diff --git a/.github/ISSUE_TEMPLATE/bug_report_template.yaml b/.github/ISSUE_TEMPLATE/bug_report_template.yaml index 5932983ae..ede026eca 100644 --- a/.github/ISSUE_TEMPLATE/bug_report_template.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report_template.yaml @@ -84,6 +84,8 @@ body: required: false - label: "`Spark Client Integration`: This is the Apache Spark integration for OpenHouse catalog. `:integration:spark`" required: false + - label: "`Python DataLoader`: This is the distributed data loading library for OpenHouse tables. `:integrations:python:dataloader`" + required: false - label: "`Documentation`: This is the documentation for OpenHouse. `docs`" required: false - label: "`Local Docker`: This is the local Docker environment for OpenHouse. `infra/recipes/docker-compose`" diff --git a/.github/ISSUE_TEMPLATE/feature_request_template.yaml b/.github/ISSUE_TEMPLATE/feature_request_template.yaml index 428365154..7614dca41 100644 --- a/.github/ISSUE_TEMPLATE/feature_request_template.yaml +++ b/.github/ISSUE_TEMPLATE/feature_request_template.yaml @@ -60,6 +60,8 @@ body: required: false - label: "`Spark Client Integration`: This is the Apache Spark integration for OpenHouse catalog. `:integration:spark`" required: false + - label: "`Python DataLoader`: This is the distributed data loading library for OpenHouse tables. `:integrations:python:dataloader`" + required: false - label: "`Documentation`: This is the documentation for OpenHouse. `docs`" required: false - label: "`Local Docker`: This is the local Docker environment for OpenHouse. `infra/recipes/docker-compose`" diff --git a/.github/workflows/build-run-tests.yml b/.github/workflows/build-run-tests.yml index 347065907..ec20d6a92 100644 --- a/.github/workflows/build-run-tests.yml +++ b/.github/workflows/build-run-tests.yml @@ -32,7 +32,16 @@ jobs: - name: Set up Python uses: actions/setup-python@v6 with: - python-version: '3.x' + python-version: '3.12' + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + enable-cache: true + + - name: Run Data Loader Tests + working-directory: integrations/python/dataloader + run: make sync verify - name: Install dependencies run: pip install -r scripts/python/requirements.txt diff --git a/.github/workflows/build-tag-publish.yml b/.github/workflows/build-tag-publish.yml index 6a6e88bf4..0e3f8d6da 100644 --- a/.github/workflows/build-tag-publish.yml +++ b/.github/workflows/build-tag-publish.yml @@ -1,10 +1,13 @@ -name: Gradle Build, Tag, and Publish OpenHouse +name: Build, Tag, and Publish OpenHouse on: push: branches: - main +permissions: + contents: write # For git tagging + jobs: build-and-run-tests: uses: ./.github/workflows/build-run-tests.yml @@ -13,6 +16,8 @@ jobs: name: Build tagged commit runs-on: ubuntu-latest needs: build-and-run-tests + outputs: + semVer: ${{ steps.get_tag.outputs.semVer }} steps: - name: Checkout project sources uses: actions/checkout@v6 @@ -75,3 +80,43 @@ jobs: run: | docker build -t linkedin-openhouse-docker.jfrog.io/linkedin/openhouse/jobs-scheduler:${{ steps.get_tag.outputs.semVer }} -f jobs-scheduler.Dockerfile . docker push linkedin-openhouse-docker.jfrog.io/linkedin/openhouse/jobs-scheduler:${{ steps.get_tag.outputs.semVer }} + + build-publish-python: + name: Build and Publish Python + runs-on: ubuntu-latest + needs: tag-publish-gradle + steps: + - name: Checkout project sources + uses: actions/checkout@v6 + + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + enable-cache: true + + - name: Sync dependencies + working-directory: integrations/python/dataloader + run: make sync + + - name: Build package + working-directory: integrations/python/dataloader + env: + SETUPTOOLS_SCM_PRETEND_VERSION: ${{ needs.tag-publish-gradle.outputs.semVer }} + run: make build + + - name: Validate package + working-directory: integrations/python/dataloader + run: make package-check + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: integrations/python/dataloader/dist/ + user: ${{ secrets.JFROG_PYPI_USERNAME }} + password: ${{ secrets.JFROG_PYPI_PASSWORD }} + verbose: true diff --git a/.gitignore b/.gitignore index add98add9..2a4d41781 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,9 @@ hs_err_pid* *.iws .idea +# VS Code +.vscode/ + # LinkedIn / Gradle / Hardware .cache .gradle diff --git a/README.md b/README.md index 7db190667..1a31235be 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@
- + GitHub @@ -64,8 +64,19 @@ To build OpenHouse, you can use the following command: ### Running OpenHouse with Docker Compose -To run OpenHouse, we recommend the [SETUP](SETUP.md) guide. You would bring up all the OpenHouse services, MySQL, -Prometheus, Apache Spark and HDFS. +The quickest way to run OpenHouse locally: +```bash +# Build JARs, Docker images, and start containers (default: oh-hadoop-spark recipe) +./gradlew dockerUp + +# Or use a lighter recipe for faster startup +./gradlew dockerUp -Precipe=oh-only + +# Stop containers +./gradlew dockerDown +``` + +For detailed configuration options and testing instructions, see the [SETUP](SETUP.md) guide. ### Deploying OpenHouse to Kubernetes diff --git a/SETUP.md b/SETUP.md index f45215d38..9a7525433 100644 --- a/SETUP.md +++ b/SETUP.md @@ -6,24 +6,60 @@ Use this guide to setup local development environment for OpenHouse using docker-compose. -## Build Containers +## Quick Start (Recommended) + +The simplest way to build and run OpenHouse locally: + +```bash +# Build everything and start containers (uses oh-hadoop-spark recipe by default) +./gradlew dockerUp + +# Or choose a specific recipe +./gradlew dockerUp -Precipe=oh-only # Lightweight, local filesystem +./gradlew dockerUp -Precipe=oh-hadoop # With HDFS +./gradlew dockerUp -Precipe=oh-hadoop-spark # Full stack with Spark (default) + +# Stop and remove containers +./gradlew dockerDown -Precipe=oh-only +``` + +This single command: +1. Builds all required JAR files (service bootJars, Spark runtime uber JARs) +2. Builds Docker images +3. Starts all containers in detached mode + +**Requirements:** +- Java 17 (`export JAVA_HOME=$(/usr/libexec/java_home -v 17)` on macOS) +- Docker and Docker Compose + +### Available Gradle Docker Tasks + +| Task | Description | +|------|-------------| +| `./gradlew dockerPrereqs` | Build only the JAR files required by Docker images | +| `./gradlew dockerBuild -Precipe=` | Build JARs and Docker images | +| `./gradlew dockerUp -Precipe=` | Build everything and start containers | +| `./gradlew dockerDown -Precipe=` | Stop and remove containers | + +## Available Recipes Recipes for setting up OpenHouse in local docker are available [here](infra/recipes/docker-compose) -docker-compose.yml files are provided to build multiple container Docker applications to be able to run a fully functional -OpenHouse locally on laptop. Script has been tested to work fine on MacOS. +| Config | Recipe | Notes | +|--------|--------|-------| +| Run OpenHouse Services Only | `oh-only` | Stores data on local filesystem within the application container, with in-memory database. Least resource consuming. | +| Run OpenHouse Services on HDFS | `oh-hadoop` | Stores data on locally running Hadoop HDFS containers, with iceberg-backed database. | +| Run OpenHouse Services on HDFS with Spark | `oh-hadoop-spark` | Stores data on locally running Hadoop HDFS containers, with MySQL database. Spark available for end to end testing. Most resource consuming. Starts Livy server. | + +## Manual Docker Compose (Advanced) -Multiple recipes are provided for locally bringing up a docker-compose environment that can be used for testing. +If you prefer manual control over the build process: -Config| docker-compose Directory |Notes ----|--------------------------|--- -Run OpenHouse Services Only | oh-only | Stores data on local filesystem within the application container, with in-memory database. Least resource consuming. -Run OpenHouse Services on HDFS | oh-hadoop | Stores data on locally running Hadoop HDFS containers, with iceberg-backed database. -Run OpenHouse Services on HDFS. Also, available Spark | oh-hadoop-spark | Stores data on locally running Hadoop HDFS containers, with MySQL database. Spark available for end to end testing. Most resource consuming. Spark container might need more memory at time. Starts Livy server. +### Build Containers -Before building docker images, you would need to build the openhouse project by running the following command. +Before building docker images, build the openhouse project: ``` -./gradlew clean build +./gradlew build ``` Pick a config that suits your testing needs. `cd` into the respective docker-compose directory above. And run the following command to build all the necessary containers: @@ -43,7 +79,7 @@ you can remove them by running docker rmi $(docker images -f "dangling=true" -q) ``` -## Run Containers +### Run Containers Manually Choose a recipe that you want to run. `cd` into the respective docker-compose directory above. And run the following command to start running all the containers. @@ -64,6 +100,8 @@ To bring down the containers, docker compose down ``` +> **Note:** The `./gradlew dockerUp` command handles all of this automatically. + ## Container Exposed Ports Following ports can be useful while interacting from host machine with applications running in docker-compose environment. diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/client/TablesClient.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/client/TablesClient.java index 10f0f5595..93107c99a 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/client/TablesClient.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/client/TablesClient.java @@ -74,7 +74,7 @@ public Optional getTableHistory(TableMetadata tableMetadata) { return getTableHistory(response); } - private Optional getTableRetention(GetTableResponseBody response) { + protected Optional getTableRetention(GetTableResponseBody response) { // timePartitionSpec or retention.ColumnPattern should be present to run Retention job on a // table. if (response == null @@ -104,7 +104,7 @@ private Optional getTableRetention(GetTableResponseBody respons .build()); } - private Optional getTableHistory(GetTableResponseBody response) { + protected Optional getTableHistory(GetTableResponseBody response) { if (response == null || response.getPolicies() == null || response.getPolicies().getHistory() == null) { @@ -129,7 +129,7 @@ private Optional getTableHistory(GetTableResponseBody response) { .build()); } - private Optional> getTableReplication(GetTableResponseBody response) { + protected Optional> getTableReplication(GetTableResponseBody response) { // At least one replication config must be present if (response == null || response.getPolicies() == null diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/scheduler/tasks/OperationTask.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/scheduler/tasks/OperationTask.java index f775cfd0d..155e0b18b 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/scheduler/tasks/OperationTask.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/scheduler/tasks/OperationTask.java @@ -24,6 +24,9 @@ /** * A callable class to apply an operation to some entity (table/database) by running a Spark job. * Takes care of the job lifecycle using /jobs API. + * + *

NOTE: Every implementation must implement a static {@code OPERATION_TYPE} field in order for + * the job scheduler to load the OperationTask. */ @Slf4j @Getter @@ -270,6 +273,20 @@ private void reportJobState( AppConstants.JOB_DURATION, System.currentTimeMillis() - startTime, attributes); + + // Granular attributes to publish entity level job metrics + Attributes granularAttributes = + Attributes.of( + AttributeKey.stringKey(AppConstants.ENTITY_NAME), + metadata.getEntityName(), + AttributeKey.stringKey(AppConstants.ENTITY_TYPE), + metadata.getClass().getSimpleName().replace("Metadata", ""), + AttributeKey.stringKey(AppConstants.JOB_TYPE), + getType().getValue(), + AttributeKey.stringKey(AppConstants.JOB_STATE), + state.name()); + + otelEmitter.count(METRICS_SCOPE, "maintenance_job_completed", 1, granularAttributes); } protected abstract boolean launchJob(); diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/scheduler/tasks/OperationTasksBuilder.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/scheduler/tasks/OperationTasksBuilder.java index fe0290b36..37e3df399 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/scheduler/tasks/OperationTasksBuilder.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/scheduler/tasks/OperationTasksBuilder.java @@ -9,12 +9,15 @@ import com.linkedin.openhouse.jobs.client.TablesClient; import com.linkedin.openhouse.jobs.client.model.JobConf; import com.linkedin.openhouse.jobs.scheduler.JobsScheduler; +import com.linkedin.openhouse.jobs.util.AppConstants; import com.linkedin.openhouse.jobs.util.DataLayoutUtil; import com.linkedin.openhouse.jobs.util.DatabaseMetadata; import com.linkedin.openhouse.jobs.util.DirectoryMetadata; import com.linkedin.openhouse.jobs.util.Metadata; import com.linkedin.openhouse.jobs.util.TableDataLayoutMetadata; import com.linkedin.openhouse.jobs.util.TableMetadata; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -168,6 +171,15 @@ private List> processMetadataList( if (optionalOperationTask.isPresent()) { taskList.add(optionalOperationTask.get()); } + + // Publish entity metrics for triggered tasks + Attributes taskAttributes = + Attributes.of( + AttributeKey.stringKey(AppConstants.ENTITY_NAME), metadata.getEntityName(), + AttributeKey.stringKey(AppConstants.ENTITY_TYPE), + metadata.getClass().getSimpleName().replace("Metadata", ""), + AttributeKey.stringKey(AppConstants.JOB_TYPE), jobType.getValue()); + otelEmitter.count(METRICS_SCOPE, "maintenance_job_triggered", 1, taskAttributes); } return taskList; } @@ -183,6 +195,15 @@ public Optional> processMetadata( task.setOtelEmitter(otelEmitter); if (!task.shouldRun()) { log.info("Skipping task {}", task); + + // Publish entity metrics for skipped tasks + Attributes taskAttributes = + Attributes.of( + AttributeKey.stringKey(AppConstants.ENTITY_NAME), metadata.getEntityName(), + AttributeKey.stringKey(AppConstants.ENTITY_TYPE), + metadata.getClass().getSimpleName().replace("Metadata", ""), + AttributeKey.stringKey(AppConstants.JOB_TYPE), task.getType().getValue()); + otelEmitter.count(METRICS_SCOPE, "maintenance_job_skipped", 1, taskAttributes); return Optional.empty(); } else { if (OperationMode.SUBMIT.equals(operationMode)) { diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BaseApp.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BaseApp.java index d6b315926..2c4a8d8d7 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BaseApp.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BaseApp.java @@ -65,20 +65,30 @@ protected static CommandLine createCommandLine(String[] args, List

For PARTITIONED tables: Returns one record per unique partition with aggregated + * statistics. Each partition is associated with its LATEST commit (highest committed_at + * timestamp). + * + *

For UNPARTITIONED tables: Returns a single record with aggregated statistics from ALL + * data_files and current snapshot metadata. + * + *

Key differences from collectCommitEventTablePartitions: + * + *

    + *
  • One record per unique partition (or single record for unpartitioned), not per + * commit-partition pair + *
  • Latest commit only (max committed_at or current snapshot) + *
  • Includes aggregated statistics from data_files metadata table + *
+ * + *

Returns empty list on errors. + * + * @param fqtn fully-qualified table name + * @return List of CommitEventTablePartitionStats objects (event_timestamp_ms will be set at + * publish time) + */ + public List collectCommitEventTablePartitionStats(String fqtn) { + Table table = getTable(fqtn); + + try { + TableStatsCollector tableStatsCollector = new TableStatsCollector(fs(), spark, table); + return tableStatsCollector.collectCommitEventTablePartitionStats(); + } catch (IOException e) { + log.error("Unable to initialize file system for partition stats collection", e); + return Collections.emptyList(); + } catch (Exception e) { + log.error("Failed to collect partition stats for table: {}", fqtn, e); + return Collections.emptyList(); + } + } } diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/OrphanFilesDeletionSparkApp.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/OrphanFilesDeletionSparkApp.java index c7e0a9faa..fb5718608 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/OrphanFilesDeletionSparkApp.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/OrphanFilesDeletionSparkApp.java @@ -29,6 +29,7 @@ public class OrphanFilesDeletionSparkApp extends BaseTableSparkApp { private final long ttlSeconds; private final String backupDir; + private final int concurrentDeletes; public OrphanFilesDeletionSparkApp( String jobId, @@ -36,10 +37,12 @@ public OrphanFilesDeletionSparkApp( String fqtn, long ttlSeconds, OtelEmitter otelEmitter, - String backupDir) { + String backupDir, + int concurrentDeletes) { super(jobId, stateManager, fqtn, otelEmitter); this.ttlSeconds = ttlSeconds; this.backupDir = backupDir; + this.concurrentDeletes = concurrentDeletes; } @Override @@ -58,7 +61,11 @@ protected void runInner(Operations ops) { backupDir); DeleteOrphanFiles.Result result = ops.deleteOrphanFiles( - ops.getTable(fqtn), olderThanTimestampMillis, backupEnabled, backupDir); + ops.getTable(fqtn), + olderThanTimestampMillis, + backupEnabled, + backupDir, + concurrentDeletes); List orphanFileLocations = Lists.newArrayList(result.orphanFileLocations().iterator()); log.info( "Detected {} orphan files older than {}ms", @@ -92,6 +99,7 @@ public static OrphanFilesDeletionSparkApp createApp(String[] args, OtelEmitter o new Option( "s", "skipStaging", false, "Whether to skip staging orphan files before deletion")); extraOptions.add(new Option("b", "backupDir", true, "Backup directory for deleted data")); + extraOptions.add(new Option("c", "concurrentDeletes", true, "Number of concurrent deletes")); CommandLine cmdLine = createCommandLine(args, extraOptions); return new OrphanFilesDeletionSparkApp( getJobId(cmdLine), @@ -101,6 +109,7 @@ public static OrphanFilesDeletionSparkApp createApp(String[] args, OtelEmitter o NumberUtils.toLong(cmdLine.getOptionValue("ttl"), TimeUnit.DAYS.toSeconds(7)), TimeUnit.DAYS.toSeconds(1)), otelEmitter, - cmdLine.getOptionValue("backupDir", ".backup")); + cmdLine.getOptionValue("backupDir", ".backup"), + Integer.parseInt(cmdLine.getOptionValue("concurrentDeletes", "10"))); } } diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/TableStatsCollectionSparkApp.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/TableStatsCollectionSparkApp.java index b83ae67bf..099a30cb5 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/TableStatsCollectionSparkApp.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/TableStatsCollectionSparkApp.java @@ -4,6 +4,7 @@ import com.linkedin.openhouse.common.metrics.DefaultOtelConfig; import com.linkedin.openhouse.common.metrics.OtelEmitter; import com.linkedin.openhouse.common.stats.model.CommitEventTable; +import com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats; import com.linkedin.openhouse.common.stats.model.CommitEventTablePartitions; import com.linkedin.openhouse.common.stats.model.IcebergTableStats; import com.linkedin.openhouse.jobs.spark.state.StateManager; @@ -34,7 +35,8 @@ public TableStatsCollectionSparkApp( protected void runInner(Operations ops) { log.info("Running TableStatsCollectorApp for table {}", fqtn); - // Run stats collection, commit events collection, and partition events collection in parallel + // Run stats collection, commit events collection, partition events collection, and partition + // stats collection long startTime = System.currentTimeMillis(); IcebergTableStats icebergStats = @@ -55,11 +57,14 @@ protected void runInner(Operations ops) { () -> ops.collectCommitEventTablePartitions(fqtn), result -> String.format("%s (%d partition events)", fqtn, result.size())); + List partitionStats = + executeWithTiming( + "partition stats collection", + () -> ops.collectCommitEventTablePartitionStats(fqtn), + result -> String.format("%s (%d partition stats)", fqtn, result.size())); + long endTime = System.currentTimeMillis(); - log.info( - "Total collection time for table: {} in {} ms (parallel execution)", - fqtn, - (endTime - startTime)); + log.info("Total collection time for table: {} in {} ms", fqtn, (endTime - startTime)); if (icebergStats != null) { publishStats(icebergStats); @@ -83,6 +88,15 @@ protected void runInner(Operations ops) { + "(unpartitioned table or collection failure or no events)", fqtn); } + + if (partitionStats != null && !partitionStats.isEmpty()) { + publishPartitionStats(partitionStats); + } else { + log.info( + "Skipping partition stats publishing for table: {} " + + "(unpartitioned table or collection failure or no stats)", + fqtn); + } } /** @@ -96,33 +110,38 @@ protected void publishStats(IcebergTableStats icebergTableStats) { } /** - * Publish commit events. Override this method in li-openhouse to send to Kafka. + * Publish commit events. * * @param commitEvents List of commit events to publish */ protected void publishCommitEvents(List commitEvents) { - // Set event timestamp at publish time - long eventTimestampInEpochMs = System.currentTimeMillis(); - commitEvents.forEach(event -> event.setEventTimestampMs(eventTimestampInEpochMs)); - log.info("Publishing commit events for table: {}", fqtn); log.info(new Gson().toJson(commitEvents)); } /** - * Publish partition-level commit events. Override this method in li-openhouse to send to Kafka. + * Publish partition-level commit events. * * @param partitionEvents List of partition events to publish */ protected void publishPartitionEvents(List partitionEvents) { - // Set event timestamp at publish time - long eventTimestampInEpochMs = System.currentTimeMillis(); - partitionEvents.forEach(event -> event.setEventTimestampMs(eventTimestampInEpochMs)); - log.info("Publishing partition events for table: {}", fqtn); log.info(new Gson().toJson(partitionEvents)); } + /** + * Publish partition-level statistics. + * + *

This method publishes one stats record per unique partition, where each partition is + * associated with its latest commit and includes aggregated statistics from data_files. + * + * @param partitionStats List of partition statistics to publish + */ + protected void publishPartitionStats(List partitionStats) { + log.info("Publishing partition stats for table: {} ({} stats)", fqtn, partitionStats.size()); + log.info(new Gson().toJson(partitionStats)); + } + public static void main(String[] args) { OtelEmitter otelEmitter = new AppsOtelEmitter(Arrays.asList(DefaultOtelConfig.getOpenTelemetry())); diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/AppConstants.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/AppConstants.java index d001d2f23..a756a65f8 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/AppConstants.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/AppConstants.java @@ -9,7 +9,9 @@ public final class AppConstants { // Spark App observability constants public static final String TYPE = "type"; + public static final String ENTITY_TYPE = "entity_type"; public static final String JOB_TYPE = "job_type"; + public static final String JOB_STATE = "job_state"; public static final String ORPHAN_FILE_COUNT = "orphan_file_count"; public static final String STAGED_FILE_COUNT = "staged_file_count"; public static final String ORPHAN_DIRECTORY_COUNT = "orphan_directory_count"; @@ -49,6 +51,7 @@ public final class AppConstants { public static final String JOB_ID = "job_id"; public static final String QUEUED_TIME = "queued_time"; public static final String DATABASE_NAME = "database_name"; + public static final String ENTITY_NAME = "entity_name"; // Maintenance jobs table properties keys public static final String BACKUP_ENABLED_KEY = "retention.backup.enabled"; diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/TableStatsCollector.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/TableStatsCollector.java index ea82178ba..345ca1f0c 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/TableStatsCollector.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/TableStatsCollector.java @@ -1,6 +1,7 @@ package com.linkedin.openhouse.jobs.util; import com.linkedin.openhouse.common.stats.model.CommitEventTable; +import com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats; import com.linkedin.openhouse.common.stats.model.CommitEventTablePartitions; import com.linkedin.openhouse.common.stats.model.IcebergTableStats; import java.util.List; @@ -69,4 +70,35 @@ public List collectCommitEventTable() { public List collectCommitEventTablePartitions() { return TableStatsCollectorUtil.populateCommitEventTablePartitions(table, spark); } + + /** + * Collect statistics for the table (partitioned or unpartitioned). + * + *

For PARTITIONED tables: Returns one record per unique partition with aggregated + * statistics from data_files metadata table. Each partition is associated with its LATEST commit + * (highest committed_at timestamp). + * + *

For UNPARTITIONED tables: Returns a single record with aggregated statistics from ALL + * data_files and current snapshot metadata. This ensures unpartitioned tables also report stats + * with latest commit info at every job run. + * + *

Key differences from collectCommitEventTablePartitions: + * + *

    + *
  • Granularity: One record per unique partition (not per commit-partition pair), or + * single record for unpartitioned + *
  • Commit Association: Latest commit only (max committed_at or current snapshot) + *
  • Data Source: Includes statistics aggregated from data_files metadata table + *
  • Metrics: Contains row count, column count, and field-level statistics + *
+ * + *

Note: Returns List (loads into memory). Size is typically smaller than partition events + * since we deduplicate to one record per unique partition (or single record for unpartitioned). + * + * @return List of CommitEventTablePartitionStats objects (event_timestamp_ms will be set at + * publish time) + */ + public List collectCommitEventTablePartitionStats() { + return TableStatsCollectorUtil.populateCommitEventTablePartitionStats(table, spark); + } } diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/TableStatsCollectorUtil.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/TableStatsCollectorUtil.java index 75cf9b67f..6427c4deb 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/TableStatsCollectorUtil.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/TableStatsCollectorUtil.java @@ -7,8 +7,10 @@ import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; import com.linkedin.openhouse.common.stats.model.BaseEventModels; +import com.linkedin.openhouse.common.stats.model.BaseEventModels.BaseTableIdentifier; import com.linkedin.openhouse.common.stats.model.ColumnData; import com.linkedin.openhouse.common.stats.model.CommitEventTable; +import com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats; import com.linkedin.openhouse.common.stats.model.CommitEventTablePartitions; import com.linkedin.openhouse.common.stats.model.CommitMetadata; import com.linkedin.openhouse.common.stats.model.CommitOperation; @@ -35,6 +37,7 @@ import org.apache.iceberg.MetadataTableType; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.ReachableFileUtil; +import org.apache.iceberg.Schema; import org.apache.iceberg.Snapshot; import org.apache.iceberg.Table; import org.apache.iceberg.catalog.TableIdentifier; @@ -441,23 +444,9 @@ public static List populateCommitEventTable(Table table, Spark + "FROM %s.snapshots", table.name()); - log.info("Executing snapshots query: {}", snapshotsQuery); + log.info("Building snapshots query: {}", snapshotsQuery); Dataset snapshotsDF = spark.sql(snapshotsQuery); - // Cache BEFORE first action to materialize during count() and reuse for collection - snapshotsDF.cache(); - - // This count() triggers cache materialization (single metadata scan) - long totalSnapshots = snapshotsDF.count(); - - if (totalSnapshots == 0) { - log.info("No snapshots found for table: {}", fullTableName); - snapshotsDF.unpersist(); // Clean up even though empty - return Collections.emptyList(); - } - - log.info("Found {} snapshots for table: {}", totalSnapshots, fullTableName); - // Get partition spec string representation String partitionSpec = table.spec().toString(); @@ -487,23 +476,105 @@ public static List populateCommitEventTable(Table table, Spark .cast("long") .multiply(1000) .as("commitTimestampMs"), - functions.col("summary").getItem("spark.app.id").as("commitAppId"), - functions.col("summary").getItem("spark.app.name").as("commitAppName"), + functions + .coalesce( + functions.col("summary").getItem("spark.app.id"), + functions.col("summary").getItem("trino_query_id")) + .as("commitAppId"), + functions + .when( + functions.col("summary").getItem("spark.app.id").isNotNull(), + functions.col("summary").getItem("spark.app.name")) + .when( + functions.col("summary").getItem("trino_query_id").isNotNull(), + functions.lit("trino")) + .as("commitAppName"), functions.upper(functions.col("operation")).as("commitOperation")) .as("commitMetadata"), - functions.lit(0L).as("eventTimestampMs")) + functions.lit(System.currentTimeMillis()).as("eventTimestampMs")) .orderBy(functions.col("commitMetadata.commitTimestampMs")) .as(commitEventEncoder) .collectAsList(); - log.info("Collected {} commit events for table: {}", totalSnapshots, fullTableName); + if (commitEventTableList.isEmpty()) { + log.info("No snapshots found for table: {}", fullTableName); + return Collections.emptyList(); + } - // Unpersist cached data to free memory - snapshotsDF.unpersist(); + log.info( + "Collected {} commit events for table: {}", commitEventTableList.size(), fullTableName); return commitEventTableList; } + /** + * Builds an enriched DataFrame containing partition data joined with commit metadata. + * + *

This shared helper method queries Iceberg metadata tables (all_entries and snapshots) and + * creates a DataFrame with partition information enriched with commit metadata. + * + *

This is a pure query builder - it does not manage caching or counting. The caller is + * responsible for the DataFrame lifecycle (cache, count, collect, unpersist). + * + *

Output Schema: + * + *

    + *
  • snapshot_id: long - Iceberg snapshot ID + *
  • committed_at: long - Commit timestamp in epoch seconds + *
  • operation: string - Commit operation (append, overwrite, delete, etc.) + *
  • summary: map<string,string> - Commit summary metadata + *
  • partition: struct - Partition column values as a struct + *
+ * + *

For unpartitioned tables: Returns null to indicate no partition data available. + * + *

Visibility: Package-private for testing purposes. + * + * @param table Iceberg Table + * @param spark SparkSession + * @return DataFrame with enriched partition and commit data, or null if unpartitioned + */ + static Dataset buildEnrichedPartitionDataFrame(Table table, SparkSession spark) { + String fullTableName = table.name(); + + // Check if table is partitioned + PartitionSpec spec = table.spec(); + if (spec.isUnpartitioned()) { + log.info("Table {} is unpartitioned, no enriched partition data to build", fullTableName); + return null; + } + + // Query all_entries metadata table for partitions per commit + // Use DISTINCT to deduplicate (snapshot_id, partition) pairs + // No status filter - captures all affected partitions (ADDED or DELETED files) + String allEntriesQuery = + String.format( + "SELECT DISTINCT snapshot_id, data_file.partition " + "FROM %s.all_entries", + table.name()); + + log.info("Building all_entries query for table {}: {}", fullTableName, allEntriesQuery); + Dataset partitionsPerCommitDF = spark.sql(allEntriesQuery); + + // Query snapshots to get commit metadata + String snapshotsQuery = + String.format( + "SELECT snapshot_id, committed_at, operation, summary " + "FROM %s.snapshots", + table.name()); + + Dataset snapshotsDF = spark.sql(snapshotsQuery); + + // Join partitions with commit metadata and return + // Caller manages the lifecycle (cache, count, collect, unpersist) + return partitionsPerCommitDF + .join(snapshotsDF, "snapshot_id") + .select( + functions.col("snapshot_id"), + functions.col("committed_at").cast("long"), // Cast timestamp to epoch seconds + functions.col("operation"), + functions.col("summary"), + functions.col("partition")); // Keep partition struct for transformation + } + /** * Collect partition-level commit events for a table. * @@ -532,14 +603,17 @@ public static List populateCommitEventTablePartition String fullTableName = table.name(); log.info("Collecting partition-level commit events for table: {}", fullTableName); - // Step 1: Check if table is partitioned - PartitionSpec spec = table.spec(); - if (spec.isUnpartitioned()) { - log.info("Table {} is unpartitioned, no partition events to collect", fullTableName); + // Step 1: Build enriched DataFrame with partition and commit data using shared helper + Dataset enrichedDF = buildEnrichedPartitionDataFrame(table, spark); + + // Check if any data was found + if (enrichedDF == null) { + log.info("No partition-level commit events found for table: {}", fullTableName); return Collections.emptyList(); } - // Step 2: Parse table name components + // Step 2: Parse table name components for transformation + PartitionSpec spec = table.spec(); String dbName = getDatabaseName(fullTableName); if (dbName == null) { return Collections.emptyList(); @@ -555,65 +629,230 @@ public static List populateCommitEventTablePartition List partitionColumnNames = spec.fields().stream().map(f -> f.name()).collect(Collectors.toList()); - // Step 3: Query all_entries metadata table for partitions per commit - // Use DISTINCT to deduplicate (snapshot_id, partition) pairs - // No status filter - captures all affected partitions (ADDED or DELETED files) - // Note: We query snapshots here even though populateCommitEventTable() also queries it. - // This is intentional to maintain parallel execution (both methods run simultaneously). - // Snapshots query is fast (~10-50ms, hits Iceberg metadata cache). - String allEntriesQuery = - String.format( - "SELECT DISTINCT snapshot_id, data_file.partition " + "FROM %s.all_entries", - table.name()); + // Step 3: Collect to driver + List rows = enrichedDF.collectAsList(); + long totalRecords = rows.size(); - log.info("Executing all_entries query: {}", allEntriesQuery); - Dataset partitionsPerCommitDF = spark.sql(allEntriesQuery); + // Early return if no data found + if (totalRecords == 0) { + log.info("No partition-level records found for table: {}", fullTableName); + return Collections.emptyList(); + } - // Cache for reuse - partitionsPerCommitDF.cache(); - long totalRecords = partitionsPerCommitDF.count(); + log.info("Collected {} rows to driver for transformation", totalRecords); - if (totalRecords == 0) { - log.info("No partition-level commit events found for table: {}", fullTableName); - partitionsPerCommitDF.unpersist(); + // Step 4: Delegate transformation to helper method + // Separated for testability and readability + List result = + transformRowsToPartitionEvents( + rows, + dbName, + tableName, + clusterName, + tableMetadataLocation, + partitionSpecString, + partitionColumnNames); + + log.info( + "Collected {} partition-level commit events for table: {}", result.size(), fullTableName); + + return result; + } + + /** + * Collect statistics for a table (partitioned or unpartitioned). + * + *

For PARTITIONED tables: Generates one CommitEventTablePartitionStats record per + * unique partition, containing aggregated statistics (row count, column count, and field-level + * metrics from readable_metrics). Each partition is associated with its LATEST commit metadata + * (highest committed_at timestamp). + * + *

For UNPARTITIONED tables: Generates a single CommitEventTablePartitionStats record + * with aggregated statistics from ALL data_files and current snapshot metadata. This ensures + * unpartitioned tables also report stats with latest commit info at every job run. + * + *

Key Differences from populateCommitEventTablePartitions: + * + *

    + *
  • Granularity: One record per unique partition (not per commit-partition pair), or + * single record for unpartitioned + *
  • Commit Association: Latest commit only (max committed_at or current snapshot) + *
  • Data Source: Joins with data_files metadata table for statistics + *
  • Metrics: Includes row count, column count, and field-level stats + *
+ * + *

Implementation Strategy: + * + *

    + *
  • Partitioned: Query all_entries + snapshots for latest per partition, aggregate + * data_files per partition + *
  • Unpartitioned: Use currentSnapshot(), aggregate ALL data_files (no GROUP BY) + *
+ * + * @param table Iceberg table instance + * @param spark SparkSession + * @return List of CommitEventTablePartitionStats objects (one per unique partition, or single + * record for unpartitioned) + */ + public static List populateCommitEventTablePartitionStats( + Table table, SparkSession spark) { + + String fullTableName = table.name(); + PartitionSpec spec = table.spec(); + log.info("Collecting partition stats for table: {}", fullTableName); + + // Route to appropriate handler based on partitioning strategy + if (spec.isUnpartitioned()) { + log.info("Table {} is unpartitioned, using snapshot-based stats collection", fullTableName); + return populateStatsForUnpartitionedTable(table, spark); + } else { + log.info("Table {} is partitioned, using partition-level stats collection", fullTableName); + return populateStatsForPartitionedTable(table, spark); + } + } + + /** + * Collect statistics for partitioned table (one record per unique partition with latest commit). + * Package-private for testing. + */ + static List populateStatsForPartitionedTable( + Table table, SparkSession spark) { + + String fullTableName = table.name(); + PartitionSpec spec = table.spec(); + + // Step 1: Get enriched partition data with commit metadata + Dataset enrichedDF = buildEnrichedPartitionDataFrame(table, spark); + if (enrichedDF == null) { + log.warn("No partition data found for partitioned table: {}", fullTableName); + return Collections.emptyList(); + } + + // Step 2: Select latest commit per partition (handles timestamp ties) + Dataset latestCommitsDF = selectLatestCommitPerPartition(enrichedDF); + + // Step 3: Aggregate statistics from data_files per partition + Schema schema = table.schema(); + List columnNames = getColumnNamesFromReadableMetrics(table, spark, fullTableName); + + if (columnNames.isEmpty()) { + log.warn("No columns with metrics found for partitioned table: {}", fullTableName); return Collections.emptyList(); } + Dataset partitionStatsDF = + aggregatePartitionStats(table, spark, fullTableName, columnNames); + + // Step 4: Join stats with commit metadata + Dataset finalStatsDF = joinStatsWithCommitMetadata(latestCommitsDF, partitionStatsDF); + + // Step 5: Collect to driver + List rows = finalStatsDF.collectAsList(); + long totalPartitions = rows.size(); + + // Early return if no data found + if (totalPartitions == 0) { + log.warn("No partition stats found after join for table: {}", fullTableName); + return Collections.emptyList(); + } + + log.info("Collected {} partition stats rows to driver", totalPartitions); + + // Step 6: Transform to CommitEventTablePartitionStats objects + return transformToPartitionStatsObjects(rows, table, spark, schema, columnNames, spec); + } + + /** Select latest commit per partition (orders by committed_at timestamp). */ + private static Dataset selectLatestCommitPerPartition(Dataset enrichedDF) { + log.info("Selecting latest commit for each unique partition using window function..."); + + org.apache.spark.sql.expressions.WindowSpec window = + org.apache.spark.sql.expressions.Window.partitionBy("partition") + .orderBy(functions.col("committed_at").desc(), functions.col("snapshot_id").desc()); + + Dataset latestCommitsDF = + enrichedDF + .withColumn("row_num", functions.row_number().over(window)) + .filter(functions.col("row_num").equalTo(1)) + .drop("row_num") + .select("snapshot_id", "committed_at", "operation", "summary", "partition"); + + log.debug("Window function applied to deduplicate partitions by latest commit"); + + return latestCommitsDF; + } + + /** Aggregate statistics from data_files per partition (row count, nulls, min/max, etc.). */ + private static Dataset aggregatePartitionStats( + Table table, SparkSession spark, String fullTableName, List columnNames) { log.info( - "Found {} partition-level commit event records for table: {}", totalRecords, fullTableName); + "Aggregating statistics for {} columns from data_files metadata...", columnNames.size()); - // Step 4: Join with snapshots to get commit metadata - String snapshotsQuery = + // Build column aggregation expressions + List columnAggExpressions = buildColumnAggregationExpressions(columnNames); + + // Build SQL query with GROUP BY partition + String aggregationQuery = String.format( - "SELECT snapshot_id, committed_at, operation, summary " + "FROM %s.snapshots", - table.name()); + "SELECT partition, sum(record_count) as total_row_count, %s FROM %s.data_files GROUP BY partition", + String.join(", ", columnAggExpressions), fullTableName); - Dataset snapshotsDF = spark.sql(snapshotsQuery); + log.debug("Building partition stats aggregation query"); + return spark.sql(aggregationQuery); + } - // Join partitions with commit metadata - Dataset enrichedDF = - partitionsPerCommitDF - .join(snapshotsDF, "snapshot_id") - .select( - functions.col("snapshot_id"), - functions.col("committed_at").cast("long"), // Cast timestamp to epoch seconds - functions.col("operation"), - functions.col("summary"), - functions.col("partition")); // Keep partition struct for Java transformation - - // Step 6: Collect to driver and transform in Java with type safety - // This matches populateCommitEventTable() pattern which also uses collectAsList() - // Size is manageable: typically 100K rows × 200 bytes = 20MB - log.info("Collecting {} rows to driver for transformation", totalRecords); - List rows = enrichedDF.collectAsList(); + /** Join partition statistics with latest commit metadata. */ + private static Dataset joinStatsWithCommitMetadata( + Dataset latestCommitsDF, Dataset partitionStatsDF) { + log.info("Joining partition stats with commit metadata..."); + + // Perform inner join on partition + Dataset joinedDF = + latestCommitsDF + .join( + partitionStatsDF, + latestCommitsDF.col("partition").equalTo(partitionStatsDF.col("partition")), + "inner") + .drop( + partitionStatsDF.col( + "partition")); // Drop duplicate partition column from right side + + log.debug("Join operation defined (will execute on first action)"); + return joinedDF; + } - partitionsPerCommitDF.unpersist(); + /** Transform collected rows to CommitEventTablePartitionStats objects. */ + private static List transformToPartitionStatsObjects( + List rows, + Table table, + SparkSession spark, + Schema schema, + List columnNames, + PartitionSpec spec) { - // Step 7: Delegate transformation to helper method - // Separated for testability and readability - List result = - transformRowsToPartitionEvents( + // Extract table metadata + String fullTableName = table.name(); + String dbName = getDatabaseName(fullTableName); + if (dbName == null) { + return Collections.emptyList(); + } + + TableIdentifier identifier = TableIdentifier.parse(fullTableName); + String tableName = identifier.name(); + String clusterName = getClusterName(spark); + String tableMetadataLocation = table.location(); + String partitionSpecString = spec.toString(); + List partitionColumnNames = + spec.fields().stream().map(f -> f.name()).collect(Collectors.toList()); + + // Transform rows to domain objects + log.info("Transforming {} rows to CommitEventTablePartitionStats objects", rows.size()); + + List result = + transformRowsToPartitionStatsFromAggregatedSQL( rows, + schema, + columnNames, dbName, tableName, clusterName, @@ -622,7 +861,338 @@ public static List populateCommitEventTablePartition partitionColumnNames); log.info( - "Collected {} partition-level commit events for table: {}", result.size(), fullTableName); + "Collected {} partition stats for table: {} (latest commit per partition)", + result.size(), + fullTableName); + + return result; + } + + /** + * Collect statistics for unpartitioned table (single record with current snapshot). + * Package-private for testing. + */ + static List populateStatsForUnpartitionedTable( + Table table, SparkSession spark) { + + String fullTableName = table.name(); + Snapshot currentSnapshot = table.currentSnapshot(); + + if (currentSnapshot == null) { + log.info("No snapshots found for unpartitioned table: {}", fullTableName); + return Collections.emptyList(); + } + + log.info( + "Using current snapshot {} for unpartitioned table: {}", + currentSnapshot.snapshotId(), + fullTableName); + + // Step 1: Get table schema and column names + Schema schema = table.schema(); + List columnNames = getColumnNamesFromReadableMetrics(table, spark, fullTableName); + log.info("Found {} columns with metrics for unpartitioned table", columnNames.size()); + + if (columnNames.isEmpty()) { + log.warn("No columns with metrics found for unpartitioned table: {}", fullTableName); + return Collections.emptyList(); + } + + // Step 2: Aggregate statistics from ALL data_files (no partitioning) + Row statsRow = aggregateUnpartitionedTableStats(spark, fullTableName, columnNames); + if (statsRow == null) { + return Collections.emptyList(); + } + + // Step 3: Build commit metadata from current snapshot + CommitMetadata commitMetadata = buildCommitMetadataFromSnapshot(currentSnapshot); + + // Step 4: Extract column-level metrics + Map> metricsMap = + extractColumnMetricsFromAggregatedRow(statsRow, schema, columnNames); + + // Step 5: Build and return stats object + CommitEventTablePartitionStats stats = + buildPartitionStatsObject( + table, spark, schema, statsRow, commitMetadata, metricsMap, Collections.emptyList()); + + if (stats == null) { + return Collections.emptyList(); + } + + log.info( + "Collected stats for unpartitioned table: {} (snapshot: {}, row count: {})", + fullTableName, + currentSnapshot.snapshotId(), + stats.getRowCount()); + + return Collections.singletonList(stats); + } + + /** Aggregate statistics for unpartitioned table (all data files, no GROUP BY). */ + private static Row aggregateUnpartitionedTableStats( + SparkSession spark, String fullTableName, List columnNames) { + log.info("Aggregating statistics for unpartitioned table..."); + + // Build column aggregation expressions + List columnAggExpressions = buildColumnAggregationExpressions(columnNames); + + // Build SQL query WITHOUT GROUP BY (aggregate all files) + String aggregationQuery = + String.format( + "SELECT sum(record_count) as total_row_count, %s FROM %s.data_files", + String.join(", ", columnAggExpressions), fullTableName); + + log.debug("Building unpartitioned table stats aggregation query"); + Dataset statsDF = spark.sql(aggregationQuery); + + List rows = statsDF.collectAsList(); + if (rows.isEmpty()) { + log.warn("No data found in data_files for table: {}", fullTableName); + return null; + } + + return rows.get(0); + } + + /** Build CommitMetadata from snapshot or row data. */ + private static CommitMetadata buildCommitMetadata( + Long snapshotId, Long commitTimestampMs, String operation, Map summary) { + CommitOperation commitOp = null; + if (operation != null) { + try { + commitOp = CommitOperation.valueOf(operation.toUpperCase()); + } catch (IllegalArgumentException e) { + log.warn("Unknown commit operation: {}", operation); + } + } + return CommitMetadata.builder() + .commitId(snapshotId) + .commitTimestampMs(commitTimestampMs) + .commitAppId(summary.getOrDefault("spark.app.id", "unknown")) + .commitAppName(summary.getOrDefault("spark.app.name", "unknown")) + .commitOperation(commitOp) + .build(); + } + + private static CommitMetadata buildCommitMetadataFromSnapshot(Snapshot snapshot) { + return buildCommitMetadata( + snapshot.snapshotId(), + snapshot.timestampMillis(), + snapshot.operation(), + snapshot.summary()); + } + + /** Build CommitEventTablePartitionStats object from extracted data. */ + private static CommitEventTablePartitionStats buildPartitionStatsObject( + Table table, + SparkSession spark, + Schema schema, + Row statsRow, + CommitMetadata commitMetadata, + Map> metricsMap, + List partitionData) { + + String fullTableName = table.name(); + String dbName = getDatabaseName(fullTableName); + if (dbName == null) { + return null; + } + + TableIdentifier identifier = TableIdentifier.parse(fullTableName); + String tableName = identifier.name(); + String clusterName = getClusterName(spark); + String tableMetadataLocation = table.location(); + String partitionSpecString = table.spec().toString(); + + // Extract stats from row + Long rowCount = statsRow.getAs("total_row_count"); + Long columnCount = (long) schema.columns().size(); + + // Extract column-level metrics from map + List nullCounts = metricsMap.get("nullCount"); + List nanCounts = metricsMap.get("nanCount"); + List minValues = metricsMap.get("minValue"); + List maxValues = metricsMap.get("maxValue"); + List columnSizes = metricsMap.get("columnSize"); + + // Build and return stats object + return CommitEventTablePartitionStats.builder() + .dataset( + BaseTableIdentifier.builder() + .databaseName(dbName) + .tableName(tableName) + .clusterName(clusterName) + .tableMetadataLocation(tableMetadataLocation) + .partitionSpec(partitionSpecString) + .build()) + .commitMetadata(commitMetadata) + .partitionData(partitionData) + .rowCount(rowCount != null ? rowCount : 0L) + .columnCount(columnCount) + .nullCount(nullCounts) + .nanCount(nanCounts) + .minValue(minValues) + .maxValue(maxValues) + .columnSizeInBytes(columnSizes) + .eventTimestampMs(System.currentTimeMillis()) + .build(); + } + + /** + * Get column names from readable_metrics (queries data_files to find columns with metrics). + * Package-private for testing. + */ + static List getColumnNamesFromReadableMetrics( + Table table, SparkSession spark, String fullTableName) { + + log.info("Discovering columns with metrics from readable_metrics for table: {}", fullTableName); + + // Query readable_metrics structure (Scala pattern) + String readableMetricsSchemaQuery = + String.format("SELECT readable_metrics FROM %s.data_files LIMIT 1", fullTableName); + Dataset schemaDF = spark.sql(readableMetricsSchemaQuery); + + List columnNames = new ArrayList<>(); + + if (schemaDF.count() > 0) { + // Get the readable_metrics struct fields + org.apache.spark.sql.types.StructType readableMetricsStruct = + (org.apache.spark.sql.types.StructType) schemaDF.schema().fields()[0].dataType(); + + for (org.apache.spark.sql.types.StructField field : readableMetricsStruct.fields()) { + columnNames.add(field.name()); + } + log.debug("Found {} columns with metrics from readable_metrics", columnNames.size()); + } else { + log.warn("No data files found for table: {}, cannot collect column metrics", fullTableName); + } + + return columnNames; + } + + /** + * Build SQL aggregation expressions for column metrics (null_count, min/max, etc.). + * Package-private for testing. + */ + static List buildColumnAggregationExpressions(List columnNames) { + + List columnAggExpressions = new ArrayList<>(); + + for (String colName : columnNames) { + // Escape column names with backticks for SQL + String escapedColName = String.format("`%s`", colName); + + // Replace dots in column names with underscores for alias names + // (dots are not allowed in SQL alias names) + // NOTE: If a table has both "user.age" (nested) and "user_age" (flat) columns, + // this will create a duplicate alias error. This is extremely rare and violates + // naming conventions, but if it occurs, the SQL query will fail with a clear error. + // The original column names are always preserved in ColumnData objects. + String aliasBase = colName.replace(".", "_"); + + // Sum of null counts across all data files + columnAggExpressions.add( + String.format( + "sum(coalesce(readable_metrics.%s.null_value_count, 0)) as %s_null_count", + escapedColName, aliasBase)); + + // Sum of NaN counts across all data files + columnAggExpressions.add( + String.format( + "sum(coalesce(readable_metrics.%s.nan_value_count, 0)) as %s_nan_count", + escapedColName, aliasBase)); + + // Minimum value across all data files + columnAggExpressions.add( + String.format( + "min(readable_metrics.%s.lower_bound) as %s_min_value", escapedColName, aliasBase)); + + // Maximum value across all data files + columnAggExpressions.add( + String.format( + "max(readable_metrics.%s.upper_bound) as %s_max_value", escapedColName, aliasBase)); + + // Sum of column sizes across all data files + columnAggExpressions.add( + String.format( + "sum(coalesce(readable_metrics.%s.column_size, 0)) as %s_column_size", + escapedColName, aliasBase)); + } + + return columnAggExpressions; + } + + /** Extract column metrics from aggregated SQL result row. Package-private for testing. */ + static Map> extractColumnMetricsFromAggregatedRow( + Row statsRow, Schema schema, List columnNames) { + + Map> result = new HashMap<>(); + result.put("nullCount", new ArrayList<>()); + result.put("nanCount", new ArrayList<>()); + result.put("minValue", new ArrayList<>()); + result.put("maxValue", new ArrayList<>()); + result.put("columnSize", new ArrayList<>()); + + // Create a map for quick column type lookup + Map columnTypeMap = new HashMap<>(); + for (Types.NestedField field : schema.columns()) { + columnTypeMap.put(field.name(), field.type()); + } + + for (String colName : columnNames) { + org.apache.iceberg.types.Type columnType = columnTypeMap.get(colName); + if (columnType == null) { + log.warn("Column {} not found in schema, skipping metrics", colName); + continue; + } + + try { + // Replace dots with underscores for SQL alias lookup + // (matches the alias names generated in buildColumnAggregationExpressions) + String aliasBase = colName.replace(".", "_"); + + // Extract null count - include all columns, even with 0 nulls + Long nullCount = statsRow.getAs(aliasBase + "_null_count"); + if (nullCount != null) { + result.get("nullCount").add(new ColumnData.LongColumnData(colName, nullCount)); + } + + // Extract NaN count - include all columns, even with 0 NaNs + // Note: NaN is only meaningful for floating point types (FLOAT, DOUBLE) + Long nanCount = statsRow.getAs(aliasBase + "_nan_count"); + if (nanCount != null) { + result.get("nanCount").add(new ColumnData.LongColumnData(colName, nanCount)); + } + + // Extract min value + Object minValue = statsRow.getAs(aliasBase + "_min_value"); + if (minValue != null) { + ColumnData minData = convertValueToColumnData(colName, minValue, columnType); + if (minData != null) { + result.get("minValue").add(minData); + } + } + + // Extract max value + Object maxValue = statsRow.getAs(aliasBase + "_max_value"); + if (maxValue != null) { + ColumnData maxData = convertValueToColumnData(colName, maxValue, columnType); + if (maxData != null) { + result.get("maxValue").add(maxData); + } + } + + // Extract column size - include all columns, even with 0 size + Long columnSize = statsRow.getAs(aliasBase + "_column_size"); + if (columnSize != null) { + result.get("columnSize").add(new ColumnData.LongColumnData(colName, columnSize)); + } + + } catch (Exception e) { + log.warn("Failed to extract metrics for column '{}': {}", colName, e.getMessage()); + } + } return result; } @@ -702,7 +1272,7 @@ static List transformRowsToPartitionEvents( .commitOperation(commitOperation) .build()) .partitionData(partitionData) - .eventTimestampMs(0L) // Will be set at publish time + .eventTimestampMs(System.currentTimeMillis()) .build(); result.add(event); @@ -770,6 +1340,154 @@ static List transformPartitionRowToColumnData( return result; } + /** + * Transform SQL-aggregated rows to CommitEventTablePartitionStats objects. Package-private for + * testing. + */ + static List transformRowsToPartitionStatsFromAggregatedSQL( + List rows, + Schema schema, + List columnNames, + String dbName, + String tableName, + String clusterName, + String tableMetadataLocation, + String partitionSpecString, + List partitionColumnNames) { + + List result = new ArrayList<>(); + + for (Row row : rows) { + try { + // Extract partition struct + Row partitionRow = row.getAs("partition"); + List partitionData = + transformPartitionRowToColumnData(partitionRow, partitionColumnNames); + + // Extract commit metadata + Long snapshotId = row.getAs("snapshot_id"); + Long committedAt = row.getAs("committed_at"); + String operation = row.getAs("operation"); + scala.collection.Map scalaMap = row.getMap(row.fieldIndex("summary")); + Map summary = scala.collection.JavaConverters.mapAsJavaMap(scalaMap); + CommitMetadata commitMetadata = + buildCommitMetadata(snapshotId, committedAt, operation, summary); + + // Extract table-level stats + Long rowCount = row.getAs("total_row_count"); + Long columnCount = (long) schema.columns().size(); + + // Extract field-level stats using shared helper + Map> metricsMap = + extractColumnMetricsFromAggregatedRow(row, schema, columnNames); + + List nullCounts = metricsMap.get("nullCount"); + List nanCounts = metricsMap.get("nanCount"); + List minValues = metricsMap.get("minValue"); + List maxValues = metricsMap.get("maxValue"); + List columnSizes = metricsMap.get("columnSize"); + + // Build CommitEventTablePartitionStats object + CommitEventTablePartitionStats stats = + CommitEventTablePartitionStats.builder() + .dataset( + BaseTableIdentifier.builder() + .databaseName(dbName) + .tableName(tableName) + .clusterName(clusterName) + .tableMetadataLocation(tableMetadataLocation) + .partitionSpec(partitionSpecString) + .build()) + .commitMetadata(commitMetadata) + .partitionData(partitionData) + .rowCount(rowCount != null ? rowCount : 0L) + .columnCount(columnCount) + .nullCount(nullCounts) + .nanCount(nanCounts) + .minValue(minValues) + .maxValue(maxValues) + .columnSizeInBytes(columnSizes) + .eventTimestampMs(System.currentTimeMillis()) + .build(); + + result.add(stats); + + } catch (Exception e) { + log.error("Failed to transform row to partition stats: {}", row, e); + // Continue processing other rows + } + } + + return result; + } + + /** + * Convert a value to appropriate ColumnData subclass based on Iceberg type. + * + *

Visibility: Package-private for testing purposes. + * + * @param columnName Column name + * @param value Value to convert + * @param icebergType Iceberg type + * @return ColumnData instance + */ + static ColumnData convertValueToColumnData( + String columnName, Object value, org.apache.iceberg.types.Type icebergType) { + + if (value == null) { + return null; + } + + try { + // Handle based on Iceberg type + switch (icebergType.typeId()) { + case INTEGER: + case LONG: + case DATE: // Days since epoch (stored as int in readable_metrics) + case TIME: // Microseconds since midnight (stored as long in readable_metrics) + Long longValue; + if (value instanceof Number) { + longValue = ((Number) value).longValue(); + } else if (value instanceof java.sql.Date) { + // Handle Date objects: convert to days since epoch + longValue = ((java.sql.Date) value).toLocalDate().toEpochDay(); + } else if (value instanceof java.sql.Time) { + // Handle Time objects: convert to microseconds since midnight + longValue = ((java.sql.Time) value).toLocalTime().toNanoOfDay() / 1000; + } else { + // Fallback: try parsing as long + longValue = Long.parseLong(value.toString()); + } + return new ColumnData.LongColumnData(columnName, longValue); + + case FLOAT: + case DOUBLE: + case DECIMAL: + Double doubleValue; + if (value instanceof Number) { + doubleValue = ((Number) value).doubleValue(); + } else { + doubleValue = Double.parseDouble(value.toString()); + } + return new ColumnData.DoubleColumnData(columnName, doubleValue); + + case STRING: + case UUID: + case TIMESTAMP: + case BINARY: + case FIXED: + default: + return new ColumnData.StringColumnData(columnName, value.toString()); + } + } catch (Exception e) { + log.warn( + "Failed to convert value for column '{}', using string fallback: {}", + columnName, + e.getMessage()); + return new ColumnData.StringColumnData(columnName, value.toString()); + } + } + /** * Extract database name from fully-qualified table name. * diff --git a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/OperationsTest.java b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/OperationsTest.java index 20742511b..4f3d6ea3e 100644 --- a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/OperationsTest.java +++ b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/OperationsTest.java @@ -18,6 +18,7 @@ import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.time.LocalDate; +import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; @@ -318,24 +319,14 @@ public void testOrphanFilesDeletionJavaAPI() throws Exception { populateTable(ops, tableName, numInserts); Table table = ops.getTable(tableName); log.info("Loaded table {}, location {}", table.name(), table.location()); - List snapshots = - ops.spark().sql(String.format("SELECT * from %s.history", tableName)).collectAsList(); - Assertions.assertEquals(numInserts, snapshots.size()); - log.info("Found {} snapshots", snapshots.size()); - for (Row metadataFileRow : snapshots) { - log.info(metadataFileRow.toString()); - } Path orphanFilePath = new Path(table.location(), testOrphanFileName); + Path dataManifestPath = new Path(table.location(), ".backup/data/data_manifest_123.json"); FileSystem fs = ops.fs(); fs.createNewFile(orphanFilePath); - log.info("Created orphan file {}", testOrphanFileName); + fs.createNewFile(dataManifestPath); DeleteOrphanFiles.Result result = - ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, BACKUP_DIR); + ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, BACKUP_DIR, 5); List orphanFiles = Lists.newArrayList(result.orphanFileLocations().iterator()); - log.info("Detected {} orphan files", orphanFiles.size()); - for (String of : orphanFiles) { - log.info("File {}", of); - } Assertions.assertTrue( fs.exists(new Path(table.location(), new Path(BACKUP_DIR, testOrphanFileName)))); Assertions.assertEquals(1, orphanFiles.size()); @@ -356,25 +347,19 @@ public void testOrphanFilesDeletionIgnoresFilesInBackupDir() throws Exception { Table table = ops.getTable(tableName); log.info("Loaded table {}, location {}", table.name(), table.location()); Path orphanFilePath = new Path(table.location(), testOrphanFileName); + Path dataManifestPath = new Path(table.location(), ".backup/data/data_manifest_123.json"); FileSystem fs = ops.fs(); fs.createNewFile(orphanFilePath); - log.info("Created orphan file {}", testOrphanFileName); + fs.createNewFile(dataManifestPath); + ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, BACKUP_DIR, 5); + Path backupFilePath = new Path(table.location(), new Path(BACKUP_DIR, testOrphanFileName)); + Assertions.assertTrue(fs.exists(backupFilePath)); + // run delete operation again and verify that files in .backup are not listed as Orphan DeleteOrphanFiles.Result result = - ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, BACKUP_DIR); + ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, BACKUP_DIR, 5); List orphanFiles = Lists.newArrayList(result.orphanFileLocations().iterator()); - log.info("Detected {} orphan files", orphanFiles.size()); - for (String of : orphanFiles) { - log.info("File {}", of); - } - Path trashFilePath = new Path(table.location(), new Path(BACKUP_DIR, testOrphanFileName)); - Assertions.assertTrue(fs.exists(trashFilePath)); - // run delete operation again and verify that files in .trash are not listed as Orphan - DeleteOrphanFiles.Result result2 = - ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, BACKUP_DIR); - List orphanFiles2 = Lists.newArrayList(result2.orphanFileLocations().iterator()); - log.info("Detected {} orphan files", orphanFiles2.size()); - Assertions.assertEquals(0, orphanFiles2.size()); - Assertions.assertTrue(fs.exists(trashFilePath)); + Assertions.assertEquals(0, orphanFiles.size()); + Assertions.assertTrue(fs.exists(backupFilePath)); } } @@ -388,24 +373,14 @@ public void testOrphanFilesDeletionDeleteNonDataFiles() throws Exception { populateTable(ops, tableName, numInserts); Table table = ops.getTable(tableName); log.info("Loaded table {}, location {}", table.name(), table.location()); - List snapshots = - ops.spark().sql(String.format("SELECT * from %s.history", tableName)).collectAsList(); - Assertions.assertEquals(numInserts, snapshots.size()); - log.info("Found {} snapshots", snapshots.size()); - for (Row metadataFileRow : snapshots) { - log.info(metadataFileRow.toString()); - } Path orphanFilePath = new Path(table.location(), testOrphanFileName); + Path dataManifestPath = new Path(table.location(), ".backup/data/data_manifest_123.json"); FileSystem fs = ops.fs(); fs.createNewFile(orphanFilePath); - log.info("Created orphan file {}", testOrphanFileName); + fs.createNewFile(dataManifestPath); DeleteOrphanFiles.Result result = - ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, BACKUP_DIR); + ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, BACKUP_DIR, 5); List orphanFiles = Lists.newArrayList(result.orphanFileLocations().iterator()); - log.info("Detected {} orphan files", orphanFiles.size()); - for (String of : orphanFiles) { - log.info("File {}", of); - } Assertions.assertFalse( fs.exists(new Path(table.location(), new Path(BACKUP_DIR, testOrphanFileName)))); Assertions.assertEquals(1, orphanFiles.size()); @@ -425,24 +400,39 @@ public void testOrphanFilesDeletionBackupDisabled() throws Exception { populateTable(ops, tableName, numInserts); Table table = ops.getTable(tableName); log.info("Loaded table {}, location {}", table.name(), table.location()); - List snapshots = - ops.spark().sql(String.format("SELECT * from %s.history", tableName)).collectAsList(); - Assertions.assertEquals(numInserts, snapshots.size()); - log.info("Found {} snapshots", snapshots.size()); - for (Row metadataFileRow : snapshots) { - log.info(metadataFileRow.toString()); - } Path orphanFilePath = new Path(table.location(), testOrphanFileName); + Path dataManifestPath = new Path(table.location(), ".backup/data/data_manifest_123.json"); FileSystem fs = ops.fs(); fs.createNewFile(orphanFilePath); - log.info("Created orphan file {}", testOrphanFileName); + fs.createNewFile(dataManifestPath); DeleteOrphanFiles.Result result = - ops.deleteOrphanFiles(table, System.currentTimeMillis(), false, BACKUP_DIR); + ops.deleteOrphanFiles(table, System.currentTimeMillis(), false, BACKUP_DIR, 5); + List orphanFiles = Lists.newArrayList(result.orphanFileLocations().iterator()); + Assertions.assertFalse( + fs.exists(new Path(table.location(), new Path(BACKUP_DIR, testOrphanFileName)))); + Assertions.assertEquals(1, orphanFiles.size()); + Assertions.assertTrue( + orphanFiles.get(0).endsWith(table.location() + "/" + testOrphanFileName)); + Assertions.assertFalse(fs.exists(orphanFilePath)); + } + } + + @Test + public void testOrphanFilesDeletionDeleteDataWhenDataManifestNotExists() throws Exception { + final String tableName = "db.test_ofd_java"; + final String testOrphanFileName = "data/test_orphan_file.orc"; + final int numInserts = 3; + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + prepareTable(ops, tableName); + populateTable(ops, tableName, numInserts); + Table table = ops.getTable(tableName); + log.info("Loaded table {}, location {}", table.name(), table.location()); + Path orphanFilePath = new Path(table.location(), testOrphanFileName); + FileSystem fs = ops.fs(); + fs.createNewFile(orphanFilePath); + DeleteOrphanFiles.Result result = + ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, BACKUP_DIR, 5); List orphanFiles = Lists.newArrayList(result.orphanFileLocations().iterator()); - log.info("Detected {} orphan files", orphanFiles.size()); - for (String of : orphanFiles) { - log.info("File {}", of); - } Assertions.assertFalse( fs.exists(new Path(table.location(), new Path(BACKUP_DIR, testOrphanFileName)))); Assertions.assertEquals(1, orphanFiles.size()); @@ -697,12 +687,14 @@ public void testStagedFilesDelete() throws Exception { log.info("Loaded table {}, location {}", table.name(), table.location()); Path orphanFilePath1 = new Path(table.location(), testOrphanFile1); Path orphanFilePath2 = new Path(table.location(), testOrphanFile2); + Path dataManifestPath = new Path(table.location(), ".trash/data/data_manifest_123.json"); FileSystem fs = ops.fs(); fs.createNewFile(orphanFilePath1); fs.createNewFile(orphanFilePath2); + fs.createNewFile(dataManifestPath); log.info("Created orphan file {}", testOrphanFile1); log.info("Created orphan file {}", testOrphanFile2); - ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, TRASH_DIR); + ops.deleteOrphanFiles(table, System.currentTimeMillis(), true, TRASH_DIR, 5); Assertions.assertTrue( fs.exists(new Path(table.location(), (new Path(TRASH_DIR, testOrphanFile1))))); Assertions.assertTrue( @@ -958,7 +950,9 @@ public void testCollectEarliestPartitionDateStat() throws Exception { stats = ops.collectTableStats(tableName); Assertions.assertEquals( stats.getEarliestPartitionDate(), - LocalDate.now().minusDays(2).format(DateTimeFormatter.ofPattern("yyyy-MM-dd"))); + LocalDate.now(ZoneOffset.UTC) + .minusDays(2) + .format(DateTimeFormatter.ofPattern("yyyy-MM-dd"))); } } @@ -1395,4 +1389,148 @@ private static List> getDataFiles(Operations ops, S r.getLong(r.fieldIndex("record_count")))) .collect(Collectors.toList()); } + + // ================================================================================== + // Tests for Operations.collectCommitEventTablePartitionStats() - Method-Level Integration + // ================================================================================== + + @Test + public void testCollectPartitionStatsForPartitionedTable() throws Exception { + final String tableName = "db.test_partition_stats_partitioned"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create partitioned table + prepareTable(ops, tableName, true); + + // Insert data across multiple partitions + populateTable(ops, tableName, 3, 0); // Partition day 0 + populateTable(ops, tableName, 2, 1); // Partition day 1 + + // Action: Collect partition stats + List stats = + ops.collectCommitEventTablePartitionStats(tableName); + + // Assert: Should have one stats record per partition + Assertions.assertNotNull(stats, "Stats should not be null"); + Assertions.assertEquals(2, stats.size(), "Should have 2 partition stats (one per partition)"); + + // Verify each stat has required fields + for (com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats stat : stats) { + Assertions.assertNotNull(stat.getDataset(), "Dataset should not be null"); + Assertions.assertNotNull(stat.getCommitMetadata(), "Commit metadata should not be null"); + Assertions.assertNotNull(stat.getPartitionData(), "Partition data should not be null"); + Assertions.assertFalse( + stat.getPartitionData().isEmpty(), + "Partition data should not be empty for partitioned table"); + Assertions.assertTrue(stat.getRowCount() > 0, "Row count should be greater than 0"); + Assertions.assertTrue(stat.getColumnCount() > 0, "Column count should be greater than 0"); + Assertions.assertNotNull(stat.getNullCount(), "Null count map should not be null"); + Assertions.assertFalse(stat.getNullCount().isEmpty(), "Null count map should have entries"); + + log.info( + "Partition stats: partitionData={}, rowCount={}, columnCount={}", + stat.getPartitionData(), + stat.getRowCount(), + stat.getColumnCount()); + } + } + } + + @Test + public void testCollectPartitionStatsForUnpartitionedTable() throws Exception { + final String tableName = "db.test_partition_stats_unpartitioned"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create unpartitioned table + prepareTable(ops, tableName, false); + + // Insert data + populateTable(ops, tableName, 5); + + // Action: Collect partition stats + List stats = + ops.collectCommitEventTablePartitionStats(tableName); + + // Assert: Should have single stats record (snapshot-based) + Assertions.assertNotNull(stats, "Stats should not be null"); + Assertions.assertEquals( + 1, stats.size(), "Should have 1 stats record for unpartitioned table"); + + com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats stat = stats.get(0); + + // Verify snapshot-based stats + Assertions.assertNotNull(stat.getDataset(), "Dataset should not be null"); + Assertions.assertNotNull(stat.getCommitMetadata(), "Commit metadata should not be null"); + Assertions.assertNotNull(stat.getPartitionData(), "Partition data should not be null"); + Assertions.assertTrue( + stat.getPartitionData().isEmpty(), + "Partition data should be empty for unpartitioned table"); + Assertions.assertTrue(stat.getRowCount() > 0, "Row count should be greater than 0"); + Assertions.assertEquals(5, stat.getRowCount(), "Row count should equal inserted rows"); + Assertions.assertTrue(stat.getColumnCount() > 0, "Column count should be greater than 0"); + + log.info( + "Unpartitioned table stats: rowCount={}, columnCount={}", + stat.getRowCount(), + stat.getColumnCount()); + } + } + + @Test + public void testCollectPartitionStatsWithMultipleCommitsToSamePartition() throws Exception { + final String tableName = "db.test_partition_stats_multiple_commits"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create partitioned table + prepareTable(ops, tableName, true); + + // Insert to SAME partition multiple times (3 commits to partition day 0) + long timestamp = System.currentTimeMillis() / 1000; + populateTable(ops, tableName, 1, 0, timestamp); + Thread.sleep(100); // Small delay to ensure different commit timestamps + populateTable(ops, tableName, 1, 0, timestamp); + Thread.sleep(100); + populateTable(ops, tableName, 1, 0, timestamp); + + // Action: Collect partition stats + List stats = + ops.collectCommitEventTablePartitionStats(tableName); + + // Assert: Should have only 1 stats record (latest commit per partition) + Assertions.assertNotNull(stats, "Stats should not be null"); + Assertions.assertEquals( + 1, stats.size(), "Should have only 1 stats record (latest commit for the partition)"); + + com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats stat = stats.get(0); + + // Verify it's the latest commit's data + Assertions.assertNotNull(stat.getCommitMetadata(), "Commit metadata should not be null"); + Assertions.assertTrue(stat.getRowCount() > 0, "Row count should reflect latest commit"); + + log.info( + "Latest commit stats: rowCount={}, commitId={}", + stat.getRowCount(), + stat.getCommitMetadata().getCommitId()); + } + } + + @Test + public void testCollectPartitionStatsEmptyTable() throws Exception { + final String tableName = "db.test_partition_stats_empty"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create empty partitioned table (no data) + prepareTable(ops, tableName, true); + + // Action: Collect partition stats (no inserts) + List stats = + ops.collectCommitEventTablePartitionStats(tableName); + + // Assert: Should handle empty table gracefully + Assertions.assertNotNull(stats, "Stats should not be null even for empty table"); + // Empty table should return empty list or single record with 0 rows + // (implementation-specific, but should not throw exception) + log.info("Empty table stats count: {}", stats.size()); + } + } } diff --git a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/TableStatsCollectionSparkAppTest.java b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/TableStatsCollectionSparkAppTest.java index 025806871..e83c1cb6a 100644 --- a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/TableStatsCollectionSparkAppTest.java +++ b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/TableStatsCollectionSparkAppTest.java @@ -14,6 +14,10 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.DataFiles; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; import org.apache.iceberg.Snapshot; import org.apache.iceberg.Table; import org.junit.jupiter.api.Assertions; @@ -173,8 +177,8 @@ public void testCommitEventsContainsCorrectSchema() throws Exception { Assertions.assertNotNull(firstEvent.getCommitMetadata().getCommitTimestampMs()); // Note: commit_app_id, commit_app_name, and commit_operation are nullable - // Verify event_timestamp_ms is placeholder (will be set at publish time) - Assertions.assertEquals(0L, firstEvent.getEventTimestampMs()); + // Verify event_timestamp_ms is set at collection time + Assertions.assertTrue(firstEvent.getEventTimestampMs() > 0); log.info("Commit events schema validated successfully"); } @@ -273,6 +277,250 @@ public void testCommitEventsOrdering() throws Exception { } } + // ==================== Commit Metadata (Spark/Trino) Tests ==================== + // + // These tests validate the coalesce logic for commitAppId and commitAppName: + // - commitAppId = coalesce(spark.app.id, trino_query_id) + // - commitAppName = when(spark.app.id.isNotNull, spark.app.name) + // .when(trino_query_id.isNotNull, "trino") + // + // We use the Iceberg Table API directly to create commits with controlled + // summary properties, bypassing Spark SQL which automatically sets spark.app.id. + + /** + * Creates a DataFile for testing commits with controlled summary properties. The file doesn't + * need to physically exist since we're testing metadata collection. + */ + private DataFile createTestDataFile(Table table) { + PartitionSpec spec = table.spec(); + Schema schema = table.schema(); + + if (spec.isUnpartitioned()) { + return DataFiles.builder(spec) + .withPath("/test/data-" + System.nanoTime() + ".parquet") + .withFileSizeInBytes(100) + .withRecordCount(1) + .build(); + } else { + // For partitioned tables, we need to provide partition data + // Our test tables are partitioned by days(ts), so partition path is like "ts_day=19000" + return DataFiles.builder(spec) + .withPath("/test/data-" + System.nanoTime() + ".parquet") + .withFileSizeInBytes(100) + .withRecordCount(1) + .withPartitionPath("ts_day=19000") + .build(); + } + } + + @Test + public void testCommitMetadata_BothSparkAndTrinoNull() throws Exception { + final String tableName = "db.test_commit_metadata_both_null"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create table + prepareTable(ops, tableName); + + // Create a commit using Iceberg Table API directly (without Spark SQL) + // This bypasses Spark's automatic spark.app.id/spark.app.name injection + Table table = ops.getTable(tableName); + DataFile dataFile = createTestDataFile(table); + table.newAppend().appendFile(dataFile).commit(); + + // Verify the commit actually succeeded by checking snapshot exists + table.refresh(); + Assertions.assertNotNull(table.currentSnapshot(), "Commit should have created a snapshot"); + log.info("Commit succeeded with snapshot ID: {}", table.currentSnapshot().snapshotId()); + + // Refresh Spark's catalog cache to see the new metadata + ops.spark().sql("REFRESH TABLE " + tableName); + + // Action: Collect commit events + List commitEvents = ops.collectCommitEventTable(tableName); + + // Verify: Both commitAppId and commitAppName should be null + // when neither spark.app.id nor trino_query_id is present in the summary + Assertions.assertFalse(commitEvents.isEmpty(), "Should have at least one commit event"); + CommitEventTable event = commitEvents.get(0); + + Assertions.assertNull( + event.getCommitMetadata().getCommitAppId(), + "commitAppId should be null when both spark.app.id and trino_query_id are absent"); + Assertions.assertNull( + event.getCommitMetadata().getCommitAppName(), + "commitAppName should be null when both spark.app.id and trino_query_id are absent"); + + log.info( + "Both null scenario validated: commitAppId={}, commitAppName={}", + event.getCommitMetadata().getCommitAppId(), + event.getCommitMetadata().getCommitAppName()); + } + } + + @Test + public void testCommitMetadata_SparkNullTrinoPresent() throws Exception { + final String tableName = "db.test_commit_metadata_trino_only"; + final String trinoQueryId = "20240101_123456_00001_abcde"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create table + prepareTable(ops, tableName); + + // Create a commit using Iceberg Table API with only trino_query_id set + // This simulates a Trino commit where spark.app.id is not present + Table table = ops.getTable(tableName); + DataFile dataFile = createTestDataFile(table); + table.newAppend().appendFile(dataFile).set("trino_query_id", trinoQueryId).commit(); + + // Verify the commit actually succeeded by checking snapshot exists + table.refresh(); + Assertions.assertNotNull(table.currentSnapshot(), "Commit should have created a snapshot"); + log.info("Commit succeeded with snapshot ID: {}", table.currentSnapshot().snapshotId()); + + // Refresh Spark's catalog cache to see the new metadata + ops.spark().sql("REFRESH TABLE " + tableName); + + // Action: Collect commit events + List commitEvents = ops.collectCommitEventTable(tableName); + + // Verify: commitAppId should be trino_query_id, commitAppName should be "trino" + Assertions.assertFalse(commitEvents.isEmpty(), "Should have at least one commit event"); + CommitEventTable event = commitEvents.get(0); + + Assertions.assertEquals( + trinoQueryId, + event.getCommitMetadata().getCommitAppId(), + "commitAppId should be trino_query_id when spark.app.id is null"); + Assertions.assertEquals( + "trino", + event.getCommitMetadata().getCommitAppName(), + "commitAppName should be 'trino' when trino_query_id is present and spark.app.id is null"); + + log.info( + "Trino-only scenario validated: commitAppId={}, commitAppName={}", + event.getCommitMetadata().getCommitAppId(), + event.getCommitMetadata().getCommitAppName()); + } + } + + @Test + public void testCommitMetadata_SparkPresentTrinoNull() throws Exception { + final String tableName = "db.test_commit_metadata_spark_only"; + final String sparkAppId = "local-1704067200000"; + final String sparkAppName = "TestSparkApp"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create table + prepareTable(ops, tableName); + + // Create a commit using Iceberg Table API with only spark.app.id and spark.app.name set + // This simulates a Spark commit where trino_query_id is not present + Table table = ops.getTable(tableName); + DataFile dataFile = createTestDataFile(table); + table + .newAppend() + .appendFile(dataFile) + .set("spark.app.id", sparkAppId) + .set("spark.app.name", sparkAppName) + .commit(); + + // Verify the commit actually succeeded by checking snapshot exists + table.refresh(); + Assertions.assertNotNull(table.currentSnapshot(), "Commit should have created a snapshot"); + log.info("Commit succeeded with snapshot ID: {}", table.currentSnapshot().snapshotId()); + + // Refresh Spark's catalog cache to see the new metadata + ops.spark().sql("REFRESH TABLE " + tableName); + + // Action: Collect commit events + List commitEvents = ops.collectCommitEventTable(tableName); + + // Verify: commitAppId should be spark.app.id, commitAppName should be spark.app.name + Assertions.assertFalse(commitEvents.isEmpty(), "Should have at least one commit event"); + CommitEventTable event = commitEvents.get(0); + + Assertions.assertEquals( + sparkAppId, + event.getCommitMetadata().getCommitAppId(), + "commitAppId should be spark.app.id when present"); + Assertions.assertEquals( + sparkAppName, + event.getCommitMetadata().getCommitAppName(), + "commitAppName should be spark.app.name when spark.app.id is present"); + + log.info( + "Spark-only scenario validated: commitAppId={}, commitAppName={}", + event.getCommitMetadata().getCommitAppId(), + event.getCommitMetadata().getCommitAppName()); + } + } + + @Test + public void testCommitMetadata_BothPresentSparkTakesPrecedence() throws Exception { + final String tableName = "db.test_commit_metadata_both_present"; + final String sparkAppId = "local-1704067200000"; + final String sparkAppName = "TestSparkApp"; + final String trinoQueryId = "20240101_123456_00001_abcde"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create table + prepareTable(ops, tableName); + + // Create a commit using Iceberg Table API with BOTH spark.app.id and trino_query_id set + // This edge case shouldn't happen in production but we verify Spark takes precedence + Table table = ops.getTable(tableName); + DataFile dataFile = createTestDataFile(table); + table + .newAppend() + .appendFile(dataFile) + .set("spark.app.id", sparkAppId) + .set("spark.app.name", sparkAppName) + .set("trino_query_id", trinoQueryId) + .commit(); + + // Verify the commit actually succeeded by checking snapshot exists + table.refresh(); + Assertions.assertNotNull(table.currentSnapshot(), "Commit should have created a snapshot"); + log.info("Commit succeeded with snapshot ID: {}", table.currentSnapshot().snapshotId()); + + // Refresh Spark's catalog cache to see the new metadata + ops.spark().sql("REFRESH TABLE " + tableName); + + // Action: Collect commit events + List commitEvents = ops.collectCommitEventTable(tableName); + + // Verify: spark.app.id takes precedence due to coalesce() ordering + // commitAppId = coalesce(spark.app.id, trino_query_id) → spark.app.id + // commitAppName = when(spark.app.id.isNotNull, spark.app.name) → spark.app.name + Assertions.assertFalse(commitEvents.isEmpty(), "Should have at least one commit event"); + CommitEventTable event = commitEvents.get(0); + + Assertions.assertEquals( + sparkAppId, + event.getCommitMetadata().getCommitAppId(), + "commitAppId should be spark.app.id (takes precedence over trino_query_id)"); + Assertions.assertEquals( + sparkAppName, + event.getCommitMetadata().getCommitAppName(), + "commitAppName should be spark.app.name when spark.app.id is present"); + + // Verify it's NOT using the Trino values + Assertions.assertNotEquals( + trinoQueryId, + event.getCommitMetadata().getCommitAppId(), + "commitAppId should NOT be trino_query_id when spark.app.id is also present"); + Assertions.assertNotEquals( + "trino", + event.getCommitMetadata().getCommitAppName(), + "commitAppName should NOT be 'trino' when spark.app.id is also present"); + + log.info( + "Both present scenario validated - Spark takes precedence: commitAppId={}, commitAppName={}", + event.getCommitMetadata().getCommitAppId(), + event.getCommitMetadata().getCommitAppName()); + } + } + // ==================== Error Handling Tests ==================== @Test @@ -533,8 +781,8 @@ public void testPartitionEventsSchemaValidation() throws Exception { Assertions.assertNotNull(firstEvent.getPartitionData()); Assertions.assertFalse(firstEvent.getPartitionData().isEmpty()); - // Verify: Event timestamp is placeholder (set at publish time) - Assertions.assertEquals(0L, firstEvent.getEventTimestampMs()); + // Verify: Event timestamp is set at collection time + Assertions.assertTrue(firstEvent.getEventTimestampMs() > 0); log.info("Partition events schema validated successfully"); } @@ -772,6 +1020,283 @@ public void testPartitionEventsParallelExecution() throws Exception { } } + // ==================== Partition Stats Tests (NEW) ==================== + + @Test + public void testPartitionStatsForPartitionedTable() throws Exception { + final String tableName = "db.test_partition_stats"; + final int numInserts = 3; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create partitioned table + prepareTable(ops, tableName, true); + populateTable(ops, tableName, numInserts); + + // Action: Collect partition stats + List + partitionStats = ops.collectCommitEventTablePartitionStats(tableName); + + // Verify: Partition stats collected + Assertions.assertFalse(partitionStats.isEmpty(), "Partition stats should not be empty"); + + // Verify: Each partition has latest commit only (not all commits) + Assertions.assertTrue( + partitionStats.size() <= numInserts, + "Partition stats should have at most one record per unique partition"); + + // Verify: Each stat has required fields + for (com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats stat : + partitionStats) { + Assertions.assertNotNull(stat.getDataset(), "Dataset should not be null"); + Assertions.assertNotNull(stat.getCommitMetadata(), "Commit metadata should not be null"); + Assertions.assertNotNull(stat.getPartitionData(), "Partition data should not be null"); + Assertions.assertNotNull(stat.getRowCount(), "Row count should not be null"); + Assertions.assertNotNull(stat.getColumnCount(), "Column count should not be null"); + + // Verify: Stats include column-level metrics + Assertions.assertNotNull(stat.getNullCount(), "Null count list should not be null"); + Assertions.assertNotNull(stat.getNanCount(), "NaN count list should not be null"); + Assertions.assertNotNull(stat.getMinValue(), "Min value list should not be null"); + Assertions.assertNotNull(stat.getMaxValue(), "Max value list should not be null"); + Assertions.assertNotNull( + stat.getColumnSizeInBytes(), "Column size list should not be null"); + + log.info( + "Partition stats: partition={}, rowCount={}, columnCount={}, commitId={}", + stat.getPartitionData(), + stat.getRowCount(), + stat.getColumnCount(), + stat.getCommitMetadata().getCommitId()); + } + + log.info("Collected {} partition stats for partitioned table", partitionStats.size()); + } + } + + @Test + public void testPartitionStatsForUnpartitionedTable() throws Exception { + final String tableName = "db.test_unpartitioned_stats"; + final int numInserts = 2; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create unpartitioned table + prepareTable(ops, tableName, false); + populateTable(ops, tableName, numInserts); + + // Action: Collect partition stats + List + partitionStats = ops.collectCommitEventTablePartitionStats(tableName); + + // Verify: Single stats record for unpartitioned table + Assertions.assertEquals( + 1, partitionStats.size(), "Unpartitioned table should return exactly one stats record"); + + com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats stat = + partitionStats.get(0); + + // Verify: Partition data is empty for unpartitioned table + Assertions.assertTrue( + stat.getPartitionData().isEmpty(), + "Unpartitioned table should have empty partition data"); + + // Verify: Row count includes all data + Assertions.assertTrue( + stat.getRowCount() > 0, "Row count should be positive for table with data"); + + // Verify: Has current snapshot metadata + Assertions.assertNotNull( + stat.getCommitMetadata(), "Commit metadata should not be null for unpartitioned"); + Assertions.assertNotNull( + stat.getCommitMetadata().getCommitId(), "Snapshot ID should not be null"); + + log.info( + "Collected stats for unpartitioned table: rowCount={}, commitId={}", + stat.getRowCount(), + stat.getCommitMetadata().getCommitId()); + } + } + + @Test + public void testPartitionStatsWithMultiplePartitions() throws Exception { + final String tableName = "db.test_multiple_partitions"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create partitioned table + prepareTable(ops, tableName, true); + + // Insert data into different partitions (different days) + populateTable(ops, tableName, 1, 0); // Day 0 + populateTable(ops, tableName, 1, 1); // Day 1 + populateTable(ops, tableName, 1, 2); // Day 2 + + // Action: Collect partition stats + List + partitionStats = ops.collectCommitEventTablePartitionStats(tableName); + + // Verify: Each partition has one stats record (latest commit) + Assertions.assertEquals( + 3, partitionStats.size(), "Should have 3 partition stats (one per unique partition)"); + + // Verify: All partitions have different partition data + java.util.Set uniquePartitions = new java.util.HashSet<>(); + for (com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats stat : + partitionStats) { + String partitionKey = stat.getPartitionData().toString(); + uniquePartitions.add(partitionKey); + } + + Assertions.assertEquals( + 3, uniquePartitions.size(), "All 3 partitions should have unique partition data"); + + log.info("Collected stats for {} unique partitions", partitionStats.size()); + } + } + + @Test + public void testPartitionStatsWithNestedColumns() throws Exception { + final String tableName = "db.test_nested_columns"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create table with nested column + ops.spark().sql(String.format("DROP TABLE IF EXISTS %s", tableName)).show(); + ops.spark() + .sql( + String.format( + "CREATE TABLE %s (id bigint, user struct, ts timestamp) " + + "partitioned by (days(ts))", + tableName)) + .show(); + + // Insert data + ops.spark() + .sql( + String.format( + "INSERT INTO %s VALUES (1, named_struct('name', 'Alice', 'age', 30), current_timestamp())", + tableName)) + .show(); + + // Action: Collect partition stats + List + partitionStats = ops.collectCommitEventTablePartitionStats(tableName); + + // Verify: Stats collected for table with nested columns + Assertions.assertFalse(partitionStats.isEmpty(), "Should collect stats for nested columns"); + + com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats stat = + partitionStats.get(0); + + // Verify: Column count includes flattened nested columns if available in readable_metrics + Assertions.assertTrue( + stat.getColumnCount() >= 3, "Column count should be at least 3 (id, user, ts)"); + + log.info( + "Collected stats for table with nested columns: columnCount={}", stat.getColumnCount()); + } + } + + @Test + public void testPartitionStatsLatestCommitOnly() throws Exception { + final String tableName = "db.test_latest_commit"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create partitioned table + prepareTable(ops, tableName, true); + + // Insert into same partition multiple times + long timestamp = System.currentTimeMillis() / 1000; + populateTable(ops, tableName, 1, 0, timestamp); // First commit + Thread.sleep(100); // Small delay to ensure different commit timestamps + populateTable(ops, tableName, 1, 0, timestamp); // Second commit (same partition) + Thread.sleep(100); + populateTable(ops, tableName, 1, 0, timestamp); // Third commit (same partition) + + // Action: Collect partition stats + List + partitionStats = ops.collectCommitEventTablePartitionStats(tableName); + + // Verify: Only one stats record (latest commit per partition) + Assertions.assertEquals( + 1, + partitionStats.size(), + "Should have only 1 stats record (latest commit for the partition)"); + + // Compare with partition events (which have all commits) + List partitionEvents = + ops.collectCommitEventTablePartitions(tableName); + + Assertions.assertEquals( + 3, partitionEvents.size(), "Partition events should have 3 records (all commits)"); + + log.info( + "Verified latest commit only: {} partition stats vs {} partition events", + partitionStats.size(), + partitionEvents.size()); + } + } + + @Test + public void testPartitionStatsFullAppIntegration() throws Exception { + final String tableName = "db.test_full_integration_stats"; + final int numInserts = 2; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create partitioned table + prepareTable(ops, tableName, true); + populateTable(ops, tableName, numInserts); + + // Action: Run full app (collects stats, commits, partition events, and partition stats) + TableStatsCollectionSparkApp app = + new TableStatsCollectionSparkApp("test-job", null, tableName, otelEmitter); + app.runInner(ops); + + // Verify: All four types collected + IcebergTableStats tableStats = ops.collectTableStats(tableName); + List commitEvents = ops.collectCommitEventTable(tableName); + List partitionEvents = + ops.collectCommitEventTablePartitions(tableName); + List + partitionStats = ops.collectCommitEventTablePartitionStats(tableName); + + Assertions.assertNotNull(tableStats, "Table stats should be collected"); + Assertions.assertFalse(commitEvents.isEmpty(), "Commit events should be collected"); + Assertions.assertFalse(partitionEvents.isEmpty(), "Partition events should be collected"); + Assertions.assertFalse(partitionStats.isEmpty(), "Partition stats should be collected"); + + // Verify: Partition stats count is less than or equal to partition events + // (stats have latest commit only, events have all commits) + Assertions.assertTrue( + partitionStats.size() <= partitionEvents.size(), + "Partition stats should have fewer or equal records than partition events"); + + log.info( + "Full app integration: table stats={}, commits={}, partition events={}, partition stats={}", + tableStats != null, + commitEvents.size(), + partitionEvents.size(), + partitionStats.size()); + } + } + + @Test + public void testPartitionStatsEmptyTable() throws Exception { + final String tableName = "db.test_stats_empty_table"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create table with no data + prepareTable(ops, tableName, true); + + // Action: Collect partition stats + List + partitionStats = ops.collectCommitEventTablePartitionStats(tableName); + + // Verify: Empty list (no data = no stats) + Assertions.assertTrue( + partitionStats.isEmpty(), "Empty table should return empty partition stats"); + + log.info("Empty table handled correctly for partition stats"); + } + } + // ==================== Helper Methods ==================== private static void prepareTable(Operations ops, String tableName) { @@ -803,6 +1328,124 @@ private static void populateTable(Operations ops, String tableName, int numRows, populateTable(ops, tableName, numRows, dayLag, System.currentTimeMillis() / 1000); } + @Test + public void testPartitionStatsColumnLevelMetricsPopulated() throws Exception { + final String tableName = "db.test_column_metrics"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + // Setup: Create table with multiple data types + ops.spark() + .sql( + String.format( + "CREATE TABLE %s (id INT, name STRING, age INT, score DOUBLE, ts TIMESTAMP) " + + "USING iceberg PARTITIONED BY (days(ts))", + tableName)) + .show(); + + // Insert data with intentional nulls and varied values + ops.spark() + .sql( + String.format( + "INSERT INTO %s VALUES " + + "(1, 'Alice', 25, 95.5, timestamp('2024-01-01')), " + + "(2, NULL, 30, 87.2, timestamp('2024-01-01')), " + + "(3, 'Charlie', NULL, 92.0, timestamp('2024-01-01')), " + + "(4, 'Diana', 28, NULL, timestamp('2024-01-02')), " + + "(5, 'Eve', 35, 88.8, timestamp('2024-01-02'))", + tableName)) + .show(); + + // Action: Collect partition stats + List + partitionStats = ops.collectCommitEventTablePartitionStats(tableName); + + // Verify: Stats collected for both partitions + Assertions.assertTrue( + partitionStats.size() >= 2, "Should have stats for at least 2 partitions"); + + // Verify: Each partition has column-level metrics populated + for (com.linkedin.openhouse.common.stats.model.CommitEventTablePartitionStats stat : + partitionStats) { + log.info("Verifying column metrics for partition: {}", stat.getPartitionData()); + + // Check null count metrics are present and valid + Assertions.assertNotNull(stat.getNullCount(), "Null count map should not be null"); + Assertions.assertFalse(stat.getNullCount().isEmpty(), "Null count map should not be empty"); + + // Log all null count entries + log.info("Null count entries:"); + for (ColumnData cd : stat.getNullCount()) { + log.info( + " Column '{}' has null count: {}", + cd.getColumnName(), + ((ColumnData.LongColumnData) cd).getValue()); + } + + // Should have null count for ALL columns (id, name, age, score, ts) + Assertions.assertTrue( + stat.getNullCount().size() >= 5, + String.format( + "Should have null count for all 5 columns (id, name, age, score, ts), got %d", + stat.getNullCount().size())); + + // At least one column should have null count > 0 (we inserted nulls) + boolean hasNonZeroNullCounts = + stat.getNullCount().stream() + .anyMatch(cd -> ((ColumnData.LongColumnData) cd).getValue() > 0); + Assertions.assertTrue( + hasNonZeroNullCounts, + "At least one column should have null count > 0 since we inserted null values"); + + // Check NaN count is populated for all columns + Assertions.assertNotNull(stat.getNanCount(), "NaN count map should not be null"); + log.info("NaN count entries: {}", stat.getNanCount().size()); + for (ColumnData cd : stat.getNanCount()) { + log.info( + " Column '{}' has NaN count: {}", + cd.getColumnName(), + ((ColumnData.LongColumnData) cd).getValue()); + } + + // Check column size metrics + Assertions.assertNotNull(stat.getColumnSizeInBytes(), "Column size map should not be null"); + Assertions.assertFalse( + stat.getColumnSizeInBytes().isEmpty(), "Column size map should not be empty"); + + // Check min/max value metrics exist + Assertions.assertNotNull(stat.getMinValue(), "Min value map should not be null"); + Assertions.assertNotNull(stat.getMaxValue(), "Max value map should not be null"); + + // Verify min/max have entries for non-null columns + log.info( + "Column metrics summary: nullCount={}, minValue={}, maxValue={}, columnSize={}", + stat.getNullCount().size(), + stat.getMinValue().size(), + stat.getMaxValue().size(), + stat.getColumnSizeInBytes().size()); + + // Verify we have min/max values for at least one column + Assertions.assertTrue( + stat.getMinValue().size() > 0 || stat.getMaxValue().size() > 0, + "Should have min or max values for at least one column"); + + // Verify specific column metrics - column names are preserved correctly + for (ColumnData cd : stat.getNullCount()) { + Assertions.assertNotNull(cd.getColumnName(), "Column name should not be null"); + Assertions.assertFalse(cd.getColumnName().isEmpty(), "Column name should not be empty"); + + // Verify the value is accessible and valid + Long nullCount = ((ColumnData.LongColumnData) cd).getValue(); + Assertions.assertTrue( + nullCount >= 0, + String.format( + "Null count for column '%s' should be non-negative", cd.getColumnName())); + } + } + + log.info("✅ Column-level metrics validation passed for all partitions"); + } + } + private static void populateTable( Operations ops, String tableName, int numRows, int dayLag, long timestampSeconds) { String timestampEntry = diff --git a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/util/TableStatsCollectorUtilTest.java b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/util/TableStatsCollectorUtilTest.java index 0f8445dbe..61e12b18d 100644 --- a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/util/TableStatsCollectorUtilTest.java +++ b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/util/TableStatsCollectorUtilTest.java @@ -232,14 +232,200 @@ public void testTransformPartitionRowToColumnData_withEmptyRow() { Assertions.assertEquals(0, result.size()); } - // Note: transformRowsToPartitionEvents() is tested via integration tests in - // TableStatsCollectionSparkAppTest because it requires: - // 1. Complex Row structure with nested fields (partition, summary map, etc.) - // 2. Scala collection conversion (summary map) - // 3. Realistic Iceberg metadata structures + // ==================== Column Aggregation Expression Tests ==================== + + @Test + public void testBuildColumnAggregationExpressions_withSingleColumn() { + // Test: Single column generates 5 aggregation expressions + List columnNames = Arrays.asList("id"); + + List result = TableStatsCollectorUtil.buildColumnAggregationExpressions(columnNames); + + Assertions.assertEquals(5, result.size()); + Assertions.assertTrue(result.get(0).contains("null_value_count")); + Assertions.assertTrue(result.get(1).contains("nan_value_count")); + Assertions.assertTrue(result.get(2).contains("lower_bound")); + Assertions.assertTrue(result.get(3).contains("upper_bound")); + Assertions.assertTrue(result.get(4).contains("column_size")); + } + + @Test + public void testBuildColumnAggregationExpressions_withMultipleColumns() { + // Test: Multiple columns generate correct number of expressions + List columnNames = Arrays.asList("id", "name", "age"); + + List result = TableStatsCollectorUtil.buildColumnAggregationExpressions(columnNames); + + // 3 columns * 5 expressions each = 15 + Assertions.assertEquals(15, result.size()); + } + + @Test + public void testBuildColumnAggregationExpressions_withSpecialCharacters() { + // Test: Column names with special characters are properly escaped with backticks + // and dots in column names are replaced with underscores in alias names + List columnNames = Arrays.asList("user-id", "timestamp.value"); + + List result = TableStatsCollectorUtil.buildColumnAggregationExpressions(columnNames); + + // Verify backtick escaping for column references + Assertions.assertTrue(result.get(0).contains("`user-id`")); + Assertions.assertTrue(result.get(5).contains("`timestamp.value`")); + + // Verify dots are replaced with underscores in alias names + // result.get(5) is the null_count expression for "timestamp.value" + Assertions.assertTrue(result.get(5).contains("as timestamp_value_null_count")); + } + + @Test + public void testBuildColumnAggregationExpressions_withEmptyList() { + // Test: Empty column list returns empty expression list + List columnNames = Arrays.asList(); + + List result = TableStatsCollectorUtil.buildColumnAggregationExpressions(columnNames); + + Assertions.assertEquals(0, result.size()); + } + + @Test + public void testBuildColumnAggregationExpressions_verifyCoalesceForNullSafety() { + // Test: Expressions include coalesce() for null safety + List columnNames = Arrays.asList("value"); + + List result = TableStatsCollectorUtil.buildColumnAggregationExpressions(columnNames); + + // null_count, nan_count, and column_size should use coalesce() + Assertions.assertTrue(result.get(0).contains("coalesce")); + Assertions.assertTrue(result.get(1).contains("coalesce")); + Assertions.assertTrue(result.get(4).contains("coalesce")); + + // min and max don't need coalesce (SQL handles nulls) + Assertions.assertFalse(result.get(2).contains("coalesce")); + Assertions.assertFalse(result.get(3).contains("coalesce")); + } + + @Test + public void testBuildColumnAggregationExpressions_verifyAliasNames() { + // Test: Verify alias naming convention (colname_metric_type) + List columnNames = Arrays.asList("age"); + + List result = TableStatsCollectorUtil.buildColumnAggregationExpressions(columnNames); + + Assertions.assertTrue(result.get(0).contains("as age_null_count")); + Assertions.assertTrue(result.get(1).contains("as age_nan_count")); + Assertions.assertTrue(result.get(2).contains("as age_min_value")); + Assertions.assertTrue(result.get(3).contains("as age_max_value")); + Assertions.assertTrue(result.get(4).contains("as age_column_size")); + } + + // ==================== Convert Value to ColumnData Tests ==================== + + @Test + public void testConvertValueToColumnData_withLongType() { + // Test: Long value converts to LongColumnData + org.apache.iceberg.types.Type icebergType = org.apache.iceberg.types.Types.LongType.get(); + + ColumnData result = + TableStatsCollectorUtil.convertValueToColumnData("count", 12345L, icebergType); + + Assertions.assertInstanceOf(ColumnData.LongColumnData.class, result); + Assertions.assertEquals("count", result.getColumnName()); + Assertions.assertEquals(12345L, ((ColumnData.LongColumnData) result).getValue()); + } + + @Test + public void testConvertValueToColumnData_withIntegerType() { + // Test: Integer value converts to LongColumnData + org.apache.iceberg.types.Type icebergType = org.apache.iceberg.types.Types.IntegerType.get(); + + ColumnData result = TableStatsCollectorUtil.convertValueToColumnData("age", 25, icebergType); + + Assertions.assertInstanceOf(ColumnData.LongColumnData.class, result); + Assertions.assertEquals(25L, ((ColumnData.LongColumnData) result).getValue()); + } + + @Test + public void testConvertValueToColumnData_withDoubleType() { + // Test: Double value converts to DoubleColumnData + org.apache.iceberg.types.Type icebergType = org.apache.iceberg.types.Types.DoubleType.get(); + + ColumnData result = + TableStatsCollectorUtil.convertValueToColumnData("price", 99.99, icebergType); + + Assertions.assertInstanceOf(ColumnData.DoubleColumnData.class, result); + Assertions.assertEquals(99.99, ((ColumnData.DoubleColumnData) result).getValue()); + } + + @Test + public void testConvertValueToColumnData_withFloatType() { + // Test: Float value converts to DoubleColumnData + org.apache.iceberg.types.Type icebergType = org.apache.iceberg.types.Types.FloatType.get(); + + ColumnData result = + TableStatsCollectorUtil.convertValueToColumnData("rating", 4.5f, icebergType); + + Assertions.assertInstanceOf(ColumnData.DoubleColumnData.class, result); + Assertions.assertEquals(4.5, ((ColumnData.DoubleColumnData) result).getValue(), 0.01); + } + + @Test + public void testConvertValueToColumnData_withStringType() { + // Test: String value converts to StringColumnData + org.apache.iceberg.types.Type icebergType = org.apache.iceberg.types.Types.StringType.get(); + + ColumnData result = + TableStatsCollectorUtil.convertValueToColumnData("name", "Alice", icebergType); + + Assertions.assertInstanceOf(ColumnData.StringColumnData.class, result); + Assertions.assertEquals("Alice", ((ColumnData.StringColumnData) result).getValue()); + } + + @Test + public void testConvertValueToColumnData_withDateType() { + // Test: Date type falls back to StringColumnData + org.apache.iceberg.types.Type icebergType = org.apache.iceberg.types.Types.DateType.get(); + + ColumnData result = + TableStatsCollectorUtil.convertValueToColumnData("date", "2024-01-01", icebergType); + + Assertions.assertInstanceOf(ColumnData.StringColumnData.class, result); + Assertions.assertEquals("2024-01-01", ((ColumnData.StringColumnData) result).getValue()); + } + + @Test + public void testConvertValueToColumnData_withNullValue() { + // Test: Null value returns null + org.apache.iceberg.types.Type icebergType = org.apache.iceberg.types.Types.StringType.get(); + + ColumnData result = + TableStatsCollectorUtil.convertValueToColumnData("field", null, icebergType); + + Assertions.assertNull(result); + } + + @Test + public void testConvertValueToColumnData_withInvalidStringForNumericType() { + // Test: Invalid string for numeric type falls back to StringColumnData + org.apache.iceberg.types.Type icebergType = org.apache.iceberg.types.Types.LongType.get(); + + ColumnData result = + TableStatsCollectorUtil.convertValueToColumnData("value", "not-a-number", icebergType); + + // Should fall back to string (due to NumberFormatException) + Assertions.assertInstanceOf(ColumnData.StringColumnData.class, result); + Assertions.assertEquals("not-a-number", ((ColumnData.StringColumnData) result).getValue()); + } + + // Note: The following methods require Spark runtime and are tested in integration tests: + // - getColumnNamesFromReadableMetrics() - requires Spark SQL execution + // - extractColumnMetricsFromAggregatedRow() - requires Spark Row structure + // - transformRowsToPartitionStatsFromAggregatedSQL() - requires complex Row structures + // - populateCommitEventTablePartitionStats() - requires full Iceberg table + // - populateStatsForUnpartitionedTable() - requires full Iceberg table // - // Integration tests provide better coverage: - // - testPartitionEventsForPartitionedTable() - // - testPartitionEventsSchemaValidation() - // - testPartitionEventsWithMultiplePartitions() + // Integration tests provide better coverage for these methods: + // - testPartitionStatsForPartitionedTable() + // - testPartitionStatsForUnpartitionedTable() + // - testPartitionStatsWithNestedColumns() + // - testPartitionStatsSchemaValidation() } diff --git a/build.gradle b/build.gradle index a43d93e1b..4699ca592 100644 --- a/build.gradle +++ b/build.gradle @@ -31,7 +31,7 @@ ext { ok_http3_version = "4.11.0" junit_version = "5.11.0" iceberg_1_2_version = "1.2.0.6" - iceberg_1_5_version = "1.5.2.4" + iceberg_1_5_version = "1.5.2.7" } group = 'com.linkedin.openhouse' @@ -146,3 +146,149 @@ tasks.register('CopyGitHooksTask', Copy) { from file('scripts/git-hooks') into file('.git/hooks/') } + +// ============================================================================= +// Docker Build Prerequisites +// ============================================================================= +// These tasks make the implicit JAR dependencies for Docker builds explicit. +// Run `./gradlew dockerPrereqs` before `docker compose build`. +// +// JAR Dependencies by Dockerfile: +// tables-service.Dockerfile -> :services:tables:bootJar +// housetables-service.Dockerfile -> :services:housetables:bootJar +// jobs-service.Dockerfile -> :services:jobs:bootJar +// jobs-scheduler.Dockerfile -> :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR) +// spark-base-hadoop2.8.dockerfile -> +// :integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar (uber JAR) +// :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR) +// :scripts:java:tools:dummytokens:jar +// spark-3.5-base-hadoop3.2.dockerfile -> +// :integrations:spark:spark-3.5:openhouse-spark-3.5-runtime_2.12:shadowJar (uber JAR) +// :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR) +// :scripts:java:tools:dummytokens:jar +// ============================================================================= + +tasks.register('dockerPrereqs') { + description = 'Builds all JAR files required by Docker images' + group = 'docker' + + // Service bootJars (Spring Boot fat JARs) + dependsOn ':services:tables:bootJar' + dependsOn ':services:housetables:bootJar' + dependsOn ':services:jobs:bootJar' + + // Spark runtime uber JARs (shadowJar) + dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar' + dependsOn ':integrations:spark:spark-3.5:openhouse-spark-3.5-runtime_2.12:shadowJar' + + // Spark apps uber JAR (shadowJar) + dependsOn ':apps:openhouse-spark-apps_2.12:shadowJar' + + // Utility JAR + dependsOn ':scripts:java:tools:dummytokens:jar' + + doLast { + println '' + println '============================================================' + println 'Docker prerequisites built successfully!' + println '' + println 'JAR files created:' + println ' build/tables/libs/tables.jar' + println ' build/housetables/libs/housetables.jar' + println ' build/jobs/libs/jobs.jar' + println ' build/openhouse-spark-runtime_2.12/libs/openhouse-spark-runtime_2.12-uber.jar' + println ' build/openhouse-spark-3.5-runtime_2.12/libs/openhouse-spark-3.5-runtime_2.12-uber.jar' + println ' build/openhouse-spark-apps_2.12/libs/openhouse-spark-apps_2.12-uber.jar' + println ' build/dummytokens/libs/dummytokens*.jar' + println '' + println 'Ready for: docker compose build' + println '============================================================' + } +} + +tasks.register('dockerBuild', Exec) { + description = 'Builds Docker images after ensuring all JAR prerequisites are built' + group = 'docker' + + dependsOn 'dockerPrereqs' + + def recipe = project.hasProperty('recipe') ? project.property('recipe') : 'oh-hadoop-spark' + def recipeDir = "${rootDir}/infra/recipes/docker-compose/${recipe}" + + workingDir recipeDir + commandLine 'docker', 'compose', 'build' + + doFirst { + if (!file(recipeDir).exists()) { + throw new GradleException("Recipe directory not found: ${recipeDir}\n" + + "Available recipes: " + file("${rootDir}/infra/recipes/docker-compose") + .listFiles() + .findAll { it.isDirectory() && it.name != 'common' } + .collect { it.name } + .join(', ')) + } + println '' + println "Building Docker images for recipe: ${recipe}" + println "Recipe directory: ${recipeDir}" + println '' + } +} + +tasks.register('dockerUp', Exec) { + description = 'Builds JARs, Docker images, and starts containers' + group = 'docker' + + dependsOn 'dockerBuild' + + def recipe = project.hasProperty('recipe') ? project.property('recipe') : 'oh-hadoop-spark' + def recipeDir = "${rootDir}/infra/recipes/docker-compose/${recipe}" + + workingDir recipeDir + commandLine 'docker', 'compose', 'up', '-d' + + doFirst { + println '' + println "Starting containers for recipe: ${recipe}" + println '' + } + + doLast { + println '' + println '============================================================' + println 'OpenHouse containers started successfully!' + println '' + println 'Services available at:' + println ' Tables Service: http://localhost:8000' + println ' HouseTables Service: http://localhost:8001' + println ' Jobs Service: http://localhost:8002' + println ' Prometheus: http://localhost:9090' + println '' + println 'To stop: ./gradlew dockerDown -Precipe=' + recipe + println '============================================================' + } +} + +tasks.register('dockerDown', Exec) { + description = 'Stops and removes Docker containers' + group = 'docker' + + def recipe = project.hasProperty('recipe') ? project.property('recipe') : 'oh-hadoop-spark' + def recipeDir = "${rootDir}/infra/recipes/docker-compose/${recipe}" + + workingDir recipeDir + commandLine 'docker', 'compose', 'down' + + doFirst { + if (!file(recipeDir).exists()) { + throw new GradleException("Recipe directory not found: ${recipeDir}\n" + + "Available recipes: " + file("${rootDir}/infra/recipes/docker-compose") + .listFiles() + .findAll { it.isDirectory() && it.name != 'common' } + .collect { it.name } + .join(', ')) + } + println '' + println "Stopping containers for recipe: ${recipe}" + println '' + } +} diff --git a/buildSrc/src/main/groovy/openhouse.apps-spark-common.gradle b/buildSrc/src/main/groovy/openhouse.apps-spark-common.gradle index ed7e014b8..4a2691012 100644 --- a/buildSrc/src/main/groovy/openhouse.apps-spark-common.gradle +++ b/buildSrc/src/main/groovy/openhouse.apps-spark-common.gradle @@ -115,10 +115,6 @@ shadowJar { } } -// https://github.com/johnrengelman/shadow/issues/335 -// By default shadow doesn't configure the build task to depend on the shadowJar task. -tasks.build.dependsOn tasks.shadowJar - test { if (JavaVersion.current() >= JavaVersion.VERSION_1_9) { jvmArgs \ diff --git a/client/common/build.gradle b/client/common/build.gradle new file mode 100644 index 000000000..85139ca88 --- /dev/null +++ b/client/common/build.gradle @@ -0,0 +1,10 @@ +// Shared OpenAPI CLI setup for client code generation +def sharedBinDir = "${rootProject.buildDir}/openapi-cli" +def openApiCliVersion = "5.3.0" + +task setUp(type: Exec) { + commandLine "sh", "${projectDir}/jar_download.sh", + "-o", sharedBinDir, + "-v", openApiCliVersion + outputs.dir(sharedBinDir) +} diff --git a/client/common/codegen.build.gradle b/client/common/codegen.build.gradle index 5da41d86e..06b5080c4 100644 --- a/client/common/codegen.build.gradle +++ b/client/common/codegen.build.gradle @@ -1,5 +1,6 @@ def generatedCodeDir = "$buildDir/generated" -def generatedBinDir = "$buildDir/bin" +// Use a shared location for the OpenAPI CLI JAR (downloaded once by :client:common:setUp) +def sharedBinDir = "${rootProject.buildDir}/openapi-cli" def commonDir = "${project(':client:common').projectDir}" /** * Change the service name to produce respective client @@ -7,19 +8,12 @@ def commonDir = "${project(':client:common').projectDir}" def serviceName = project.ext.get("codeGenForService") def openApiCliVersion = "5.3.0" -task setUp(type: Exec) { - commandLine "sh", "$commonDir/jar_download.sh", - "-o", generatedBinDir, - "-v", openApiCliVersion - outputs.dir(generatedBinDir) -} - -task clientCodeGen(type: Exec, dependsOn: ["$serviceName:openApiOutput", setUp]) { +task clientCodeGen(type: Exec, dependsOn: ["$serviceName:openApiOutput", ":client:common:setUp"]) { def inputSpec = tasks.getByPath("$serviceName:openApiOutput").outputs.files.first() commandLine "sh", "$commonDir/client_codegen.sh", "-i", tasks.getByPath("$serviceName:openApiOutput").outputs.files.first(), // path to spec "-o", generatedCodeDir, // path to generated code - "-j", generatedBinDir, // path to download cli-jar + "-j", sharedBinDir, // path to shared cli-jar location "-n", project(serviceName).name, // service name to generate packages "-v", openApiCliVersion // defining the following inputs and outputs ensures this task does not rerun, if input has not changed diff --git a/client/common/jar_download.sh b/client/common/jar_download.sh index e460e2f98..5cd0729cf 100755 --- a/client/common/jar_download.sh +++ b/client/common/jar_download.sh @@ -50,6 +50,7 @@ cd "$output" || exit 1 FILE="openapi-generator-cli-${version}.jar" if test -f "$FILE"; then echo "$FILE exists." - exit + exit 0 fi -curl "https://repo.maven.apache.org/maven2/org/openapitools/openapi-generator-cli/${version}/openapi-generator-cli-${version}.jar" --output "openapi-generator-cli-${version}.jar" \ No newline at end of file +echo "Downloading $FILE..." +curl -f "https://repo.maven.apache.org/maven2/org/openapitools/openapi-generator-cli/${version}/openapi-generator-cli-${version}.jar" --output "$FILE" || exit 1 \ No newline at end of file diff --git a/cluster/configs/src/main/java/com/linkedin/openhouse/cluster/configs/ClusterProperties.java b/cluster/configs/src/main/java/com/linkedin/openhouse/cluster/configs/ClusterProperties.java index 34b873f13..9d0c81876 100644 --- a/cluster/configs/src/main/java/com/linkedin/openhouse/cluster/configs/ClusterProperties.java +++ b/cluster/configs/src/main/java/com/linkedin/openhouse/cluster/configs/ClusterProperties.java @@ -64,6 +64,25 @@ public class ClusterProperties { @Value("${HTS_DB_PASSWORD:}") private String clusterHouseTablesDatabasePassword; + // MySQL SSL/Certificate configuration for certificate-based authentication + @Value("${cluster.housetables.database.cert-based-auth.enabled:false}") + private boolean clusterHouseTablesDatabaseCertBasedAuthEnabled; + + @Value("${cluster.housetables.database.cert-based-auth.ssl-mode:VERIFY_IDENTITY}") + private String clusterHouseTablesDatabaseCertBasedAuthSslMode; + + @Value("${cluster.housetables.database.cert-based-auth.client-cert-keystore-url:#{null}}") + private String clusterHouseTablesDatabaseCertBasedAuthClientCertKeystoreUrl; + + @Value("${cluster.housetables.database.cert-based-auth.client-cert-keystore-password:#{null}}") + private String clusterHouseTablesDatabaseCertBasedAuthClientCertKeystorePassword; + + @Value("${cluster.housetables.database.cert-based-auth.truststore-url:#{null}}") + private String clusterHouseTablesDatabaseCertBasedAuthTruststoreUrl; + + @Value("${cluster.housetables.database.cert-based-auth.truststore-password:#{null}}") + private String clusterHouseTablesDatabaseCertBasedAuthTruststorePassword; + @Value("${cluster.security.token.interceptor.classname:#{null}}") private String clusterSecurityTokenInterceptorClassname; diff --git a/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/DiagnosticHadoopFileIO.java b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/DiagnosticHadoopFileIO.java new file mode 100644 index 000000000..0babfbe09 --- /dev/null +++ b/cluster/storage/src/main/java/com/linkedin/openhouse/cluster/storage/hdfs/DiagnosticHadoopFileIO.java @@ -0,0 +1,360 @@ +package com.linkedin.openhouse.cluster.storage.hdfs; + +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; +import lombok.extern.slf4j.Slf4j; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.iceberg.hadoop.HadoopFileIO; +import org.apache.iceberg.hadoop.HadoopInputFile; +import org.apache.iceberg.hadoop.HadoopOutputFile; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.io.PositionOutputStream; +import org.apache.iceberg.io.SeekableInputStream; + +/** + * Diagnostic FileIO that logs metrics for HDFS client tuning decisions. + * + *

Focuses on 4 actionable signals: + * + *

    + *
  1. NN vs DN latency - If NN slow: tune failover. If DN slow: tune hedged reads. + *
  2. Hedged/read failover internals - Enable Hadoop loggers for DFSClient and retry + * handlers. + *
  3. Write close time - If slow: enable pipeline recovery. + *
  4. Periodic operation summaries - Track read/write and slow-path counts over time. + *
+ * + *

Enable: {@code openhouse.hdfs.diagnostic.logging.enabled=true} + * + *

Log format: + * + *

+ * HDFS_READ path=... total_ms=150 nn_ms=20 dn_ms=130 bytes=4096
+ * HDFS_WRITE path=... total_ms=250 create_ms=30 write_ms=150 close_ms=70 bytes=4096
+ * HDFS_STATS reads=10 writes=8 slow_nn=2 slow_dn=1 slow_close=0
+ * 
+ */ +@Slf4j +public class DiagnosticHadoopFileIO extends HadoopFileIO { + + private boolean enabled = false; + + // Periodic stats tracking + private long lastStatsTime = 0; + private static final long STATS_INTERVAL_MS = 60_000; + + // Aggregate counters for summary + private final AtomicLong totalReads = new AtomicLong(); + private final AtomicLong totalWrites = new AtomicLong(); + private final AtomicLong slowNnReads = new AtomicLong(); // nn_ms > dn_ms + private final AtomicLong slowDnReads = new AtomicLong(); // dn_ms > nn_ms + private final AtomicLong slowCloses = new AtomicLong(); // close_ms > 500 + + public DiagnosticHadoopFileIO() { + super(); + } + + public DiagnosticHadoopFileIO(Configuration conf) { + super(conf); + init(conf); + } + + @Override + public void initialize(Map properties) { + super.initialize(properties); + this.enabled = + Boolean.parseBoolean( + properties.getOrDefault("openhouse.hdfs.diagnostic.logging.enabled", "true")); + try { + init(conf()); + } catch (Exception e) { + log.warn("Failed to initialize HDFS diagnostics", e); + this.enabled = false; + } + } + + private void init(Configuration conf) { + if (conf == null) return; + try { + FileSystem fs = FileSystem.get(conf); + logConfig(conf); + if (fs instanceof DistributedFileSystem) { + log.info("HDFS_CONFIG fs_impl=DistributedFileSystem"); + } else { + log.info("HDFS_CONFIG fs_impl={}", fs.getClass().getName()); + } + } catch (IOException e) { + log.warn("Failed to get FileSystem for diagnostics", e); + } + } + + private void logConfig(Configuration conf) { + log.info( + "HDFS_CONFIG hedged_pool={} hedged_threshold_ms={} failover_base_ms={} failover_max_ms={} socket_timeout_ms={} replace_dn_on_failure={}", + conf.get("dfs.client.hedged.read.threadpool.size", "0"), + conf.get("dfs.client.hedged.read.threshold.millis", "500"), + conf.get("dfs.client.failover.sleep.base.millis", "500"), + conf.get("dfs.client.failover.sleep.max.millis", "15000"), + conf.get("dfs.client.socket-timeout", "60000"), + conf.get("dfs.client.block.write.replace-datanode-on-failure.enable", "true")); + } + + @Override + public InputFile newInputFile(String path) { + maybeLogStats(); + if (!enabled) return super.newInputFile(path); + return new DiagnosticInputFile((HadoopInputFile) super.newInputFile(path), path); + } + + @Override + public OutputFile newOutputFile(String path) { + maybeLogStats(); + if (!enabled) return super.newOutputFile(path); + return new DiagnosticOutputFile((HadoopOutputFile) super.newOutputFile(path), path); + } + + private void maybeLogStats() { + long now = System.currentTimeMillis(); + if (now - lastStatsTime < STATS_INTERVAL_MS) return; + lastStatsTime = now; + + log.info( + "HDFS_STATS reads={} writes={} slow_nn={} slow_dn={} slow_close={}", + totalReads.get(), + totalWrites.get(), + slowNnReads.get(), + slowDnReads.get(), + slowCloses.get()); + } + + private static String truncate(String path) { + if (path == null || path.length() <= 60) return path; + return "..." + path.substring(path.length() - 57); + } + + // ========== Input File Wrapper ========== + + private class DiagnosticInputFile implements InputFile { + private final HadoopInputFile delegate; + private final String path; + + DiagnosticInputFile(HadoopInputFile delegate, String path) { + this.delegate = delegate; + this.path = path; + } + + @Override + public long getLength() { + return delegate.getLength(); + } + + @Override + public SeekableInputStream newStream() { + long openStart = System.currentTimeMillis(); + SeekableInputStream stream = delegate.newStream(); + long nnMs = System.currentTimeMillis() - openStart; + return new DiagnosticInputStream(stream, path, openStart, nnMs); + } + + @Override + public String location() { + return delegate.location(); + } + + @Override + public boolean exists() { + return delegate.exists(); + } + } + + // ========== Input Stream Wrapper ========== + + private class DiagnosticInputStream extends SeekableInputStream { + private final SeekableInputStream delegate; + private final String path; + private final long startTime; + private final long nnMs; + private long bytes = 0; + + DiagnosticInputStream(SeekableInputStream delegate, String path, long startTime, long nnMs) { + this.delegate = delegate; + this.path = path; + this.startTime = startTime; + this.nnMs = nnMs; + } + + @Override + public long getPos() throws IOException { + return delegate.getPos(); + } + + @Override + public void seek(long pos) throws IOException { + delegate.seek(pos); + } + + @Override + public int read() throws IOException { + int b = delegate.read(); + if (b >= 0) bytes++; + return b; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int n = delegate.read(b, off, len); + if (n > 0) bytes += n; + return n; + } + + @Override + public void close() throws IOException { + try { + delegate.close(); + } finally { + long totalMs = System.currentTimeMillis() - startTime; + long dnMs = totalMs - nnMs; + + totalReads.incrementAndGet(); + if (nnMs > dnMs && nnMs > 100) slowNnReads.incrementAndGet(); + if (dnMs > nnMs && dnMs > 100) slowDnReads.incrementAndGet(); + + log.info( + "HDFS_READ path={} total_ms={} nn_ms={} dn_ms={} bytes={}", + truncate(path), + totalMs, + nnMs, + dnMs, + bytes); + + if (totalMs > 500) { + if (nnMs > dnMs) { + log.warn( + "HDFS_SLOW_NN path={} nn_ms={} - consider tuning failover timeouts", + truncate(path), + nnMs); + } else { + log.warn( + "HDFS_SLOW_DN path={} dn_ms={} - consider tuning hedged reads", + truncate(path), + dnMs); + } + } + } + } + } + + // ========== Output File Wrapper ========== + + private class DiagnosticOutputFile implements OutputFile { + private final HadoopOutputFile delegate; + private final String path; + + DiagnosticOutputFile(HadoopOutputFile delegate, String path) { + this.delegate = delegate; + this.path = path; + } + + @Override + public PositionOutputStream create() { + long createStart = System.currentTimeMillis(); + PositionOutputStream stream = delegate.create(); + long createMs = System.currentTimeMillis() - createStart; + return new DiagnosticOutputStream(stream, path, createStart, createMs); + } + + @Override + public PositionOutputStream createOrOverwrite() { + long createStart = System.currentTimeMillis(); + PositionOutputStream stream = delegate.createOrOverwrite(); + long createMs = System.currentTimeMillis() - createStart; + return new DiagnosticOutputStream(stream, path, createStart, createMs); + } + + @Override + public String location() { + return delegate.location(); + } + + @Override + public InputFile toInputFile() { + return new DiagnosticInputFile((HadoopInputFile) delegate.toInputFile(), path); + } + } + + // ========== Output Stream Wrapper ========== + + private class DiagnosticOutputStream extends PositionOutputStream { + private final PositionOutputStream delegate; + private final String path; + private final long startTime; + private final long createMs; + private long bytes = 0; + + DiagnosticOutputStream( + PositionOutputStream delegate, String path, long startTime, long createMs) { + this.delegate = delegate; + this.path = path; + this.startTime = startTime; + this.createMs = createMs; + } + + @Override + public long getPos() throws IOException { + return delegate.getPos(); + } + + @Override + public void write(int b) throws IOException { + delegate.write(b); + bytes++; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + delegate.write(b, off, len); + bytes += len; + } + + @Override + public void flush() throws IOException { + delegate.flush(); + } + + @Override + public void close() throws IOException { + long closeStart = System.currentTimeMillis(); + long writeMs = closeStart - startTime - createMs; + + try { + delegate.close(); + } finally { + long closeMs = System.currentTimeMillis() - closeStart; + long totalMs = System.currentTimeMillis() - startTime; + + totalWrites.incrementAndGet(); + if (closeMs > 500) slowCloses.incrementAndGet(); + + log.info( + "HDFS_WRITE path={} total_ms={} create_ms={} write_ms={} close_ms={} bytes={}", + truncate(path), + totalMs, + createMs, + writeMs, + closeMs, + bytes); + + if (closeMs > 500) { + log.warn( + "HDFS_SLOW_CLOSE path={} close_ms={} - consider enabling pipeline recovery", + truncate(path), + closeMs); + } + } + } + } +} diff --git a/gradle.properties b/gradle.properties index 5ca2f967c..5b6e16558 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,2 +1,8 @@ # Increase daemon memory allocation org.gradle.jvmargs=-Xmx2g "-XX:MaxMetaspaceSize=512m" + +# Enable parallel build by default for faster builds +org.gradle.parallel=true + +# Enable build cache for faster incremental builds +org.gradle.caching=true diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java index 4c18fa952..0b132eab1 100644 --- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java +++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperations.java @@ -383,7 +383,13 @@ updatedMtDataRef, io().newOutputFile(newMetadataLocation)), e); } throw new CommitFailedException(ioe); - } catch (InvalidIcebergSnapshotException | IllegalArgumentException | ValidationException e) { + } catch (InvalidIcebergSnapshotException | IllegalArgumentException e) { + throw new BadRequestException(e, e.getMessage()); + } catch (ValidationException e) { + // Stale snapshot errors are retryable - client should refresh and retry + if (isStaleSnapshotError(e)) { + throw new CommitFailedException(e); + } throw new BadRequestException(e, e.getMessage()); } catch (CommitFailedException e) { throw e; @@ -556,6 +562,17 @@ private void failIfRetryUpdate(Map properties) { } } + /** + * Checks if a ValidationException is due to a stale snapshot (sequence number conflict). This + * happens during concurrent modifications and should be retryable (409), not a bad request (400). + */ + private boolean isStaleSnapshotError(ValidationException e) { + String msg = e.getMessage(); + return msg != null + && msg.contains("Cannot add snapshot with sequence number") + && msg.contains("older than last sequence number"); + } + /** * Process all schema-related operations including client schema (for new tables), evolved schema * (for updates), and intermediate schemas (for replication scenarios). This consolidates all diff --git a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/fileio/FileIOConfig.java b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/fileio/FileIOConfig.java index 44c9480dc..e42829732 100644 --- a/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/fileio/FileIOConfig.java +++ b/iceberg/openhouse/internalcatalog/src/main/java/com/linkedin/openhouse/internal/catalog/fileio/FileIOConfig.java @@ -2,16 +2,19 @@ import com.linkedin.openhouse.cluster.storage.StorageManager; import com.linkedin.openhouse.cluster.storage.StorageType; +import com.linkedin.openhouse.cluster.storage.hdfs.DiagnosticHadoopFileIO; +import java.util.HashMap; import java.util.Map; import lombok.extern.slf4j.Slf4j; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.iceberg.aws.s3.S3FileIO; import org.apache.iceberg.azure.adlsv2.ADLSFileIO; import org.apache.iceberg.hadoop.HadoopFileIO; import org.apache.iceberg.io.FileIO; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; import software.amazon.awssdk.services.s3.S3Client; /** @@ -24,13 +27,39 @@ * between them. */ @Slf4j -@Configuration +@org.springframework.context.annotation.Configuration public class FileIOConfig { @Autowired StorageManager storageManager; /** - * Provides the HdfsFileIO bean for HDFS storage type + * Enable diagnostic logging for HDFS operations. When enabled, detailed metrics are logged for + * every FileIO operation including: read/write latency, bytes transferred, NameNode host, hedged + * read statistics, and retry counts. + * + *

Set to true when diagnosing P90/P99 latency issues. Log output format: + * + *

+   * HDFS_STREAM_READ path=/.../metadata.json duration_ms=150 bytes_read=4096 read_calls=2
+   * HDFS_SLOW_READ path=/.../metadata.json duration_ms=1500 bytes_read=4096
+   * 
+ */ + @Value("${openhouse.hdfs.diagnostic.logging.enabled:false}") + private boolean hdfsDiagnosticLoggingEnabled; + + /** + * Provides the HdfsFileIO bean for HDFS storage type. + * + *

When diagnostic logging is enabled (openhouse.hdfs.diagnostic.logging.enabled=true), returns + * a {@link DiagnosticHadoopFileIO} that logs detailed HDFS operation metrics including: + * + *

    + *
  • Read/write duration breakdown + *
  • Bytes transferred + *
  • NameNode host (for Active vs Observer analysis) + *
  • Hedged read statistics + *
  • Slow operation warnings (>500ms reads, >1000ms writes) + *
* * @return HdfsFileIO bean for HDFS storage type, or null if HDFS storage type is not configured */ @@ -39,7 +68,24 @@ HadoopFileIO provideHdfsFileIO() { try { FileSystem fs = (FileSystem) storageManager.getStorage(StorageType.HDFS).getClient().getNativeClient(); - return new HadoopFileIO(fs.getConf()); + Configuration conf = fs.getConf(); + + if (hdfsDiagnosticLoggingEnabled) { + log.info("HDFS diagnostic logging ENABLED - detailed operation metrics will be logged"); + DiagnosticHadoopFileIO diagnosticFileIO = new DiagnosticHadoopFileIO(conf); + + // Initialize with properties to enable internal diagnostic flag + Map props = new HashMap<>(); + props.put("openhouse.hdfs.diagnostic.logging.enabled", "true"); + diagnosticFileIO.initialize(props); + + // Config is logged automatically during init() + return diagnosticFileIO; + } else { + log.info( + "HDFS diagnostic logging disabled. Enable with openhouse.hdfs.diagnostic.logging.enabled=true"); + return new HadoopFileIO(conf); + } } catch (IllegalArgumentException e) { // If the HDFS storage type is not configured, return null // Spring doesn't define the bean if the return value is null diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/IcebergTestUtil.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/IcebergTestUtil.java index 931feef12..4a174716e 100644 --- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/IcebergTestUtil.java +++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/IcebergTestUtil.java @@ -16,6 +16,7 @@ public final class IcebergTestUtil { private static final String SNAPSHOTS_FILE = "serialized_snapshots.json"; private static final String EXTRA_SNAPSHOTS_FILE = "extra_serialized_snapshots.json"; private static final String WAP_SNAPSHOTS_FILE = "wap_serialized_snapshots.json"; + private static final String STALE_SNAPSHOT_FILE = "stale_snapshot.json"; private IcebergTestUtil() {} @@ -31,6 +32,10 @@ public static List getWapSnapshots() throws IOException { return loadSnapshots(WAP_SNAPSHOTS_FILE); } + public static List getStaleSnapshots() throws IOException { + return loadSnapshots(STALE_SNAPSHOT_FILE); + } + private static List loadSnapshots(String snapshotFile) throws IOException { InputStream inputStream = IcebergTestUtil.class.getClassLoader().getResourceAsStream(snapshotFile); diff --git a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java index 29a475957..a4506ca4f 100644 --- a/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java +++ b/iceberg/openhouse/internalcatalog/src/test/java/com/linkedin/openhouse/internal/catalog/OpenHouseInternalTableOperationsTest.java @@ -52,6 +52,7 @@ import org.apache.iceberg.exceptions.BadRequestException; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.CommitStateUnknownException; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.hadoop.HadoopFileIO; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -77,7 +78,7 @@ public class OpenHouseInternalTableOperationsTest { Types.NestedField.required(2, "ts", Types.TimestampType.withoutZone())), PartitionSpec.unpartitioned(), getTempLocation(), - ImmutableMap.of()); + ImmutableMap.of("format-version", "2")); @Mock private HouseTableRepository mockHouseTableRepository; @Mock private HouseTableMapper mockHouseTableMapper; @Mock private HouseTable mockHouseTable; @@ -1718,4 +1719,100 @@ void testMultipleDiffCommitWithWAPSnapshots() throws IOException { Mockito.verify(mockHouseTableRepository, Mockito.times(1)).save(Mockito.eq(mockHouseTable)); } } + + /** + * Tests that stale snapshot detection returns 409 Conflict instead of 400 Bad Request. + * + *

This test reproduces the production scenario where: + * + *

    + *
  1. Table has 4 existing snapshots with lastSequenceNumber = 4 + *
  2. A concurrent modification occurs during commit + *
  3. Client tries to add a snapshot with sequenceNumber = 4 (now stale) + *
  4. Before fix: ValidationException → BadRequestException (400) + *
  5. After fix: Stale snapshot detected → CommitFailedException (409) + *
+ * + *

The 409 response tells clients to refresh and retry, while 400 incorrectly suggests the + * request was invalid. + */ + /** + * First verifies that Iceberg's validation actually throws for stale snapshots in format v2, then + * tests the OpenHouse doCommit path. + */ + @Test + void testStaleSnapshotDuringConcurrentModificationReturns409NotBadRequest() throws IOException { + // Build base metadata with all 4 test snapshots (lastSequenceNumber = 4) + List existingSnapshots = IcebergTestUtil.getSnapshots(); + TableMetadata tempMetadata = BASE_TABLE_METADATA; + for (Snapshot snapshot : existingSnapshots) { + tempMetadata = + TableMetadata.buildFrom(tempMetadata) + .setBranchSnapshot(snapshot, SnapshotRef.MAIN_BRANCH) + .build(); + } + final TableMetadata baseMetadata = tempMetadata; + + // Verify base metadata has format version 2 and lastSequenceNumber = 4 + Assertions.assertEquals(2, baseMetadata.formatVersion(), "Format version should be 2"); + Assertions.assertEquals(4, baseMetadata.lastSequenceNumber(), "lastSequenceNumber should be 4"); + + // Load stale snapshot with sequenceNumber = 4 (same as lastSequenceNumber) + List staleSnapshots = IcebergTestUtil.getStaleSnapshots(); + Snapshot staleSnapshot = staleSnapshots.get(0); + Assertions.assertEquals( + 4, staleSnapshot.sequenceNumber(), "Stale snapshot should have sequenceNumber = 4"); + + // Verify stale snapshot has a parent (required for Iceberg 1.5+ validation) + Assertions.assertNotNull( + staleSnapshot.parentId(), "Stale snapshot must have parentId for validation to trigger"); + + // FIRST: Verify Iceberg's validation works directly + TableMetadata.Builder directBuilder = TableMetadata.buildFrom(baseMetadata); + ValidationException icebergException = + Assertions.assertThrows( + ValidationException.class, + () -> directBuilder.addSnapshot(staleSnapshot), + "Iceberg should throw ValidationException for stale snapshot"); + Assertions.assertTrue( + icebergException.getMessage().contains("Cannot add snapshot with sequence number"), + "Iceberg should report sequence number issue: " + icebergException.getMessage()); + + // NOW test the full doCommit path + List allSnapshots = new ArrayList<>(existingSnapshots); + allSnapshots.addAll(staleSnapshots); + + Map properties = new HashMap<>(baseMetadata.properties()); + properties.put( + CatalogConstants.SNAPSHOTS_JSON_KEY, SnapshotsUtil.serializedSnapshots(allSnapshots)); + properties.put( + CatalogConstants.SNAPSHOTS_REFS_KEY, + SnapshotsUtil.serializeMap( + IcebergTestUtil.createMainBranchRefPointingTo(existingSnapshots.get(3)))); + properties.put(getCanonicalFieldName("tableLocation"), TEST_LOCATION); + + final TableMetadata metadataWithStaleSnapshot = baseMetadata.replaceProperties(properties); + + try (MockedStatic ignoreWriteMock = + Mockito.mockStatic(TableMetadataParser.class)) { + + // Should throw CommitFailedException (409), not BadRequestException (400) + Exception exception = + Assertions.assertThrows( + CommitFailedException.class, + () -> + openHouseInternalTableOperations.doCommit( + baseMetadata, metadataWithStaleSnapshot), + "Stale snapshot should return 409 Conflict (CommitFailedException), not 400 " + + "(BadRequestException). 400 suggests invalid request, 409 suggests retry."); + + String message = exception.getMessage().toLowerCase(); + Assertions.assertTrue( + message.contains("stale") + || message.contains("sequence") + || message.contains("concurrent"), + "Exception message should indicate stale snapshot or sequence number issue: " + + exception.getMessage()); + } + } } diff --git a/iceberg/openhouse/internalcatalog/src/test/resources/stale_snapshot.json b/iceberg/openhouse/internalcatalog/src/test/resources/stale_snapshot.json new file mode 100644 index 000000000..34ecdf6a1 --- /dev/null +++ b/iceberg/openhouse/internalcatalog/src/test/resources/stale_snapshot.json @@ -0,0 +1,3 @@ +[ + "{ \"snapshot-id\" : 9999888777666555, \"parent-snapshot-id\" : 4151407017102313398, \"sequence-number\" : 4, \"timestamp-ms\" : 1767258030812, \"summary\" : { \"operation\" : \"append\", \"spark.app.id\" : \"local-1767258030000\", \"added-data-files\" : \"1\", \"added-records\" : \"1\", \"added-files-size\" : \"673\", \"changed-partition-count\" : \"1\", \"total-records\" : \"1\", \"total-files-size\" : \"673\", \"total-data-files\" : \"1\", \"total-delete-files\" : \"0\", \"total-position-deletes\" : \"0\", \"total-equality-deletes\" : \"0\" }, \"manifest-list\" : \"/data/openhouse/tracking_live_daily/SponsoredDocumentPageImpressionEvent/metadata/snap-9999888777666555-1-aa0dcbb9-707f-4f53-9df8-394bad8563f2.avro\", \"schema-id\" : 0}" +] diff --git a/integrations/python/dataloader/.gitignore b/integrations/python/dataloader/.gitignore new file mode 100644 index 000000000..aed472efd --- /dev/null +++ b/integrations/python/dataloader/.gitignore @@ -0,0 +1,25 @@ +# Python +__pycache__/ +*.py[cod] +*.so + +# Virtual environments +.venv/ +venv/ + +# Distribution / packaging +build/ +dist/ +*.egg-info/ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ + +# IDEs +.vscode/ +.idea/ + +# OS +.DS_Store diff --git a/integrations/python/dataloader/CLAUDE.md b/integrations/python/dataloader/CLAUDE.md new file mode 100644 index 000000000..14f6698ee --- /dev/null +++ b/integrations/python/dataloader/CLAUDE.md @@ -0,0 +1,58 @@ +# Claude Instructions for OpenHouse DataLoader + +## Project Overview + +Python library for distributed data loading of OpenHouse tables. Uses DataFusion for query execution and PyIceberg for table access. + +## Common Commands + +```bash +make sync # Install dependencies +make check # Run lint + format checks +make test # Run tests +make verify # Run all checks and tests +make format # Auto-format code +make build # Build package distributions +make package-check # Validate built distributions with twine +make clean # Clean build artifacts +``` + +## Workflows +When making a change run `make verify` to ensure all tests and checks pass + +## Project Structure + +``` +src/openhouse/dataloader/ +├── __init__.py # Public API exports +├── data_loader.py # Main API: OpenHouseDataLoader +├── data_loader_splits.py # DataLoaderSplits (iterable of splits) +├── data_loader_split.py # DataLoaderSplit (single callable split) +├── table_identifier.py # TableIdentifier dataclass +├── table_transformer.py # TableTransformer ABC (internal) +└── udf_registry.py # UDFRegistry ABC (internal) +``` + +## Public API + +Only these are exported in `__init__.py`: +- `OpenHouseDataLoader` - Main entry point +- `TableIdentifier` - Table reference (database, table, branch) + +Internal modules (TableTransformer, UDFRegistry) can be imported directly if needed but expose DataFusion types. + +## Code Style + +- Uses `ruff` for linting and formatting +- Line length: 120 +- Python 3.12+ +- Use modern type hints (`list`, `dict`, `X | None` instead of `List`, `Dict`, `Optional`) +- Use `raise NotImplementedError` for unimplemented methods in concrete classes +- Use `pass` for abstract methods decorated with `@abstractmethod` + +## Versioning + +- Version is derived from git tags via `hatch-vcs` (no hardcoded version in `pyproject.toml`) +- `__version__` in `__init__.py` reads from package metadata at runtime +- CI sets `SETUPTOOLS_SCM_PRETEND_VERSION` to inject the monorepo semVer tag at build time +- For local builds, use `SETUPTOOLS_SCM_PRETEND_VERSION=x.y.z make build` to override diff --git a/integrations/python/dataloader/Makefile b/integrations/python/dataloader/Makefile new file mode 100644 index 000000000..bd260e2cc --- /dev/null +++ b/integrations/python/dataloader/Makefile @@ -0,0 +1,46 @@ +.PHONY: help sync clean lint format format-check check test verify build package-check + +help: + @echo "Available commands:" + @echo " make sync - Sync dependencies using uv" + @echo " make lint - Run ruff linter" + @echo " make format - Format code with ruff" + @echo " make check - Run all checks (lint + format check)" + @echo " make test - Run tests with pytest" + @echo " make verify - Run all checks and tests" + @echo " make build - Build package distributions" + @echo " make package-check - Validate built distributions with twine" + @echo " make clean - Clean build artifacts" + +sync: + uv sync --all-extras + +lint: + uv run ruff check src/ tests/ + +format: + uv run ruff format src/ tests/ + +format-check: + uv run ruff format --check src/ tests/ + +check: lint format-check + +test: + uv run pytest + +verify: check test + +build: + uv build + +package-check: + uv run twine check dist/* + +clean: + rm -rf build/ + rm -rf dist/ + rm -rf *.egg-info + rm -rf .venv/ + find . -type d -name __pycache__ -exec rm -rf {} + + find . -type f -name "*.pyc" -delete diff --git a/integrations/python/dataloader/README.md b/integrations/python/dataloader/README.md new file mode 100644 index 000000000..34ffbdb87 --- /dev/null +++ b/integrations/python/dataloader/README.md @@ -0,0 +1,32 @@ +# OpenHouse DataLoader + +A Python library for distributed data loading of OpenHouse tables + +## Quickstart + +```python +from openhouse.dataloader import OpenHouseDataLoader + +loader = OpenHouseDataLoader("my_database", "my_table") + +for split in loader: + # Get table properties + split.table_properties + + # Load data + for batch in split: + process(batch) +``` + +## Development + +```bash +# Set up environment +make sync + +# Run tests +make test + +# See all available commands +make help +``` \ No newline at end of file diff --git a/integrations/python/dataloader/pyproject.toml b/integrations/python/dataloader/pyproject.toml new file mode 100644 index 000000000..84247da36 --- /dev/null +++ b/integrations/python/dataloader/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "openhouse.dataloader" +dynamic = ["version"] +description = "A Python library for distributed data loading of OpenHouse tables" +readme = "README.md" +requires-python = ">=3.12" +license = {text = "BSD-2-Clause"} +keywords = ["openhouse", "data-loader", "lakehouse", "iceberg", "datafusion"] +dependencies = ["datafusion==51.0.0", "pyiceberg~=0.11.0"] + +[project.optional-dependencies] +dev = ["ruff>=0.9.0", "pytest>=8.0.0", "twine>=6.0.0"] + +[tool.hatch.version] +source = "vcs" +raw-options = { root = "../../.." } + +[tool.hatch.build.targets.wheel] +packages = ["src/openhouse"] + +[tool.ruff] +line-length = 120 +target-version = "py312" + +[tool.ruff.lint] +select = ["E", "F", "I", "UP", "B", "SIM"] + +[tool.ruff.format] +quote-style = "double" diff --git a/integrations/python/dataloader/src/openhouse/__init__.py b/integrations/python/dataloader/src/openhouse/__init__.py new file mode 100644 index 000000000..8db66d3d0 --- /dev/null +++ b/integrations/python/dataloader/src/openhouse/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/integrations/python/dataloader/src/openhouse/dataloader/__init__.py b/integrations/python/dataloader/src/openhouse/dataloader/__init__.py new file mode 100644 index 000000000..6d535a224 --- /dev/null +++ b/integrations/python/dataloader/src/openhouse/dataloader/__init__.py @@ -0,0 +1,6 @@ +from importlib.metadata import version + +from openhouse.dataloader.data_loader import DataLoaderContext, OpenHouseDataLoader + +__version__ = version("openhouse.dataloader") +__all__ = ["OpenHouseDataLoader", "DataLoaderContext"] diff --git a/integrations/python/dataloader/src/openhouse/dataloader/_table_scan_context.py b/integrations/python/dataloader/src/openhouse/dataloader/_table_scan_context.py new file mode 100644 index 000000000..5c7b5aa9c --- /dev/null +++ b/integrations/python/dataloader/src/openhouse/dataloader/_table_scan_context.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from pyiceberg.expressions import AlwaysTrue, BooleanExpression +from pyiceberg.io import FileIO +from pyiceberg.schema import Schema +from pyiceberg.table.metadata import TableMetadata + + +@dataclass(frozen=True) +class TableScanContext: + """Table-level context for reading Iceberg data files. + + Created once per table scan by OpenHouseDataLoader and shared + across all DataLoaderSplit instances for that scan. + + Attributes: + table_metadata: Full Iceberg table metadata (schema, properties, partition specs, etc.) + io: FileIO configured for the table's storage location + projected_schema: Subset of columns to read (equals table schema when no projection) + row_filter: Row-level filter expression pushed down to the scan + """ + + table_metadata: TableMetadata + io: FileIO + projected_schema: Schema + row_filter: BooleanExpression = AlwaysTrue() diff --git a/integrations/python/dataloader/src/openhouse/dataloader/data_loader.py b/integrations/python/dataloader/src/openhouse/dataloader/data_loader.py new file mode 100644 index 000000000..e90067da6 --- /dev/null +++ b/integrations/python/dataloader/src/openhouse/dataloader/data_loader.py @@ -0,0 +1,57 @@ +from collections.abc import Iterable, Mapping, Sequence +from dataclasses import dataclass + +from openhouse.dataloader.data_loader_split import DataLoaderSplit +from openhouse.dataloader.table_identifier import TableIdentifier +from openhouse.dataloader.table_transformer import TableTransformer +from openhouse.dataloader.udf_registry import UDFRegistry + + +@dataclass +class DataLoaderContext: + """Context and customization for the DataLoader. + + Provides execution context (e.g. tenant, environment) and optional customizations + like table transformations applied before loading data. + + Args: + execution_context: Dictionary of execution context information (e.g. tenant, environment) + table_transformer: Transformation to apply to the table before loading (e.g. column masking) + udf_registry: UDFs required for the table transformation + """ + + execution_context: Mapping[str, str] | None = None + table_transformer: TableTransformer | None = None + udf_registry: UDFRegistry | None = None + + +class OpenHouseDataLoader: + """An API for distributed data loading of OpenHouse tables""" + + def __init__( + self, + database: str, + table: str, + branch: str | None = None, + columns: Sequence[str] | None = None, + context: DataLoaderContext | None = None, + ): + """ + Args: + database: Database name + table: Table name + branch: Optional branch name + columns: Column names to load, or None to load all columns + context: Data loader context + """ + self._table = TableIdentifier(database, table, branch) + self._columns = columns + self._context = context or DataLoaderContext() + + def __iter__(self) -> Iterable[DataLoaderSplit]: + """Iterate over data splits for distributed data loading of the table. + + Returns: + Iterable of DataLoaderSplit, each containing table_properties + """ + raise NotImplementedError diff --git a/integrations/python/dataloader/src/openhouse/dataloader/data_loader_split.py b/integrations/python/dataloader/src/openhouse/dataloader/data_loader_split.py new file mode 100644 index 000000000..0ae9db4d0 --- /dev/null +++ b/integrations/python/dataloader/src/openhouse/dataloader/data_loader_split.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from collections.abc import Iterator, Mapping + +from datafusion.plan import LogicalPlan +from pyarrow import RecordBatch +from pyiceberg.io.pyarrow import ArrowScan +from pyiceberg.table import FileScanTask + +from openhouse.dataloader._table_scan_context import TableScanContext +from openhouse.dataloader.udf_registry import NoOpRegistry, UDFRegistry + + +class DataLoaderSplit: + """A single data split""" + + def __init__( + self, + plan: LogicalPlan, + file_scan_task: FileScanTask, + scan_context: TableScanContext, + udf_registry: UDFRegistry | None = None, + ): + self._plan = plan + self._file_scan_task = file_scan_task + self._udf_registry = udf_registry or NoOpRegistry() + self._scan_context = scan_context + + @property + def table_properties(self) -> Mapping[str, str]: + """Properties of the table being loaded""" + return self._scan_context.table_metadata.properties + + def __iter__(self) -> Iterator[RecordBatch]: + """Reads the file scan task and yields Arrow RecordBatches. + + Uses PyIceberg's ArrowScan to handle format dispatch, schema resolution, + delete files, and partition spec lookups. + """ + ctx = self._scan_context + arrow_scan = ArrowScan( + table_metadata=ctx.table_metadata, + io=ctx.io, + projected_schema=ctx.projected_schema, + row_filter=ctx.row_filter, + ) + yield from arrow_scan.to_record_batches([self._file_scan_task]) diff --git a/integrations/python/dataloader/src/openhouse/dataloader/table_identifier.py b/integrations/python/dataloader/src/openhouse/dataloader/table_identifier.py new file mode 100644 index 000000000..5dce6e674 --- /dev/null +++ b/integrations/python/dataloader/src/openhouse/dataloader/table_identifier.py @@ -0,0 +1,21 @@ +from dataclasses import dataclass + + +@dataclass +class TableIdentifier: + """Identifier for a table in OpenHouse + + Args: + database: Database name + table: Table name + branch: Optional branch name + """ + + database: str + table: str + branch: str | None = None + + def __str__(self) -> str: + """Return the fully qualified table name.""" + base = f"{self.database}.{self.table}" + return f"{base}.{self.branch}" if self.branch else base diff --git a/integrations/python/dataloader/src/openhouse/dataloader/table_transformer.py b/integrations/python/dataloader/src/openhouse/dataloader/table_transformer.py new file mode 100644 index 000000000..3a226d8d4 --- /dev/null +++ b/integrations/python/dataloader/src/openhouse/dataloader/table_transformer.py @@ -0,0 +1,29 @@ +from abc import ABC, abstractmethod +from collections.abc import Mapping + +from datafusion.context import SessionContext +from datafusion.dataframe import DataFrame + +from openhouse.dataloader.table_identifier import TableIdentifier + + +class TableTransformer(ABC): + """Interface for applying additional transformation logic to the data + being loaded (e.g. column masking, row filtering) + """ + + @abstractmethod + def transform( + self, session_context: SessionContext, table: TableIdentifier, context: Mapping[str, str] + ) -> DataFrame | None: + """Applies transformation logic to the base table that is being loaded. + + Args: + table: Identifier for the table + context: Dictionary of context information (e.g. tenant, environment, etc.) + + Returns: + The DataFrame representing the transformation. This is expected to read from the exact + base table identifier passed in as input. If no transformation is required, None is returned. + """ + pass diff --git a/integrations/python/dataloader/src/openhouse/dataloader/udf_registry.py b/integrations/python/dataloader/src/openhouse/dataloader/udf_registry.py new file mode 100644 index 000000000..7ffd7e45b --- /dev/null +++ b/integrations/python/dataloader/src/openhouse/dataloader/udf_registry.py @@ -0,0 +1,33 @@ +from abc import ABC, abstractmethod + +from datafusion.context import SessionContext + + +class UDFRegistry(ABC): + """Used to register DataFusion UDFs""" + + @abstractmethod + def register_udfs(self, session_context: SessionContext) -> None: + """Registers UDFs with DataFusion + + Args: + session_context: The session context to register the UDFs in + """ + pass + + +class NoOpRegistry(UDFRegistry): + """Default no-op UDF registry implementation. + + This registry performs no operations when register_udfs is called. + Use this as the default when custom UDF registration is not needed, + or subclass UDFRegistry to provide custom UDF registration logic. + """ + + def register_udfs(self, session_context: SessionContext) -> None: + """No-op implementation that registers no UDFs. + + Args: + session_context: The session context (unused) + """ + pass diff --git a/integrations/python/dataloader/tests/__init__.py b/integrations/python/dataloader/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/integrations/python/dataloader/tests/test_data_loader.py b/integrations/python/dataloader/tests/test_data_loader.py new file mode 100644 index 000000000..9726458a3 --- /dev/null +++ b/integrations/python/dataloader/tests/test_data_loader.py @@ -0,0 +1,16 @@ +from openhouse.dataloader import DataLoaderContext, OpenHouseDataLoader, __version__ +from openhouse.dataloader.data_loader_split import DataLoaderSplit + + +def test_package_imports(): + """Test that package imports work correctly""" + assert OpenHouseDataLoader is not None + assert DataLoaderContext is not None + assert DataLoaderSplit is not None + assert isinstance(__version__, str) + assert len(__version__) > 0 + + +def test_data_loader(): + """Test placeholder until real tests are added""" + pass diff --git a/integrations/python/dataloader/tests/test_data_loader_split.py b/integrations/python/dataloader/tests/test_data_loader_split.py new file mode 100644 index 000000000..59dd56c30 --- /dev/null +++ b/integrations/python/dataloader/tests/test_data_loader_split.py @@ -0,0 +1,146 @@ +"""Tests for DataLoaderSplit functionality.""" + +import os + +import pyarrow as pa +import pyarrow.orc as orc +import pyarrow.parquet as pq +import pytest +from datafusion.context import SessionContext +from pyiceberg.io import load_file_io +from pyiceberg.manifest import DataFile, FileFormat +from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC +from pyiceberg.schema import Schema +from pyiceberg.table import FileScanTask +from pyiceberg.table.metadata import new_table_metadata +from pyiceberg.table.name_mapping import create_mapping_from_schema +from pyiceberg.table.sorting import UNSORTED_SORT_ORDER +from pyiceberg.types import BooleanType, DoubleType, LongType, NestedField, StringType + +from openhouse.dataloader.data_loader_split import DataLoaderSplit, TableScanContext + +FILE_FORMATS = pytest.mark.parametrize("file_format", [FileFormat.PARQUET, FileFormat.ORC], ids=["parquet", "orc"]) + + +def _create_test_split( + tmp_path, + table: pa.Table, + file_format: FileFormat, + iceberg_schema: Schema, +) -> DataLoaderSplit: + """Create a DataLoaderSplit for testing by writing data to disk. + + Args: + tmp_path: Pytest temporary directory path for test files + table: PyArrow table containing test data + file_format: File format to use (PARQUET or ORC) + iceberg_schema: Iceberg schema with field IDs for column mapping + + Returns: + DataLoaderSplit configured to read the written test file + """ + ext = file_format.name.lower() + file_path = str(tmp_path / f"test.{ext}") + + properties = {} + if file_format == FileFormat.PARQUET: + fields = [ + field.with_metadata({b"PARQUET:field_id": str(i + 1).encode()}) for i, field in enumerate(table.schema) + ] + pq.write_table(table.cast(pa.schema(fields)), file_path) + else: + orc.write_table(table, file_path) + nm = create_mapping_from_schema(iceberg_schema) + properties["schema.name-mapping.default"] = nm.model_dump_json() + + metadata = new_table_metadata( + schema=iceberg_schema, + partition_spec=UNPARTITIONED_PARTITION_SPEC, + sort_order=UNSORTED_SORT_ORDER, + location=str(tmp_path), + properties=properties, + ) + + scan_context = TableScanContext( + table_metadata=metadata, + io=load_file_io(properties={}, location=file_path), + projected_schema=iceberg_schema, + ) + + ctx = SessionContext() + plan = ctx.sql("SELECT 1 as a").logical_plan() + + data_file = DataFile.from_args( + file_path=file_path, + file_format=file_format, + record_count=table.num_rows, + file_size_in_bytes=os.path.getsize(file_path), + ) + data_file._spec_id = 0 + task = FileScanTask(data_file=data_file) + + return DataLoaderSplit( + plan=plan, + file_scan_task=task, + scan_context=scan_context, + ) + + +@FILE_FORMATS +def test_split_iteration_returns_all_rows_with_correct_values(tmp_path, file_format): + """Test that iterating a DataLoaderSplit returns all rows with correct values and types.""" + iceberg_schema = Schema( + NestedField(field_id=1, name="id", field_type=LongType(), required=False), + NestedField(field_id=2, name="name", field_type=StringType(), required=False), + NestedField(field_id=3, name="value", field_type=DoubleType(), required=False), + NestedField(field_id=4, name="flag", field_type=BooleanType(), required=False), + ) + + expected_data = { + "id": [1, 2, 3], + "name": ["alice", "bob", "charlie"], + "value": [1.1, 2.2, 3.3], + "flag": [True, False, True], + } + table = pa.table( + { + "id": pa.array(expected_data["id"], type=pa.int64()), + "name": pa.array(expected_data["name"], type=pa.string()), + "value": pa.array(expected_data["value"], type=pa.float64()), + "flag": pa.array(expected_data["flag"], type=pa.bool_()), + } + ) + + split = _create_test_split(tmp_path, table, file_format, iceberg_schema) + batches = list(split) + + assert len(batches) >= 1, "Split iteration should return at least one batch" + + total_rows = sum(batch.num_rows for batch in batches) + assert total_rows == len(expected_data["id"]), f"Expected {len(expected_data['id'])} rows, got {total_rows}" + + result = pa.Table.from_batches(batches) + # Sort by ID to ensure deterministic comparison (row order is not guaranteed) + result = result.sort_by("id") + assert result.column("id").to_pylist() == expected_data["id"], "ID column values mismatch" + assert result.column("name").to_pylist() == expected_data["name"], "Name column values mismatch" + assert result.column("value").to_pylist() == expected_data["value"], "Value column values mismatch" + assert result.column("flag").to_pylist() == expected_data["flag"], "Flag column values mismatch" + + +@FILE_FORMATS +def test_split_handles_wide_tables_with_many_columns(tmp_path, file_format): + """Test that DataLoaderSplit correctly handles tables with many columns.""" + num_cols = 50 + iceberg_schema = Schema(*[NestedField(i + 1, f"col_{i}", LongType(), required=False) for i in range(num_cols)]) + data = {f"col_{i}": list(range(5)) for i in range(num_cols)} + table = pa.table(data) + + split = _create_test_split(tmp_path, table, file_format, iceberg_schema) + result = pa.Table.from_batches(list(split)) + + assert result.num_rows == 5, f"Expected 5 rows, got {result.num_rows}" + assert result.num_columns == num_cols, f"Expected {num_cols} columns, got {result.num_columns}" + + for i in range(num_cols): + assert result.column(f"col_{i}").to_pylist() == list(range(5)), f"Column col_{i} values mismatch" diff --git a/integrations/python/dataloader/uv.lock b/integrations/python/dataloader/uv.lock new file mode 100644 index 000000000..eef7465ad --- /dev/null +++ b/integrations/python/dataloader/uv.lock @@ -0,0 +1,993 @@ +version = 1 +revision = 3 +requires-python = ">=3.12" +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version < '3.14'", +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "cachetools" +version = "6.2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/39/91/d9ae9a66b01102a18cd16db0cf4cd54187ffe10f0865cc80071a4104fbb3/cachetools-6.2.6.tar.gz", hash = "sha256:16c33e1f276b9a9c0b49ab5782d901e3ad3de0dd6da9bf9bcd29ac5672f2f9e6", size = 32363, upload-time = "2026-01-27T20:32:59.956Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/45/f458fa2c388e79dd9d8b9b0c99f1d31b568f27388f2fdba7bb66bbc0c6ed/cachetools-6.2.6-py3-none-any.whl", hash = "sha256:8c9717235b3c651603fff0076db52d6acbfd1b338b8ed50256092f7ce9c85bda", size = 11668, upload-time = "2026-01-27T20:32:58.527Z" }, +] + +[[package]] +name = "certifi" +version = "2026.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, +] + +[[package]] +name = "click" +version = "8.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "cryptography" +version = "46.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/78/19/f748958276519adf6a0c1e79e7b8860b4830dda55ccdf29f2719b5fc499c/cryptography-46.0.4.tar.gz", hash = "sha256:bfd019f60f8abc2ed1b9be4ddc21cfef059c841d86d710bb69909a688cbb8f59", size = 749301, upload-time = "2026-01-28T00:24:37.379Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/91/874b8910903159043b5c6a123b7e79c4559ddd1896e38967567942635778/cryptography-46.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f14fba5bf6f4390d7ff8f086c566454bff0411f6d8aa7af79c88b6f9267aecc", size = 4275871, upload-time = "2026-01-28T00:23:09.439Z" }, + { url = "https://files.pythonhosted.org/packages/c0/35/690e809be77896111f5b195ede56e4b4ed0435b428c2f2b6d35046fbb5e8/cryptography-46.0.4-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47bcd19517e6389132f76e2d5303ded6cf3f78903da2158a671be8de024f4cd0", size = 4423124, upload-time = "2026-01-28T00:23:11.529Z" }, + { url = "https://files.pythonhosted.org/packages/1a/5b/a26407d4f79d61ca4bebaa9213feafdd8806dc69d3d290ce24996d3cfe43/cryptography-46.0.4-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:01df4f50f314fbe7009f54046e908d1754f19d0c6d3070df1e6268c5a4af09fa", size = 4277090, upload-time = "2026-01-28T00:23:13.123Z" }, + { url = "https://files.pythonhosted.org/packages/2b/08/f83e2e0814248b844265802d081f2fac2f1cbe6cd258e72ba14ff006823a/cryptography-46.0.4-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0a9ad24359fee86f131836a9ac3bffc9329e956624a2d379b613f8f8abaf5255", size = 4455157, upload-time = "2026-01-28T00:23:16.443Z" }, + { url = "https://files.pythonhosted.org/packages/0a/05/19d849cf4096448779d2dcc9bb27d097457dac36f7273ffa875a93b5884c/cryptography-46.0.4-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:dc1272e25ef673efe72f2096e92ae39dea1a1a450dd44918b15351f72c5a168e", size = 3981078, upload-time = "2026-01-28T00:23:17.838Z" }, + { url = "https://files.pythonhosted.org/packages/e6/89/f7bac81d66ba7cde867a743ea5b37537b32b5c633c473002b26a226f703f/cryptography-46.0.4-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:de0f5f4ec8711ebc555f54735d4c673fc34b65c44283895f1a08c2b49d2fd99c", size = 4276213, upload-time = "2026-01-28T00:23:19.257Z" }, + { url = "https://files.pythonhosted.org/packages/a6/f7/6d43cbaddf6f65b24816e4af187d211f0bc536a29961f69faedc48501d8e/cryptography-46.0.4-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:3d425eacbc9aceafd2cb429e42f4e5d5633c6f873f5e567077043ef1b9bbf616", size = 4454641, upload-time = "2026-01-28T00:23:22.866Z" }, + { url = "https://files.pythonhosted.org/packages/9e/4f/ebd0473ad656a0ac912a16bd07db0f5d85184924e14fc88feecae2492834/cryptography-46.0.4-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91627ebf691d1ea3976a031b61fb7bac1ccd745afa03602275dda443e11c8de0", size = 4405159, upload-time = "2026-01-28T00:23:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d1/f7/7923886f32dc47e27adeff8246e976d77258fd2aa3efdd1754e4e323bf49/cryptography-46.0.4-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2d08bc22efd73e8854b0b7caff402d735b354862f1145d7be3b9c0f740fef6a0", size = 4666059, upload-time = "2026-01-28T00:23:26.766Z" }, + { url = "https://files.pythonhosted.org/packages/f8/f5/559c25b77f40b6bf828eabaf988efb8b0e17b573545edb503368ca0a2a03/cryptography-46.0.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:078e5f06bd2fa5aea5a324f2a09f914b1484f1d0c2a4d6a8a28c74e72f65f2da", size = 4264508, upload-time = "2026-01-28T00:23:34.264Z" }, + { url = "https://files.pythonhosted.org/packages/49/a1/551fa162d33074b660dc35c9bc3616fefa21a0e8c1edd27b92559902e408/cryptography-46.0.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dce1e4f068f03008da7fa51cc7abc6ddc5e5de3e3d1550334eaf8393982a5829", size = 4409080, upload-time = "2026-01-28T00:23:35.793Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6a/4d8d129a755f5d6df1bbee69ea2f35ebfa954fa1847690d1db2e8bca46a5/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2067461c80271f422ee7bdbe79b9b4be54a5162e90345f86a23445a0cf3fd8a2", size = 4270039, upload-time = "2026-01-28T00:23:37.263Z" }, + { url = "https://files.pythonhosted.org/packages/43/ae/9f03d5f0c0c00e85ecb34f06d3b79599f20630e4db91b8a6e56e8f83d410/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:829c2b12bbc5428ab02d6b7f7e9bbfd53e33efd6672d21341f2177470171ad8b", size = 4442307, upload-time = "2026-01-28T00:23:40.56Z" }, + { url = "https://files.pythonhosted.org/packages/8b/22/e0f9f2dae8040695103369cf2283ef9ac8abe4d51f68710bec2afd232609/cryptography-46.0.4-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:62217ba44bf81b30abaeda1488686a04a702a261e26f87db51ff61d9d3510abd", size = 3959253, upload-time = "2026-01-28T00:23:42.827Z" }, + { url = "https://files.pythonhosted.org/packages/01/5b/6a43fcccc51dae4d101ac7d378a8724d1ba3de628a24e11bf2f4f43cba4d/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:9c2da296c8d3415b93e6053f5a728649a87a48ce084a9aaf51d6e46c87c7f2d2", size = 4269372, upload-time = "2026-01-28T00:23:44.655Z" }, + { url = "https://files.pythonhosted.org/packages/83/17/259409b8349aa10535358807a472c6a695cf84f106022268d31cea2b6c97/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:df4a817fa7138dd0c96c8c8c20f04b8aaa1fac3bbf610913dcad8ea82e1bfd3f", size = 4441254, upload-time = "2026-01-28T00:23:48.403Z" }, + { url = "https://files.pythonhosted.org/packages/9c/fe/e4a1b0c989b00cee5ffa0764401767e2d1cf59f45530963b894129fd5dce/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b1de0ebf7587f28f9190b9cb526e901bf448c9e6a99655d2b07fff60e8212a82", size = 4396520, upload-time = "2026-01-28T00:23:50.26Z" }, + { url = "https://files.pythonhosted.org/packages/b3/81/ba8fd9657d27076eb40d6a2f941b23429a3c3d2f56f5a921d6b936a27bc9/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9b4d17bc7bd7cdd98e3af40b441feaea4c68225e2eb2341026c84511ad246c0c", size = 4651479, upload-time = "2026-01-28T00:23:51.674Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cc/8f3224cbb2a928de7298d6ed4790f5ebc48114e02bdc9559196bfb12435d/cryptography-46.0.4-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8bf75b0259e87fa70bddc0b8b4078b76e7fd512fd9afae6c1193bcf440a4dbef", size = 4275419, upload-time = "2026-01-28T00:23:58.364Z" }, + { url = "https://files.pythonhosted.org/packages/17/43/4a18faa7a872d00e4264855134ba82d23546c850a70ff209e04ee200e76f/cryptography-46.0.4-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3c268a3490df22270955966ba236d6bc4a8f9b6e4ffddb78aac535f1a5ea471d", size = 4419058, upload-time = "2026-01-28T00:23:59.867Z" }, + { url = "https://files.pythonhosted.org/packages/ee/64/6651969409821d791ba12346a124f55e1b76f66a819254ae840a965d4b9c/cryptography-46.0.4-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:812815182f6a0c1d49a37893a303b44eaac827d7f0d582cecfc81b6427f22973", size = 4278151, upload-time = "2026-01-28T00:24:01.731Z" }, + { url = "https://files.pythonhosted.org/packages/db/a7/20c5701e2cd3e1dfd7a19d2290c522a5f435dd30957d431dcb531d0f1413/cryptography-46.0.4-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a05177ff6296644ef2876fce50518dffb5bcdf903c85250974fc8bc85d54c0af", size = 4451617, upload-time = "2026-01-28T00:24:05.403Z" }, + { url = "https://files.pythonhosted.org/packages/00/dc/3e16030ea9aa47b63af6524c354933b4fb0e352257c792c4deeb0edae367/cryptography-46.0.4-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:daa392191f626d50f1b136c9b4cf08af69ca8279d110ea24f5c2700054d2e263", size = 3977774, upload-time = "2026-01-28T00:24:06.851Z" }, + { url = "https://files.pythonhosted.org/packages/42/c8/ad93f14118252717b465880368721c963975ac4b941b7ef88f3c56bf2897/cryptography-46.0.4-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e07ea39c5b048e085f15923511d8121e4a9dc45cee4e3b970ca4f0d338f23095", size = 4277008, upload-time = "2026-01-28T00:24:08.926Z" }, + { url = "https://files.pythonhosted.org/packages/03/c3/c90a2cb358de4ac9309b26acf49b2a100957e1ff5cc1e98e6c4996576710/cryptography-46.0.4-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:6bb5157bf6a350e5b28aee23beb2d84ae6f5be390b2f8ee7ea179cda077e1019", size = 4451216, upload-time = "2026-01-28T00:24:13.975Z" }, + { url = "https://files.pythonhosted.org/packages/96/2c/8d7f4171388a10208671e181ca43cdc0e596d8259ebacbbcfbd16de593da/cryptography-46.0.4-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dd5aba870a2c40f87a3af043e0dee7d9eb02d4aff88a797b48f2b43eff8c3ab4", size = 4404299, upload-time = "2026-01-28T00:24:16.169Z" }, + { url = "https://files.pythonhosted.org/packages/e9/23/cbb2036e450980f65c6e0a173b73a56ff3bccd8998965dea5cc9ddd424a5/cryptography-46.0.4-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:93d8291da8d71024379ab2cb0b5c57915300155ad42e07f76bea6ad838d7e59b", size = 4664837, upload-time = "2026-01-28T00:24:17.629Z" }, +] + +[[package]] +name = "datafusion" +version = "51.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyarrow" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2c/6d/d0e2632c93bbcca0687eeda672af3f92042ecd349df7be55da86253594a9/datafusion-51.0.0.tar.gz", hash = "sha256:1887c7d5ed3ae5d9f389e62ba869864afad4006a3f7c99ef0ca4707782a7838f", size = 193751, upload-time = "2026-01-09T13:23:41.562Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/a9/7717cec053a3309be3020fe3147e3f76e5bf21295fa8adf9b52dd44ea3ff/datafusion-51.0.0-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:0c0d265fe3ee0dcbfa7cc3c64c7cd94fc493f38418bd79debb7ec29f29b7176e", size = 30389413, upload-time = "2026-01-09T13:23:23.266Z" }, + { url = "https://files.pythonhosted.org/packages/55/45/72c9874fd3740a4cb9d55049fdbae0df512dc5433e9f1176f3cfd970f1a1/datafusion-51.0.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:43e6011db86e950bf9a21ed73cc089c2346b340a41a4f1044268af6c3a357acc", size = 26982206, upload-time = "2026-01-09T13:23:27.437Z" }, + { url = "https://files.pythonhosted.org/packages/21/ac/b32ba1f25d38fc16e7623cc4bfb7bd68db61be2ef27b2d9969ea5c865765/datafusion-51.0.0-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e76803907150159aa059d5cc9291645bbaac1b6a46d07e56035118d327b741ae", size = 33246117, upload-time = "2026-01-09T13:23:30.981Z" }, + { url = "https://files.pythonhosted.org/packages/0b/4e/437121422ef010690fc3cdd7f080203e986ba00e0e3c3b577e03f5b54ca2/datafusion-51.0.0-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:9d0cfabfe1853994adc2e6e9da5f36c1eb061102e34a2f1101fa935c6991c9e1", size = 31421867, upload-time = "2026-01-09T13:23:34.436Z" }, + { url = "https://files.pythonhosted.org/packages/db/fc/58cf27fcb85b2fd2a698253ae46213b1cbda784407e205c148f4006c1429/datafusion-51.0.0-cp310-abi3-win_amd64.whl", hash = "sha256:fd5f9abfd6669062debf0658d13e4583234c89d4df95faf381927b11cea411f5", size = 32517679, upload-time = "2026-01-09T13:23:39.615Z" }, +] + +[[package]] +name = "docutils" +version = "0.22.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, +] + +[[package]] +name = "fsspec" +version = "2026.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/7d/5df2650c57d47c57232af5ef4b4fdbff182070421e405e0d62c6cdbfaa87/fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b", size = 310496, upload-time = "2026-01-09T15:21:35.562Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/c9/97cc5aae1648dcb851958a3ddf73ccd7dbe5650d95203ecb4d7720b4cdbf/fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc", size = 201838, upload-time = "2026-01-09T15:21:34.041Z" }, +] + +[[package]] +name = "id" +version = "1.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/04/c2156091427636080787aac190019dc64096e56a23b7364d3c1764ee3a06/id-1.6.1.tar.gz", hash = "sha256:d0732d624fb46fd4e7bc4e5152f00214450953b9e772c182c1c22964def1a069", size = 18088, upload-time = "2026-02-04T16:19:41.26Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/77/de194443bf38daed9452139e960c632b0ef9f9a5dd9ce605fdf18ca9f1b1/id-1.6.1-py3-none-any.whl", hash = "sha256:f5ec41ed2629a508f5d0988eda142e190c9c6da971100612c4de9ad9f9b237ca", size = 14689, upload-time = "2026-02-04T16:19:40.051Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "jaraco-classes" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" }, +] + +[[package]] +name = "jaraco-context" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/9c/a788f5bb29c61e456b8ee52ce76dbdd32fd72cd73dd67bc95f42c7a8d13c/jaraco_context-6.1.0.tar.gz", hash = "sha256:129a341b0a85a7db7879e22acd66902fda67882db771754574338898b2d5d86f", size = 15850, upload-time = "2026-01-13T02:53:53.847Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/48/aa685dbf1024c7bd82bede569e3a85f82c32fd3d79ba5fea578f0159571a/jaraco_context-6.1.0-py3-none-any.whl", hash = "sha256:a43b5ed85815223d0d3cfdb6d7ca0d2bc8946f28f30b6f3216bda070f68badda", size = 7065, upload-time = "2026-01-13T02:53:53.031Z" }, +] + +[[package]] +name = "jaraco-functools" +version = "4.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/27/056e0638a86749374d6f57d0b0db39f29509cce9313cf91bdc0ac4d91084/jaraco_functools-4.4.0.tar.gz", hash = "sha256:da21933b0417b89515562656547a77b4931f98176eb173644c0d35032a33d6bb", size = 19943, upload-time = "2025-12-21T09:29:43.6Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/c4/813bb09f0985cb21e959f21f2464169eca882656849adf727ac7bb7e1767/jaraco_functools-4.4.0-py3-none-any.whl", hash = "sha256:9eec1e36f45c818d9bf307c8948eb03b2b56cd44087b3cdc989abca1f20b9176", size = 10481, upload-time = "2025-12-21T09:29:42.27Z" }, +] + +[[package]] +name = "jeepney" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, +] + +[[package]] +name = "keyring" +version = "25.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jaraco-classes" }, + { name = "jaraco-context" }, + { name = "jaraco-functools" }, + { name = "jeepney", marker = "sys_platform == 'linux'" }, + { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" }, + { name = "secretstorage", marker = "sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "mmh3" +version = "5.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/af/f28c2c2f51f31abb4725f9a64bc7863d5f491f6539bd26aee2a1d21a649e/mmh3-5.2.0.tar.gz", hash = "sha256:1efc8fec8478e9243a78bb993422cf79f8ff85cb4cf6b79647480a31e0d950a8", size = 33582, upload-time = "2025-07-29T07:43:48.49Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/6a/d5aa7edb5c08e0bd24286c7d08341a0446f9a2fbbb97d96a8a6dd81935ee/mmh3-5.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:384eda9361a7bf83a85e09447e1feafe081034af9dd428893701b959230d84be", size = 56141, upload-time = "2025-07-29T07:42:13.456Z" }, + { url = "https://files.pythonhosted.org/packages/08/49/131d0fae6447bc4a7299ebdb1a6fb9d08c9f8dcf97d75ea93e8152ddf7ab/mmh3-5.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2c9da0d568569cc87315cb063486d761e38458b8ad513fedd3dc9263e1b81bcd", size = 40681, upload-time = "2025-07-29T07:42:14.306Z" }, + { url = "https://files.pythonhosted.org/packages/8f/6f/9221445a6bcc962b7f5ff3ba18ad55bba624bacdc7aa3fc0a518db7da8ec/mmh3-5.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86d1be5d63232e6eb93c50881aea55ff06eb86d8e08f9b5417c8c9b10db9db96", size = 40062, upload-time = "2025-07-29T07:42:15.08Z" }, + { url = "https://files.pythonhosted.org/packages/1e/d4/6bb2d0fef81401e0bb4c297d1eb568b767de4ce6fc00890bc14d7b51ecc4/mmh3-5.2.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bf7bee43e17e81671c447e9c83499f53d99bf440bc6d9dc26a841e21acfbe094", size = 97333, upload-time = "2025-07-29T07:42:16.436Z" }, + { url = "https://files.pythonhosted.org/packages/44/e0/ccf0daff8134efbb4fbc10a945ab53302e358c4b016ada9bf97a6bdd50c1/mmh3-5.2.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7aa18cdb58983ee660c9c400b46272e14fa253c675ed963d3812487f8ca42037", size = 103310, upload-time = "2025-07-29T07:42:17.796Z" }, + { url = "https://files.pythonhosted.org/packages/02/63/1965cb08a46533faca0e420e06aff8bbaf9690a6f0ac6ae6e5b2e4544687/mmh3-5.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae9d032488fcec32d22be6542d1a836f00247f40f320844dbb361393b5b22773", size = 106178, upload-time = "2025-07-29T07:42:19.281Z" }, + { url = "https://files.pythonhosted.org/packages/c2/41/c883ad8e2c234013f27f92061200afc11554ea55edd1bcf5e1accd803a85/mmh3-5.2.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1861fb6b1d0453ed7293200139c0a9011eeb1376632e048e3766945b13313c5", size = 113035, upload-time = "2025-07-29T07:42:20.356Z" }, + { url = "https://files.pythonhosted.org/packages/df/b5/1ccade8b1fa625d634a18bab7bf08a87457e09d5ec8cf83ca07cbea9d400/mmh3-5.2.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:99bb6a4d809aa4e528ddfe2c85dd5239b78b9dd14be62cca0329db78505e7b50", size = 120784, upload-time = "2025-07-29T07:42:21.377Z" }, + { url = "https://files.pythonhosted.org/packages/77/1c/919d9171fcbdcdab242e06394464ccf546f7d0f3b31e0d1e3a630398782e/mmh3-5.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1f8d8b627799f4e2fcc7c034fed8f5f24dc7724ff52f69838a3d6d15f1ad4765", size = 99137, upload-time = "2025-07-29T07:42:22.344Z" }, + { url = "https://files.pythonhosted.org/packages/66/8a/1eebef5bd6633d36281d9fc83cf2e9ba1ba0e1a77dff92aacab83001cee4/mmh3-5.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b5995088dd7023d2d9f310a0c67de5a2b2e06a570ecfd00f9ff4ab94a67cde43", size = 98664, upload-time = "2025-07-29T07:42:23.269Z" }, + { url = "https://files.pythonhosted.org/packages/13/41/a5d981563e2ee682b21fb65e29cc0f517a6734a02b581359edd67f9d0360/mmh3-5.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1a5f4d2e59d6bba8ef01b013c472741835ad961e7c28f50c82b27c57748744a4", size = 106459, upload-time = "2025-07-29T07:42:24.238Z" }, + { url = "https://files.pythonhosted.org/packages/24/31/342494cd6ab792d81e083680875a2c50fa0c5df475ebf0b67784f13e4647/mmh3-5.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fd6e6c3d90660d085f7e73710eab6f5545d4854b81b0135a3526e797009dbda3", size = 110038, upload-time = "2025-07-29T07:42:25.629Z" }, + { url = "https://files.pythonhosted.org/packages/28/44/efda282170a46bb4f19c3e2b90536513b1d821c414c28469a227ca5a1789/mmh3-5.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c4a2f3d83879e3de2eb8cbf562e71563a8ed15ee9b9c2e77ca5d9f73072ac15c", size = 97545, upload-time = "2025-07-29T07:42:27.04Z" }, + { url = "https://files.pythonhosted.org/packages/68/8f/534ae319c6e05d714f437e7206f78c17e66daca88164dff70286b0e8ea0c/mmh3-5.2.0-cp312-cp312-win32.whl", hash = "sha256:2421b9d665a0b1ad724ec7332fb5a98d075f50bc51a6ff854f3a1882bd650d49", size = 40805, upload-time = "2025-07-29T07:42:28.032Z" }, + { url = "https://files.pythonhosted.org/packages/b8/f6/f6abdcfefcedab3c964868048cfe472764ed358c2bf6819a70dd4ed4ed3a/mmh3-5.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d80005b7634a3a2220f81fbeb94775ebd12794623bb2e1451701ea732b4aa3", size = 41597, upload-time = "2025-07-29T07:42:28.894Z" }, + { url = "https://files.pythonhosted.org/packages/15/fd/f7420e8cbce45c259c770cac5718badf907b302d3a99ec587ba5ce030237/mmh3-5.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:3d6bfd9662a20c054bc216f861fa330c2dac7c81e7fb8307b5e32ab5b9b4d2e0", size = 39350, upload-time = "2025-07-29T07:42:29.794Z" }, + { url = "https://files.pythonhosted.org/packages/d8/fa/27f6ab93995ef6ad9f940e96593c5dd24744d61a7389532b0fec03745607/mmh3-5.2.0-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:e79c00eba78f7258e5b354eccd4d7907d60317ced924ea4a5f2e9d83f5453065", size = 40874, upload-time = "2025-07-29T07:42:30.662Z" }, + { url = "https://files.pythonhosted.org/packages/11/9c/03d13bcb6a03438bc8cac3d2e50f80908d159b31a4367c2e1a7a077ded32/mmh3-5.2.0-cp313-cp313-android_21_x86_64.whl", hash = "sha256:956127e663d05edbeec54df38885d943dfa27406594c411139690485128525de", size = 42012, upload-time = "2025-07-29T07:42:31.539Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/0865d9765408a7d504f1789944e678f74e0888b96a766d578cb80b040999/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:c3dca4cb5b946ee91b3d6bb700d137b1cd85c20827f89fdf9c16258253489044", size = 39197, upload-time = "2025-07-29T07:42:32.374Z" }, + { url = "https://files.pythonhosted.org/packages/3e/12/76c3207bd186f98b908b6706c2317abb73756d23a4e68ea2bc94825b9015/mmh3-5.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e651e17bfde5840e9e4174b01e9e080ce49277b70d424308b36a7969d0d1af73", size = 39840, upload-time = "2025-07-29T07:42:33.227Z" }, + { url = "https://files.pythonhosted.org/packages/5d/0d/574b6cce5555c9f2b31ea189ad44986755eb14e8862db28c8b834b8b64dc/mmh3-5.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:9f64bf06f4bf623325fda3a6d02d36cd69199b9ace99b04bb2d7fd9f89688504", size = 40644, upload-time = "2025-07-29T07:42:34.099Z" }, + { url = "https://files.pythonhosted.org/packages/52/82/3731f8640b79c46707f53ed72034a58baad400be908c87b0088f1f89f986/mmh3-5.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ddc63328889bcaee77b743309e5c7d2d52cee0d7d577837c91b6e7cc9e755e0b", size = 56153, upload-time = "2025-07-29T07:42:35.031Z" }, + { url = "https://files.pythonhosted.org/packages/4f/34/e02dca1d4727fd9fdeaff9e2ad6983e1552804ce1d92cc796e5b052159bb/mmh3-5.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bb0fdc451fb6d86d81ab8f23d881b8d6e37fc373a2deae1c02d27002d2ad7a05", size = 40684, upload-time = "2025-07-29T07:42:35.914Z" }, + { url = "https://files.pythonhosted.org/packages/8f/36/3dee40767356e104967e6ed6d102ba47b0b1ce2a89432239b95a94de1b89/mmh3-5.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b29044e1ffdb84fe164d0a7ea05c7316afea93c00f8ed9449cf357c36fc4f814", size = 40057, upload-time = "2025-07-29T07:42:36.755Z" }, + { url = "https://files.pythonhosted.org/packages/31/58/228c402fccf76eb39a0a01b8fc470fecf21965584e66453b477050ee0e99/mmh3-5.2.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:58981d6ea9646dbbf9e59a30890cbf9f610df0e4a57dbfe09215116fd90b0093", size = 97344, upload-time = "2025-07-29T07:42:37.675Z" }, + { url = "https://files.pythonhosted.org/packages/34/82/fc5ce89006389a6426ef28e326fc065b0fbaaed230373b62d14c889f47ea/mmh3-5.2.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7e5634565367b6d98dc4aa2983703526ef556b3688ba3065edb4b9b90ede1c54", size = 103325, upload-time = "2025-07-29T07:42:38.591Z" }, + { url = "https://files.pythonhosted.org/packages/09/8c/261e85777c6aee1ebd53f2f17e210e7481d5b0846cd0b4a5c45f1e3761b8/mmh3-5.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0271ac12415afd3171ab9a3c7cbfc71dee2c68760a7dc9d05bf8ed6ddfa3a7a", size = 106240, upload-time = "2025-07-29T07:42:39.563Z" }, + { url = "https://files.pythonhosted.org/packages/70/73/2f76b3ad8a3d431824e9934403df36c0ddacc7831acf82114bce3c4309c8/mmh3-5.2.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:45b590e31bc552c6f8e2150ff1ad0c28dd151e9f87589e7eaf508fbdd8e8e908", size = 113060, upload-time = "2025-07-29T07:42:40.585Z" }, + { url = "https://files.pythonhosted.org/packages/9f/b9/7ea61a34e90e50a79a9d87aa1c0b8139a7eaf4125782b34b7d7383472633/mmh3-5.2.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bdde97310d59604f2a9119322f61b31546748499a21b44f6715e8ced9308a6c5", size = 120781, upload-time = "2025-07-29T07:42:41.618Z" }, + { url = "https://files.pythonhosted.org/packages/0f/5b/ae1a717db98c7894a37aeedbd94b3f99e6472a836488f36b6849d003485b/mmh3-5.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fc9c5f280438cf1c1a8f9abb87dc8ce9630a964120cfb5dd50d1e7ce79690c7a", size = 99174, upload-time = "2025-07-29T07:42:42.587Z" }, + { url = "https://files.pythonhosted.org/packages/e3/de/000cce1d799fceebb6d4487ae29175dd8e81b48e314cba7b4da90bcf55d7/mmh3-5.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c903e71fd8debb35ad2a4184c1316b3cb22f64ce517b4e6747f25b0a34e41266", size = 98734, upload-time = "2025-07-29T07:42:43.996Z" }, + { url = "https://files.pythonhosted.org/packages/79/19/0dc364391a792b72fbb22becfdeacc5add85cc043cd16986e82152141883/mmh3-5.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:eed4bba7ff8a0d37106ba931ab03bdd3915fbb025bcf4e1f0aa02bc8114960c5", size = 106493, upload-time = "2025-07-29T07:42:45.07Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b1/bc8c28e4d6e807bbb051fefe78e1156d7f104b89948742ad310612ce240d/mmh3-5.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1fdb36b940e9261aff0b5177c5b74a36936b902f473180f6c15bde26143681a9", size = 110089, upload-time = "2025-07-29T07:42:46.122Z" }, + { url = "https://files.pythonhosted.org/packages/3b/a2/d20f3f5c95e9c511806686c70d0a15479cc3941c5f322061697af1c1ff70/mmh3-5.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7303aab41e97adcf010a09efd8f1403e719e59b7705d5e3cfed3dd7571589290", size = 97571, upload-time = "2025-07-29T07:42:47.18Z" }, + { url = "https://files.pythonhosted.org/packages/7b/23/665296fce4f33488deec39a750ffd245cfc07aafb0e3ef37835f91775d14/mmh3-5.2.0-cp313-cp313-win32.whl", hash = "sha256:03e08c6ebaf666ec1e3d6ea657a2d363bb01effd1a9acfe41f9197decaef0051", size = 40806, upload-time = "2025-07-29T07:42:48.166Z" }, + { url = "https://files.pythonhosted.org/packages/59/b0/92e7103f3b20646e255b699e2d0327ce53a3f250e44367a99dc8be0b7c7a/mmh3-5.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:7fddccd4113e7b736706e17a239a696332360cbaddf25ae75b57ba1acce65081", size = 41600, upload-time = "2025-07-29T07:42:49.371Z" }, + { url = "https://files.pythonhosted.org/packages/99/22/0b2bd679a84574647de538c5b07ccaa435dbccc37815067fe15b90fe8dad/mmh3-5.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:fa0c966ee727aad5406d516375593c5f058c766b21236ab8985693934bb5085b", size = 39349, upload-time = "2025-07-29T07:42:50.268Z" }, + { url = "https://files.pythonhosted.org/packages/f7/ca/a20db059a8a47048aaf550da14a145b56e9c7386fb8280d3ce2962dcebf7/mmh3-5.2.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:e5015f0bb6eb50008bed2d4b1ce0f2a294698a926111e4bb202c0987b4f89078", size = 39209, upload-time = "2025-07-29T07:42:51.559Z" }, + { url = "https://files.pythonhosted.org/packages/98/dd/e5094799d55c7482d814b979a0fd608027d0af1b274bfb4c3ea3e950bfd5/mmh3-5.2.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e0f3ed828d709f5b82d8bfe14f8856120718ec4bd44a5b26102c3030a1e12501", size = 39843, upload-time = "2025-07-29T07:42:52.536Z" }, + { url = "https://files.pythonhosted.org/packages/f4/6b/7844d7f832c85400e7cc89a1348e4e1fdd38c5a38415bb5726bbb8fcdb6c/mmh3-5.2.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:f35727c5118aba95f0397e18a1a5b8405425581bfe53e821f0fb444cbdc2bc9b", size = 40648, upload-time = "2025-07-29T07:42:53.392Z" }, + { url = "https://files.pythonhosted.org/packages/1f/bf/71f791f48a21ff3190ba5225807cbe4f7223360e96862c376e6e3fb7efa7/mmh3-5.2.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bc244802ccab5220008cb712ca1508cb6a12f0eb64ad62997156410579a1770", size = 56164, upload-time = "2025-07-29T07:42:54.267Z" }, + { url = "https://files.pythonhosted.org/packages/70/1f/f87e3d34d83032b4f3f0f528c6d95a98290fcacf019da61343a49dccfd51/mmh3-5.2.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ff3d50dc3fe8a98059f99b445dfb62792b5d006c5e0b8f03c6de2813b8376110", size = 40692, upload-time = "2025-07-29T07:42:55.234Z" }, + { url = "https://files.pythonhosted.org/packages/a6/e2/db849eaed07117086f3452feca8c839d30d38b830ac59fe1ce65af8be5ad/mmh3-5.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:37a358cc881fe796e099c1db6ce07ff757f088827b4e8467ac52b7a7ffdca647", size = 40068, upload-time = "2025-07-29T07:42:56.158Z" }, + { url = "https://files.pythonhosted.org/packages/df/6b/209af927207af77425b044e32f77f49105a0b05d82ff88af6971d8da4e19/mmh3-5.2.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b9a87025121d1c448f24f27ff53a5fe7b6ef980574b4a4f11acaabe702420d63", size = 97367, upload-time = "2025-07-29T07:42:57.037Z" }, + { url = "https://files.pythonhosted.org/packages/ca/e0/78adf4104c425606a9ce33fb351f790c76a6c2314969c4a517d1ffc92196/mmh3-5.2.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ba55d6ca32eeef8b2625e1e4bfc3b3db52bc63014bd7e5df8cc11bf2b036b12", size = 103306, upload-time = "2025-07-29T07:42:58.522Z" }, + { url = "https://files.pythonhosted.org/packages/a3/79/c2b89f91b962658b890104745b1b6c9ce38d50a889f000b469b91eeb1b9e/mmh3-5.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9ff37ba9f15637e424c2ab57a1a590c52897c845b768e4e0a4958084ec87f22", size = 106312, upload-time = "2025-07-29T07:42:59.552Z" }, + { url = "https://files.pythonhosted.org/packages/4b/14/659d4095528b1a209be90934778c5ffe312177d51e365ddcbca2cac2ec7c/mmh3-5.2.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a094319ec0db52a04af9fdc391b4d39a1bc72bc8424b47c4411afb05413a44b5", size = 113135, upload-time = "2025-07-29T07:43:00.745Z" }, + { url = "https://files.pythonhosted.org/packages/8d/6f/cd7734a779389a8a467b5c89a48ff476d6f2576e78216a37551a97e9e42a/mmh3-5.2.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c5584061fd3da584659b13587f26c6cad25a096246a481636d64375d0c1f6c07", size = 120775, upload-time = "2025-07-29T07:43:02.124Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ca/8256e3b96944408940de3f9291d7e38a283b5761fe9614d4808fcf27bd62/mmh3-5.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ecbfc0437ddfdced5e7822d1ce4855c9c64f46819d0fdc4482c53f56c707b935", size = 99178, upload-time = "2025-07-29T07:43:03.182Z" }, + { url = "https://files.pythonhosted.org/packages/8a/32/39e2b3cf06b6e2eb042c984dab8680841ac2a0d3ca6e0bea30db1f27b565/mmh3-5.2.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:7b986d506a8e8ea345791897ba5d8ba0d9d8820cd4fc3e52dbe6de19388de2e7", size = 98738, upload-time = "2025-07-29T07:43:04.207Z" }, + { url = "https://files.pythonhosted.org/packages/61/d3/7bbc8e0e8cf65ebbe1b893ffa0467b7ecd1bd07c3bbf6c9db4308ada22ec/mmh3-5.2.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:38d899a156549da8ef6a9f1d6f7ef231228d29f8f69bce2ee12f5fba6d6fd7c5", size = 106510, upload-time = "2025-07-29T07:43:05.656Z" }, + { url = "https://files.pythonhosted.org/packages/10/99/b97e53724b52374e2f3859046f0eb2425192da356cb19784d64bc17bb1cf/mmh3-5.2.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d86651fa45799530885ba4dab3d21144486ed15285e8784181a0ab37a4552384", size = 110053, upload-time = "2025-07-29T07:43:07.204Z" }, + { url = "https://files.pythonhosted.org/packages/ac/62/3688c7d975ed195155671df68788c83fed6f7909b6ec4951724c6860cb97/mmh3-5.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c463d7c1c4cfc9d751efeaadd936bbba07b5b0ed81a012b3a9f5a12f0872bd6e", size = 97546, upload-time = "2025-07-29T07:43:08.226Z" }, + { url = "https://files.pythonhosted.org/packages/ca/3b/c6153250f03f71a8b7634cded82939546cdfba02e32f124ff51d52c6f991/mmh3-5.2.0-cp314-cp314-win32.whl", hash = "sha256:bb4fe46bdc6104fbc28db7a6bacb115ee6368ff993366bbd8a2a7f0076e6f0c0", size = 41422, upload-time = "2025-07-29T07:43:09.216Z" }, + { url = "https://files.pythonhosted.org/packages/74/01/a27d98bab083a435c4c07e9d1d720d4c8a578bf4c270bae373760b1022be/mmh3-5.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:7c7f0b342fd06044bedd0b6e72177ddc0076f54fd89ee239447f8b271d919d9b", size = 42135, upload-time = "2025-07-29T07:43:10.183Z" }, + { url = "https://files.pythonhosted.org/packages/cb/c9/dbba5507e95429b8b380e2ba091eff5c20a70a59560934dff0ad8392b8c8/mmh3-5.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:3193752fc05ea72366c2b63ff24b9a190f422e32d75fdeae71087c08fff26115", size = 39879, upload-time = "2025-07-29T07:43:11.106Z" }, + { url = "https://files.pythonhosted.org/packages/b5/d1/c8c0ef839c17258b9de41b84f663574fabcf8ac2007b7416575e0f65ff6e/mmh3-5.2.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:69fc339d7202bea69ef9bd7c39bfdf9fdabc8e6822a01eba62fb43233c1b3932", size = 57696, upload-time = "2025-07-29T07:43:11.989Z" }, + { url = "https://files.pythonhosted.org/packages/2f/55/95e2b9ff201e89f9fe37036037ab61a6c941942b25cdb7b6a9df9b931993/mmh3-5.2.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:12da42c0a55c9d86ab566395324213c319c73ecb0c239fad4726324212b9441c", size = 41421, upload-time = "2025-07-29T07:43:13.269Z" }, + { url = "https://files.pythonhosted.org/packages/77/79/9be23ad0b7001a4b22752e7693be232428ecc0a35068a4ff5c2f14ef8b20/mmh3-5.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f7f9034c7cf05ddfaac8d7a2e63a3c97a840d4615d0a0e65ba8bdf6f8576e3be", size = 40853, upload-time = "2025-07-29T07:43:14.888Z" }, + { url = "https://files.pythonhosted.org/packages/ac/1b/96b32058eda1c1dee8264900c37c359a7325c1f11f5ff14fd2be8e24eff9/mmh3-5.2.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:11730eeb16dfcf9674fdea9bb6b8e6dd9b40813b7eb839bc35113649eef38aeb", size = 109694, upload-time = "2025-07-29T07:43:15.816Z" }, + { url = "https://files.pythonhosted.org/packages/8d/6f/a2ae44cd7dad697b6dea48390cbc977b1e5ca58fda09628cbcb2275af064/mmh3-5.2.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:932a6eec1d2e2c3c9e630d10f7128d80e70e2d47fe6b8c7ea5e1afbd98733e65", size = 117438, upload-time = "2025-07-29T07:43:16.865Z" }, + { url = "https://files.pythonhosted.org/packages/a0/08/bfb75451c83f05224a28afeaf3950c7b793c0b71440d571f8e819cfb149a/mmh3-5.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ca975c51c5028947bbcfc24966517aac06a01d6c921e30f7c5383c195f87991", size = 120409, upload-time = "2025-07-29T07:43:18.207Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ea/8b118b69b2ff8df568f742387d1a159bc654a0f78741b31437dd047ea28e/mmh3-5.2.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5b0b58215befe0f0e120b828f7645e97719bbba9f23b69e268ed0ac7adde8645", size = 125909, upload-time = "2025-07-29T07:43:19.39Z" }, + { url = "https://files.pythonhosted.org/packages/3e/11/168cc0b6a30650032e351a3b89b8a47382da541993a03af91e1ba2501234/mmh3-5.2.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29c2b9ce61886809d0492a274a5a53047742dea0f703f9c4d5d223c3ea6377d3", size = 135331, upload-time = "2025-07-29T07:43:20.435Z" }, + { url = "https://files.pythonhosted.org/packages/31/05/e3a9849b1c18a7934c64e831492c99e67daebe84a8c2f2c39a7096a830e3/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a367d4741ac0103f8198c82f429bccb9359f543ca542b06a51f4f0332e8de279", size = 110085, upload-time = "2025-07-29T07:43:21.92Z" }, + { url = "https://files.pythonhosted.org/packages/d9/d5/a96bcc306e3404601418b2a9a370baec92af84204528ba659fdfe34c242f/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:5a5dba98e514fb26241868f6eb90a7f7ca0e039aed779342965ce24ea32ba513", size = 111195, upload-time = "2025-07-29T07:43:23.066Z" }, + { url = "https://files.pythonhosted.org/packages/af/29/0fd49801fec5bff37198684e0849b58e0dab3a2a68382a357cfffb0fafc3/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:941603bfd75a46023807511c1ac2f1b0f39cccc393c15039969806063b27e6db", size = 116919, upload-time = "2025-07-29T07:43:24.178Z" }, + { url = "https://files.pythonhosted.org/packages/2d/04/4f3c32b0a2ed762edca45d8b46568fc3668e34f00fb1e0a3b5451ec1281c/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:132dd943451a7c7546978863d2f5a64977928410782e1a87d583cb60eb89e667", size = 123160, upload-time = "2025-07-29T07:43:25.26Z" }, + { url = "https://files.pythonhosted.org/packages/91/76/3d29eaa38821730633d6a240d36fa8ad2807e9dfd432c12e1a472ed211eb/mmh3-5.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f698733a8a494466432d611a8f0d1e026f5286dee051beea4b3c3146817e35d5", size = 110206, upload-time = "2025-07-29T07:43:26.699Z" }, + { url = "https://files.pythonhosted.org/packages/44/1c/ccf35892684d3a408202e296e56843743e0b4fb1629e59432ea88cdb3909/mmh3-5.2.0-cp314-cp314t-win32.whl", hash = "sha256:6d541038b3fc360ec538fc116de87462627944765a6750308118f8b509a8eec7", size = 41970, upload-time = "2025-07-29T07:43:27.666Z" }, + { url = "https://files.pythonhosted.org/packages/75/b2/b9e4f1e5adb5e21eb104588fcee2cd1eaa8308255173481427d5ecc4284e/mmh3-5.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:e912b19cf2378f2967d0c08e86ff4c6c360129887f678e27e4dde970d21b3f4d", size = 43063, upload-time = "2025-07-29T07:43:28.582Z" }, + { url = "https://files.pythonhosted.org/packages/6a/fc/0e61d9a4e29c8679356795a40e48f647b4aad58d71bfc969f0f8f56fb912/mmh3-5.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:e7884931fe5e788163e7b3c511614130c2c59feffdc21112290a194487efb2e9", size = 40455, upload-time = "2025-07-29T07:43:29.563Z" }, +] + +[[package]] +name = "more-itertools" +version = "10.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" }, +] + +[[package]] +name = "nh3" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/a5/34c26015d3a434409f4d2a1cd8821a06c05238703f49283ffeb937bef093/nh3-0.3.2.tar.gz", hash = "sha256:f394759a06df8b685a4ebfb1874fb67a9cbfd58c64fc5ed587a663c0e63ec376", size = 19288, upload-time = "2025-10-30T11:17:45.948Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/01/a1eda067c0ba823e5e2bb033864ae4854549e49fb6f3407d2da949106bfb/nh3-0.3.2-cp314-cp314t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:d18957a90806d943d141cc5e4a0fefa1d77cf0d7a156878bf9a66eed52c9cc7d", size = 1419839, upload-time = "2025-10-30T11:17:09.956Z" }, + { url = "https://files.pythonhosted.org/packages/30/57/07826ff65d59e7e9cc789ef1dc405f660cabd7458a1864ab58aefa17411b/nh3-0.3.2-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45c953e57028c31d473d6b648552d9cab1efe20a42ad139d78e11d8f42a36130", size = 791183, upload-time = "2025-10-30T11:17:11.99Z" }, + { url = "https://files.pythonhosted.org/packages/af/2f/e8a86f861ad83f3bb5455f596d5c802e34fcdb8c53a489083a70fd301333/nh3-0.3.2-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2c9850041b77a9147d6bbd6dbbf13eeec7009eb60b44e83f07fcb2910075bf9b", size = 829127, upload-time = "2025-10-30T11:17:13.192Z" }, + { url = "https://files.pythonhosted.org/packages/d8/97/77aef4daf0479754e8e90c7f8f48f3b7b8725a3b8c0df45f2258017a6895/nh3-0.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:403c11563e50b915d0efdb622866d1d9e4506bce590ef7da57789bf71dd148b5", size = 997131, upload-time = "2025-10-30T11:17:14.677Z" }, + { url = "https://files.pythonhosted.org/packages/41/ee/fd8140e4df9d52143e89951dd0d797f5546004c6043285289fbbe3112293/nh3-0.3.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:0dca4365db62b2d71ff1620ee4f800c4729849906c5dd504ee1a7b2389558e31", size = 1068783, upload-time = "2025-10-30T11:17:15.861Z" }, + { url = "https://files.pythonhosted.org/packages/87/64/bdd9631779e2d588b08391f7555828f352e7f6427889daf2fa424bfc90c9/nh3-0.3.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0fe7ee035dd7b2290715baf29cb27167dddd2ff70ea7d052c958dbd80d323c99", size = 994732, upload-time = "2025-10-30T11:17:17.155Z" }, + { url = "https://files.pythonhosted.org/packages/79/66/90190033654f1f28ca98e3d76b8be1194505583f9426b0dcde782a3970a2/nh3-0.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a40202fd58e49129764f025bbaae77028e420f1d5b3c8e6f6fd3a6490d513868", size = 975997, upload-time = "2025-10-30T11:17:18.77Z" }, + { url = "https://files.pythonhosted.org/packages/34/30/ebf8e2e8d71fdb5a5d5d8836207177aed1682df819cbde7f42f16898946c/nh3-0.3.2-cp314-cp314t-win32.whl", hash = "sha256:1f9ba555a797dbdcd844b89523f29cdc90973d8bd2e836ea6b962cf567cadd93", size = 583364, upload-time = "2025-10-30T11:17:20.286Z" }, + { url = "https://files.pythonhosted.org/packages/94/ae/95c52b5a75da429f11ca8902c2128f64daafdc77758d370e4cc310ecda55/nh3-0.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:dce4248edc427c9b79261f3e6e2b3ecbdd9b88c267012168b4a7b3fc6fd41d13", size = 589982, upload-time = "2025-10-30T11:17:21.384Z" }, + { url = "https://files.pythonhosted.org/packages/b4/bd/c7d862a4381b95f2469704de32c0ad419def0f4a84b7a138a79532238114/nh3-0.3.2-cp314-cp314t-win_arm64.whl", hash = "sha256:019ecbd007536b67fdf76fab411b648fb64e2257ca3262ec80c3425c24028c80", size = 577126, upload-time = "2025-10-30T11:17:22.755Z" }, + { url = "https://files.pythonhosted.org/packages/b6/3e/f5a5cc2885c24be13e9b937441bd16a012ac34a657fe05e58927e8af8b7a/nh3-0.3.2-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7064ccf5ace75825bd7bf57859daaaf16ed28660c1c6b306b649a9eda4b54b1e", size = 1431980, upload-time = "2025-10-30T11:17:25.457Z" }, + { url = "https://files.pythonhosted.org/packages/7f/f7/529a99324d7ef055de88b690858f4189379708abae92ace799365a797b7f/nh3-0.3.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8745454cdd28bbbc90861b80a0111a195b0e3961b9fa2e672be89eb199fa5d8", size = 820805, upload-time = "2025-10-30T11:17:26.98Z" }, + { url = "https://files.pythonhosted.org/packages/3d/62/19b7c50ccd1fa7d0764822d2cea8f2a320f2fd77474c7a1805cb22cf69b0/nh3-0.3.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72d67c25a84579f4a432c065e8b4274e53b7cf1df8f792cf846abfe2c3090866", size = 803527, upload-time = "2025-10-30T11:17:28.284Z" }, + { url = "https://files.pythonhosted.org/packages/4a/ca/f022273bab5440abff6302731a49410c5ef66b1a9502ba3fbb2df998d9ff/nh3-0.3.2-cp38-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:13398e676a14d6233f372c75f52d5ae74f98210172991f7a3142a736bd92b131", size = 1051674, upload-time = "2025-10-30T11:17:29.909Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f7/5728e3b32a11daf5bd21cf71d91c463f74305938bc3eb9e0ac1ce141646e/nh3-0.3.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:03d617e5c8aa7331bd2659c654e021caf9bba704b109e7b2b28b039a00949fe5", size = 1004737, upload-time = "2025-10-30T11:17:31.205Z" }, + { url = "https://files.pythonhosted.org/packages/53/7f/f17e0dba0a99cee29e6cee6d4d52340ef9cb1f8a06946d3a01eb7ec2fb01/nh3-0.3.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f55c4d2d5a207e74eefe4d828067bbb01300e06e2a7436142f915c5928de07", size = 911745, upload-time = "2025-10-30T11:17:32.945Z" }, + { url = "https://files.pythonhosted.org/packages/42/0f/c76bf3dba22c73c38e9b1113b017cf163f7696f50e003404ec5ecdb1e8a6/nh3-0.3.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb18403f02b655a1bbe4e3a4696c2ae1d6ae8f5991f7cacb684b1ae27e6c9f7", size = 797184, upload-time = "2025-10-30T11:17:34.226Z" }, + { url = "https://files.pythonhosted.org/packages/08/a1/73d8250f888fb0ddf1b119b139c382f8903d8bb0c5bd1f64afc7e38dad1d/nh3-0.3.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d66f41672eb4060cf87c037f760bdbc6847852ca9ef8e9c5a5da18f090abf87", size = 838556, upload-time = "2025-10-30T11:17:35.875Z" }, + { url = "https://files.pythonhosted.org/packages/d1/09/deb57f1fb656a7a5192497f4a287b0ade5a2ff6b5d5de4736d13ef6d2c1f/nh3-0.3.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f97f8b25cb2681d25e2338148159447e4d689aafdccfcf19e61ff7db3905768a", size = 1006695, upload-time = "2025-10-30T11:17:37.071Z" }, + { url = "https://files.pythonhosted.org/packages/b6/61/8f4d41c4ccdac30e4b1a4fa7be4b0f9914d8314a5058472f84c8e101a418/nh3-0.3.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:2ab70e8c6c7d2ce953d2a58102eefa90c2d0a5ed7aa40c7e29a487bc5e613131", size = 1075471, upload-time = "2025-10-30T11:17:38.225Z" }, + { url = "https://files.pythonhosted.org/packages/b0/c6/966aec0cb4705e69f6c3580422c239205d5d4d0e50fac380b21e87b6cf1b/nh3-0.3.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1710f3901cd6440ca92494ba2eb6dc260f829fa8d9196b659fa10de825610ce0", size = 1002439, upload-time = "2025-10-30T11:17:39.553Z" }, + { url = "https://files.pythonhosted.org/packages/e2/c8/97a2d5f7a314cce2c5c49f30c6f161b7f3617960ade4bfc2fd1ee092cb20/nh3-0.3.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:91e9b001101fb4500a2aafe3e7c92928d85242d38bf5ac0aba0b7480da0a4cd6", size = 987439, upload-time = "2025-10-30T11:17:40.81Z" }, + { url = "https://files.pythonhosted.org/packages/0d/95/2d6fc6461687d7a171f087995247dec33e8749a562bfadd85fb5dbf37a11/nh3-0.3.2-cp38-abi3-win32.whl", hash = "sha256:169db03df90da63286e0560ea0efa9b6f3b59844a9735514a1d47e6bb2c8c61b", size = 589826, upload-time = "2025-10-30T11:17:42.239Z" }, + { url = "https://files.pythonhosted.org/packages/64/9a/1a1c154f10a575d20dd634e5697805e589bbdb7673a0ad00e8da90044ba7/nh3-0.3.2-cp38-abi3-win_amd64.whl", hash = "sha256:562da3dca7a17f9077593214a9781a94b8d76de4f158f8c895e62f09573945fe", size = 596406, upload-time = "2025-10-30T11:17:43.773Z" }, + { url = "https://files.pythonhosted.org/packages/9e/7e/a96255f63b7aef032cbee8fc4d6e37def72e3aaedc1f72759235e8f13cb1/nh3-0.3.2-cp38-abi3-win_arm64.whl", hash = "sha256:cf5964d54edd405e68583114a7cba929468bcd7db5e676ae38ee954de1cfc104", size = 584162, upload-time = "2025-10-30T11:17:44.96Z" }, +] + +[[package]] +name = "openhouse-dataloader" +source = { editable = "." } +dependencies = [ + { name = "datafusion" }, + { name = "pyiceberg" }, +] + +[package.optional-dependencies] +dev = [ + { name = "pytest" }, + { name = "ruff" }, + { name = "twine" }, +] + +[package.metadata] +requires-dist = [ + { name = "datafusion", specifier = "==51.0.0" }, + { name = "pyiceberg", specifier = "~=0.11.0" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.0" }, + { name = "twine", marker = "extra == 'dev'", specifier = ">=6.0.0" }, +] +provides-extras = ["dev"] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pyarrow" +version = "23.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185, upload-time = "2026-01-18T16:19:42.218Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/bd/c861d020831ee57609b73ea721a617985ece817684dc82415b0bc3e03ac3/pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8", size = 34189116, upload-time = "2026-01-18T16:15:28.054Z" }, + { url = "https://files.pythonhosted.org/packages/8c/23/7725ad6cdcbaf6346221391e7b3eecd113684c805b0a95f32014e6fa0736/pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a", size = 35803831, upload-time = "2026-01-18T16:15:33.798Z" }, + { url = "https://files.pythonhosted.org/packages/57/06/684a421543455cdc2944d6a0c2cc3425b028a4c6b90e34b35580c4899743/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333", size = 44436452, upload-time = "2026-01-18T16:15:41.598Z" }, + { url = "https://files.pythonhosted.org/packages/c6/6f/8f9eb40c2328d66e8b097777ddcf38494115ff9f1b5bc9754ba46991191e/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b", size = 47557396, upload-time = "2026-01-18T16:15:51.252Z" }, + { url = "https://files.pythonhosted.org/packages/10/6e/f08075f1472e5159553501fde2cc7bc6700944bdabe49a03f8a035ee6ccd/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de", size = 48147129, upload-time = "2026-01-18T16:16:00.299Z" }, + { url = "https://files.pythonhosted.org/packages/7d/82/d5a680cd507deed62d141cc7f07f7944a6766fc51019f7f118e4d8ad0fb8/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df", size = 50596642, upload-time = "2026-01-18T16:16:08.502Z" }, + { url = "https://files.pythonhosted.org/packages/a9/26/4f29c61b3dce9fa7780303b86895ec6a0917c9af927101daaaf118fbe462/pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c", size = 27660628, upload-time = "2026-01-18T16:16:15.28Z" }, + { url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630, upload-time = "2026-01-18T16:16:22.141Z" }, + { url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820, upload-time = "2026-01-18T16:16:29.441Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735, upload-time = "2026-01-18T16:16:38.79Z" }, + { url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263, upload-time = "2026-01-18T16:16:46.248Z" }, + { url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529, upload-time = "2026-01-18T16:16:53.412Z" }, + { url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851, upload-time = "2026-01-18T16:17:01.133Z" }, + { url = "https://files.pythonhosted.org/packages/c6/4c/2929c4be88723ba025e7b3453047dc67e491c9422965c141d24bab6b5962/pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c", size = 27577747, upload-time = "2026-01-18T16:18:02.413Z" }, + { url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038, upload-time = "2026-01-18T16:17:07.861Z" }, + { url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142, upload-time = "2026-01-18T16:17:15.401Z" }, + { url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374, upload-time = "2026-01-18T16:17:23.93Z" }, + { url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896, upload-time = "2026-01-18T16:17:33.393Z" }, + { url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401, upload-time = "2026-01-18T16:17:42.012Z" }, + { url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677, upload-time = "2026-01-18T16:17:50.241Z" }, + { url = "https://files.pythonhosted.org/packages/c2/4c/f11f371f5d4740a5dafc2e11c76bcf42d03dfdb2d68696da97de420b6963/pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b", size = 27631889, upload-time = "2026-01-18T16:17:56.55Z" }, + { url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265, upload-time = "2026-01-18T16:18:07.904Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211, upload-time = "2026-01-18T16:18:13.94Z" }, + { url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313, upload-time = "2026-01-18T16:18:20.367Z" }, + { url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886, upload-time = "2026-01-18T16:18:27.544Z" }, + { url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055, upload-time = "2026-01-18T16:18:35.425Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812, upload-time = "2026-01-18T16:18:43.553Z" }, + { url = "https://files.pythonhosted.org/packages/59/8d/2af846cd2412e67a087f5bda4a8e23dfd4ebd570f777db2e8686615dafc1/pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861", size = 28263851, upload-time = "2026-01-18T16:19:38.567Z" }, + { url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240, upload-time = "2026-01-18T16:18:49.755Z" }, + { url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712, upload-time = "2026-01-18T16:18:55.626Z" }, + { url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523, upload-time = "2026-01-18T16:19:03.958Z" }, + { url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490, upload-time = "2026-01-18T16:19:11.208Z" }, + { url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605, upload-time = "2026-01-18T16:19:19.544Z" }, + { url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979, upload-time = "2026-01-18T16:19:26.717Z" }, + { url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload-time = "2026-01-18T16:19:32.93Z" }, +] + +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, +] + +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pyiceberg" +version = "0.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cachetools" }, + { name = "click" }, + { name = "fsspec" }, + { name = "mmh3" }, + { name = "pydantic" }, + { name = "pyparsing" }, + { name = "pyroaring" }, + { name = "requests" }, + { name = "rich" }, + { name = "strictyaml" }, + { name = "tenacity" }, + { name = "zstandard" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bd/22/3d02ad39710bf51834d108e6d548cee9c1916850460ccba80db47a982567/pyiceberg-0.11.0.tar.gz", hash = "sha256:095bbafc87d204cf8d3ffc1c434e07cf9a67a709192ac0b11dcb0f8251f7ad4e", size = 1074873, upload-time = "2026-02-10T02:28:20.762Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/37/b5a818444f5563ee2dacac93cc690e63396ab60308be353502dc7008168b/pyiceberg-0.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6fc89c9581d42ff2383cc9ba3f443ab9f175d8e85216ecbd819e955e9069bc46", size = 532694, upload-time = "2026-02-10T02:28:01.298Z" }, + { url = "https://files.pythonhosted.org/packages/7d/f9/ef76d6cf62a7ba9d61a5e20216000d4b366d8eac3be5c89c2ce5c8eb38f9/pyiceberg-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e2dfdf5438cc5ad8eb8b2e3f7a41ab6f286fe8b6fd6f5c1407381f627097e2e0", size = 532901, upload-time = "2026-02-10T02:28:02.517Z" }, + { url = "https://files.pythonhosted.org/packages/15/2a/bcec7d0ca75259cdb83ddceee1c59cdad619d2dfe36cee802c7e7207d96a/pyiceberg-0.11.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4543e93c78bb4fd78da7093c8232d62487a68661ba6bff0bafc0b346b34ca38c", size = 729261, upload-time = "2026-02-10T02:28:03.694Z" }, + { url = "https://files.pythonhosted.org/packages/99/ff/db75a2062a0b4b64ad0a6c677cab5b6e3ac19e0820584c597e1822f2cf7c/pyiceberg-0.11.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8dda2ad8d57e3af743ab67d976a23ca1cd54a4849110b5c2375f5d9466a4ae80", size = 729979, upload-time = "2026-02-10T02:28:04.878Z" }, + { url = "https://files.pythonhosted.org/packages/d8/eb/453e8c4a7e6eb698bf1402337e3cd3516f20c4bbe0f06961d3e6c5031cca/pyiceberg-0.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b5999fb41ea0b4b153a5c80d56512ef0596f95fdd62512d1806b8db89fd4a5f9", size = 723778, upload-time = "2026-02-10T02:28:06.573Z" }, + { url = "https://files.pythonhosted.org/packages/c8/7b/4f38016722ecc04f97000f7b7f80ba1d74e66dcbf630a4c2b620b5393ce0/pyiceberg-0.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:63c76f882ad30bda5b5fc685c6ab053e5b5585eadab04d1afc515eec4e272b14", size = 726955, upload-time = "2026-02-10T02:28:08.684Z" }, + { url = "https://files.pythonhosted.org/packages/56/14/dc689c0637d7f6716cae614afcce5782903cc87a781dfd47e6d6e72ce104/pyiceberg-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:4bb26a9308e8bb97c1d3518209d221f2a790a37b9806b8b91fee4c47be4919a6", size = 531019, upload-time = "2026-02-10T02:28:10.333Z" }, + { url = "https://files.pythonhosted.org/packages/c6/72/ef1e816d79d703eec1182398947a6b72f502eefeee01c4484bd5e1493b07/pyiceberg-0.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c707f4463dd9c1ca664d41d5ddd38babadf1bf5fa1946cb591c033a6a2827eb4", size = 532359, upload-time = "2026-02-10T02:28:11.473Z" }, + { url = "https://files.pythonhosted.org/packages/1f/41/ec85279b1b8ed57d0d27d4675203d314b8f5d69383e1df68f615f45e9dda/pyiceberg-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f1c944969fda799a2d26dc6f57448ace44ee07e334306ba6f5110df1aadeeef1", size = 532496, upload-time = "2026-02-10T02:28:13.19Z" }, + { url = "https://files.pythonhosted.org/packages/b9/b4/02861c450057c9a6e2f2e1eb0ef735c2e28473cff60b2747c50d0427ec1c/pyiceberg-0.11.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1be075b9ecc175b8dd76822b081b379ce33cda33d6403eaf607268f6061f3275", size = 721917, upload-time = "2026-02-10T02:28:14.484Z" }, + { url = "https://files.pythonhosted.org/packages/16/cf/924b7b14267d47f5055bb5d032c7d24eb9542ac3631b460e1398fe9935ea/pyiceberg-0.11.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3507d079d43d724bffb80e75201f2995822af844b674642dcf73c19d5303994", size = 723754, upload-time = "2026-02-10T02:28:15.77Z" }, + { url = "https://files.pythonhosted.org/packages/24/a1/df2d73af6dc3ee301e727d0bef4421c57de02b5030cf38e39ed25ef36154/pyiceberg-0.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eb3719cd61a0512596b4306283072de443d84ec7b68654f565b0d7c2d7cdeeeb", size = 715749, upload-time = "2026-02-10T02:28:17.034Z" }, + { url = "https://files.pythonhosted.org/packages/8e/0a/c3cdcd5ed417aceb2f73e8463d97e8dd7e3f7021015d0c8d51394a5c5a63/pyiceberg-0.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9a71fd6b1c3c625ed2a9ca2cecf0dc8713acc5814e78c9becde3b1f42315c35", size = 720600, upload-time = "2026-02-10T02:28:18.275Z" }, + { url = "https://files.pythonhosted.org/packages/01/b8/29ec7281fb831ab983f953b00924c1cc3ebc21e9f67a1466af9b63767ba4/pyiceberg-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:bed2df9eb7e1496af22fa2307dbd13f29865b98ba5851695ffd1f4436edc05f9", size = 530631, upload-time = "2026-02-10T02:28:19.561Z" }, +] + +[[package]] +name = "pyparsing" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574, upload-time = "2026-01-21T03:57:59.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, +] + +[[package]] +name = "pyroaring" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/e4/975f0fa77fc3590820b4a3ac49704644b389795409bc12eb91729f845812/pyroaring-1.0.3.tar.gz", hash = "sha256:cd7392d1c010c9e41c11c62cd0610c8852e7e9698b1f7f6c2fcdefe50e7ef6da", size = 188688, upload-time = "2025-10-09T09:08:22.448Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/09/a5376d55672e0535019ba1469888909d0046cea0cfb969a4aa1f99caaf22/pyroaring-1.0.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:add3e4c78eb590a76526ecce8d1566eecdd5822e351c36b3697997f4a80ed808", size = 681056, upload-time = "2025-10-09T09:07:11.497Z" }, + { url = "https://files.pythonhosted.org/packages/23/dd/78f59d361bd9ebf8de3660408b0c48664ade0a057ebcf4b207d99ac1a698/pyroaring-1.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ebaffe846cf4ba4f00ce6b8a9f39613f24e2d09447e77be4fa6e898bc36451b6", size = 375111, upload-time = "2025-10-09T09:07:12.597Z" }, + { url = "https://files.pythonhosted.org/packages/bf/03/10dc93f83a5453eb40a69c79106a8385b40aa12cf4531ca72bd9d7f45cb2/pyroaring-1.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9459f27498f97d08031a34a5ead230b77eb0ab3cc3d85b7f54faa2fd548acd6", size = 314319, upload-time = "2025-10-09T09:07:13.579Z" }, + { url = "https://files.pythonhosted.org/packages/86/9e/b00c38a7e62a73e152055f593595c37152e61fc2896fd11538a7c71fbe4e/pyroaring-1.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2b2eb8bd1c35c772994889be9f7dda09477475d7aa1e2af9ab4ef18619326f6", size = 1869251, upload-time = "2025-10-09T09:07:14.584Z" }, + { url = "https://files.pythonhosted.org/packages/4f/33/f32d00ca105b66303deab43d027c3574c8ade8525dac0e5b50a9fb4d1b76/pyroaring-1.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d31f4c1c906f1af14ce61a3959d04a14a64c594f8a768399146a45bbd341f21f", size = 2071551, upload-time = "2025-10-09T09:07:15.713Z" }, + { url = "https://files.pythonhosted.org/packages/5d/89/e953cae181ba4c7523334855a1ca0ae8eeea3cee8d7cd39c56bd99709d3f/pyroaring-1.0.3-cp312-cp312-manylinux_2_24_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53be988fc86698d56c11049bfe5113a2f6990adb1fa2782b29636509808b6aa7", size = 1781071, upload-time = "2025-10-09T09:07:17.19Z" }, + { url = "https://files.pythonhosted.org/packages/fa/db/65d4be532e68b62a84a9c89b24d0a1394f452f484fa29392142d9a3b9c48/pyroaring-1.0.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7df84d223424523b19a23781f4246cc247fd6d821e1bc0853c2f25669136f7d0", size = 1795670, upload-time = "2025-10-09T09:07:18.524Z" }, + { url = "https://files.pythonhosted.org/packages/f5/9e/684ea0568ce7d30fc4e01ad1c666e9ce1a5b1702fa630231f4f6bdb96539/pyroaring-1.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:34a781f1f9766897f63ef18be129827340ae37764015b83fdcff1efb9e29136d", size = 2849305, upload-time = "2025-10-09T09:07:20.388Z" }, + { url = "https://files.pythonhosted.org/packages/7c/fd/d7773a2adf91f45d8924197954c66b1694325afd2f27e02edaac07338402/pyroaring-1.0.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1f414343b4ed0756734328cdf2a91022fc54503769e3f8d79bd0b672ea815a16", size = 2692843, upload-time = "2025-10-09T09:07:22.042Z" }, + { url = "https://files.pythonhosted.org/packages/13/72/b8a99ba138eebd8ff9bf8d15f3942e9e43e8e45723e2e6b7b09e542b7448/pyroaring-1.0.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d16ae185c72dc64f76335dbe53e53a892e78115adc92194957d1b7ef74d230b9", size = 2983440, upload-time = "2025-10-09T09:07:23.419Z" }, + { url = "https://files.pythonhosted.org/packages/ca/94/e6ed1f682d850e039c71b2032bacdefc5082dc809796cf34b9e6f24c604d/pyroaring-1.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f888447bf22dde7759108bfe6dfbeb6bbb61b14948de9c4cb6843c4dd57e2215", size = 3117542, upload-time = "2025-10-09T09:07:25.104Z" }, + { url = "https://files.pythonhosted.org/packages/8f/89/d55b0ed3e098ef89c421b43b748afe3d90eb250cab50b9e53e3a3449ac58/pyroaring-1.0.3-cp312-cp312-win32.whl", hash = "sha256:fbbdc44c51a0a3efd7be3dbe04466278ce098fcd101aa1905849319042159770", size = 205118, upload-time = "2025-10-09T09:07:26.532Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e1/b71fef6a73efb50110d33d714235ff7059f4ebae98dc474b6549b322f48f/pyroaring-1.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:3b217c4b3ad953b4c759a0d2f9bd95316f0c345b9f7adb49e6ded7a1f5106bd4", size = 260629, upload-time = "2025-10-09T09:07:27.528Z" }, + { url = "https://files.pythonhosted.org/packages/57/33/66ee872079c9c47512d6e17d374bcad8d91350c24dc20fbe678c34b33745/pyroaring-1.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:e6bcf838564c21bab8fe6c2748b4990d4cd90612d8c470c04889def7bb5114ea", size = 219032, upload-time = "2025-10-09T09:07:28.754Z" }, + { url = "https://files.pythonhosted.org/packages/1f/95/97142ee32587ddda9e2cd614b865eeb5c0ee91006a51928f4074cd6e8e5f/pyroaring-1.0.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:20bc947054b197d1baa76cd05d70b8e04f95b82e698266e2f8f2f4b36d764477", size = 678813, upload-time = "2025-10-09T09:07:29.936Z" }, + { url = "https://files.pythonhosted.org/packages/70/5e/cff22be3a76a80024bdf00a9decdffedc6e80f037328a58b58c1b521442d/pyroaring-1.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ba5909b4c66bb85cab345e2f3a87e5ce671509c94b8c9823d8db64e107cbe854", size = 373661, upload-time = "2025-10-09T09:07:30.983Z" }, + { url = "https://files.pythonhosted.org/packages/86/73/fc406a67cd49e1707d1c3d08214458959dd579eff88c28587b356dfa068b/pyroaring-1.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b744746ba5da27fad760067f12633f5d384db6a1e65648d00244ceacbbd87731", size = 313559, upload-time = "2025-10-09T09:07:32.099Z" }, + { url = "https://files.pythonhosted.org/packages/f9/64/c7fe510523445f27e2cb04de6ffd3137f9d72db438b62db2bfa3dafcf4fc/pyroaring-1.0.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b16c2a2791a5a09c4b59c0e1069ac1c877d0df25cae3155579c7eac8844676e", size = 1875926, upload-time = "2025-10-09T09:07:33.701Z" }, + { url = "https://files.pythonhosted.org/packages/47/74/da9b8ad2ca9ce6af1377f2cffdad6582a51a5f5df4f26df5c41810c9de5b/pyroaring-1.0.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7f68dfcf8d01177267f4bc06c4960fe8e39577470d1b52c9af8b61a72ca8767", size = 2064377, upload-time = "2025-10-09T09:07:35.273Z" }, + { url = "https://files.pythonhosted.org/packages/99/e3/8a70c5a5f7821c63709e2769aeccda8ae87a192198374bc475cbee543a22/pyroaring-1.0.3-cp313-cp313-manylinux_2_24_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:dba4e4700030182a981a3c887aa73887697145fc9ffb192f908aa59b718fbbdd", size = 1778320, upload-time = "2025-10-09T09:07:36.782Z" }, + { url = "https://files.pythonhosted.org/packages/04/4c/08159a07c3723a2775064887543766b6115b4975e7baaa4d51e5580701a4/pyroaring-1.0.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e26dd1dc1edba02288902914bdb559e53e346e9155defa43c31fcab831b55342", size = 1786569, upload-time = "2025-10-09T09:07:38.473Z" }, + { url = "https://files.pythonhosted.org/packages/e5/ff/55a18d0e7e0dc4cd9f43988b746e788234a8d660fa17367c5ed9fa799348/pyroaring-1.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6eb98d2cacfc6d51c6a69893f04075e07b3df761eac71ba162c43b9b4c4452ad", size = 2852766, upload-time = "2025-10-09T09:07:39.633Z" }, + { url = "https://files.pythonhosted.org/packages/24/3c/419e25c51843dd40975ae37d67dea4f2f256554b5bec32237f607ec8ef21/pyroaring-1.0.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a967e9eddb9485cbdd95d6371e3dada67880844d836c0283d3b11efe9225d1b7", size = 2683904, upload-time = "2025-10-09T09:07:41.139Z" }, + { url = "https://files.pythonhosted.org/packages/75/64/8d91f1b85b42925af632fc2c1047bb314be622dce890a4181a0a8d6e498d/pyroaring-1.0.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b12ef7f992ba7be865f91c7c098fd8ac6c413563aaa14d5b1e2bcb8cb43a4614", size = 2973884, upload-time = "2025-10-09T09:07:42.34Z" }, + { url = "https://files.pythonhosted.org/packages/61/6d/c867625549df0dc9ad675424ecf989fa2f08f0571bd46dfc4f7218737dd2/pyroaring-1.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:82ca5be174b85c40be7b00bc6bf39b2931a1b4a465f3af17ec6b9c48e9aa6fe0", size = 3103671, upload-time = "2025-10-09T09:07:44.055Z" }, + { url = "https://files.pythonhosted.org/packages/59/b1/d47c5ec2b2580d0b94f42575be8f49907a0f4aa396fdc18660f3b5060d54/pyroaring-1.0.3-cp313-cp313-win32.whl", hash = "sha256:f758c681e63ffe74b20423695e71f0410920f41b075cee679ffb5bc2bf38440b", size = 205153, upload-time = "2025-10-09T09:07:45.496Z" }, + { url = "https://files.pythonhosted.org/packages/c4/92/3600486936eebab747ae1462d231d7f87d234da24a04e82e1915c00f4427/pyroaring-1.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:428c3bb384fe4c483feb5cf7aa3aef1621fb0a5c4f3d391da67b2c4a43f08a10", size = 260349, upload-time = "2025-10-09T09:07:46.524Z" }, + { url = "https://files.pythonhosted.org/packages/77/96/8dde074f1ad2a1c3d2091b22de80d1b3007824e649e06eeeebded83f4d48/pyroaring-1.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:9c0c856e8aa5606e8aed5f30201286e404fdc9093f81fefe82d2e79e67472bb2", size = 218775, upload-time = "2025-10-09T09:07:47.558Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "pywin32-ctypes" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" }, +] + +[[package]] +name = "readme-renderer" +version = "44.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, + { name = "nh3" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5a/a9/104ec9234c8448c4379768221ea6df01260cd6c2ce13182d4eac531c8342/readme_renderer-44.0.tar.gz", hash = "sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1", size = 32056, upload-time = "2024-07-08T15:00:57.805Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/67/921ec3024056483db83953ae8e48079ad62b92db7880013ca77632921dd0/readme_renderer-44.0-py3-none-any.whl", hash = "sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151", size = 13310, upload-time = "2024-07-08T15:00:56.577Z" }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, +] + +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, +] + +[[package]] +name = "rfc3986" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/40/1520d68bfa07ab5a6f065a186815fb6610c86fe957bc065754e47f7b0840/rfc3986-2.0.0.tar.gz", hash = "sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c", size = 49026, upload-time = "2022-01-10T00:52:30.832Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/9a/9afaade874b2fa6c752c36f1548f718b5b83af81ed9b76628329dab81c1b/rfc3986-2.0.0-py2.py3-none-any.whl", hash = "sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd", size = 31326, upload-time = "2022-01-10T00:52:29.594Z" }, +] + +[[package]] +name = "rich" +version = "14.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/84/4831f881aa6ff3c976f6d6809b58cdfa350593ffc0dc3c58f5f6586780fb/rich-14.3.1.tar.gz", hash = "sha256:b8c5f568a3a749f9290ec6bddedf835cec33696bfc1e48bcfecb276c7386e4b8", size = 230125, upload-time = "2026-01-24T21:40:44.847Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/2a/a1810c8627b9ec8c57ec5ec325d306701ae7be50235e8fd81266e002a3cc/rich-14.3.1-py3-none-any.whl", hash = "sha256:da750b1aebbff0b372557426fb3f35ba56de8ef954b3190315eb64076d6fb54e", size = 309952, upload-time = "2026-01-24T21:40:42.969Z" }, +] + +[[package]] +name = "ruff" +version = "0.14.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" }, + { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" }, + { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" }, + { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" }, + { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" }, + { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" }, + { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" }, + { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" }, + { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" }, + { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" }, + { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" }, + { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" }, + { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" }, + { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" }, + { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" }, + { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" }, + { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" }, + { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, +] + +[[package]] +name = "secretstorage" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "jeepney" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "strictyaml" +version = "1.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/08/efd28d49162ce89c2ad61a88bd80e11fb77bc9f6c145402589112d38f8af/strictyaml-1.7.3.tar.gz", hash = "sha256:22f854a5fcab42b5ddba8030a0e4be51ca89af0267961c8d6cfa86395586c407", size = 115206, upload-time = "2023-03-10T12:50:27.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/7c/a81ef5ef10978dd073a854e0fa93b5d8021d0594b639cc8f6453c3c78a1d/strictyaml-1.7.3-py3-none-any.whl", hash = "sha256:fb5c8a4edb43bebb765959e420f9b3978d7f1af88c80606c03fb420888f5d1c7", size = 123917, upload-time = "2023-03-10T12:50:17.242Z" }, +] + +[[package]] +name = "tenacity" +version = "9.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" }, +] + +[[package]] +name = "twine" +version = "6.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "id" }, + { name = "keyring", marker = "platform_machine != 'ppc64le' and platform_machine != 's390x'" }, + { name = "packaging" }, + { name = "readme-renderer" }, + { name = "requests" }, + { name = "requests-toolbelt" }, + { name = "rfc3986" }, + { name = "rich" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e0/a8/949edebe3a82774c1ec34f637f5dd82d1cf22c25e963b7d63771083bbee5/twine-6.2.0.tar.gz", hash = "sha256:e5ed0d2fd70c9959770dce51c8f39c8945c574e18173a7b81802dab51b4b75cf", size = 172262, upload-time = "2025-09-04T15:43:17.255Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/7a/882d99539b19b1490cac5d77c67338d126e4122c8276bf640e411650c830/twine-6.2.0-py3-none-any.whl", hash = "sha256:418ebf08ccda9a8caaebe414433b0ba5e25eb5e4a927667122fbe8f829f985d8", size = 42727, upload-time = "2025-09-04T15:43:15.994Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +] + +[[package]] +name = "zstandard" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/fc/f26eb6ef91ae723a03e16eddb198abcfce2bc5a42e224d44cc8b6765e57e/zstandard-0.25.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b3c3a3ab9daa3eed242d6ecceead93aebbb8f5f84318d82cee643e019c4b73b", size = 795738, upload-time = "2025-09-14T22:16:56.237Z" }, + { url = "https://files.pythonhosted.org/packages/aa/1c/d920d64b22f8dd028a8b90e2d756e431a5d86194caa78e3819c7bf53b4b3/zstandard-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:913cbd31a400febff93b564a23e17c3ed2d56c064006f54efec210d586171c00", size = 640436, upload-time = "2025-09-14T22:16:57.774Z" }, + { url = "https://files.pythonhosted.org/packages/53/6c/288c3f0bd9fcfe9ca41e2c2fbfd17b2097f6af57b62a81161941f09afa76/zstandard-0.25.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:011d388c76b11a0c165374ce660ce2c8efa8e5d87f34996aa80f9c0816698b64", size = 5343019, upload-time = "2025-09-14T22:16:59.302Z" }, + { url = "https://files.pythonhosted.org/packages/1e/15/efef5a2f204a64bdb5571e6161d49f7ef0fffdbca953a615efbec045f60f/zstandard-0.25.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dffecc361d079bb48d7caef5d673c88c8988d3d33fb74ab95b7ee6da42652ea", size = 5063012, upload-time = "2025-09-14T22:17:01.156Z" }, + { url = "https://files.pythonhosted.org/packages/b7/37/a6ce629ffdb43959e92e87ebdaeebb5ac81c944b6a75c9c47e300f85abdf/zstandard-0.25.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7149623bba7fdf7e7f24312953bcf73cae103db8cae49f8154dd1eadc8a29ecb", size = 5394148, upload-time = "2025-09-14T22:17:03.091Z" }, + { url = "https://files.pythonhosted.org/packages/e3/79/2bf870b3abeb5c070fe2d670a5a8d1057a8270f125ef7676d29ea900f496/zstandard-0.25.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:6a573a35693e03cf1d67799fd01b50ff578515a8aeadd4595d2a7fa9f3ec002a", size = 5451652, upload-time = "2025-09-14T22:17:04.979Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/7be26e610767316c028a2cbedb9a3beabdbe33e2182c373f71a1c0b88f36/zstandard-0.25.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5a56ba0db2d244117ed744dfa8f6f5b366e14148e00de44723413b2f3938a902", size = 5546993, upload-time = "2025-09-14T22:17:06.781Z" }, + { url = "https://files.pythonhosted.org/packages/85/c7/3483ad9ff0662623f3648479b0380d2de5510abf00990468c286c6b04017/zstandard-0.25.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:10ef2a79ab8e2974e2075fb984e5b9806c64134810fac21576f0668e7ea19f8f", size = 5046806, upload-time = "2025-09-14T22:17:08.415Z" }, + { url = "https://files.pythonhosted.org/packages/08/b3/206883dd25b8d1591a1caa44b54c2aad84badccf2f1de9e2d60a446f9a25/zstandard-0.25.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aaf21ba8fb76d102b696781bddaa0954b782536446083ae3fdaa6f16b25a1c4b", size = 5576659, upload-time = "2025-09-14T22:17:10.164Z" }, + { url = "https://files.pythonhosted.org/packages/9d/31/76c0779101453e6c117b0ff22565865c54f48f8bd807df2b00c2c404b8e0/zstandard-0.25.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1869da9571d5e94a85a5e8d57e4e8807b175c9e4a6294e3b66fa4efb074d90f6", size = 4953933, upload-time = "2025-09-14T22:17:11.857Z" }, + { url = "https://files.pythonhosted.org/packages/18/e1/97680c664a1bf9a247a280a053d98e251424af51f1b196c6d52f117c9720/zstandard-0.25.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:809c5bcb2c67cd0ed81e9229d227d4ca28f82d0f778fc5fea624a9def3963f91", size = 5268008, upload-time = "2025-09-14T22:17:13.627Z" }, + { url = "https://files.pythonhosted.org/packages/1e/73/316e4010de585ac798e154e88fd81bb16afc5c5cb1a72eeb16dd37e8024a/zstandard-0.25.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f27662e4f7dbf9f9c12391cb37b4c4c3cb90ffbd3b1fb9284dadbbb8935fa708", size = 5433517, upload-time = "2025-09-14T22:17:16.103Z" }, + { url = "https://files.pythonhosted.org/packages/5b/60/dd0f8cfa8129c5a0ce3ea6b7f70be5b33d2618013a161e1ff26c2b39787c/zstandard-0.25.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99c0c846e6e61718715a3c9437ccc625de26593fea60189567f0118dc9db7512", size = 5814292, upload-time = "2025-09-14T22:17:17.827Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5f/75aafd4b9d11b5407b641b8e41a57864097663699f23e9ad4dbb91dc6bfe/zstandard-0.25.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:474d2596a2dbc241a556e965fb76002c1ce655445e4e3bf38e5477d413165ffa", size = 5360237, upload-time = "2025-09-14T22:17:19.954Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8d/0309daffea4fcac7981021dbf21cdb2e3427a9e76bafbcdbdf5392ff99a4/zstandard-0.25.0-cp312-cp312-win32.whl", hash = "sha256:23ebc8f17a03133b4426bcc04aabd68f8236eb78c3760f12783385171b0fd8bd", size = 436922, upload-time = "2025-09-14T22:17:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/79/3b/fa54d9015f945330510cb5d0b0501e8253c127cca7ebe8ba46a965df18c5/zstandard-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffef5a74088f1e09947aecf91011136665152e0b4b359c42be3373897fb39b01", size = 506276, upload-time = "2025-09-14T22:17:21.429Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6b/8b51697e5319b1f9ac71087b0af9a40d8a6288ff8025c36486e0c12abcc4/zstandard-0.25.0-cp312-cp312-win_arm64.whl", hash = "sha256:181eb40e0b6a29b3cd2849f825e0fa34397f649170673d385f3598ae17cca2e9", size = 462679, upload-time = "2025-09-14T22:17:23.147Z" }, + { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" }, + { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" }, + { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" }, + { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" }, + { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" }, + { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" }, + { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" }, + { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" }, + { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" }, + { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" }, + { url = "https://files.pythonhosted.org/packages/3d/5c/f8923b595b55fe49e30612987ad8bf053aef555c14f05bb659dd5dbe3e8a/zstandard-0.25.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e29f0cf06974c899b2c188ef7f783607dbef36da4c242eb6c82dcd8b512855e3", size = 795887, upload-time = "2025-09-14T22:17:54.198Z" }, + { url = "https://files.pythonhosted.org/packages/8d/09/d0a2a14fc3439c5f874042dca72a79c70a532090b7ba0003be73fee37ae2/zstandard-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:05df5136bc5a011f33cd25bc9f506e7426c0c9b3f9954f056831ce68f3b6689f", size = 640658, upload-time = "2025-09-14T22:17:55.423Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7c/8b6b71b1ddd517f68ffb55e10834388d4f793c49c6b83effaaa05785b0b4/zstandard-0.25.0-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:f604efd28f239cc21b3adb53eb061e2a205dc164be408e553b41ba2ffe0ca15c", size = 5379849, upload-time = "2025-09-14T22:17:57.372Z" }, + { url = "https://files.pythonhosted.org/packages/a4/86/a48e56320d0a17189ab7a42645387334fba2200e904ee47fc5a26c1fd8ca/zstandard-0.25.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223415140608d0f0da010499eaa8ccdb9af210a543fac54bce15babbcfc78439", size = 5058095, upload-time = "2025-09-14T22:17:59.498Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ad/eb659984ee2c0a779f9d06dbfe45e2dc39d99ff40a319895df2d3d9a48e5/zstandard-0.25.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e54296a283f3ab5a26fc9b8b5d4978ea0532f37b231644f367aa588930aa043", size = 5551751, upload-time = "2025-09-14T22:18:01.618Z" }, + { url = "https://files.pythonhosted.org/packages/61/b3/b637faea43677eb7bd42ab204dfb7053bd5c4582bfe6b1baefa80ac0c47b/zstandard-0.25.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ca54090275939dc8ec5dea2d2afb400e0f83444b2fc24e07df7fdef677110859", size = 6364818, upload-time = "2025-09-14T22:18:03.769Z" }, + { url = "https://files.pythonhosted.org/packages/31/dc/cc50210e11e465c975462439a492516a73300ab8caa8f5e0902544fd748b/zstandard-0.25.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e09bb6252b6476d8d56100e8147b803befa9a12cea144bbe629dd508800d1ad0", size = 5560402, upload-time = "2025-09-14T22:18:05.954Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ae/56523ae9c142f0c08efd5e868a6da613ae76614eca1305259c3bf6a0ed43/zstandard-0.25.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a9ec8c642d1ec73287ae3e726792dd86c96f5681eb8df274a757bf62b750eae7", size = 4955108, upload-time = "2025-09-14T22:18:07.68Z" }, + { url = "https://files.pythonhosted.org/packages/98/cf/c899f2d6df0840d5e384cf4c4121458c72802e8bda19691f3b16619f51e9/zstandard-0.25.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a4089a10e598eae6393756b036e0f419e8c1d60f44a831520f9af41c14216cf2", size = 5269248, upload-time = "2025-09-14T22:18:09.753Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c0/59e912a531d91e1c192d3085fc0f6fb2852753c301a812d856d857ea03c6/zstandard-0.25.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f67e8f1a324a900e75b5e28ffb152bcac9fbed1cc7b43f99cd90f395c4375344", size = 5430330, upload-time = "2025-09-14T22:18:11.966Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/7e31db1240de2df22a58e2ea9a93fc6e38cc29353e660c0272b6735d6669/zstandard-0.25.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:9654dbc012d8b06fc3d19cc825af3f7bf8ae242226df5f83936cb39f5fdc846c", size = 5811123, upload-time = "2025-09-14T22:18:13.907Z" }, + { url = "https://files.pythonhosted.org/packages/f6/49/fac46df5ad353d50535e118d6983069df68ca5908d4d65b8c466150a4ff1/zstandard-0.25.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4203ce3b31aec23012d3a4cf4a2ed64d12fea5269c49aed5e4c3611b938e4088", size = 5359591, upload-time = "2025-09-14T22:18:16.465Z" }, + { url = "https://files.pythonhosted.org/packages/c2/38/f249a2050ad1eea0bb364046153942e34abba95dd5520af199aed86fbb49/zstandard-0.25.0-cp314-cp314-win32.whl", hash = "sha256:da469dc041701583e34de852d8634703550348d5822e66a0c827d39b05365b12", size = 444513, upload-time = "2025-09-14T22:18:20.61Z" }, + { url = "https://files.pythonhosted.org/packages/3a/43/241f9615bcf8ba8903b3f0432da069e857fc4fd1783bd26183db53c4804b/zstandard-0.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:c19bcdd826e95671065f8692b5a4aa95c52dc7a02a4c5a0cac46deb879a017a2", size = 516118, upload-time = "2025-09-14T22:18:17.849Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ef/da163ce2450ed4febf6467d77ccb4cd52c4c30ab45624bad26ca0a27260c/zstandard-0.25.0-cp314-cp314-win_arm64.whl", hash = "sha256:d7541afd73985c630bafcd6338d2518ae96060075f9463d7dc14cfb33514383d", size = 476940, upload-time = "2025-09-14T22:18:19.088Z" }, +] diff --git a/integrations/spark/spark-3.1/openhouse-spark-runtime/build.gradle b/integrations/spark/spark-3.1/openhouse-spark-runtime/build.gradle index 2e10a0ca1..9abb935b6 100644 --- a/integrations/spark/spark-3.1/openhouse-spark-runtime/build.gradle +++ b/integrations/spark/spark-3.1/openhouse-spark-runtime/build.gradle @@ -76,7 +76,7 @@ task runAntlr(type:JavaExec) { inputs.dir antlrMainDir outputs.dir antlrMainGeneratedSrcDir - main = "org.antlr.v4.Tool" + mainClass = "org.antlr.v4.Tool" args = ["${antlrMainDir}/OpenhouseSqlExtensions.g4", "-visitor", "-o", "${antlrMainGeneratedSrcDir}/${antlrPackageDirPrefix}", diff --git a/scripts/java/tools/dummytokens/build.gradle b/scripts/java/tools/dummytokens/build.gradle index c14715f3e..1f1db17ba 100644 --- a/scripts/java/tools/dummytokens/build.gradle +++ b/scripts/java/tools/dummytokens/build.gradle @@ -3,8 +3,10 @@ plugins { id 'openhouse.maven-publish' id 'application' } -apply plugin: 'application' -mainClassName = 'com.linkedin.openhouse.tools.dummytokens.DummyTokenGenerator' + +application { + mainClass = 'com.linkedin.openhouse.tools.dummytokens.DummyTokenGenerator' +} dependencies { implementation(project(':services:common')){ @@ -17,6 +19,8 @@ dependencies { } jar { + // Ensure all runtime dependencies are built before creating the fat jar + dependsOn configurations.runtimeClasspath duplicatesStrategy = DuplicatesStrategy.EXCLUDE manifest { attributes( diff --git a/services/housetables/build.gradle b/services/housetables/build.gradle index a3dbcb460..ec0b8abe8 100644 --- a/services/housetables/build.gradle +++ b/services/housetables/build.gradle @@ -20,6 +20,14 @@ plugins { id 'openhouse.service-specgen-convention' } +// Configure unique port for OpenAPI spec generation to avoid conflicts during parallel builds +openApi { + customBootRun { + args = ["--server.port=8001"] + } + apiDocsUrl.set("http://localhost:8001/v3/api-docs") +} + configurations { all { exclude group: 'org.apache.logging.log4j', module: 'log4j-slf4j-impl' diff --git a/services/housetables/src/main/java/com/linkedin/openhouse/housetables/config/db/jdbc/JdbcProviderConfiguration.java b/services/housetables/src/main/java/com/linkedin/openhouse/housetables/config/db/jdbc/JdbcProviderConfiguration.java index 3fbef0910..e52f4ac5d 100644 --- a/services/housetables/src/main/java/com/linkedin/openhouse/housetables/config/db/jdbc/JdbcProviderConfiguration.java +++ b/services/housetables/src/main/java/com/linkedin/openhouse/housetables/config/db/jdbc/JdbcProviderConfiguration.java @@ -2,8 +2,12 @@ import com.linkedin.openhouse.cluster.configs.ClusterProperties; import com.linkedin.openhouse.housetables.config.db.DatabaseConfiguration; +import com.zaxxer.hikari.HikariDataSource; +import javax.sql.DataSource; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang.StringUtils; import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties; +import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Primary; @@ -56,4 +60,69 @@ public DataSourceProperties dataSourceProperties() { return properties; } + + /** + * Create HikariCP DataSource with SSL properties configured as DataSource properties. This is + * cleaner than appending parameters to JDBC URL and follows Spring Boot best practices. + * + *

The @ConfigurationProperties annotation ensures that spring.datasource.hikari.* properties + * (such as maximum-pool-size, connection-timeout, etc.) are automatically bound to the + * HikariDataSource. + * + *

When SSL is enabled for MySQL, this method configures certificate-based authentication + * properties including keystore paths, passwords, and SSL modes. These properties are passed + * directly to the MySQL JDBC driver via HikariCP. + * + * @param dataSourceProperties the basic datasource properties (URL, username, password) + * @return configured HikariDataSource with SSL properties if enabled + */ + @Bean + @Primary + @ConfigurationProperties("spring.datasource.hikari") + public DataSource dataSource(DataSourceProperties dataSourceProperties) { + HikariDataSource dataSource = + dataSourceProperties.initializeDataSourceBuilder().type(HikariDataSource.class).build(); + + // Add SSL/Certificate properties if enabled (MYSQL only) + DatabaseConfiguration.SupportedDbTypes dbType = + DatabaseConfiguration.SupportedDbTypes.valueOf( + clusterProperties.getClusterHouseTablesDatabaseType()); + + if (dbType == DatabaseConfiguration.SupportedDbTypes.MYSQL + && clusterProperties.isClusterHouseTablesDatabaseCertBasedAuthEnabled()) { + + log.info("Configuring MySQL certificate-based authentication"); + + // Add MySQL Connector/J specific SSL properties + dataSource.addDataSourceProperty( + "sslMode", clusterProperties.getClusterHouseTablesDatabaseCertBasedAuthSslMode()); + + if (StringUtils.isNotBlank( + clusterProperties.getClusterHouseTablesDatabaseCertBasedAuthClientCertKeystoreUrl())) { + dataSource.addDataSourceProperty( + "clientCertificateKeyStoreUrl", + clusterProperties.getClusterHouseTablesDatabaseCertBasedAuthClientCertKeystoreUrl()); + dataSource.addDataSourceProperty( + "clientCertificateKeyStorePassword", + clusterProperties + .getClusterHouseTablesDatabaseCertBasedAuthClientCertKeystorePassword()); + } + + if (StringUtils.isNotBlank( + clusterProperties.getClusterHouseTablesDatabaseCertBasedAuthTruststoreUrl())) { + dataSource.addDataSourceProperty( + "trustCertificateKeyStoreUrl", + clusterProperties.getClusterHouseTablesDatabaseCertBasedAuthTruststoreUrl()); + dataSource.addDataSourceProperty( + "trustCertificateKeyStorePassword", + clusterProperties.getClusterHouseTablesDatabaseCertBasedAuthTruststorePassword()); + } + + log.info( + "MySQL SSL configuration completed successfully (sslMode={}).", + clusterProperties.getClusterHouseTablesDatabaseCertBasedAuthSslMode()); + } + + return dataSource; + } } diff --git a/services/jobs/build.gradle b/services/jobs/build.gradle index 4a2b08ef1..e7adbef3e 100644 --- a/services/jobs/build.gradle +++ b/services/jobs/build.gradle @@ -20,6 +20,14 @@ plugins { id 'openhouse.service-specgen-convention' } +// Configure unique port for OpenAPI spec generation to avoid conflicts during parallel builds +openApi { + customBootRun { + args = ["--server.port=8002"] + } + apiDocsUrl.set("http://localhost:8002/v3/api-docs") +} + dependencies { implementation project(':services:common') implementation project(':iceberg:openhouse:internalcatalog') diff --git a/services/tables/build.gradle b/services/tables/build.gradle index 62d467d64..1f9243430 100644 --- a/services/tables/build.gradle +++ b/services/tables/build.gradle @@ -22,6 +22,14 @@ plugins { id 'openhouse.service-specgen-convention' } +// Configure unique port for OpenAPI spec generation to avoid conflicts during parallel builds +openApi { + customBootRun { + args = ["--server.port=8000"] + } + apiDocsUrl.set("http://localhost:8000/v3/api-docs") +} + dependencies { implementation project(':services:common') implementation project(':iceberg:openhouse:internalcatalog') diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java index c48a5438c..0dde039c0 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java @@ -93,23 +93,6 @@ static Map getUserTblProps( return result; } - /** - * @param updateProperties - * @param policiesFromRequest - * @param policiesFromTable - * @return True if alteration of policies occurred. - */ - public static boolean alterPoliciesIfNeeded( - UpdateProperties updateProperties, String policiesFromRequest, String policiesFromTable) { - boolean policiesUpdated = false; - if (!policiesFromRequest.equals(policiesFromTable)) { - updateProperties.set(POLICIES_KEY, policiesFromRequest); - policiesUpdated = true; - } - - return policiesUpdated; - } - /** * Converting {@link Table} to {@link TableDto} ONLY when returning back to response, since * jsonSnapshots won't be set to a meaningful value here. diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java index 094219cc9..2f641044c 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImpl.java @@ -4,6 +4,7 @@ import static com.linkedin.openhouse.internal.catalog.mapper.HouseTableSerdeUtils.*; import static com.linkedin.openhouse.tables.repository.impl.InternalRepositoryUtils.*; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Iterators; import com.google.common.collect.Maps; import com.google.gson.GsonBuilder; @@ -76,7 +77,9 @@ public class OpenHouseInternalRepositoryImpl implements OpenHouseInternalReposit @Autowired TablesMapper mapper; - @Autowired PoliciesSpecMapper policiesMapper; + @Autowired private PoliciesSpecMapper policiesMapper; + + @Autowired private TablePolicyUpdater tablePolicyUpdater; @Autowired PartitionSpecMapper partitionSpecMapper; @@ -148,7 +151,7 @@ public TableDto save(TableDto tableDto) { boolean propsUpdated = doUpdateUserPropsIfNeeded(updateProperties, tableDto, table); boolean snapshotsUpdated = doUpdateSnapshotsIfNeeded(updateProperties, tableDto); boolean policiesUpdated = - doUpdatePoliciesIfNeeded(updateProperties, tableDto, table.properties()); + tablePolicyUpdater.updatePoliciesIfNeeded(updateProperties, tableDto, table.properties()); boolean sortOrderUpdated = doUpdateSortOrderIfNeeded(updateProperties, tableDto, table, writeSchema); // TODO remove tableTypeAdded after all existing tables have been back-filled to have a @@ -354,7 +357,8 @@ private void checkIfTableTypeModified(TableDto tableDto, Table table) { * @param tableDto * @return */ - private Map computePropsForTableCreation(TableDto tableDto) { + @VisibleForTesting + Map computePropsForTableCreation(TableDto tableDto) { // Populate non-preserved keys, mainly user defined properties. Map propertiesMap = tableDto.getTableProperties().entrySet().stream() @@ -416,9 +420,20 @@ private Map computePropsForTableCreation(TableDto tableDto) { TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, Boolean.toString( clusterProperties.isClusterIcebergWriteMetadataDeleteAfterCommitEnabled())); - propertiesMap.put( - TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, - Integer.toString(clusterProperties.getClusterIcebergWriteMetadataPreviousVersionsMax())); + if (!propertiesMap.containsKey(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX)) { + propertiesMap.put( + TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, + Integer.toString(clusterProperties.getClusterIcebergWriteMetadataPreviousVersionsMax())); + log.info( + "Setting the table property: {} to default value: {}.", + TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, + Integer.toString(clusterProperties.getClusterIcebergWriteMetadataPreviousVersionsMax())); + } else { + log.info( + "Using the value: {} for table property: {}.", + propertiesMap.get(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX), + TableProperties.METADATA_PREVIOUS_VERSIONS_MAX); + } propertiesMap.put( TableProperties.FORMAT_VERSION, Integer.toString(clusterProperties.getClusterIcebergFormatVersion())); @@ -523,32 +538,6 @@ private void doSetNewIntermediateSchemasIfNeeded( } } - /** - * @param updateProperties - * @param tableDto - * @param existingTableProps - * @return Whether there are any policies-updates actually materialized in properties. - */ - private boolean doUpdatePoliciesIfNeeded( - UpdateProperties updateProperties, - TableDto tableDto, - Map existingTableProps) { - boolean policiesUpdated; - String tableDtoPolicyString = policiesMapper.toPoliciesJsonString(tableDto); - - if (!existingTableProps.containsKey(InternalRepositoryUtils.POLICIES_KEY)) { - updateProperties.set(InternalRepositoryUtils.POLICIES_KEY, tableDtoPolicyString); - policiesUpdated = true; - } else { - String policiesJsonString = existingTableProps.get(InternalRepositoryUtils.POLICIES_KEY); - policiesUpdated = - InternalRepositoryUtils.alterPoliciesIfNeeded( - updateProperties, tableDtoPolicyString, policiesJsonString); - } - - return policiesUpdated; - } - /** * check if an existing table has tableType set in table properties, if not the put operation * should add an explicit entry in tableProperties for openhouse.tableType = "PRIMARY_TABLE" diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/TablePolicyUpdater.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/TablePolicyUpdater.java new file mode 100644 index 000000000..69fe4d52b --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/TablePolicyUpdater.java @@ -0,0 +1,65 @@ +package com.linkedin.openhouse.tables.repository.impl; + +import com.linkedin.openhouse.tables.dto.mapper.iceberg.PoliciesSpecMapper; +import com.linkedin.openhouse.tables.model.TableDto; +import java.util.Map; +import org.apache.iceberg.UpdateProperties; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +/** + * Handles updates to table policies by serializing them and storing them to table properties + * Extensions of this class can implement more granular updates to policies by examining tableDto + * class + */ +@Component +public class TablePolicyUpdater { + + @Autowired protected PoliciesSpecMapper policiesMapper; + + /** + * Default policy update behavior that preserves existing functionality. Updates all policies if + * they differ from the existing ones. + * + * @param updateProperties The properties update builder + * @param tableDto The DTO containing new policy values + * @param existingTableProps Current table properties + * @return true if any policies were updated + */ + public boolean updatePoliciesIfNeeded( + UpdateProperties updateProperties, + TableDto tableDto, + Map existingTableProps) { + + boolean policiesUpdated; + + String tableDtoPolicyString = policiesMapper.toPoliciesJsonString(tableDto); + if (!existingTableProps.containsKey(InternalRepositoryUtils.POLICIES_KEY)) { + updateProperties.set(InternalRepositoryUtils.POLICIES_KEY, tableDtoPolicyString); + policiesUpdated = true; + } else { + String policiesJsonString = existingTableProps.get(InternalRepositoryUtils.POLICIES_KEY); + policiesUpdated = + alterPoliciesIfNeeded(updateProperties, tableDtoPolicyString, policiesJsonString); + } + + return policiesUpdated; + } + + /** + * @param updateProperties + * @param policiesFromRequest + * @param policiesFromTable + * @return True if alteration of policies occurred. + */ + protected boolean alterPoliciesIfNeeded( + UpdateProperties updateProperties, String policiesFromRequest, String policiesFromTable) { + boolean policiesUpdated = false; + if (!policiesFromRequest.equals(policiesFromTable)) { + updateProperties.set(InternalRepositoryUtils.POLICIES_KEY, policiesFromRequest); + policiesUpdated = true; + } + + return policiesUpdated; + } +} diff --git a/services/tables/src/main/resources/application-hdfs-diagnostics.properties b/services/tables/src/main/resources/application-hdfs-diagnostics.properties new file mode 100644 index 000000000..06304a8b9 --- /dev/null +++ b/services/tables/src/main/resources/application-hdfs-diagnostics.properties @@ -0,0 +1,9 @@ +# Enable detailed Hadoop/HDFS client diagnostics when this profile is active. +logging.level.org.apache.hadoop.hdfs.DFSClient=DEBUG +logging.level.org.apache.hadoop.hdfs.DFSInputStream=DEBUG +logging.level.org.apache.hadoop.hdfs.DFSOutputStream=DEBUG +logging.level.org.apache.hadoop.hdfs.DataStreamer=DEBUG +logging.level.org.apache.hadoop.io.retry.RetryInvocationHandler=DEBUG +logging.level.org.apache.hadoop.ipc.Client=DEBUG +logging.level.org.apache.hadoop.hdfs.server.namenode.ha.ObserverReadProxyProvider=DEBUG +logging.level.org.apache.hadoop.hdfs.server.namenode.ha.RequestHedgingProxyProvider=DEBUG diff --git a/services/tables/src/main/resources/application.properties b/services/tables/src/main/resources/application.properties index 47448de1a..3db1853ee 100644 --- a/services/tables/src/main/resources/application.properties +++ b/services/tables/src/main/resources/application.properties @@ -17,4 +17,5 @@ management.endpoint.health.enabled=true management.endpoint.shutdown.enabled=true management.endpoint.prometheus.enabled=true management.endpoint.beans.enabled=true -management.metrics.distribution.percentiles-histogram.all=true \ No newline at end of file +management.metrics.distribution.percentiles-histogram.all=true +management.metrics.distribution.maximum-expected-value.catalog_metadata_retrieval_latency=600s \ No newline at end of file diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/MetricsHistogramConfigurationTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/MetricsHistogramConfigurationTest.java new file mode 100644 index 000000000..90106ba67 --- /dev/null +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/e2e/h2/MetricsHistogramConfigurationTest.java @@ -0,0 +1,178 @@ +package com.linkedin.openhouse.tables.e2e.h2; + +import static org.junit.jupiter.api.Assertions.*; + +import com.linkedin.openhouse.common.test.cluster.PropertyOverrideContextInitializer; +import com.linkedin.openhouse.tables.mock.properties.AuthorizationPropertiesInitializer; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Timer; +import io.micrometer.core.instrument.distribution.CountAtBucket; +import io.micrometer.core.instrument.distribution.HistogramSnapshot; +import io.micrometer.prometheus.PrometheusMeterRegistry; +import java.time.Duration; +import java.util.Arrays; +import java.util.concurrent.TimeUnit; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.test.autoconfigure.actuate.metrics.AutoConfigureMetrics; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ContextConfiguration; + +/** + * Tests that verify the histogram bucket configuration from application.properties is properly + * configured for metrics. Specifically tests that the 600s maximum expected value is configured for + * the catalog_metadata_retrieval_latency metric. + * + *

These tests use the actual autowired MeterRegistry from Spring context + * (PrometheusMeterRegistry in production configuration) and verify histogram buckets by directly + * inspecting the Timer's HistogramSnapshot. + * + *

NOTE: {@code @AutoConfigureMetrics} is required because Spring Boot disables metrics exporters + * by default in tests, replacing PrometheusMeterRegistry with SimpleMeterRegistry. This annotation + * re-enables the production metrics configuration. + * + *

    + *
  1. The configuration property is correctly set in application.properties + *
  2. The MeterRegistry is a PrometheusMeterRegistry with histogram buckets + *
  3. Histogram buckets extend to 600 seconds for catalog_metadata_retrieval_latency + *
+ */ +@SpringBootTest +@AutoConfigureMetrics +@ContextConfiguration( + initializers = { + PropertyOverrideContextInitializer.class, + AuthorizationPropertiesInitializer.class + }) +public class MetricsHistogramConfigurationTest { + + @Autowired private MeterRegistry meterRegistry; + + @Value( + "${management.metrics.distribution.maximum-expected-value.catalog_metadata_retrieval_latency:}") + private String maxExpectedValueConfig; + + @Value("${management.metrics.distribution.percentiles-histogram.all:false}") + private boolean percentilesHistogramEnabled; + + /** + * Tests that the application.properties has the correct histogram configuration for + * catalog_metadata_retrieval_latency metric. + * + *

The application.properties setting: + * management.metrics.distribution.maximum-expected-value.catalog_metadata_retrieval_latency=600s + * + *

This configuration ensures histogram buckets extend to 600 seconds (10 minutes) when using a + * production MeterRegistry like Prometheus. + */ + @Test + void testMaxExpectedValueConfigurationIsSet() { + assertNotNull( + maxExpectedValueConfig, + "maximum-expected-value.catalog_metadata_retrieval_latency should be configured"); + assertFalse( + maxExpectedValueConfig.isEmpty(), + "maximum-expected-value.catalog_metadata_retrieval_latency should not be empty"); + assertEquals( + "600s", + maxExpectedValueConfig, + "maximum-expected-value.catalog_metadata_retrieval_latency should be set to 600s"); + } + + /** Tests that percentiles histogram is enabled for all metrics. */ + @Test + void testPercentilesHistogramIsEnabled() { + assertTrue( + percentilesHistogramEnabled, + "management.metrics.distribution.percentiles-histogram.all should be true"); + } + + /** + * Tests that the autowired MeterRegistry is a PrometheusMeterRegistry. This is required for + * histogram buckets to work correctly in production. + */ + @Test + void testMeterRegistryIsPrometheus() { + assertTrue( + meterRegistry instanceof PrometheusMeterRegistry, + String.format( + "MeterRegistry should be PrometheusMeterRegistry but was %s", + meterRegistry.getClass().getName())); + } + + /** + * Tests that histogram buckets are properly configured for catalog_metadata_retrieval_latency + * metric with buckets extending to 600 seconds. + * + *

This test validates that: + * + *

    + *
  1. Recording a value creates histogram bucket entries + *
  2. Histogram buckets include a boundary at or above 600 seconds + *
  3. Values are correctly counted + *
+ */ + @Test + void testHistogramBucketsExtendTo600Seconds() { + Timer timer = meterRegistry.timer("catalog_metadata_retrieval_latency"); + + // Record various latencies including some near and beyond the 600s boundary + timer.record(100, TimeUnit.MILLISECONDS); + timer.record(1, TimeUnit.SECONDS); + timer.record(10, TimeUnit.SECONDS); + timer.record(60, TimeUnit.SECONDS); + timer.record(300, TimeUnit.SECONDS); // 5 minutes + timer.record(550, TimeUnit.SECONDS); // Just under 600s + timer.record(600, TimeUnit.SECONDS); // Exactly 600s (10 minutes) + timer.record(700, TimeUnit.SECONDS); // Beyond 600s + + // Verify all recordings were captured + assertEquals(8, timer.count(), "All recordings should be captured"); + + // Get histogram snapshot and inspect buckets directly + HistogramSnapshot snapshot = timer.takeSnapshot(); + CountAtBucket[] buckets = snapshot.histogramCounts(); + + assertTrue(buckets.length > 0, "Histogram should have bucket entries"); + + // Find the maximum finite bucket boundary (in seconds) + double maxBucketSeconds = + Arrays.stream(buckets) + .mapToDouble(b -> b.bucket(TimeUnit.SECONDS)) + .filter(b -> b != Double.POSITIVE_INFINITY) + .max() + .orElse(0); + + // Build bucket list string for assertion message + String bucketList = + Arrays.stream(buckets) + .map(b -> String.valueOf(b.bucket(TimeUnit.SECONDS))) + .reduce((a, b) -> a + ", " + b) + .orElse("none"); + + assertTrue( + maxBucketSeconds >= 600.0, + String.format( + "Histogram should have buckets extending to at least 600s. " + + "This validates the maximum-expected-value.catalog_metadata_retrieval_latency=600s configuration. " + + "Found max bucket: %.1fs, all buckets: [%s]", + maxBucketSeconds, bucketList)); + } + + /** + * Tests that the 600s configuration value can be parsed as a Duration. This validates the format + * used in application.properties is correct. + */ + @Test + void testConfigurationValueIsParseable() { + // Parse the configuration value as a Duration + Duration maxExpectedDuration = Duration.parse("PT" + maxExpectedValueConfig.toUpperCase()); + + assertEquals( + Duration.ofSeconds(600), maxExpectedDuration, "Configuration should parse to 600 seconds"); + + assertEquals( + 600, maxExpectedDuration.getSeconds(), "Configuration should be exactly 600 seconds"); + } +} diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImplTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImplTest.java new file mode 100644 index 000000000..e4cc20971 --- /dev/null +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/repository/impl/OpenHouseInternalRepositoryImplTest.java @@ -0,0 +1,102 @@ +package com.linkedin.openhouse.tables.repository.impl; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.linkedin.openhouse.cluster.configs.ClusterProperties; +import com.linkedin.openhouse.tables.common.TableType; +import com.linkedin.openhouse.tables.dto.mapper.iceberg.PoliciesSpecMapper; +import com.linkedin.openhouse.tables.model.TableDto; +import com.linkedin.openhouse.tables.repository.PreservedKeyChecker; +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.MeterRegistry; +import java.util.HashMap; +import java.util.Map; +import org.apache.iceberg.TableProperties; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) +public class OpenHouseInternalRepositoryImplTest { + + @Mock private PoliciesSpecMapper policiesMapper; + @Mock private MeterRegistry meterRegistry; + @Mock private ClusterProperties clusterProperties; + @Mock private PreservedKeyChecker preservedKeyChecker; + + @InjectMocks private OpenHouseInternalRepositoryImpl openHouseInternalRepository; + + private static final String DB_ID = "db"; + private static final String TABLE_ID = "table"; + private static final String SCHEMA_JSON = + "{\"type\":\"struct\",\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":true,\"type\":\"string\"}]}"; + + @BeforeEach + void setUp() { + when(meterRegistry.counter(anyString())).thenReturn(mock(Counter.class)); + when(preservedKeyChecker.allowKeyInCreation(anyString(), any())).thenReturn(true); + when(policiesMapper.toPoliciesJsonString(any())).thenReturn("{}"); + when(clusterProperties.getClusterIcebergWriteFormatDefault()).thenReturn("parquet"); + } + + @Test + void testComputePropsForTableCreation_DefaultMetadataVersions() { + int clusterDefaultMaxMetadataVersions = 10; + when(clusterProperties.getClusterIcebergWriteMetadataPreviousVersionsMax()) + .thenReturn(clusterDefaultMaxMetadataVersions); + when(clusterProperties.isClusterIcebergWriteMetadataDeleteAfterCommitEnabled()) + .thenReturn(true); + when(clusterProperties.getClusterIcebergFormatVersion()).thenReturn(2); + + TableDto tableDto = createTableDto(new HashMap<>()); + Map actualProps = + openHouseInternalRepository.computePropsForTableCreation(tableDto); + + Assertions.assertEquals( + String.valueOf(clusterDefaultMaxMetadataVersions), + actualProps.get(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX)); + } + + @Test + void testComputePropsForTableCreation_UserProvidedMetadataVersions() { + int clusterDefaultMaxMetadataVersions = 10; + String userProvidedMaxMetadataVersions = "5"; + + // Although cluster property is mocked, it shouldn't be used for the key + when(clusterProperties.isClusterIcebergWriteMetadataDeleteAfterCommitEnabled()) + .thenReturn(true); + when(clusterProperties.getClusterIcebergFormatVersion()).thenReturn(2); + + Map userProps = new HashMap<>(); + userProps.put(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, userProvidedMaxMetadataVersions); + TableDto tableDto = createTableDto(userProps); + + Map actualProps = + openHouseInternalRepository.computePropsForTableCreation(tableDto); + + Assertions.assertEquals( + userProvidedMaxMetadataVersions, + actualProps.get(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX)); + } + + private TableDto createTableDto(Map properties) { + return TableDto.builder() + .databaseId(DB_ID) + .tableId(TABLE_ID) + .schema(SCHEMA_JSON) + .tableProperties(properties) + .tableVersion("v1") + .tableType(TableType.PRIMARY_TABLE) + .build(); + } +} diff --git a/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/build.gradle b/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/build.gradle index b6e49f1a7..61c247131 100644 --- a/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/build.gradle +++ b/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/build.gradle @@ -217,10 +217,6 @@ shadowJar { exclude '**/**/*.so' } -// https://github.com/johnrengelman/shadow/issues/335 -// By default shadow doesn't configure the build task to depend on the shadowJar task. -tasks.build.dependsOn tasks.shadowJar - test { if (JavaVersion.current() >= JavaVersion.VERSION_1_9){ jvmArgs '--add-opens=java.base/java.net=ALL-UNNAMED' diff --git a/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/OpenHouseSparkITest.java b/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/OpenHouseSparkITest.java index 4afc8548b..b49e077ce 100644 --- a/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/OpenHouseSparkITest.java +++ b/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/OpenHouseSparkITest.java @@ -9,27 +9,49 @@ * {@link OpenHouseSparkITest} for the test class. - Use the SparkSession returned by invoking * `getSparkSession`. * + *

This class is thread-safe and supports parallel test execution. The singleton {@link + * OpenHouseLocalServer} is lazily initialized with proper synchronization. + * *

See com.linkedin.openhouse.jobs.spark.TestSparkApp as an example. */ public class OpenHouseSparkITest { private static final String LOCALHOST = "http://localhost:"; private static final String LOCAL_FS = "file:///"; - /** Use a singleton {@link OpenHouseLocalServer} that will shutdown when the JVM exits. */ - private static OpenHouseLocalServer openHouseLocalServer = null; + /** + * Singleton {@link OpenHouseLocalServer} that will shutdown when the JVM exits. This field is + * declared as volatile to ensure proper publication of the initialized instance across threads + * when using double-checked locking pattern. + */ + private static volatile OpenHouseLocalServer openHouseLocalServer = null; /** * Get a spark session with correctly configured OpenHouse catalog. A single session will be * cached and reused for different unit tests. * + *

This method is thread-safe and can be called concurrently from multiple test instances. + * *

Do not {@link SparkSession#close()} a created SparkSession. + * + * @return configured SparkSession + * @throws Exception if session creation fails */ - protected synchronized SparkSession getSparkSession() throws Exception { + protected SparkSession getSparkSession() throws Exception { return TestSparkSessionUtil.createSparkSession(getBuilder()); } - protected synchronized SparkSession getSparkSession( - String overrideCatalogName, Map overrides) throws Exception { + /** + * Get a spark session with a custom catalog configuration. + * + *

This method is thread-safe and can be called concurrently from multiple test instances. + * + * @param overrideCatalogName the name of the catalog to override + * @param overrides additional spark configuration overrides + * @return configured SparkSession + * @throws Exception if session creation fails + */ + protected SparkSession getSparkSession(String overrideCatalogName, Map overrides) + throws Exception { SparkSession.Builder builder = getBuilder(); TestSparkSessionUtil.configureCatalogs( builder, overrideCatalogName, getOpenHouseLocalServerURI()); @@ -39,11 +61,35 @@ protected synchronized SparkSession getSparkSession( return TestSparkSessionUtil.createSparkSession(builder); } - protected SparkSession.Builder getBuilder() throws Exception { - if (openHouseLocalServer == null) { - openHouseLocalServer = new OpenHouseLocalServer(); - openHouseLocalServer.start(); + /** + * Initializes the singleton OpenHouseLocalServer if not already started. Uses double-checked + * locking pattern for optimal performance - synchronization only occurs during initialization. + * + * @throws Exception if server initialization fails + */ + private static void startOpenHouseLocalServer() throws Exception { + if (openHouseLocalServer == null) { // First check (no locking) - fast path + synchronized (OpenHouseSparkITest.class) { + if (openHouseLocalServer == null) { // Second check (with lock) - ensures singleton + openHouseLocalServer = new OpenHouseLocalServer(); + openHouseLocalServer.start(); + } + } } + } + + /** + * Build a SparkSession.Builder with OpenHouse catalog configured. This method is thread-safe and + * can be overridden by subclasses that need custom builder configuration. + * + *

Subclasses overriding this method should call super.getBuilder() to leverage the thread-safe + * server initialization. + * + * @return configured SparkSession.Builder + * @throws Exception if builder creation fails + */ + protected SparkSession.Builder getBuilder() throws Exception { + startOpenHouseLocalServer(); SparkSession.Builder builder = TestSparkSessionUtil.getBaseBuilder(URI.create(LOCAL_FS)); TestSparkSessionUtil.configureCatalogs(builder, "openhouse", getOpenHouseLocalServerURI()); // default_iceberg catalog need to point to the custom one, required by Iceberg maintenance diff --git a/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/TestSparkSessionUtil.java b/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/TestSparkSessionUtil.java index 2ebdf8a46..5404c3248 100644 --- a/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/TestSparkSessionUtil.java +++ b/tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/java/com/linkedin/openhouse/tablestest/TestSparkSessionUtil.java @@ -25,6 +25,12 @@ public static SparkSession.Builder getBaseBuilder(URI fsURI) { ("org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions," + "com.linkedin.openhouse.spark.extensions.OpenhouseSparkSessionExtensions")) .config("spark.hadoop.fs.defaultFS", fsURI.toString()) + // Set session timezone to UTC to ensure consistent timestamp handling across different + // system timezones. This prevents test flakiness when timestamp operations + // (like using from_unixtime) are interpreted differently based on the + // system's local timezone.With UTC, all tests behave consistently + // regardless of where they run. + .config("spark.sql.session.timeZone", "UTC") .config("spark.sql.autoBroadcastJoinThreshold", "-1") .config("spark.driver.memory", "2g") .config("spark.driver.bindAddress", "127.0.0.1"); diff --git a/tables-test-fixtures/tables-test-fixtures-iceberg-1.5/build.gradle b/tables-test-fixtures/tables-test-fixtures-iceberg-1.5/build.gradle index 188f14312..4a03ec874 100644 --- a/tables-test-fixtures/tables-test-fixtures-iceberg-1.5/build.gradle +++ b/tables-test-fixtures/tables-test-fixtures-iceberg-1.5/build.gradle @@ -234,10 +234,6 @@ shadowJar { exclude '**/**/*.so' } -// https://github.com/johnrengelman/shadow/issues/335 -// By default shadow doesn't configure the build task to depend on the shadowJar task. -tasks.build.dependsOn tasks.shadowJar - test { if (JavaVersion.current() >= JavaVersion.VERSION_1_9){ jvmArgs '--add-opens=java.base/java.net=ALL-UNNAMED'