From ecab98e0389602111e3e866f3b667e295c9d9e57 Mon Sep 17 00:00:00 2001 From: Jake Wilkins Date: Mon, 13 Oct 2025 15:03:46 +0100 Subject: [PATCH 1/2] Default config values for ngpus, scheduler_ngpus and worker_ngpus are now converted to integers before being checked to be greater than the value of zero. Beforehand, these values were assumed to be integers, causing a type error for all instantiations of GCPCluster, creating a breaking change --- dask_cloudprovider/gcp/instances.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dask_cloudprovider/gcp/instances.py b/dask_cloudprovider/gcp/instances.py index f0c308a2..47707b34 100644 --- a/dask_cloudprovider/gcp/instances.py +++ b/dask_cloudprovider/gcp/instances.py @@ -662,17 +662,18 @@ def __init__( self.scheduler_machine_type = machine_type self.worker_machine_type = machine_type - self.ngpus = ngpus or self.config.get("ngpus") + ngpus_value = ngpus or self.config.get("ngpus") + self.ngpus = int(ngpus_value) if ngpus_value else None if not self.ngpus: self.scheduler_ngpus = ( scheduler_ngpus if scheduler_ngpus is not None - else self.config.get("scheduler_ngpus", 0) + else int(self.config.get("scheduler_ngpus") or 0) ) self.worker_ngpus = ( worker_ngpus if worker_ngpus is not None - else self.config.get("worker_ngpus", 0) + else int(self.config.get("worker_ngpus") or 0) ) else: if scheduler_ngpus is not None or worker_ngpus is not None: From d52a2112b4dd94881ce1642c2909d21cb5629585 Mon Sep 17 00:00:00 2001 From: Jake Wilkins Date: Tue, 24 Mar 2026 17:52:51 +0000 Subject: [PATCH 2/2] Changed all 6 GPU config defaults from to null. This resolves the root cause - empty string compared to integers. Replaced the int conversion with or 0 for scheduler_ngpus and worker_ngpus. Reverted ngpus back to the simpler option, since None already the default falsy type. Updated with a regression test, that checks ngpus is None, and scheduler_ngpus/worker_ngpus is 0 when no gpu args are passed --- dask_cloudprovider/cloudprovider.yaml | 12 ++++++------ dask_cloudprovider/gcp/instances.py | 7 +++---- dask_cloudprovider/gcp/tests/test_gcp.py | 11 +++++++++++ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/dask_cloudprovider/cloudprovider.yaml b/dask_cloudprovider/cloudprovider.yaml index b8448af6..fe6c8740 100755 --- a/dask_cloudprovider/cloudprovider.yaml +++ b/dask_cloudprovider/cloudprovider.yaml @@ -105,12 +105,12 @@ cloudprovider: scheduler_machine_type: "n1-standard-1" # size of the machine type to use for the scheduler worker_machine_type: "n1-standard-1" # size of the machine type to use for all workers filesystem_size: 50 # amount in GBs of hard drive space to allocate - ngpus: "" # number of GPUs to use. If provided, will be used for both scheduler and worker - gpu_type: "" # type of gpus to use. (e.g. 'nvidia-tesla-t4'). You can view the possible values through ``gcloud compute accelerator-types list``. If provided, will be used for both scheduler and worker - scheduler_ngpus: "" # number of GPUs to use on scheduler - scheduler_gpu_type: "" # type of gpus to use. (e.g. 'nvidia-tesla-t4'). You can view the possible values through ``gcloud compute accelerator-types list``. - worker_ngpus: "" # number of GPUs to use on worker - worker_gpu_type: "" # type of gpus to use. (e.g. 'nvidia-tesla-t4'). You can view the possible values through ``gcloud compute accelerator-types list``. + ngpus: null # number of GPUs to use. If provided, will be used for both scheduler and worker + gpu_type: null # type of gpus to use. (e.g. 'nvidia-tesla-t4'). You can view the possible values through ``gcloud compute accelerator-types list``. If provided, will be used for both scheduler and worker + scheduler_ngpus: null # number of GPUs to use on scheduler + scheduler_gpu_type: null # type of gpus to use. (e.g. 'nvidia-tesla-t4'). You can view the possible values through ``gcloud compute accelerator-types list``. + worker_ngpus: null # number of GPUs to use on worker + worker_gpu_type: null # type of gpus to use. (e.g. 'nvidia-tesla-t4'). You can view the possible values through ``gcloud compute accelerator-types list``. disk_type: "pd-standard" # type of disk to use: pd-standard, pd-ssd docker_image: "daskdev/dask:latest" # docker image to use auto_shutdown: true # Shutdown instances automatically if the scheduler or worker services time out. diff --git a/dask_cloudprovider/gcp/instances.py b/dask_cloudprovider/gcp/instances.py index 47707b34..c39a1eae 100644 --- a/dask_cloudprovider/gcp/instances.py +++ b/dask_cloudprovider/gcp/instances.py @@ -662,18 +662,17 @@ def __init__( self.scheduler_machine_type = machine_type self.worker_machine_type = machine_type - ngpus_value = ngpus or self.config.get("ngpus") - self.ngpus = int(ngpus_value) if ngpus_value else None + self.ngpus = ngpus or self.config.get("ngpus") if not self.ngpus: self.scheduler_ngpus = ( scheduler_ngpus if scheduler_ngpus is not None - else int(self.config.get("scheduler_ngpus") or 0) + else self.config.get("scheduler_ngpus") or 0 ) self.worker_ngpus = ( worker_ngpus if worker_ngpus is not None - else int(self.config.get("worker_ngpus") or 0) + else self.config.get("worker_ngpus") or 0 ) else: if scheduler_ngpus is not None or worker_ngpus is not None: diff --git a/dask_cloudprovider/gcp/tests/test_gcp.py b/dask_cloudprovider/gcp/tests/test_gcp.py index bf96e9af..039923f4 100644 --- a/dask_cloudprovider/gcp/tests/test_gcp.py +++ b/dask_cloudprovider/gcp/tests/test_gcp.py @@ -53,6 +53,17 @@ async def test_init(): assert cluster.status == Status.created +@pytest.mark.asyncio +async def test_init_gpu_config_defaults(): + """Regression test for https://github.com/dask/dask-cloudprovider/pull/479.""" + skip_without_credentials() + + cluster = GCPCluster(asynchronous=True) + assert cluster.ngpus is None + assert cluster.scheduler_ngpus == 0 + assert cluster.worker_ngpus == 0 + + @pytest.mark.asyncio async def test_get_cloud_init(): skip_without_credentials()