-
Notifications
You must be signed in to change notification settings - Fork 287
[wip] Support ray/dynamo nightly + vLLM 0.22 (cu129) across all extras #2064
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4f0cdf4
122386d
670328c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ | |
|
|
||
| from __future__ import annotations | ||
|
|
||
| import importlib.metadata | ||
| import json | ||
| import tempfile | ||
| from functools import reduce | ||
|
|
@@ -24,6 +25,7 @@ | |
|
|
||
| import ray | ||
| from loguru import logger | ||
| from packaging.requirements import InvalidRequirement, Requirement | ||
|
|
||
| from nemo_curator.core.serve.base import BaseModelConfig | ||
| from nemo_curator.core.serve.dynamo.infra import ( | ||
|
|
@@ -50,19 +52,85 @@ | |
| from nemo_curator.core.serve.placement import ReplicaBundleSpec | ||
|
|
||
|
|
||
| # ai-dynamo[vllm]'s [vllm] extra carries a hard ray pin, but Ray refuses | ||
| # actor venvs whose ray version differs from the cluster head's. uv has no | ||
| # inline override syntax — only ``--override <file>`` — so we materialize a | ||
| # tiny constraints file at a fixed path on every node via | ||
| # ``ensure_actor_overrides_on_all_nodes``; the content is derived from the | ||
| # driver's ``ray.__version__`` at fan-out time so a future Curator ray bump | ||
| # doesn't need a code change here. | ||
| # The actor venv ``uv pip install`` needs overrides that pyproject's ``[tool.uv]`` | ||
| # can't reach (Ray runs it in an empty cwd). uv has no inline override syntax — | ||
| # only ``--override <file>`` — so we materialize a constraints file at a fixed path | ||
| # on every node via ``ensure_actor_overrides_on_all_nodes``. It carries: | ||
| # * ``ray==<driver version>`` — ai-dynamo[vllm]'s [vllm] extra has a hard ray pin, | ||
| # but Ray refuses actor venvs whose ray differs from the cluster head's. Derived | ||
| # from the driver's ``ray.__version__`` so a future Curator ray bump needs no edit. | ||
| # * ``nixl-cu13`` dropped — ai-dynamo[vllm] pulls the CUDA-13 NIXL backend, whose | ||
| # eagerly-imported ``nixl_ep_cpp.so`` dlopens libcudart.so.13 (absent on this | ||
| # CUDA-12.9 image). The base image excludes it via pyproject, but that override | ||
| # doesn't reach this standalone install; re-apply it here so the cu12 backend wins. | ||
| _ACTOR_VENV_OVERRIDES_PATH = Path(tempfile.gettempdir()) / "nemo_curator_dynamo_actor_overrides.txt" | ||
| _ACTOR_VENV_NIXL_CU13_EXCLUSION = "nixl-cu13 ; sys_platform == 'never'" | ||
| # The CUDA build the actor venv must match (torch ecosystem + vllm wheel variant). | ||
| _ACTOR_VENV_CUDA_TAG = "cu129" | ||
|
|
||
|
|
||
| def _vllm_cu129_index_url() -> str | None: | ||
| """The vLLM cu129 wheel index for the exact version ai-dynamo[vllm] pins. | ||
|
|
||
| ai-dynamo's [vllm] extra pins an exact vllm (e.g. ``==0.22.1``) that may | ||
| differ from Curator's base vllm — the base installs ai-dynamo WITHOUT its | ||
| [vllm] extra, so its vllm comes from Curator's own pin, while the actor | ||
| venv installs ``ai-dynamo[vllm]`` and must honor ai-dynamo's pin. vLLM | ||
| publishes a per-version cu129 wheel index at ``wheels.vllm.ai/<v>/cu129``; | ||
| pointing at the pinned version means its ``+cu129`` local build sorts above | ||
| the default cu130 wheel under unsafe-best-match. Derived from ai-dynamo's | ||
| own metadata so a nightly bump (which changes the vllm pin) needs no edit. | ||
|
|
||
| Returns None if ai-dynamo (or its vllm pin) can't be found — only happens | ||
| when the dynamo backend isn't actually installed, where this is unused. | ||
| """ | ||
| try: | ||
| requirements = importlib.metadata.requires("ai-dynamo") or [] | ||
| except importlib.metadata.PackageNotFoundError: | ||
| return None | ||
| for raw in requirements: | ||
| try: | ||
| req = Requirement(raw) | ||
| except InvalidRequirement: | ||
| continue # a malformed Requires-Dist line must not break module import | ||
| # Match vllm only as it applies under the [vllm] extra we install (skip a vllm | ||
| # pin that some other ai-dynamo extra might add under a different marker). | ||
| if req.name != "vllm" or (req.marker is not None and not req.marker.evaluate({"extra": "vllm"})): | ||
| continue | ||
| pinned = next((spec.version for spec in req.specifier if spec.operator in ("==", "===")), None) | ||
| if pinned: | ||
| return f"https://wheels.vllm.ai/{pinned}/{_ACTOR_VENV_CUDA_TAG}" | ||
| return None | ||
|
Comment on lines
+72
to
+103
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing test coverage: |
||
|
|
||
|
|
||
| # Ray builds the actor venv with a bare ``uv pip install`` in an empty cwd, so it | ||
| # inherits none of the project's ``[tool.uv]`` index/source/prerelease config — only | ||
| # what we pass here. Force CUDA 12.9 the way vLLM documents for uv: --torch-backend | ||
| # routes the torch ecosystem to the cu129 index, and the per-version cu129 vllm index | ||
| # (see ``_vllm_cu129_index_url``) keeps vllm on cu129. ``unsafe-best-match`` is REQUIRED | ||
| # so nixl resolves (its version is split across pypi.nvidia.com and PyPI, which the | ||
| # default first-match strategy can't combine). | ||
| _ACTOR_VENV_UV_OPTIONS = [ | ||
| "--override", | ||
| str(_ACTOR_VENV_OVERRIDES_PATH), | ||
| "--torch-backend", | ||
| _ACTOR_VENV_CUDA_TAG, | ||
| "--index-strategy", | ||
| "unsafe-best-match", | ||
| "--prerelease", | ||
| "if-necessary-or-explicit", | ||
| *( | ||
| arg | ||
| for url in ("https://pypi.nvidia.com", _vllm_cu129_index_url()) | ||
| if url is not None | ||
| for arg in ("--extra-index-url", url) | ||
| ), | ||
| ] | ||
|
|
||
| DYNAMO_VLLM_RUNTIME_ENV: dict[str, Any] = { | ||
| "uv": { | ||
| "packages": ["ai-dynamo[vllm]"], | ||
| "uv_pip_install_options": ["--override", str(_ACTOR_VENV_OVERRIDES_PATH)], | ||
| "uv_pip_install_options": _ACTOR_VENV_UV_OPTIONS, | ||
| }, | ||
| "config": {"setup_timeout_seconds": 600}, | ||
| } | ||
|
|
@@ -78,7 +146,8 @@ def ensure_actor_overrides_on_all_nodes(*, ignore_head_node: bool = False) -> No | |
|
|
||
| The file pins ``ray=={ray.__version__}`` (read from the driver) so the | ||
| actor venv keeps the same ray patch as the cluster head — Ray rejects | ||
| any mismatch. | ||
| any mismatch — and drops ``nixl-cu13`` so the cu12 NIXL backend is used | ||
| (see module comment on :data:`_ACTOR_VENV_OVERRIDES_PATH`). | ||
|
|
||
| Must run inside an active Ray context, before any worker spawned with | ||
| :data:`DYNAMO_VLLM_RUNTIME_ENV` lands. The runtime_env_agent on each | ||
|
|
@@ -91,7 +160,7 @@ def ensure_actor_overrides_on_all_nodes(*, ignore_head_node: bool = False) -> No | |
| run_on_each_node( | ||
| _write_actor_overrides_file, | ||
| str(_ACTOR_VENV_OVERRIDES_PATH), | ||
| f"ray=={ray.__version__}\n", | ||
| f"ray=={ray.__version__}\n{_ACTOR_VENV_NIXL_CU13_EXCLUSION}\n", | ||
| ignore_head_node=ignore_head_node, | ||
| ) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
importlib.metadata.requires("ai-dynamo")returns every requirement across all extras. The[vllm]extra's entry looks like"vllm==0.22.x ; extra == \"vllm\"", but so would any vllm pin in a hypothetical[vllm-dev]or other extra. The loop returns on the firstvllm-named req without checkingreq.markerforextra == "vllm", so a future ai-dynamo refactor that adds a secondary vllm constraint could silently pick the wrong index URL.Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!