diff --git a/docs/user_guide/ASCEND_NPU.md b/docs/user_guide/ASCEND_NPU.md index be84d9def..8e429ceb6 100644 --- a/docs/user_guide/ASCEND_NPU.md +++ b/docs/user_guide/ASCEND_NPU.md @@ -14,10 +14,11 @@ Please refer to **[Ascend NPU Supported Matrix](../supported_matrix/ASCEND_NPU.m ## Features Support -|Device|Hybrid Cache|Context Parallel|Tensor Parallel|Text Encoder Parallel|Auto Encoder(VAE) Parallel| -|:---|:---:|:---:|:---:|:---:|:---:| -|Atlas 800T A2|✅|✅|✅|✅|✅| -|Atlas 800I A2|✅|✅|✅|✅|✅| +|Device|Hybrid Cache|Context Parallel|Tensor Parallel|Text Encoder Parallel|Auto Encoder(VAE) Parallel|Compilation| +|:---|:---:|:---:|:---:|:---:|:---:|:---:| +|Atlas 800T A2|✅|✅|✅|✅|✅|🟡| +|Atlas 800I A2|✅|✅|✅|✅|✅|🟡| +> 🟡: Experimental Feature ## Attention backend @@ -200,3 +201,36 @@ torchrun --nproc_per_node=4 -m cache_dit.generate flux --parallel ulysses --cach torchrun --nproc_per_node=4 -m cache_dit.generate zimage --parallel ulysses --cache --attn _native_npu torchrun --nproc_per_node=4 -m cache_dit.generate qwen_image --parallel ulysses --cache --attn _native_npu ``` + +## [Experimental] Speedup with MindIE-SD Compilation + +MindIE-SD is an open-source acceleration framework for diffusion models on Ascend NPU. By providing a custom `MindieSDBackend` for `torch.compile`, it enables automatic operator fusion and optimization for enhanced performance on Ascend hardware. For detailed documentation and examples, visit [MindIE-SD](https://gitcode.com/Ascend/MindIE-SD). + + +### Install MindIE-SD + +```bash +git clone --branch master --single-branch https://gitcode.com/Ascend/MindIE-SD.git +cd MindIE-SD +pip install wheel +python3 setup.py bdist_wheel +pip install ./dist/*.whl --force-reinstall +``` + +### Enable MindIE-SD Compilation + +```bash +python3 generate.py flux --attn _native_npu --compile +``` + +### Performance + +The performance of MindIE-SD Compilation is as follows (device 910B3): + +|Model|Batch Size|Resolution|Compile|E2E Time(s)| +|:---|:---:|:---:|:---:|:---:| +|flux.1-dev|1|1024x1024|❌|14.09| +|flux.1-dev|1|1024x1024|✅|12.85| + +⚠️ **Experimental Feature**: MindIE-SD Compilation is currently in the experimental stage. +For bug reports, feature requests, or detailed information, please visit the [MindIE-SD Compilation Documentation](https://gitcode.com/Ascend/MindIE-SD/blob/master/docs/features/compilation.md). diff --git a/src/cache_dit/__init__.py b/src/cache_dit/__init__.py index a70ac1672..a9fdd203b 100644 --- a/src/cache_dit/__init__.py +++ b/src/cache_dit/__init__.py @@ -34,6 +34,7 @@ from .parallelism import ParallelismBackend from .parallelism import ParallelismConfig from .compile import set_compile_configs +from .compile import maybe_wrap_torch_compile from .summary import supported_matrix from .summary import summary from .summary import strify @@ -54,3 +55,5 @@ Pattern_3 = ForwardPattern.Pattern_3 Pattern_4 = ForwardPattern.Pattern_4 Pattern_5 = ForwardPattern.Pattern_5 + +maybe_wrap_torch_compile() diff --git a/src/cache_dit/compile/__init__.py b/src/cache_dit/compile/__init__.py index 5d8f9058d..28baee72c 100644 --- a/src/cache_dit/compile/__init__.py +++ b/src/cache_dit/compile/__init__.py @@ -1 +1,2 @@ from .utils import set_compile_configs +from .dispatch import maybe_wrap_torch_compile diff --git a/src/cache_dit/compile/dispatch.py b/src/cache_dit/compile/dispatch.py new file mode 100644 index 000000000..7737ad9a7 --- /dev/null +++ b/src/cache_dit/compile/dispatch.py @@ -0,0 +1,53 @@ +import torch +import functools +from typing import Optional, Callable +from cache_dit.platforms import current_platform +from cache_dit.logger import init_logger + +logger = init_logger(__name__) +_original_torch_compile: Optional[Callable] = None + + +def _get_mindiesd_backend(): + try: + from mindiesd.compilation import MindieSDBackend + + _backend = MindieSDBackend() + except ImportError: + _backend = None + + return _backend + + +def maybe_wrap_torch_compile(): + global _original_torch_compile + + # Avoid duplicate patch + if _original_torch_compile is not None: + return + + _original_torch_compile = torch.compile + + # MindIESD Backend Available + mindiesd_backend = _get_mindiesd_backend() + + @functools.wraps(_original_torch_compile) + def patched_compile(*args, **kwargs): + if "backend" not in kwargs and "npu" in current_platform.device_type: + if mindiesd_backend: + logger.warning( + "NPU platform detected with MindIE-SD available. " + "torch.compile will default to MindIESDBackend. " + "Override it with torch.compile(backend=...) if needed." + ) + kwargs["backend"] = mindiesd_backend + else: + logger.warning( + "NPU platform detected but MindIE-SD not found. " + "Run `pip install mindiesd` for better NPU performance on Compilation." + ) + + return _original_torch_compile(*args, **kwargs) + + # Patch Torch Compile + torch.compile = patched_compile