vipshop · zhangtao0408 · Jan 27, 2026 · Jan 27, 2026 · DefTruth · Jan 28, 2026
diff --git a/docs/user_guide/ASCEND_NPU.md b/docs/user_guide/ASCEND_NPU.md
@@ -14,10 +14,11 @@ Please refer to **[Ascend NPU Supported Matrix](../supported_matrix/ASCEND_NPU.m
 
 ## Features Support 
 
-|Device|Hybrid Cache|Context Parallel|Tensor Parallel|Text Encoder Parallel|Auto Encoder(VAE) Parallel|
-|:---|:---:|:---:|:---:|:---:|:---:|
-|Atlas 800T A2|✅|✅|✅|✅|✅|
-|Atlas 800I A2|✅|✅|✅|✅|✅|
+|Device|Hybrid Cache|Context Parallel|Tensor Parallel|Text Encoder Parallel|Auto Encoder(VAE) Parallel|Compilation|
+|:---|:---:|:---:|:---:|:---:|:---:|:---:|
+|Atlas 800T A2|✅|✅|✅|✅|✅|🟡|
+|Atlas 800I A2|✅|✅|✅|✅|✅|🟡|
+> 🟡: Experimental Feature
 
 ## Attention backend
 
@@ -200,3 +201,36 @@ torchrun --nproc_per_node=4 -m cache_dit.generate flux --parallel ulysses --cach
 torchrun --nproc_per_node=4 -m cache_dit.generate zimage --parallel ulysses --cache --attn _native_npu
 torchrun --nproc_per_node=4 -m cache_dit.generate qwen_image --parallel ulysses --cache --attn _native_npu
 ```
+
+## [Experimental] Speedup with MindIE-SD Compilation
+
+MindIE-SD is an open-source acceleration framework for diffusion models on Ascend NPU. By providing a custom `MindieSDBackend` for `torch.compile`, it enables automatic operator fusion and optimization for enhanced performance on Ascend hardware. For detailed documentation and examples, visit [MindIE-SD](https://gitcode.com/Ascend/MindIE-SD).
+
+
+### Install MindIE-SD
+
+```bash
+git clone --branch master --single-branch https://gitcode.com/Ascend/MindIE-SD.git
+cd MindIE-SD
+pip install wheel
+python3 setup.py bdist_wheel
+pip install ./dist/*.whl --force-reinstall
+```
+
+### Enable MindIE-SD Compilation
+
+```bash
+python3 generate.py flux --attn _native_npu --compile
+```
+
+### Performance
+
+The performance of MindIE-SD Compilation is as follows (device 910B3):
+
+|Model|Batch Size|Resolution|Compile|E2E Time(s)|
+|:---|:---:|:---:|:---:|:---:|
+|flux.1-dev|1|1024x1024|❌|14.09|
+|flux.1-dev|1|1024x1024|✅|12.85|
+
+⚠️ **Experimental Feature**: MindIE-SD Compilation is currently in the experimental stage. 
+For bug reports, feature requests, or detailed information, please visit the [MindIE-SD Compilation Documentation](https://gitcode.com/Ascend/MindIE-SD/blob/master/docs/features/compilation.md).
diff --git a/src/cache_dit/__init__.py b/src/cache_dit/__init__.py
@@ -34,6 +34,7 @@
 from .parallelism import ParallelismBackend
 from .parallelism import ParallelismConfig
 from .compile import set_compile_configs
+from .compile import maybe_wrap_torch_compile
 from .summary import supported_matrix
 from .summary import summary
 from .summary import strify
@@ -54,3 +55,5 @@
 Pattern_3 = ForwardPattern.Pattern_3
 Pattern_4 = ForwardPattern.Pattern_4
 Pattern_5 = ForwardPattern.Pattern_5
+
+maybe_wrap_torch_compile()
diff --git a/src/cache_dit/compile/__init__.py b/src/cache_dit/compile/__init__.py
@@ -1 +1,2 @@
 from .utils import set_compile_configs
+from .dispatch import maybe_wrap_torch_compile
diff --git a/src/cache_dit/compile/dispatch.py b/src/cache_dit/compile/dispatch.py
@@ -0,0 +1,53 @@
+import torch
+import functools
+from typing import Optional, Callable
+from cache_dit.platforms import current_platform
+from cache_dit.logger import init_logger
+
+logger = init_logger(__name__)
+_original_torch_compile: Optional[Callable] = None
+
+
+def _get_mindiesd_backend():
+    try:
+        from mindiesd.compilation import MindieSDBackend
+
+        _backend = MindieSDBackend()
+    except ImportError:
+        _backend = None
+
+    return _backend
+
+
+def maybe_wrap_torch_compile():
+    global _original_torch_compile
+
+    # Avoid duplicate patch
+    if _original_torch_compile is not None:
+        return
+
+    _original_torch_compile = torch.compile
+
+    # MindIESD Backend Available
-    # MindIESD Backend Available
+    # MindIESD Backend Available
+    if current_platform.device_type != "npu":
+        return  # DO NOTHING
-    # MindIESD Backend Available
+    # MindIESD Backend Available
+    if current_platform.device_type != "npu":
+        return  # DO NOTHING
+    mindiesd_backend = _get_mindiesd_backend()
+
+    @functools.wraps(_original_torch_compile)
+    def patched_compile(*args, **kwargs):
+        if "backend" not in kwargs and "npu" in current_platform.device_type:
+            if mindiesd_backend:
+                logger.warning(
+                    "NPU platform detected with MindIE-SD available. "
+                    "torch.compile will default to MindIESDBackend. "
+                    "Override it with torch.compile(backend=...) if needed."
+                )
+                kwargs["backend"] = mindiesd_backend
+            else:
+                logger.warning(
+                    "NPU platform detected but MindIE-SD not found. "
+                    "Run `pip install mindiesd` for better NPU performance on Compilation."
+                )
+
+        return _original_torch_compile(*args, **kwargs)
+
+    # Patch Torch Compile
+    torch.compile = patched_compile
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		from .utils import set_compile_configs
		from .dispatch import maybe_wrap_torch_compile