From e82259f057eacc7a73863a0403c452d36ed45f4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-Micha=C3=ABl=20Celerier?=
 <jeanmichael.celerier@gmail.com>
Date: Thu, 12 Feb 2026 14:49:12 -0500
Subject: [PATCH 1/3] sdxl

---
 src/streamdiffusion/acceleration/tensorrt/__init__.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/streamdiffusion/acceleration/tensorrt/__init__.py b/src/streamdiffusion/acceleration/tensorrt/__init__.py
index 93d541c..8fdf2f7 100644
--- a/src/streamdiffusion/acceleration/tensorrt/__init__.py
+++ b/src/streamdiffusion/acceleration/tensorrt/__init__.py
@@ -282,7 +282,7 @@ def accelerate_with_tensorrt(
     # Check if we'll use pre-built ONNX (needed for model selection)
     prebuilt_onnx_dir = os.path.join(os.path.dirname(engine_dir), "engines_sdxl_turbo")
     prebuilt_unet_onnx = os.path.join(prebuilt_onnx_dir, "unetxl.opt", "model.onnx")
-    use_prebuilt_onnx = stream.sdxl and os.path.exists(prebuilt_unet_onnx)
+    use_prebuilt_onnx = True # stream.sdxl and os.path.exists(prebuilt_unet_onnx)
 
     # Use SDXL-specific model configuration if SDXL pipeline
     if stream.sdxl:
@@ -442,9 +442,7 @@ def accelerate_with_tensorrt(
                 engine_path=unet_engine_path,
                 onnx_opt_path=onnx_opt_path,
                 model_data=unet_model,
-                opt_image_height=stream.height,
-                opt_image_width=stream.width,
-                opt_batch_size=opt_batch_size,
+    opt_batch_size=1,
                 **engine_build_options,
             )
         else:

From 5d3af48eca148bf32a529bdef1192c662cdec05f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-Micha=C3=ABl=20Celerier?=
 <jeanmichael.celerier@gmail.com>
Date: Thu, 12 Feb 2026 14:49:17 -0500
Subject: [PATCH 2/3] Revert "sdxl"

This reverts commit d2bfc9257d04ec88869da6caaddb9271efb4d5b0.
---
 src/streamdiffusion/acceleration/tensorrt/__init__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/streamdiffusion/acceleration/tensorrt/__init__.py b/src/streamdiffusion/acceleration/tensorrt/__init__.py
index 8fdf2f7..93d541c 100644
--- a/src/streamdiffusion/acceleration/tensorrt/__init__.py
+++ b/src/streamdiffusion/acceleration/tensorrt/__init__.py
@@ -282,7 +282,7 @@ def accelerate_with_tensorrt(
     # Check if we'll use pre-built ONNX (needed for model selection)
     prebuilt_onnx_dir = os.path.join(os.path.dirname(engine_dir), "engines_sdxl_turbo")
     prebuilt_unet_onnx = os.path.join(prebuilt_onnx_dir, "unetxl.opt", "model.onnx")
-    use_prebuilt_onnx = True # stream.sdxl and os.path.exists(prebuilt_unet_onnx)
+    use_prebuilt_onnx = stream.sdxl and os.path.exists(prebuilt_unet_onnx)
 
     # Use SDXL-specific model configuration if SDXL pipeline
     if stream.sdxl:
@@ -442,7 +442,9 @@ def accelerate_with_tensorrt(
                 engine_path=unet_engine_path,
                 onnx_opt_path=onnx_opt_path,
                 model_data=unet_model,
-    opt_batch_size=1,
+                opt_image_height=stream.height,
+                opt_image_width=stream.width,
+                opt_batch_size=opt_batch_size,
                 **engine_build_options,
             )
         else:

From 5da2c91746cfad1c50ce4ed7e95c9f85ecda8356 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-Micha=C3=ABl=20Celerier?=
 <jeanmichael.celerier@gmail.com>
Date: Thu, 2 Apr 2026 10:58:39 -0400
Subject: [PATCH 3/3] wip

---
 .../acceleration/tensorrt/__init__.py         |  2 +-
 .../acceleration/tensorrt/models.py           | 24 +++++++++++++++++++
 .../acceleration/tensorrt/utilities.py        |  1 +
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/streamdiffusion/acceleration/tensorrt/__init__.py b/src/streamdiffusion/acceleration/tensorrt/__init__.py
index 93d541c..9c29648 100644
--- a/src/streamdiffusion/acceleration/tensorrt/__init__.py
+++ b/src/streamdiffusion/acceleration/tensorrt/__init__.py
@@ -282,7 +282,7 @@ def accelerate_with_tensorrt(
     # Check if we'll use pre-built ONNX (needed for model selection)
     prebuilt_onnx_dir = os.path.join(os.path.dirname(engine_dir), "engines_sdxl_turbo")
     prebuilt_unet_onnx = os.path.join(prebuilt_onnx_dir, "unetxl.opt", "model.onnx")
-    use_prebuilt_onnx = stream.sdxl and os.path.exists(prebuilt_unet_onnx)
+    use_prebuilt_onnx = False#stream.sdxl and os.path.exists(prebuilt_unet_onnx)
 
     # Use SDXL-specific model configuration if SDXL pipeline
     if stream.sdxl:
diff --git a/src/streamdiffusion/acceleration/tensorrt/models.py b/src/streamdiffusion/acceleration/tensorrt/models.py
index cbf317d..c39e18c 100644
--- a/src/streamdiffusion/acceleration/tensorrt/models.py
+++ b/src/streamdiffusion/acceleration/tensorrt/models.py
@@ -565,6 +565,30 @@ def get_input_profile(self, batch_size, image_height, image_width, static_batch,
             ],
         }
 
+    def get_shape_dict(self, batch_size, image_height, image_width):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        return {
+            "sample": (batch_size, self.unet_dim, latent_height, latent_width),
+            "timestep": (batch_size,),
+            "encoder_hidden_states": (batch_size, self.text_maxlen, self.embedding_dim),
+            "text_embeds": (batch_size, self.pooled_embedding_dim),
+            "time_ids": (batch_size, 6),
+            "latent": (batch_size, 4, latent_height, latent_width),
+        }
+
+    def get_sample_input(self, batch_size, image_height, image_width):
+        latent_height, latent_width = self.check_dims(batch_size, image_height, image_width)
+        dtype = torch.float16 if self.fp16 else torch.float32
+        return (
+            torch.randn(
+                batch_size, self.unet_dim, latent_height, latent_width, dtype=torch.float32, device=self.device
+            ),
+            torch.ones((batch_size,), dtype=torch.float32, device=self.device),
+            torch.randn(batch_size, self.text_maxlen, self.embedding_dim, dtype=dtype, device=self.device),
+            torch.randn(batch_size, self.pooled_embedding_dim, dtype=dtype, device=self.device),
+            torch.randn(batch_size, 6, dtype=dtype, device=self.device),  # time_ids
+        )
+
 
 class SDXLUNetPrebuilt(SDXLUNet):
     """SDXL UNet for pre-built ONNX from HuggingFace (static timestep shape)"""
diff --git a/src/streamdiffusion/acceleration/tensorrt/utilities.py b/src/streamdiffusion/acceleration/tensorrt/utilities.py
index a4a4b29..9180d0a 100644
--- a/src/streamdiffusion/acceleration/tensorrt/utilities.py
+++ b/src/streamdiffusion/acceleration/tensorrt/utilities.py
@@ -550,6 +550,7 @@ def build_engine(
         static_batch=build_static_batch,
         static_shape=not build_dynamic_shape,
     )
+    print(input_profile)
     engine.build(
         onnx_opt_path,
         fp16=True,