From 8584d5bb66f3562444f143ed903cdf4aef48a289 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Artur=20K=C5=82oniecki?= <arturx.kloniecki@intel.com>
Date: Mon, 18 May 2026 12:20:13 +0000
Subject: [PATCH 1/2] Fix arange error type for fractional step with integer
 output on XPU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When torch.arange is called with a fractional step (e.g., 0.5) and an
integer output tensor, the step truncates to 0. The CUDA backend
correctly raises ValueError via TORCH_CHECK_VALUE, but XPU was raising
RuntimeError via TORCH_CHECK.

Replace inline bounds checks with arange_check_bounds() (from
RangeUtils.h) and add TORCH_CHECK_VALUE in the int64_t branch to match
CUDA behavior.

Fixes #3699

Signed-off-by: Artur Kłoniecki <arturx.kloniecki@intel.com>
---
 src/ATen/native/xpu/RangeFactories.cpp | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)
diff --git a/src/ATen/native/xpu/RangeFactories.cpp b/src/ATen/native/xpu/RangeFactories.cpp
index cfb538c7b6..14c3945623 100644
--- a/src/ATen/native/xpu/RangeFactories.cpp
+++ b/src/ATen/native/xpu/RangeFactories.cpp
@@ -19,6 +19,7 @@
 #include <ATen/core/Tensor.h>
 #include <ATen/native/DispatchStub.h>
 #include <ATen/native/RangeFactories.h>
+#include <ATen/native/RangeUtils.h>
 #include <ATen/native/TensorIterator.h>
 #include <ATen/native/xpu/sycl/RangeFactoriesKernel.h>
 #include <comm/xpu_aten.h>
@@ -48,18 +49,7 @@ Tensor& arange_out_xpu(
         auto xend = end.to<accscalar_t>();
         auto xstep = step.to<accscalar_t>();
 
-        TORCH_CHECK(xstep > 0 || xstep < 0, "step must be nonzero");
-        TORCH_CHECK(
-            std::isfinite(static_cast<double>(xstart)) &&
-                std::isfinite(static_cast<double>(xend)),
-            "unsupported range: ",
-            xstart,
-            " -> ",
-            xend);
-        TORCH_CHECK(
-            ((xstep > 0) && (xend >= xstart)) ||
-                ((xstep < 0) && (xend <= xstart)),
-            "upper bound and larger bound inconsistent with step sign");
+        arange_check_bounds(start, end, step);
 
         // we use double precision for (start - end) / step
         // to compute size_d for consistency across devices.
@@ -71,6 +61,7 @@ Tensor& arange_out_xpu(
         // than double
         double size_d;
         if constexpr (std::is_same_v<scalar_t, int64_t>) {
+          TORCH_CHECK_VALUE(xstep != 0, "step must be nonzero");
           int64_t sgn = (xstep > 0) - (xstep < 0);
           size_d = std::ceil((xend - xstart + xstep - sgn) / xstep);
         } else {

From 3e68faf3a86260ab50cdb83fa09b263dc799a1bc Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 18 May 2026 19:52:48 +0000
Subject: [PATCH 2/2] Scope arange int64 temporaries to constexpr branch

Agent-Logs-Url: https://github.com/intel/torch-xpu-ops/sessions/b20c84de-bfc4-446e-b55f-028f700c7dea

Co-authored-by: chuanqi129 <13608516+chuanqi129@users.noreply.github.com>
---
 src/ATen/native/xpu/RangeFactories.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/ATen/native/xpu/RangeFactories.cpp b/src/ATen/native/xpu/RangeFactories.cpp
index 14c3945623..131164f11d 100644
--- a/src/ATen/native/xpu/RangeFactories.cpp
+++ b/src/ATen/native/xpu/RangeFactories.cpp
@@ -44,11 +44,6 @@ Tensor& arange_out_xpu(
       out.scalar_type(),
       "arange_xpu_preprocess",
       [&]() {
-        using accscalar_t = at::acc_type_device<scalar_t, kXPU>;
-        auto xstart = start.to<accscalar_t>();
-        auto xend = end.to<accscalar_t>();
-        auto xstep = step.to<accscalar_t>();
-
         arange_check_bounds(start, end, step);
 
         // we use double precision for (start - end) / step
@@ -61,6 +56,10 @@ Tensor& arange_out_xpu(
         // than double
         double size_d;
         if constexpr (std::is_same_v<scalar_t, int64_t>) {
+          using accscalar_t = at::acc_type_device<scalar_t, kXPU>;
+          auto xstart = start.to<accscalar_t>();
+          auto xend = end.to<accscalar_t>();
+          auto xstep = step.to<accscalar_t>();
           TORCH_CHECK_VALUE(xstep != 0, "step must be nonzero");
           int64_t sgn = (xstep > 0) - (xstep < 0);
           size_d = std::ceil((xend - xstart + xstep - sgn) / xstep);