GridTools · philip-paul-mueller · Dec 2, 2025 · Dec 4, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/.coveragerc b/.coveragerc
@@ -19,14 +19,16 @@ exclude_lines =
     if False:
     if __name__ == .__main__.:
     pass
+    if TYPE_CHECKING:
+    if typing.TYPE_CHECKING:
 
 omit =
     # Omit files that cannot be tested
     dace/jupyter.py
 
     # Omit deprecated files
-    dace/frontend/tensorflow/__init__.py
-    dace/frontend/tensorflow/tensorflow.py
-    dace/frontend/tensorflow/winograd.py
-    dace/frontend/tensorflow/transformations/__init__.py
-    dace/frontend/tensorflow/transformations/redundant_array.py
+    dace/frontend/ml/tensorflow/__init__.py
+    dace/frontend/ml/tensorflow/tensorflow.py
+    dace/frontend/ml/tensorflow/winograd.py
+    dace/frontend/ml/tensorflow/transformations/__init__.py
+    dace/frontend/ml/tensorflow/transformations/redundant_array.py
diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml
@@ -35,6 +35,6 @@ jobs:
 
       - name: Install DaCe in development mode
         run: |
-          python -m pip install --editable ".[testing,linting]"
+          python -m pip install --editable ".[testing,linting,ml]"
           pre-commit install
           pre-commit run
diff --git a/.github/workflows/dace-updater.yml b/.github/workflows/dace-updater.yml
@@ -0,0 +1,50 @@
+name: Inform the Python package index about a new DaCe release.
+
+on:
+  # Trigger for all pushes to tags matching this pattern
+  push:
+    tags:
+      - __gt4py-next-integration_*
+
+  # To "install" this workflow you must enable this trigger, such that the workflow runs at least one.
+  #  You should also disable any processing such that no commit in the index repo is performed.
+  #  See https://stackoverflow.com/a/71057825
+  #pull_request:
+
+  # Allows to trigger the update manually.
+  # NOTE: Is only possible if the workflow file is located on the default and the branch where it should run on.
+  workflow_dispatch:
+
+jobs:
+  update-dace:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Inform Index
+        shell: bash
+        run: |
+          INDEX_ORGANIZATION="gridtools"
+          INDEX_REPO="pypi"
+
+          # We are using `github.sha` here to be sure that we transmit an identifier to the index
+          #  that can be checked out. Before we used `github.ref_name` but got strange results
+          #  with it.
+          DEPENDENCY_REF="${{ github.sha }}"
+          SOURCE_REPO="dace"
+          SOURCE_OWNER="gridtools"
+
+          curl -L -v --fail-with-body \
+            -X POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: Bearer ${{ secrets.PKG_UPDATE_TOKEN }}" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            "https://api.github.com/repos/${INDEX_ORGANIZATION}/${INDEX_REPO}/dispatches" \
+            -d '{"event_type":"update_package_index","client_payload":{"source_repo":"'"${SOURCE_REPO}"'","source_org":"'"${SOURCE_OWNER}"'","dependency_ref":"'"${DEPENDENCY_REF}"'"}}'
+          CURL_RET=$?
+
+          if [ "${CURL_RET}" -ne 0 ]
+          then
+            echo "POST to '${INDEX_ORGANIZATION}:${INDEX_REPO}' failed with error code '${CURL_RET}'"
+            exit 1
+          fi
+
+          exit 0
diff --git a/.github/workflows/fpga-ci.yml b/.github/workflows/fpga-ci.yml
diff --git a/.github/workflows/general-ci.yml b/.github/workflows/general-ci.yml
@@ -59,7 +59,7 @@ jobs:
         else
             export DACE_optimizer_automatic_simplification=${{ matrix.simplify }}
         fi
-        pytest -n auto --cov-report=xml --cov=dace --tb=short --timeout_method thread --timeout=300 -m "not gpu and not verilator and not tensorflow and not mkl and not sve and not papi and not mlir and not lapack and not fpga and not mpi and not rtl_hardware and not scalapack and not datainstrument and not long and not sequential"
+        pytest -n auto --cov-report=xml --cov=dace --tb=short --timeout_method thread --timeout=300 -m "not gpu and not autodiff and not torch and not onnx and not tensorflow and not mkl and not sve and not papi and not mlir and not lapack and not mpi and not scalapack and not datainstrument and not long and not sequential"
         ./codecov
 
     - name: Test OpenBLAS LAPACK

diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml
@@ -37,7 +37,7 @@ jobs:
         pip install mpi4py
         pip install cupy
         pip uninstall -y dace
-        pip install -e ".[testing]"
+        pip install -e ".[testing,ml]"
         curl -Os https://uploader.codecov.io/latest/linux/codecov
         chmod +x codecov
 

diff --git a/.github/workflows/hardware_test.yml b/.github/workflows/hardware_test.yml
diff --git a/.github/workflows/heterogeneous-ci.yml b/.github/workflows/heterogeneous-ci.yml
@@ -48,7 +48,7 @@ jobs:
       run: |
         source ~/.venv/bin/activate # activate venv
         export DACE_cache=unique
-        pytest --cov-report=xml --cov=dace --tb=short --timeout_method thread --timeout=300 -m "verilator or mkl or papi or datainstrument"
+        pytest --cov-report=xml --cov=dace --tb=short --timeout_method thread --timeout=300 -m "mkl or papi or datainstrument"
 
     - name: Run MPI tests
       run: |

diff --git a/.github/workflows/ml-ci.yml b/.github/workflows/ml-ci.yml
@@ -0,0 +1,62 @@
+name: Machine Learning and Autodiff Tests
+
+on:
+  push:
+    branches: [ main, ci-fix ]
+  pull_request:
+    branches: [ main, ci-fix ]
+  merge_group:
+    branches: [ main, ci-fix ]
+
+concurrency:
+  group: ${{github.workflow}}-${{github.ref}}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    if: "!contains(github.event.pull_request.labels.*.name, 'no-ci')"
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.13']
+        simplify: [0,1,autoopt]
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: 'recursive'
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y libyaml-dev cmake
+        sudo apt-get install -y libblas-dev libopenblas-dev liblapacke-dev
+        python -m pip install --upgrade pip
+        pip install flake8 pytest-xdist coverage
+        pip install -e ".[ml-testing,ml]"
+        curl -Os https://uploader.codecov.io/latest/linux/codecov
+        chmod +x codecov
+
+    - name: Test with pytest
+      run: |
+        export NOSTATUSBAR=1
+        export DACE_testing_serialization=1
+        export DACE_testing_deserialize_exception=1
+        export DACE_cache=unique
+        if [ "${{ matrix.simplify }}" = "autoopt" ]; then
+            export DACE_optimizer_automatic_simplification=1
+            export DACE_optimizer_autooptimize=1
+            echo "Auto-optimization heuristics"
+        else
+            export DACE_optimizer_automatic_simplification=${{ matrix.simplify }}
+        fi
+        pytest --cov-report=xml --cov=dace --tb=short --timeout_method thread --timeout=600 -v -m "(torch or onnx or autodiff) and not gpu"
+        ./codecov
+
+    - uses: codecov/codecov-action@v4
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        verbose: true
diff --git a/.github/workflows/verilator_compatibility.yml b/.github/workflows/verilator_compatibility.yml
diff --git a/.gitignore b/.gitignore
@@ -150,12 +150,6 @@ perf.json
 perf*.csv
 /dace/frontend/octave/parsetab.py
 
-# Xilinx
-xilinx_vcu1525_*
-sdaccel_profile_*
-sdaccel_timeline_*
-.run/
-
 # NVIDIA
 *.nvprof
 out.sdfg
@@ -195,3 +189,8 @@ _build/
 
 # Ignoring the test junk
 _all_tests/
+
+
+# Ignore downloaded ONNX models
+/*.onnx
+/*.bin
diff --git a/.gitmodules b/.gitmodules
@@ -5,12 +5,6 @@
 [submodule "dace/external/moodycamel"]
 	path = dace/external/moodycamel
 	url = https://github.com/cameron314/concurrentqueue.git
-[submodule "dace/external/hlslib"]
-	path = dace/external/hlslib
-	url = https://github.com/definelicht/hlslib.git
 [submodule "dace/viewer/webclient"]
 	path = dace/viewer/webclient
 	url = https://github.com/spcl/dace-webclient.git
-[submodule "dace/external/rtllib"]
-	path = dace/external/rtllib
-	url = https://github.com/carljohnsen/rtllib.git
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,12 +1,8 @@
 global-include LICENSE LICENSE.*
-include dace/*.yml dace/codegen/CMakeLists.txt dace/codegen/tools/*.cpp dace/external/moodycamel/*.h dace/codegen/Xilinx_HLS.tcl.in dace/viewer/webclient/*.css dace/viewer/webclient/*.html dace/viewer/webclient/dist/*.js
+include dace/*.yml dace/codegen/CMakeLists.txt dace/codegen/tools/*.cpp dace/external/moodycamel/*.h dace/viewer/webclient/*.css dace/viewer/webclient/*.html dace/viewer/webclient/dist/*.js
 recursive-include dace/codegen *.cmake
 graft dace/runtime/include
 graft dace/libraries
 graft dace/viewer/webclient/external_lib
 graft dace/viewer/templates
 graft dace/external/cub/cub
-graft dace/external/hlslib/cmake
-graft dace/external/hlslib/include
-graft dace/external/rtllib/cmake
-graft dace/external/rtllib/templates
diff --git a/README.md b/README.md
@@ -1,6 +1,5 @@
 [![General Tests](https://github.com/spcl/dace/actions/workflows/general-ci.yml/badge.svg)](https://github.com/spcl/dace/actions/workflows/general-ci.yml)
 [![GPU Tests](https://github.com/spcl/dace/actions/workflows/gpu-ci.yml/badge.svg)](https://github.com/spcl/dace/actions/workflows/gpu-ci.yml)
-[![FPGA Tests](https://github.com/spcl/dace/actions/workflows/fpga-ci.yml/badge.svg)](https://github.com/spcl/dace/actions/workflows/fpga-ci.yml)
 [![Documentation Status](https://readthedocs.org/projects/spcldace/badge/?version=latest)](https://spcldace.readthedocs.io/en/latest/?badge=latest)
 [![PyPI version](https://badge.fury.io/py/dace.svg)](https://badge.fury.io/py/dace)
 [![codecov](https://codecov.io/gh/spcl/dace/branch/main/graph/badge.svg)](https://codecov.io/gh/spcl/dace)
@@ -13,7 +12,7 @@ _Decoupling domain science from performance optimization._
 
 DaCe is a [fast](https://nbviewer.org/github/spcl/dace/blob/main/tutorials/benchmarking.ipynb) parallel programming
 framework that takes code in Python/NumPy and other programming languages, and maps it to high-performance
-**CPU, GPU, and FPGA** programs, which can be optimized to achieve state-of-the-art. Internally, DaCe
+**CPU, GPU, and [FPGA](https://github.com/spcl/dace-fpga)** programs, which can be optimized to achieve state-of-the-art. Internally, DaCe
 uses the Stateful DataFlow multiGraph (SDFG) *data-centric intermediate
 representation*: A transformable, interactive representation of code based on
 data movement.
@@ -27,7 +26,7 @@ of performance optimization, regardless of the application or the target process
 DaCe generates high-performance programs for:
  * Multi-core CPUs (tested on Intel, IBM POWER9, and ARM with SVE)
  * NVIDIA GPUs and AMD GPUs (with HIP)
- * Xilinx and Intel FPGAs
+ * [Xilinx and Intel FPGAs](https://github.com/spcl/dace-fpga)
 
 DaCe can be written inline in Python and transformed in the command-line/Jupyter
 Notebooks or SDFGs can be interactively modified using our [Visual Studio Code extension](https://marketplace.visualstudio.com/items?itemName=phschaad.sdfv).