diff --git a/.gitignore b/.gitignore
index 62a814bde..790e5bc66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -67,6 +67,7 @@ dmypy.json
 # Sphinx
 docs/_build/
 docs/source/api/generated/
+docs/source/tutorials/
 
 # docs/_doctrees/
 # docs/_static_gen/
@@ -126,8 +127,14 @@ certificates/
 # Benchmark outputs
 benchmark_results*/
 
+# Local dataset files
+liander_dataset/
+
 # Mlflow
 /mlflow
 /mlflow_artifacts_local
 
-.github/instructions
\ No newline at end of file
+.github/instructions
+
+# Jupyter notebook cache (myst-nb execution outputs)
+.jupyter_cache/
diff --git a/REUSE.toml b/REUSE.toml
index 0d4f42da5..b222eb543 100644
--- a/REUSE.toml
+++ b/REUSE.toml
@@ -6,6 +6,7 @@ path = [
     ".python-version",
     "uv.lock",
     "examples/*/*.ipynb",
+    "examples/tutorials/*.py",
 ]
 precedence = "override"
 SPDX-FileCopyrightText = "2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>"
diff --git a/docs/pyproject.toml b/docs/pyproject.toml
index f06651cfc..c3624285b 100644
--- a/docs/pyproject.toml
+++ b/docs/pyproject.toml
@@ -29,7 +29,9 @@ dependencies = [
   "openstef",
   "sphinx-autobuild>=2024.10.3",
   "sphinx-autodoc-typehints>=3.2.0",
-  "myst-parser>=4.0.1",
+  "myst-nb>=1.2.0",
+  "jupyter-cache>=1.0.0",
+  "jupytext>=1.16.0",
 ]
 
 [tool.uv.sources]
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 932a5d101..c517dcfe9 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -38,7 +38,7 @@
     "sphinx.ext.mathjax",
     "sphinx.ext.napoleon",
     "sphinx.ext.viewcode",
-    "myst_parser",
+    "myst_nb",
     "sphinx_design",
     "sphinx_copybutton",
     "matplotlib.sphinxext.plot_directive",
@@ -50,7 +50,7 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
+exclude_patterns = ["conf.py", "**/*.ipynb"]
 
 # Specify how to identify the prompt when copying code snippets
 copybutton_prompt_text = r">>> |\.\.\. "
@@ -124,6 +124,14 @@
     "colon_fence",
 ]
 
+# -- Notebook execution (myst-nb) -------------------------------------------
+nb_custom_formats = {".py": ["jupytext.reads", {"fmt": "py:percent"}]}
+nb_execution_mode = "off"  # TODO(#884): enable "cache" once tutorials are optimized for faster execution
+nb_execution_timeout = 120
+nb_execution_raise_on_error = True
+# TODO(#884): backtesting notebook exceeds timeout — needs rewrite or execution split
+nb_execution_excludepatterns = ["tutorials/backtesting_openstef_with_beam*"]
+
 # Sphinx version switcher
 config = SphinxConfig("../../pyproject.toml", globalns=globals())
 version = config.version
@@ -300,12 +308,17 @@ def cff_to_bibtex(cff_data: dict[str, Any]) -> str:
 # -- Sphinx setup ------------------------------------------------------------
 
 
-def rstjinja(app: Sphinx, _docname: str, source: list[str]) -> None:
+def rstjinja(app: Sphinx, docname: str, source: list[str]) -> None:
     """Render RST files as Jinja templates for variable substitution."""
     # Only process HTML builds
     if app.builder.format != "html":  # type: ignore[attr-defined]
         return
 
+    # Only process .rst sources — skip notebooks/MyST which contain {} literals
+    rst_path = Path(app.srcdir) / f"{docname}.rst"
+    if not rst_path.is_file() or not source[0].strip():
+        return
+
     src: str = source[0]
     rendered: str = app.builder.templates.render_string(  # type: ignore[attr-defined]
         src,
diff --git a/docs/source/contribute/document.rst b/docs/source/contribute/document.rst
index 06c42be5a..c267239b3 100644
--- a/docs/source/contribute/document.rst
+++ b/docs/source/contribute/document.rst
@@ -390,6 +390,43 @@ Common issues
 
 .. include:: _getting_help.rst
 
+Working with tutorial notebooks
+===============================
+
+Tutorials live in ``examples/tutorials/`` as paired `Jupytext <https://jupytext.readthedocs.io/>`_
+files: a ``.py`` (percent format) source of truth and a ``.ipynb`` companion kept in sync.
+
+Key rules
+---------
+
+* **Edit the** ``.py`` **file**, not the ``.ipynb`` — the script is the single source of truth.
+* **Never commit notebook outputs.** The ``.ipynb`` on ``main`` must be output-free.
+* Notebooks are rendered into the docs via `myst-nb <https://myst-nb.readthedocs.io/>`_ with
+  cached execution (``nb_execution_mode = "cache"``).
+
+Workflow
+--------
+
+.. code-block:: bash
+
+    # After editing a .py tutorial:
+    poe notebooks          # Sync .py → .ipynb
+
+    # Before committing:
+    poe notebooks-clear    # Strip any outputs from .ipynb
+    poe notebooks-check    # Verify sync + no outputs (runs in CI)
+
+Creating a new tutorial
+-----------------------
+
+.. code-block:: bash
+
+    # Create the .py file in percent format, then pair it:
+    jupytext --set-formats "ipynb,py:percent" examples/tutorials/my_tutorial.py
+
+    # Add a toctree entry in docs/source/examples.rst
+    # Optionally tag the first SPDX cell with "remove-cell" (see existing tutorials)
+
 Additional documentation resources
 ==================================
 
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index 23023da77..d184ef7b0 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -7,6 +7,12 @@
 Examples
 ========
 
-.. admonition:: This page is under construction. Will be autogenerated from examples/examples.
+End-to-end tutorials demonstrating OpenSTEF workflows. Each example is a runnable
+Jupyter notebook rendered with executed outputs.
 
-    Want to help? Check :ref:`Contributing <contributing>` for more information.
+.. toctree::
+   :maxdepth: 1
+
+   Forecasting with Presets <tutorials/forecasting_with_workflow_presets>
+   Hyperparameter Tuning <tutorials/hyperparameter_tuning_with_optuna>
+   Backtesting with BEAM <tutorials/backtesting_openstef_with_beam>
diff --git a/examples/tutorials/backtesting_openstef_with_beam.ipynb b/examples/tutorials/backtesting_openstef_with_beam.ipynb
index e58b73f32..3b48e1738 100644
--- a/examples/tutorials/backtesting_openstef_with_beam.ipynb
+++ b/examples/tutorials/backtesting_openstef_with_beam.ipynb
@@ -37,6 +37,7 @@
     "# --- Thread Configuration ---\n",
     "# Prevent thread contention when running parallel backtests with XGBoost\n",
     "import os\n",
+    "\n",
     "os.environ[\"OMP_NUM_THREADS\"] = \"1\"\n",
     "os.environ[\"OPENBLAS_NUM_THREADS\"] = \"1\"\n",
     "os.environ[\"MKL_NUM_THREADS\"] = \"1\"\n",
@@ -70,8 +71,8 @@
    "outputs": [],
    "source": [
     "# Import types for configuration\n",
-    "from openstef_core.types import LeadTime, Q  # LeadTime: forecast horizon, Q: quantile\n",
     "from openstef_beam.benchmarking.benchmarks.liander2024 import Liander2024Category\n",
+    "from openstef_core.types import LeadTime, Q  # LeadTime: forecast horizon, Q: quantile\n",
     "\n",
     "# --- Output Paths ---\n",
     "OUTPUT_PATH = Path(\"./benchmark_results\")\n",
@@ -87,9 +88,13 @@
     "\n",
     "# Quantiles for probabilistic forecasting (7 quantiles covering 5th to 95th percentile)\n",
     "PREDICTION_QUANTILES = [\n",
-    "    Q(0.05), Q(0.1), Q(0.3),  # Lower quantiles\n",
-    "    Q(0.5),                    # Median\n",
-    "    Q(0.7), Q(0.9), Q(0.95),  # Upper quantiles\n",
+    "    Q(0.05),\n",
+    "    Q(0.1),\n",
+    "    Q(0.3),  # Lower quantiles\n",
+    "    Q(0.5),  # Median\n",
+    "    Q(0.7),\n",
+    "    Q(0.9),\n",
+    "    Q(0.95),  # Upper quantiles\n",
     "]\n",
     "\n",
     "# --- Benchmark Filter (optional) ---\n",
@@ -128,26 +133,21 @@
     "    model_id=\"benchmark_model_\",\n",
     "    run_name=None,\n",
     "    model=\"flatliner\",  # Placeholder - will be overwritten per model\n",
-    "    \n",
     "    # Forecast settings\n",
     "    horizons=FORECAST_HORIZONS,\n",
     "    quantiles=PREDICTION_QUANTILES,\n",
-    "    \n",
     "    # Model reuse: reuse trained model for same target (speeds up backtesting)\n",
     "    model_reuse_enable=True,\n",
     "    mlflow_storage=None,  # Disable MLflow for this demo\n",
-    "    \n",
     "    # Weather feature column mappings (match dataset column names)\n",
     "    radiation_column=\"shortwave_radiation\",\n",
     "    wind_speed_column=\"wind_speed_80m\",  # 80m wind speed for better wind park predictions\n",
     "    pressure_column=\"surface_pressure\",\n",
     "    temperature_column=\"temperature_2m\",\n",
     "    relative_humidity_column=\"relative_humidity_2m\",\n",
-    "    \n",
     "    # Additional features\n",
     "    energy_price_column=\"EPEX_NL\",  # Day-ahead electricity price\n",
     "    rolling_aggregate_features=[\"mean\", \"median\", \"max\", \"min\"],  # Rolling window stats\n",
-    "    \n",
     "    # Logging\n",
     "    verbosity=0,  # Quiet mode for batch processing\n",
     ")"
@@ -228,9 +228,9 @@
    "outputs": [],
    "source": [
     "# Import benchmark components\n",
+    "from openstef_beam.benchmarking.baselines.openstef4 import create_openstef4_preset_backtest_forecaster\n",
     "from openstef_beam.benchmarking.benchmarks.liander2024 import create_liander2024_benchmark_runner\n",
     "from openstef_beam.benchmarking.callbacks.strict_execution_callback import StrictExecutionCallback\n",
-    "from openstef_beam.benchmarking.baselines.openstef4 import create_openstef4_preset_backtest_forecaster\n",
     "\n",
     "# --- Run XGBoost Benchmark ---\n",
     "print(\"🌲 Running XGBoost benchmark...\")\n",
@@ -300,10 +300,12 @@
     "\n",
     "# Generate comparison reports\n",
     "print(\"📊 Generating comparison analysis...\")\n",
-    "comparison_pipeline.run(run_data={\n",
-    "    \"xgboost\": storage_xgboost,\n",
-    "    \"gblinear\": storage_gblinear,\n",
-    "})\n",
+    "comparison_pipeline.run(\n",
+    "    run_data={\n",
+    "        \"xgboost\": storage_xgboost,\n",
+    "        \"gblinear\": storage_gblinear,\n",
+    "    }\n",
+    ")\n",
     "print(\"✅ Comparison analysis complete!\")"
    ]
   },
@@ -331,11 +333,11 @@
    "source": [
     "# Open key analysis plots in browser\n",
     "# HTML visualizations are interactive and best viewed in a browser\n",
-    "import webbrowser\n",
     "import os\n",
+    "import webbrowser\n",
     "\n",
     "# Base path for analysis results\n",
-    "analysis_base = os.path.abspath('./benchmark_results/analysis/D-1T06:00')\n",
+    "analysis_base = os.path.abspath(\"./benchmark_results/analysis/D-1T06:00\")\n",
     "\n",
     "# Define key visualizations to open\n",
     "visualizations = [\n",
@@ -346,9 +348,9 @@
     "print(\"🌐 Opening analysis visualizations in browser...\\n\")\n",
     "for name, filename in visualizations:\n",
     "    filepath = os.path.join(analysis_base, filename)\n",
-    "    if os.path.exists(filepath):\n",
+    "    if Path(filepath).exists():\n",
     "        print(f\"   📊 {name}\")\n",
-    "        webbrowser.open(f'file://{filepath}')\n",
+    "        webbrowser.open(f\"file://{filepath}\")\n",
     "    else:\n",
     "        print(f\"   ⚠️  {name} not found at {filepath}\")"
    ]
@@ -374,22 +376,22 @@
     "import glob\n",
     "\n",
     "# Find all time series plots for individual targets\n",
-    "target_plots = glob.glob('./benchmark_results/XGBoost/analysis/*/*/time_series_plot*.html')\n",
+    "target_plots = glob.glob(\"./benchmark_results/XGBoost/analysis/*/*/time_series_plot*.html\")\n",
     "\n",
     "if target_plots:\n",
     "    print(\"📊 Available target-specific time series plots:\\n\")\n",
     "    for i, plot in enumerate(sorted(target_plots)[:5]):  # Show first 5\n",
-    "        parts = plot.split('/')\n",
+    "        parts = plot.split(\"/\")\n",
     "        category = parts[-3]  # e.g., \"transformer\"\n",
-    "        target = parts[-2]    # e.g., \"OS Apeldoorn\"\n",
-    "        print(f\"   {i+1}. {category}/{target}\")\n",
-    "    \n",
+    "        target = parts[-2]  # e.g., \"OS Apeldoorn\"\n",
+    "        print(f\"   {i + 1}. {category}/{target}\")\n",
+    "\n",
     "    # Open the first transformer plot as an example\n",
-    "    transformer_plots = [p for p in target_plots if 'transformer' in p]\n",
+    "    transformer_plots = [p for p in target_plots if \"transformer\" in p]\n",
     "    if transformer_plots:\n",
     "        example_plot = os.path.abspath(transformer_plots[0])\n",
     "        print(f\"\\n🌐 Opening example: {transformer_plots[0]}\")\n",
-    "        webbrowser.open(f'file://{example_plot}')\n",
+    "        webbrowser.open(f\"file://{example_plot}\")\n",
     "else:\n",
     "    print(\"⚠️  No target-specific plots found. Run the benchmark first.\")"
    ]
@@ -437,6 +439,9 @@
   }
  ],
  "metadata": {
+  "jupytext": {
+   "formats": "ipynb,py:percent"
+  },
   "kernelspec": {
    "display_name": ".venv",
    "language": "python",
diff --git a/examples/tutorials/backtesting_openstef_with_beam.py b/examples/tutorials/backtesting_openstef_with_beam.py
new file mode 100644
index 000000000..0c30580ec
--- /dev/null
+++ b/examples/tutorials/backtesting_openstef_with_beam.py
@@ -0,0 +1,338 @@
+# ---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:percent
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.19.1
+#   kernelspec:
+#     display_name: .venv
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+# # 📊 Backtesting OpenSTEF Models with OpenSTEF-BEAM
+#
+# This tutorial demonstrates how to use **OpenSTEF-BEAM** (Backtesting, Evaluation, Analysis, Metrics) to systematically evaluate forecasting models. You'll learn how to:
+#
+# 1. **Configure benchmark experiments** with multiple model types
+# 2. **Run parallel backtests** across dozens of energy assets
+# 3. **Compare model performance** with standardized metrics
+# 4. **Generate analysis reports** with interactive visualizations
+#
+# > **BEAM** provides a rigorous framework for model evaluation, ensuring fair comparisons and reproducible results.
+
+# %% [markdown]
+# ## 🔧 Environment Setup
+#
+# First, we configure thread settings to prevent conflicts with XGBoost's internal parallelization when running multiple processes.
+
+# %%
+# --- Thread Configuration ---
+# Prevent thread contention when running parallel backtests with XGBoost
+import os
+
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["OPENBLAS_NUM_THREADS"] = "1"
+os.environ["MKL_NUM_THREADS"] = "1"
+
+# --- Standard Imports ---
+import logging
+import multiprocessing
+from pathlib import Path
+
+logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s] %(message)s")
+
+# %% [markdown]
+# ## ⚙️ Benchmark Configuration
+#
+# Configure the benchmark parameters:
+# - **Output paths** — where to store results for each model
+# - **Forecast horizons** — how far ahead to predict (using ISO 8601 duration format)
+# - **Quantiles** — prediction intervals for probabilistic evaluation
+
+# %%
+# Import types for configuration
+from openstef_beam.benchmarking.benchmarks.liander2024 import Liander2024Category
+from openstef_core.types import LeadTime, Q  # LeadTime: forecast horizon, Q: quantile
+
+# --- Output Paths ---
+OUTPUT_PATH = Path("./benchmark_results")
+BENCHMARK_RESULTS_PATH_XGBOOST = OUTPUT_PATH / "XGBoost"
+BENCHMARK_RESULTS_PATH_GBLINEAR = OUTPUT_PATH / "GBLinear"
+
+# --- Parallelization ---
+N_PROCESSES = multiprocessing.cpu_count()  # Use all available CPU cores
+print(f"🖥️  Running with {N_PROCESSES} parallel processes")
+
+# --- Forecast Configuration ---
+FORECAST_HORIZONS = [LeadTime.from_string("P3D")]  # 3-day ahead forecast (ISO 8601: P3D)
+
+# Quantiles for probabilistic forecasting (7 quantiles covering 5th to 95th percentile)
+PREDICTION_QUANTILES = [
+    Q(0.05),
+    Q(0.1),
+    Q(0.3),  # Lower quantiles
+    Q(0.5),  # Median
+    Q(0.7),
+    Q(0.9),
+    Q(0.95),  # Upper quantiles
+]
+
+# --- Benchmark Filter (optional) ---
+# Set to None to run all categories, or specify categories like:
+# BENCHMARK_FILTER = [Liander2024Category.TRANSFORMER, Liander2024Category.MV_FEEDER]
+BENCHMARK_FILTER: list[Liander2024Category] | None = None
+
+# %% [markdown]
+# ## 🛠️ Model Configuration
+#
+# We define a **common configuration** that both models share, then create model-specific variants. This ensures fair comparison by keeping all settings identical except the model type.
+#
+# ### Available Models:
+# - **XGBoost** — Gradient boosting trees (handles complex nonlinear patterns)
+# - **GBLinear** — Gradient boosted linear model (better extrapolation, faster)
+
+# %%
+# Import workflow configuration
+from openstef_models.presets import ForecastingWorkflowConfig
+
+# Common configuration shared by all models
+# This ensures fair comparison by keeping all settings identical
+common_config = ForecastingWorkflowConfig(
+    model_id="benchmark_model_",
+    run_name=None,
+    model="flatliner",  # Placeholder - will be overwritten per model
+    # Forecast settings
+    horizons=FORECAST_HORIZONS,
+    quantiles=PREDICTION_QUANTILES,
+    # Model reuse: reuse trained model for same target (speeds up backtesting)
+    model_reuse_enable=True,
+    mlflow_storage=None,  # Disable MLflow for this demo
+    # Weather feature column mappings (match dataset column names)
+    radiation_column="shortwave_radiation",
+    wind_speed_column="wind_speed_80m",  # 80m wind speed for better wind park predictions
+    pressure_column="surface_pressure",
+    temperature_column="temperature_2m",
+    relative_humidity_column="relative_humidity_2m",
+    # Additional features
+    energy_price_column="EPEX_NL",  # Day-ahead electricity price
+    rolling_aggregate_features=["mean", "median", "max", "min"],  # Rolling window stats
+    # Logging
+    verbosity=0,  # Quiet mode for batch processing
+)
+
+# %%
+# Create model-specific configurations by copying common config and updating model type
+xgboost_config = common_config.model_copy(update={"model": "xgboost"})
+gblinear_config = common_config.model_copy(update={"model": "gblinear"})
+
+print("✅ Model configurations created:")
+print(f"   - XGBoost: {xgboost_config.model}")
+print(f"   - GBLinear: {gblinear_config.model}")
+
+# %% [markdown]
+# ## 💾 Storage Configuration
+#
+# **LocalBenchmarkStorage** manages the file structure for benchmark results:
+# ```
+# benchmark_results/
+# ├── XGBoost/
+# │   ├── backtest/      # Raw predictions
+# │   ├── evaluation/    # Metrics per target
+# │   └── analysis/      # Visualizations (HTML)
+# └── GBLinear/
+#     └── ...
+# ```
+
+# %%
+# Initialize storage backends for each model
+from openstef_beam.benchmarking.storage.local_storage import LocalBenchmarkStorage
+
+storage_xgboost = LocalBenchmarkStorage(base_path=BENCHMARK_RESULTS_PATH_XGBOOST)
+storage_gblinear = LocalBenchmarkStorage(base_path=BENCHMARK_RESULTS_PATH_GBLINEAR)
+
+print(f"📁 XGBoost results: {BENCHMARK_RESULTS_PATH_XGBOOST}")
+print(f"📁 GBLinear results: {BENCHMARK_RESULTS_PATH_GBLINEAR}")
+
+# %% [markdown]
+# ## 🚀 Run Backtests
+#
+# Now we run the **Liander 2024 Benchmark** — a comprehensive evaluation suite that:
+# 1. Downloads the benchmark dataset from HuggingFace Hub (if needed)
+# 2. Runs backtests across 5 asset categories (transformers, feeders, solar/wind parks)
+# 3. Computes metrics and generates analysis visualizations
+#
+# ⚠️ **Note**: This may take several minutes depending on your hardware.
+
+# %%
+# Import benchmark components
+from openstef_beam.benchmarking.baselines.openstef4 import create_openstef4_preset_backtest_forecaster
+from openstef_beam.benchmarking.benchmarks.liander2024 import create_liander2024_benchmark_runner
+from openstef_beam.benchmarking.callbacks.strict_execution_callback import StrictExecutionCallback
+
+# --- Run XGBoost Benchmark ---
+print("🌲 Running XGBoost benchmark...")
+create_liander2024_benchmark_runner(
+    storage=storage_xgboost,
+    callbacks=[StrictExecutionCallback()],  # Fail fast on errors
+).run(
+    forecaster_factory=create_openstef4_preset_backtest_forecaster(
+        workflow_config=xgboost_config,
+    ),
+    run_name="xgboost",
+    n_processes=N_PROCESSES,
+    filter_args=BENCHMARK_FILTER,
+)
+print("✅ XGBoost benchmark complete!")
+
+# --- Run GBLinear Benchmark ---
+print("\n📈 Running GBLinear benchmark...")
+create_liander2024_benchmark_runner(
+    storage=storage_gblinear,
+    callbacks=[StrictExecutionCallback()],
+).run(
+    forecaster_factory=create_openstef4_preset_backtest_forecaster(
+        workflow_config=gblinear_config,
+    ),
+    run_name="gblinear",
+    n_processes=N_PROCESSES,
+    filter_args=BENCHMARK_FILTER,
+)
+print("✅ GBLinear benchmark complete!")
+
+# %% [markdown]
+# ## 📊 Compare Model Performance
+#
+# The **BenchmarkComparisonPipeline** generates side-by-side analysis of multiple models:
+# - Global metrics across all targets
+# - Per-category breakdowns (transformers, feeders, etc.)
+# - Time-windowed performance analysis
+
+# %%
+# Run model comparison analysis
+from openstef_beam.benchmarking import BenchmarkComparisonPipeline
+from openstef_beam.benchmarking.benchmarks.liander2024 import LIANDER2024_ANALYSIS_CONFIG
+
+# Create comparison pipeline
+target_provider = create_liander2024_benchmark_runner(
+    storage=LocalBenchmarkStorage(base_path=OUTPUT_PATH),
+).target_provider
+
+comparison_pipeline = BenchmarkComparisonPipeline(
+    analysis_config=LIANDER2024_ANALYSIS_CONFIG,
+    storage=LocalBenchmarkStorage(base_path=OUTPUT_PATH),
+    target_provider=target_provider,
+)
+
+# Generate comparison reports
+print("📊 Generating comparison analysis...")
+comparison_pipeline.run(
+    run_data={
+        "xgboost": storage_xgboost,
+        "gblinear": storage_gblinear,
+    }
+)
+print("✅ Comparison analysis complete!")
+
+# %% [markdown]
+# ## 📈 View Analysis Results
+#
+# The benchmark generates interactive HTML visualizations. Let's open the most important ones:
+#
+# ### Key Metrics:
+# - **rCRPS** (relative Continuous Ranked Probability Score) — measures probabilistic forecast accuracy
+# - **rMAE** (relative Mean Absolute Error) — measures point forecast accuracy
+# - Lower values = better performance
+
+# %%
+# Open key analysis plots in browser
+# HTML visualizations are interactive and best viewed in a browser
+import os
+import webbrowser
+
+# Base path for analysis results
+analysis_base = os.path.abspath("./benchmark_results/analysis/D-1T06:00")
+
+# Define key visualizations to open
+visualizations = [
+    ("rCRPS Grouped by Category", "rCRPS_grouped.html"),
+    ("rCRPS Time-Windowed (7 days)", "rCRPS_windowed_7D.html"),
+]
+
+print("🌐 Opening analysis visualizations in browser...\n")
+for name, filename in visualizations:
+    filepath = os.path.join(analysis_base, filename)
+    if Path(filepath).exists():
+        print(f"   📊 {name}")
+        webbrowser.open(f"file://{filepath}")
+    else:
+        print(f"   ⚠️  {name} not found at {filepath}")
+
+# %% [markdown]
+# ### 🔍 Explore Individual Target Results
+#
+# You can also view time series plots for individual targets. Let's look at a transformer forecast:
+
+# %%
+# List available target-specific visualizations
+import glob
+
+# Find all time series plots for individual targets
+target_plots = glob.glob("./benchmark_results/XGBoost/analysis/*/*/time_series_plot*.html")
+
+if target_plots:
+    print("📊 Available target-specific time series plots:\n")
+    for i, plot in enumerate(sorted(target_plots)[:5]):  # Show first 5
+        parts = plot.split("/")
+        category = parts[-3]  # e.g., "transformer"
+        target = parts[-2]  # e.g., "OS Apeldoorn"
+        print(f"   {i + 1}. {category}/{target}")
+
+    # Open the first transformer plot as an example
+    transformer_plots = [p for p in target_plots if "transformer" in p]
+    if transformer_plots:
+        example_plot = os.path.abspath(transformer_plots[0])
+        print(f"\n🌐 Opening example: {transformer_plots[0]}")
+        webbrowser.open(f"file://{example_plot}")
+else:
+    print("⚠️  No target-specific plots found. Run the benchmark first.")
+
+# %% [markdown]
+# ---
+#
+# ## 🎯 Summary
+#
+# In this tutorial, you learned how to:
+#
+# 1. ✅ **Configure benchmark experiments** with `ForecastingWorkflowConfig`
+# 2. ✅ **Run parallel backtests** using the Liander 2024 benchmark
+# 3. ✅ **Compare models** (XGBoost vs GBLinear) with `BenchmarkComparisonPipeline`
+# 4. ✅ **Analyze results** with interactive HTML visualizations
+#
+# ### 📁 Output Structure
+#
+# ```
+# benchmark_results/
+# ├── XGBoost/
+# │   ├── backtest/       # Raw predictions (parquet)
+# │   ├── evaluation/     # Metrics per target
+# │   └── analysis/       # HTML visualizations
+# ├── GBLinear/
+# │   └── ...
+# └── analysis/           # Comparison analysis (both models)
+#     └── D-1T06:00/
+#         ├── rCRPS_grouped.html      # Probabilistic accuracy by category
+#         ├── rMAE_grouped.html       # Point forecast accuracy
+#         └── summary.html            # Overall summary
+# ```
+#
+# ### 🚀 Next Steps
+#
+# - Experiment with different `FORECAST_HORIZONS` (e.g., `"PT6H"`, `"P7D"`)
+# - Add more quantiles for higher resolution prediction intervals
+# - Filter specific categories with `BENCHMARK_FILTER`
+# - Integrate MLflow for experiment tracking
diff --git a/examples/tutorials/forecasting_with_workflow_presets.ipynb b/examples/tutorials/forecasting_with_workflow_presets.ipynb
index 01a3a6881..cdf4a3800 100644
--- a/examples/tutorials/forecasting_with_workflow_presets.ipynb
+++ b/examples/tutorials/forecasting_with_workflow_presets.ipynb
@@ -4,7 +4,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "e8eae4f7",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove-cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>\n",
diff --git a/examples/tutorials/forecasting_with_workflow_presets.py b/examples/tutorials/forecasting_with_workflow_presets.py
index 4c7f8be3d..aaba18a08 100644
--- a/examples/tutorials/forecasting_with_workflow_presets.py
+++ b/examples/tutorials/forecasting_with_workflow_presets.py
@@ -13,7 +13,7 @@
 #     name: python3
 # ---
 
-# %%
+# %% tags=["remove-cell"]
 # SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>
 #
 # SPDX-License-Identifier: MPL-2.0
diff --git a/examples/tutorials/hyperparameter_tuning_with_optuna.ipynb b/examples/tutorials/hyperparameter_tuning_with_optuna.ipynb
index 84e8f6685..ecf1de68d 100644
--- a/examples/tutorials/hyperparameter_tuning_with_optuna.ipynb
+++ b/examples/tutorials/hyperparameter_tuning_with_optuna.ipynb
@@ -4,7 +4,11 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "feff3c78",
-   "metadata": {},
+   "metadata": {
+    "tags": [
+     "remove-cell"
+    ]
+   },
    "outputs": [],
    "source": [
     "# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>\n",
diff --git a/examples/tutorials/hyperparameter_tuning_with_optuna.py b/examples/tutorials/hyperparameter_tuning_with_optuna.py
index ad4608b19..6014e1afa 100644
--- a/examples/tutorials/hyperparameter_tuning_with_optuna.py
+++ b/examples/tutorials/hyperparameter_tuning_with_optuna.py
@@ -13,7 +13,7 @@
 #     name: python3
 # ---
 
-# %%
+# %% tags=["remove-cell"]
 # SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>
 #
 # SPDX-License-Identifier: MPL-2.0
diff --git a/pyproject.toml b/pyproject.toml
index 6dfe79d66..bdb01fff2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -130,9 +130,11 @@ lint.ignore = [
 lint.per-file-ignores."./examples/tutorials/*" = [
   "D100",   # Notebooks don't need module docstrings
   "E402",   # Imports not at top — notebook cells have natural ordering
+  "E501",   # Long lines in notebooks — readability is cell-scoped
   "ERA001", # Jupytext YAML frontmatter looks like commented-out code
   "F821",   # Cell-scoped names appear undefined to Ruff's module-level analysis
   "INP001", # Not a namespace package — notebooks don't need __init__.py
+  "PTH",    # Tutorials may use os.path for simplicity
   "S101",   # Tutorials may use assert for runtime checks
   "T201",   # Tutorials may use print for demonstration
 ]
@@ -230,6 +232,9 @@ include = [
   "packages/*/examples/**/*.py",
   "packages/*/tests/**/*.py",
 ]
+exclude = [
+  "examples/tutorials/**/*.py",
+]
 
 typeCheckingMode = "strict"
 reportPrivateImportUsage = false
@@ -282,11 +287,16 @@ cmd = "pytest --numprocesses=auto --doctest-modules packages/*/src --maxfail=1"
 help = "Sync jupytext .py percent sources → .ipynb notebooks (and vice versa)"
 cmd = "jupytext --sync examples/tutorials/*.py"
 
+[tool.poe.tasks.notebooks-clear]
+help = "Strip outputs from all .ipynb notebooks"
+cmd = "jupyter nbconvert --clear-output --inplace examples/tutorials/*.ipynb"
+
 [tool.poe.tasks.notebooks-check]
-help = "Check that .ipynb notebooks are in sync with their jupytext .py sources"
+help = "Check that .ipynb notebooks are in sync with their jupytext .py sources and have no outputs"
 sequence = [
   { cmd = "jupytext --sync examples/tutorials/*.py" },
   { cmd = "git diff --exit-code -- examples/tutorials/" },
+  { script = "tools.check_notebook_outputs:main" },
 ]
 
 [tool.poe.tasks.report]
@@ -375,8 +385,13 @@ sequence = [
   { cmd = "uv build .", help = "Build root meta package separately (not part of --all-packages)" },
 ]
 
+[tool.poe.tasks._docs_sync]
+help = "Sync tutorial sources into docs/source for Sphinx"
+cmd = "python -c \"import shutil; shutil.rmtree('docs/source/tutorials', ignore_errors=True); shutil.copytree('examples/tutorials', 'docs/source/tutorials')\""
+
 [tool.poe.tasks.docs]
 help = "Build the documentation"
+deps = [ "_docs_sync" ]
 control.expr = "serve"
 switch = [
   { case = "True", cmd = "sphinx-autobuild docs/source docs/build/html --watch packages" },
diff --git a/tools/check_notebook_outputs.py b/tools/check_notebook_outputs.py
new file mode 100644
index 000000000..37f039822
--- /dev/null
+++ b/tools/check_notebook_outputs.py
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project <openstef@lfenergy.org>
+#
+# SPDX-License-Identifier: MPL-2.0
+"""Check that committed .ipynb notebooks have no cell outputs."""
+
+import json
+import sys
+from pathlib import Path
+
+TUTORIALS_DIR = Path("examples/tutorials")
+
+
+def main() -> None:
+    """Validate that no .ipynb in tutorials has stored outputs."""
+    failures: list[str] = []
+
+    for nb_path in sorted(TUTORIALS_DIR.glob("*.ipynb")):
+        nb = json.loads(nb_path.read_text(encoding="utf-8"))
+        for i, cell in enumerate(nb.get("cells", [])):
+            if cell.get("cell_type") == "code" and cell.get("outputs"):
+                failures.append(f"  {nb_path.name}: cell {i} has outputs")
+                break
+
+    if failures:
+        print("ERROR: Notebooks with outputs found (run `poe notebooks-clear`):")
+        print("\n".join(failures))
+        sys.exit(1)
+
+    print(f"OK: {len(list(TUTORIALS_DIR.glob('*.ipynb')))} notebooks checked, no outputs found")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/uv.lock b/uv.lock
index 2271380e5..b53637146 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1740,6 +1740,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/64/285f20a31679bf547b75602702f7800e74dbabae36ef324f716c02804753/jupyter-1.1.1-py2.py3-none-any.whl", hash = "sha256:7a59533c22af65439b24bbe60373a4e95af8f16ac65a6c00820ad378e3f7cc83", size = 2657, upload-time = "2024-08-30T07:15:47.045Z" },
 ]
 
+[[package]]
+name = "jupyter-cache"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "click" },
+    { name = "importlib-metadata" },
+    { name = "nbclient" },
+    { name = "nbformat" },
+    { name = "pyyaml" },
+    { name = "sqlalchemy" },
+    { name = "tabulate" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bb/f7/3627358075f183956e8c4974603232b03afd4ddc7baf72c2bc9fff522291/jupyter_cache-1.0.1.tar.gz", hash = "sha256:16e808eb19e3fb67a223db906e131ea6e01f03aa27f49a7214ce6a5fec186fb9", size = 32048, upload-time = "2024-11-15T16:03:55.322Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/64/6b/67b87da9d36bff9df7d0efbd1a325fa372a43be7158effaf43ed7b22341d/jupyter_cache-1.0.1-py3-none-any.whl", hash = "sha256:9c3cafd825ba7da8b5830485343091143dff903e4d8c69db9349b728b140abf6", size = 33907, upload-time = "2024-11-15T16:03:54.021Z" },
+]
+
 [[package]]
 name = "jupyter-client"
 version = "8.7.0"
@@ -2517,6 +2536,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
 ]
 
+[[package]]
+name = "myst-nb"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata" },
+    { name = "ipykernel" },
+    { name = "ipython" },
+    { name = "jupyter-cache" },
+    { name = "myst-parser" },
+    { name = "nbclient" },
+    { name = "nbformat" },
+    { name = "pyyaml" },
+    { name = "sphinx" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bd/b4/ff1abeea67e8cfe0a8c033389f6d1d8b0bfecfd611befb5cbdeab884fce6/myst_nb-1.4.0.tar.gz", hash = "sha256:c145598de62446a6fd009773dd071a40d3b76106ace780de1abdfc6961f614c2", size = 82285, upload-time = "2026-03-02T21:14:56.95Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/93/0a378b48488879a1d925b42a804edfc6e0cd0ef854220f2dce738a46e7e9/myst_nb-1.4.0-py3-none-any.whl", hash = "sha256:0e2c86e7d3b82c3aa51383f82d6268f7714f3b772c23a796ab09538a8e68b4e4", size = 82555, upload-time = "2026-03-02T21:14:55.652Z" },
+]
+
 [[package]]
 name = "myst-parser"
 version = "4.0.1"
@@ -2936,8 +2976,10 @@ version = "0.0.0"
 source = { editable = "docs" }
 dependencies = [
     { name = "ipython" },
+    { name = "jupyter-cache" },
+    { name = "jupytext" },
     { name = "matplotlib" },
-    { name = "myst-parser" },
+    { name = "myst-nb" },
     { name = "openstef" },
     { name = "openstef-beam" },
     { name = "openstef-core" },
@@ -2956,8 +2998,10 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "ipython", specifier = ">=9.4.0" },
+    { name = "jupyter-cache", specifier = ">=1.0.0" },
+    { name = "jupytext", specifier = ">=1.16.0" },
     { name = "matplotlib", specifier = ">=3.10.5" },
-    { name = "myst-parser", specifier = ">=4.0.1" },
+    { name = "myst-nb", specifier = ">=1.2.0" },
     { name = "openstef", editable = "." },
     { name = "openstef-beam", editable = "packages/openstef-beam" },
     { name = "openstef-core", editable = "packages/openstef-core" },
@@ -4992,6 +5036,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
 ]
 
+[[package]]
+name = "tabulate"
+version = "0.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/46/58/8c37dea7bbf769b20d58e7ace7e5edfe65b849442b00ffcdd56be88697c6/tabulate-0.10.0.tar.gz", hash = "sha256:e2cfde8f79420f6deeffdeda9aaec3b6bc5abce947655d17ac662b126e48a60d", size = 91754, upload-time = "2026-03-04T18:55:34.402Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3", size = 39814, upload-time = "2026-03-04T18:55:31.284Z" },
+]
+
 [[package]]
 name = "terminado"
 version = "0.18.1"