diff --git a/.gitignore b/.gitignore index 62a814bde..790e5bc66 100644 --- a/.gitignore +++ b/.gitignore @@ -67,6 +67,7 @@ dmypy.json # Sphinx docs/_build/ docs/source/api/generated/ +docs/source/tutorials/ # docs/_doctrees/ # docs/_static_gen/ @@ -126,8 +127,14 @@ certificates/ # Benchmark outputs benchmark_results*/ +# Local dataset files +liander_dataset/ + # Mlflow /mlflow /mlflow_artifacts_local -.github/instructions \ No newline at end of file +.github/instructions + +# Jupyter notebook cache (myst-nb execution outputs) +.jupyter_cache/ diff --git a/REUSE.toml b/REUSE.toml index 0d4f42da5..b222eb543 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -6,6 +6,7 @@ path = [ ".python-version", "uv.lock", "examples/*/*.ipynb", + "examples/tutorials/*.py", ] precedence = "override" SPDX-FileCopyrightText = "2025 Contributors to the OpenSTEF project " diff --git a/docs/pyproject.toml b/docs/pyproject.toml index f06651cfc..c3624285b 100644 --- a/docs/pyproject.toml +++ b/docs/pyproject.toml @@ -29,7 +29,9 @@ dependencies = [ "openstef", "sphinx-autobuild>=2024.10.3", "sphinx-autodoc-typehints>=3.2.0", - "myst-parser>=4.0.1", + "myst-nb>=1.2.0", + "jupyter-cache>=1.0.0", + "jupytext>=1.16.0", ] [tool.uv.sources] diff --git a/docs/source/conf.py b/docs/source/conf.py index 932a5d101..c517dcfe9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -38,7 +38,7 @@ "sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx.ext.viewcode", - "myst_parser", + "myst_nb", "sphinx_design", "sphinx_copybutton", "matplotlib.sphinxext.plot_directive", @@ -50,7 +50,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] +exclude_patterns = ["conf.py", "**/*.ipynb"] # Specify how to identify the prompt when copying code snippets copybutton_prompt_text = r">>> |\.\.\. " @@ -124,6 +124,14 @@ "colon_fence", ] +# -- Notebook execution (myst-nb) ------------------------------------------- +nb_custom_formats = {".py": ["jupytext.reads", {"fmt": "py:percent"}]} +nb_execution_mode = "off" # TODO(#884): enable "cache" once tutorials are optimized for faster execution +nb_execution_timeout = 120 +nb_execution_raise_on_error = True +# TODO(#884): backtesting notebook exceeds timeout — needs rewrite or execution split +nb_execution_excludepatterns = ["tutorials/backtesting_openstef_with_beam*"] + # Sphinx version switcher config = SphinxConfig("../../pyproject.toml", globalns=globals()) version = config.version @@ -300,12 +308,17 @@ def cff_to_bibtex(cff_data: dict[str, Any]) -> str: # -- Sphinx setup ------------------------------------------------------------ -def rstjinja(app: Sphinx, _docname: str, source: list[str]) -> None: +def rstjinja(app: Sphinx, docname: str, source: list[str]) -> None: """Render RST files as Jinja templates for variable substitution.""" # Only process HTML builds if app.builder.format != "html": # type: ignore[attr-defined] return + # Only process .rst sources — skip notebooks/MyST which contain {} literals + rst_path = Path(app.srcdir) / f"{docname}.rst" + if not rst_path.is_file() or not source[0].strip(): + return + src: str = source[0] rendered: str = app.builder.templates.render_string( # type: ignore[attr-defined] src, diff --git a/docs/source/contribute/document.rst b/docs/source/contribute/document.rst index 06c42be5a..c267239b3 100644 --- a/docs/source/contribute/document.rst +++ b/docs/source/contribute/document.rst @@ -390,6 +390,43 @@ Common issues .. include:: _getting_help.rst +Working with tutorial notebooks +=============================== + +Tutorials live in ``examples/tutorials/`` as paired `Jupytext `_ +files: a ``.py`` (percent format) source of truth and a ``.ipynb`` companion kept in sync. + +Key rules +--------- + +* **Edit the** ``.py`` **file**, not the ``.ipynb`` — the script is the single source of truth. +* **Never commit notebook outputs.** The ``.ipynb`` on ``main`` must be output-free. +* Notebooks are rendered into the docs via `myst-nb `_ with + cached execution (``nb_execution_mode = "cache"``). + +Workflow +-------- + +.. code-block:: bash + + # After editing a .py tutorial: + poe notebooks # Sync .py → .ipynb + + # Before committing: + poe notebooks-clear # Strip any outputs from .ipynb + poe notebooks-check # Verify sync + no outputs (runs in CI) + +Creating a new tutorial +----------------------- + +.. code-block:: bash + + # Create the .py file in percent format, then pair it: + jupytext --set-formats "ipynb,py:percent" examples/tutorials/my_tutorial.py + + # Add a toctree entry in docs/source/examples.rst + # Optionally tag the first SPDX cell with "remove-cell" (see existing tutorials) + Additional documentation resources ================================== diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 23023da77..d184ef7b0 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -7,6 +7,12 @@ Examples ======== -.. admonition:: This page is under construction. Will be autogenerated from examples/examples. +End-to-end tutorials demonstrating OpenSTEF workflows. Each example is a runnable +Jupyter notebook rendered with executed outputs. - Want to help? Check :ref:`Contributing ` for more information. +.. toctree:: + :maxdepth: 1 + + Forecasting with Presets + Hyperparameter Tuning + Backtesting with BEAM diff --git a/examples/tutorials/backtesting_openstef_with_beam.ipynb b/examples/tutorials/backtesting_openstef_with_beam.ipynb index e58b73f32..3b48e1738 100644 --- a/examples/tutorials/backtesting_openstef_with_beam.ipynb +++ b/examples/tutorials/backtesting_openstef_with_beam.ipynb @@ -37,6 +37,7 @@ "# --- Thread Configuration ---\n", "# Prevent thread contention when running parallel backtests with XGBoost\n", "import os\n", + "\n", "os.environ[\"OMP_NUM_THREADS\"] = \"1\"\n", "os.environ[\"OPENBLAS_NUM_THREADS\"] = \"1\"\n", "os.environ[\"MKL_NUM_THREADS\"] = \"1\"\n", @@ -70,8 +71,8 @@ "outputs": [], "source": [ "# Import types for configuration\n", - "from openstef_core.types import LeadTime, Q # LeadTime: forecast horizon, Q: quantile\n", "from openstef_beam.benchmarking.benchmarks.liander2024 import Liander2024Category\n", + "from openstef_core.types import LeadTime, Q # LeadTime: forecast horizon, Q: quantile\n", "\n", "# --- Output Paths ---\n", "OUTPUT_PATH = Path(\"./benchmark_results\")\n", @@ -87,9 +88,13 @@ "\n", "# Quantiles for probabilistic forecasting (7 quantiles covering 5th to 95th percentile)\n", "PREDICTION_QUANTILES = [\n", - " Q(0.05), Q(0.1), Q(0.3), # Lower quantiles\n", - " Q(0.5), # Median\n", - " Q(0.7), Q(0.9), Q(0.95), # Upper quantiles\n", + " Q(0.05),\n", + " Q(0.1),\n", + " Q(0.3), # Lower quantiles\n", + " Q(0.5), # Median\n", + " Q(0.7),\n", + " Q(0.9),\n", + " Q(0.95), # Upper quantiles\n", "]\n", "\n", "# --- Benchmark Filter (optional) ---\n", @@ -128,26 +133,21 @@ " model_id=\"benchmark_model_\",\n", " run_name=None,\n", " model=\"flatliner\", # Placeholder - will be overwritten per model\n", - " \n", " # Forecast settings\n", " horizons=FORECAST_HORIZONS,\n", " quantiles=PREDICTION_QUANTILES,\n", - " \n", " # Model reuse: reuse trained model for same target (speeds up backtesting)\n", " model_reuse_enable=True,\n", " mlflow_storage=None, # Disable MLflow for this demo\n", - " \n", " # Weather feature column mappings (match dataset column names)\n", " radiation_column=\"shortwave_radiation\",\n", " wind_speed_column=\"wind_speed_80m\", # 80m wind speed for better wind park predictions\n", " pressure_column=\"surface_pressure\",\n", " temperature_column=\"temperature_2m\",\n", " relative_humidity_column=\"relative_humidity_2m\",\n", - " \n", " # Additional features\n", " energy_price_column=\"EPEX_NL\", # Day-ahead electricity price\n", " rolling_aggregate_features=[\"mean\", \"median\", \"max\", \"min\"], # Rolling window stats\n", - " \n", " # Logging\n", " verbosity=0, # Quiet mode for batch processing\n", ")" @@ -228,9 +228,9 @@ "outputs": [], "source": [ "# Import benchmark components\n", + "from openstef_beam.benchmarking.baselines.openstef4 import create_openstef4_preset_backtest_forecaster\n", "from openstef_beam.benchmarking.benchmarks.liander2024 import create_liander2024_benchmark_runner\n", "from openstef_beam.benchmarking.callbacks.strict_execution_callback import StrictExecutionCallback\n", - "from openstef_beam.benchmarking.baselines.openstef4 import create_openstef4_preset_backtest_forecaster\n", "\n", "# --- Run XGBoost Benchmark ---\n", "print(\"🌲 Running XGBoost benchmark...\")\n", @@ -300,10 +300,12 @@ "\n", "# Generate comparison reports\n", "print(\"šŸ“Š Generating comparison analysis...\")\n", - "comparison_pipeline.run(run_data={\n", - " \"xgboost\": storage_xgboost,\n", - " \"gblinear\": storage_gblinear,\n", - "})\n", + "comparison_pipeline.run(\n", + " run_data={\n", + " \"xgboost\": storage_xgboost,\n", + " \"gblinear\": storage_gblinear,\n", + " }\n", + ")\n", "print(\"āœ… Comparison analysis complete!\")" ] }, @@ -331,11 +333,11 @@ "source": [ "# Open key analysis plots in browser\n", "# HTML visualizations are interactive and best viewed in a browser\n", - "import webbrowser\n", "import os\n", + "import webbrowser\n", "\n", "# Base path for analysis results\n", - "analysis_base = os.path.abspath('./benchmark_results/analysis/D-1T06:00')\n", + "analysis_base = os.path.abspath(\"./benchmark_results/analysis/D-1T06:00\")\n", "\n", "# Define key visualizations to open\n", "visualizations = [\n", @@ -346,9 +348,9 @@ "print(\"🌐 Opening analysis visualizations in browser...\\n\")\n", "for name, filename in visualizations:\n", " filepath = os.path.join(analysis_base, filename)\n", - " if os.path.exists(filepath):\n", + " if Path(filepath).exists():\n", " print(f\" šŸ“Š {name}\")\n", - " webbrowser.open(f'file://{filepath}')\n", + " webbrowser.open(f\"file://{filepath}\")\n", " else:\n", " print(f\" āš ļø {name} not found at {filepath}\")" ] @@ -374,22 +376,22 @@ "import glob\n", "\n", "# Find all time series plots for individual targets\n", - "target_plots = glob.glob('./benchmark_results/XGBoost/analysis/*/*/time_series_plot*.html')\n", + "target_plots = glob.glob(\"./benchmark_results/XGBoost/analysis/*/*/time_series_plot*.html\")\n", "\n", "if target_plots:\n", " print(\"šŸ“Š Available target-specific time series plots:\\n\")\n", " for i, plot in enumerate(sorted(target_plots)[:5]): # Show first 5\n", - " parts = plot.split('/')\n", + " parts = plot.split(\"/\")\n", " category = parts[-3] # e.g., \"transformer\"\n", - " target = parts[-2] # e.g., \"OS Apeldoorn\"\n", - " print(f\" {i+1}. {category}/{target}\")\n", - " \n", + " target = parts[-2] # e.g., \"OS Apeldoorn\"\n", + " print(f\" {i + 1}. {category}/{target}\")\n", + "\n", " # Open the first transformer plot as an example\n", - " transformer_plots = [p for p in target_plots if 'transformer' in p]\n", + " transformer_plots = [p for p in target_plots if \"transformer\" in p]\n", " if transformer_plots:\n", " example_plot = os.path.abspath(transformer_plots[0])\n", " print(f\"\\n🌐 Opening example: {transformer_plots[0]}\")\n", - " webbrowser.open(f'file://{example_plot}')\n", + " webbrowser.open(f\"file://{example_plot}\")\n", "else:\n", " print(\"āš ļø No target-specific plots found. Run the benchmark first.\")" ] @@ -437,6 +439,9 @@ } ], "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, "kernelspec": { "display_name": ".venv", "language": "python", diff --git a/examples/tutorials/backtesting_openstef_with_beam.py b/examples/tutorials/backtesting_openstef_with_beam.py new file mode 100644 index 000000000..0c30580ec --- /dev/null +++ b/examples/tutorials/backtesting_openstef_with_beam.py @@ -0,0 +1,338 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.1 +# kernelspec: +# display_name: .venv +# language: python +# name: python3 +# --- + +# %% [markdown] +# # šŸ“Š Backtesting OpenSTEF Models with OpenSTEF-BEAM +# +# This tutorial demonstrates how to use **OpenSTEF-BEAM** (Backtesting, Evaluation, Analysis, Metrics) to systematically evaluate forecasting models. You'll learn how to: +# +# 1. **Configure benchmark experiments** with multiple model types +# 2. **Run parallel backtests** across dozens of energy assets +# 3. **Compare model performance** with standardized metrics +# 4. **Generate analysis reports** with interactive visualizations +# +# > **BEAM** provides a rigorous framework for model evaluation, ensuring fair comparisons and reproducible results. + +# %% [markdown] +# ## šŸ”§ Environment Setup +# +# First, we configure thread settings to prevent conflicts with XGBoost's internal parallelization when running multiple processes. + +# %% +# --- Thread Configuration --- +# Prevent thread contention when running parallel backtests with XGBoost +import os + +os.environ["OMP_NUM_THREADS"] = "1" +os.environ["OPENBLAS_NUM_THREADS"] = "1" +os.environ["MKL_NUM_THREADS"] = "1" + +# --- Standard Imports --- +import logging +import multiprocessing +from pathlib import Path + +logging.basicConfig(level=logging.INFO, format="[%(asctime)s][%(levelname)s] %(message)s") + +# %% [markdown] +# ## āš™ļø Benchmark Configuration +# +# Configure the benchmark parameters: +# - **Output paths** — where to store results for each model +# - **Forecast horizons** — how far ahead to predict (using ISO 8601 duration format) +# - **Quantiles** — prediction intervals for probabilistic evaluation + +# %% +# Import types for configuration +from openstef_beam.benchmarking.benchmarks.liander2024 import Liander2024Category +from openstef_core.types import LeadTime, Q # LeadTime: forecast horizon, Q: quantile + +# --- Output Paths --- +OUTPUT_PATH = Path("./benchmark_results") +BENCHMARK_RESULTS_PATH_XGBOOST = OUTPUT_PATH / "XGBoost" +BENCHMARK_RESULTS_PATH_GBLINEAR = OUTPUT_PATH / "GBLinear" + +# --- Parallelization --- +N_PROCESSES = multiprocessing.cpu_count() # Use all available CPU cores +print(f"šŸ–„ļø Running with {N_PROCESSES} parallel processes") + +# --- Forecast Configuration --- +FORECAST_HORIZONS = [LeadTime.from_string("P3D")] # 3-day ahead forecast (ISO 8601: P3D) + +# Quantiles for probabilistic forecasting (7 quantiles covering 5th to 95th percentile) +PREDICTION_QUANTILES = [ + Q(0.05), + Q(0.1), + Q(0.3), # Lower quantiles + Q(0.5), # Median + Q(0.7), + Q(0.9), + Q(0.95), # Upper quantiles +] + +# --- Benchmark Filter (optional) --- +# Set to None to run all categories, or specify categories like: +# BENCHMARK_FILTER = [Liander2024Category.TRANSFORMER, Liander2024Category.MV_FEEDER] +BENCHMARK_FILTER: list[Liander2024Category] | None = None + +# %% [markdown] +# ## šŸ› ļø Model Configuration +# +# We define a **common configuration** that both models share, then create model-specific variants. This ensures fair comparison by keeping all settings identical except the model type. +# +# ### Available Models: +# - **XGBoost** — Gradient boosting trees (handles complex nonlinear patterns) +# - **GBLinear** — Gradient boosted linear model (better extrapolation, faster) + +# %% +# Import workflow configuration +from openstef_models.presets import ForecastingWorkflowConfig + +# Common configuration shared by all models +# This ensures fair comparison by keeping all settings identical +common_config = ForecastingWorkflowConfig( + model_id="benchmark_model_", + run_name=None, + model="flatliner", # Placeholder - will be overwritten per model + # Forecast settings + horizons=FORECAST_HORIZONS, + quantiles=PREDICTION_QUANTILES, + # Model reuse: reuse trained model for same target (speeds up backtesting) + model_reuse_enable=True, + mlflow_storage=None, # Disable MLflow for this demo + # Weather feature column mappings (match dataset column names) + radiation_column="shortwave_radiation", + wind_speed_column="wind_speed_80m", # 80m wind speed for better wind park predictions + pressure_column="surface_pressure", + temperature_column="temperature_2m", + relative_humidity_column="relative_humidity_2m", + # Additional features + energy_price_column="EPEX_NL", # Day-ahead electricity price + rolling_aggregate_features=["mean", "median", "max", "min"], # Rolling window stats + # Logging + verbosity=0, # Quiet mode for batch processing +) + +# %% +# Create model-specific configurations by copying common config and updating model type +xgboost_config = common_config.model_copy(update={"model": "xgboost"}) +gblinear_config = common_config.model_copy(update={"model": "gblinear"}) + +print("āœ… Model configurations created:") +print(f" - XGBoost: {xgboost_config.model}") +print(f" - GBLinear: {gblinear_config.model}") + +# %% [markdown] +# ## šŸ’¾ Storage Configuration +# +# **LocalBenchmarkStorage** manages the file structure for benchmark results: +# ``` +# benchmark_results/ +# ā”œā”€ā”€ XGBoost/ +# │ ā”œā”€ā”€ backtest/ # Raw predictions +# │ ā”œā”€ā”€ evaluation/ # Metrics per target +# │ └── analysis/ # Visualizations (HTML) +# └── GBLinear/ +# └── ... +# ``` + +# %% +# Initialize storage backends for each model +from openstef_beam.benchmarking.storage.local_storage import LocalBenchmarkStorage + +storage_xgboost = LocalBenchmarkStorage(base_path=BENCHMARK_RESULTS_PATH_XGBOOST) +storage_gblinear = LocalBenchmarkStorage(base_path=BENCHMARK_RESULTS_PATH_GBLINEAR) + +print(f"šŸ“ XGBoost results: {BENCHMARK_RESULTS_PATH_XGBOOST}") +print(f"šŸ“ GBLinear results: {BENCHMARK_RESULTS_PATH_GBLINEAR}") + +# %% [markdown] +# ## šŸš€ Run Backtests +# +# Now we run the **Liander 2024 Benchmark** — a comprehensive evaluation suite that: +# 1. Downloads the benchmark dataset from HuggingFace Hub (if needed) +# 2. Runs backtests across 5 asset categories (transformers, feeders, solar/wind parks) +# 3. Computes metrics and generates analysis visualizations +# +# āš ļø **Note**: This may take several minutes depending on your hardware. + +# %% +# Import benchmark components +from openstef_beam.benchmarking.baselines.openstef4 import create_openstef4_preset_backtest_forecaster +from openstef_beam.benchmarking.benchmarks.liander2024 import create_liander2024_benchmark_runner +from openstef_beam.benchmarking.callbacks.strict_execution_callback import StrictExecutionCallback + +# --- Run XGBoost Benchmark --- +print("🌲 Running XGBoost benchmark...") +create_liander2024_benchmark_runner( + storage=storage_xgboost, + callbacks=[StrictExecutionCallback()], # Fail fast on errors +).run( + forecaster_factory=create_openstef4_preset_backtest_forecaster( + workflow_config=xgboost_config, + ), + run_name="xgboost", + n_processes=N_PROCESSES, + filter_args=BENCHMARK_FILTER, +) +print("āœ… XGBoost benchmark complete!") + +# --- Run GBLinear Benchmark --- +print("\nšŸ“ˆ Running GBLinear benchmark...") +create_liander2024_benchmark_runner( + storage=storage_gblinear, + callbacks=[StrictExecutionCallback()], +).run( + forecaster_factory=create_openstef4_preset_backtest_forecaster( + workflow_config=gblinear_config, + ), + run_name="gblinear", + n_processes=N_PROCESSES, + filter_args=BENCHMARK_FILTER, +) +print("āœ… GBLinear benchmark complete!") + +# %% [markdown] +# ## šŸ“Š Compare Model Performance +# +# The **BenchmarkComparisonPipeline** generates side-by-side analysis of multiple models: +# - Global metrics across all targets +# - Per-category breakdowns (transformers, feeders, etc.) +# - Time-windowed performance analysis + +# %% +# Run model comparison analysis +from openstef_beam.benchmarking import BenchmarkComparisonPipeline +from openstef_beam.benchmarking.benchmarks.liander2024 import LIANDER2024_ANALYSIS_CONFIG + +# Create comparison pipeline +target_provider = create_liander2024_benchmark_runner( + storage=LocalBenchmarkStorage(base_path=OUTPUT_PATH), +).target_provider + +comparison_pipeline = BenchmarkComparisonPipeline( + analysis_config=LIANDER2024_ANALYSIS_CONFIG, + storage=LocalBenchmarkStorage(base_path=OUTPUT_PATH), + target_provider=target_provider, +) + +# Generate comparison reports +print("šŸ“Š Generating comparison analysis...") +comparison_pipeline.run( + run_data={ + "xgboost": storage_xgboost, + "gblinear": storage_gblinear, + } +) +print("āœ… Comparison analysis complete!") + +# %% [markdown] +# ## šŸ“ˆ View Analysis Results +# +# The benchmark generates interactive HTML visualizations. Let's open the most important ones: +# +# ### Key Metrics: +# - **rCRPS** (relative Continuous Ranked Probability Score) — measures probabilistic forecast accuracy +# - **rMAE** (relative Mean Absolute Error) — measures point forecast accuracy +# - Lower values = better performance + +# %% +# Open key analysis plots in browser +# HTML visualizations are interactive and best viewed in a browser +import os +import webbrowser + +# Base path for analysis results +analysis_base = os.path.abspath("./benchmark_results/analysis/D-1T06:00") + +# Define key visualizations to open +visualizations = [ + ("rCRPS Grouped by Category", "rCRPS_grouped.html"), + ("rCRPS Time-Windowed (7 days)", "rCRPS_windowed_7D.html"), +] + +print("🌐 Opening analysis visualizations in browser...\n") +for name, filename in visualizations: + filepath = os.path.join(analysis_base, filename) + if Path(filepath).exists(): + print(f" šŸ“Š {name}") + webbrowser.open(f"file://{filepath}") + else: + print(f" āš ļø {name} not found at {filepath}") + +# %% [markdown] +# ### šŸ” Explore Individual Target Results +# +# You can also view time series plots for individual targets. Let's look at a transformer forecast: + +# %% +# List available target-specific visualizations +import glob + +# Find all time series plots for individual targets +target_plots = glob.glob("./benchmark_results/XGBoost/analysis/*/*/time_series_plot*.html") + +if target_plots: + print("šŸ“Š Available target-specific time series plots:\n") + for i, plot in enumerate(sorted(target_plots)[:5]): # Show first 5 + parts = plot.split("/") + category = parts[-3] # e.g., "transformer" + target = parts[-2] # e.g., "OS Apeldoorn" + print(f" {i + 1}. {category}/{target}") + + # Open the first transformer plot as an example + transformer_plots = [p for p in target_plots if "transformer" in p] + if transformer_plots: + example_plot = os.path.abspath(transformer_plots[0]) + print(f"\n🌐 Opening example: {transformer_plots[0]}") + webbrowser.open(f"file://{example_plot}") +else: + print("āš ļø No target-specific plots found. Run the benchmark first.") + +# %% [markdown] +# --- +# +# ## šŸŽÆ Summary +# +# In this tutorial, you learned how to: +# +# 1. āœ… **Configure benchmark experiments** with `ForecastingWorkflowConfig` +# 2. āœ… **Run parallel backtests** using the Liander 2024 benchmark +# 3. āœ… **Compare models** (XGBoost vs GBLinear) with `BenchmarkComparisonPipeline` +# 4. āœ… **Analyze results** with interactive HTML visualizations +# +# ### šŸ“ Output Structure +# +# ``` +# benchmark_results/ +# ā”œā”€ā”€ XGBoost/ +# │ ā”œā”€ā”€ backtest/ # Raw predictions (parquet) +# │ ā”œā”€ā”€ evaluation/ # Metrics per target +# │ └── analysis/ # HTML visualizations +# ā”œā”€ā”€ GBLinear/ +# │ └── ... +# └── analysis/ # Comparison analysis (both models) +# └── D-1T06:00/ +# ā”œā”€ā”€ rCRPS_grouped.html # Probabilistic accuracy by category +# ā”œā”€ā”€ rMAE_grouped.html # Point forecast accuracy +# └── summary.html # Overall summary +# ``` +# +# ### šŸš€ Next Steps +# +# - Experiment with different `FORECAST_HORIZONS` (e.g., `"PT6H"`, `"P7D"`) +# - Add more quantiles for higher resolution prediction intervals +# - Filter specific categories with `BENCHMARK_FILTER` +# - Integrate MLflow for experiment tracking diff --git a/examples/tutorials/forecasting_with_workflow_presets.ipynb b/examples/tutorials/forecasting_with_workflow_presets.ipynb index 01a3a6881..cdf4a3800 100644 --- a/examples/tutorials/forecasting_with_workflow_presets.ipynb +++ b/examples/tutorials/forecasting_with_workflow_presets.ipynb @@ -4,7 +4,11 @@ "cell_type": "code", "execution_count": null, "id": "e8eae4f7", - "metadata": {}, + "metadata": { + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project \n", diff --git a/examples/tutorials/forecasting_with_workflow_presets.py b/examples/tutorials/forecasting_with_workflow_presets.py index 4c7f8be3d..aaba18a08 100644 --- a/examples/tutorials/forecasting_with_workflow_presets.py +++ b/examples/tutorials/forecasting_with_workflow_presets.py @@ -13,7 +13,7 @@ # name: python3 # --- -# %% +# %% tags=["remove-cell"] # SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project # # SPDX-License-Identifier: MPL-2.0 diff --git a/examples/tutorials/hyperparameter_tuning_with_optuna.ipynb b/examples/tutorials/hyperparameter_tuning_with_optuna.ipynb index 84e8f6685..ecf1de68d 100644 --- a/examples/tutorials/hyperparameter_tuning_with_optuna.ipynb +++ b/examples/tutorials/hyperparameter_tuning_with_optuna.ipynb @@ -4,7 +4,11 @@ "cell_type": "code", "execution_count": null, "id": "feff3c78", - "metadata": {}, + "metadata": { + "tags": [ + "remove-cell" + ] + }, "outputs": [], "source": [ "# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project \n", diff --git a/examples/tutorials/hyperparameter_tuning_with_optuna.py b/examples/tutorials/hyperparameter_tuning_with_optuna.py index ad4608b19..6014e1afa 100644 --- a/examples/tutorials/hyperparameter_tuning_with_optuna.py +++ b/examples/tutorials/hyperparameter_tuning_with_optuna.py @@ -13,7 +13,7 @@ # name: python3 # --- -# %% +# %% tags=["remove-cell"] # SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project # # SPDX-License-Identifier: MPL-2.0 diff --git a/pyproject.toml b/pyproject.toml index 6dfe79d66..bdb01fff2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,9 +130,11 @@ lint.ignore = [ lint.per-file-ignores."./examples/tutorials/*" = [ "D100", # Notebooks don't need module docstrings "E402", # Imports not at top — notebook cells have natural ordering + "E501", # Long lines in notebooks — readability is cell-scoped "ERA001", # Jupytext YAML frontmatter looks like commented-out code "F821", # Cell-scoped names appear undefined to Ruff's module-level analysis "INP001", # Not a namespace package — notebooks don't need __init__.py + "PTH", # Tutorials may use os.path for simplicity "S101", # Tutorials may use assert for runtime checks "T201", # Tutorials may use print for demonstration ] @@ -230,6 +232,9 @@ include = [ "packages/*/examples/**/*.py", "packages/*/tests/**/*.py", ] +exclude = [ + "examples/tutorials/**/*.py", +] typeCheckingMode = "strict" reportPrivateImportUsage = false @@ -282,11 +287,16 @@ cmd = "pytest --numprocesses=auto --doctest-modules packages/*/src --maxfail=1" help = "Sync jupytext .py percent sources → .ipynb notebooks (and vice versa)" cmd = "jupytext --sync examples/tutorials/*.py" +[tool.poe.tasks.notebooks-clear] +help = "Strip outputs from all .ipynb notebooks" +cmd = "jupyter nbconvert --clear-output --inplace examples/tutorials/*.ipynb" + [tool.poe.tasks.notebooks-check] -help = "Check that .ipynb notebooks are in sync with their jupytext .py sources" +help = "Check that .ipynb notebooks are in sync with their jupytext .py sources and have no outputs" sequence = [ { cmd = "jupytext --sync examples/tutorials/*.py" }, { cmd = "git diff --exit-code -- examples/tutorials/" }, + { script = "tools.check_notebook_outputs:main" }, ] [tool.poe.tasks.report] @@ -375,8 +385,13 @@ sequence = [ { cmd = "uv build .", help = "Build root meta package separately (not part of --all-packages)" }, ] +[tool.poe.tasks._docs_sync] +help = "Sync tutorial sources into docs/source for Sphinx" +cmd = "python -c \"import shutil; shutil.rmtree('docs/source/tutorials', ignore_errors=True); shutil.copytree('examples/tutorials', 'docs/source/tutorials')\"" + [tool.poe.tasks.docs] help = "Build the documentation" +deps = [ "_docs_sync" ] control.expr = "serve" switch = [ { case = "True", cmd = "sphinx-autobuild docs/source docs/build/html --watch packages" }, diff --git a/tools/check_notebook_outputs.py b/tools/check_notebook_outputs.py new file mode 100644 index 000000000..37f039822 --- /dev/null +++ b/tools/check_notebook_outputs.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: 2025 Contributors to the OpenSTEF project +# +# SPDX-License-Identifier: MPL-2.0 +"""Check that committed .ipynb notebooks have no cell outputs.""" + +import json +import sys +from pathlib import Path + +TUTORIALS_DIR = Path("examples/tutorials") + + +def main() -> None: + """Validate that no .ipynb in tutorials has stored outputs.""" + failures: list[str] = [] + + for nb_path in sorted(TUTORIALS_DIR.glob("*.ipynb")): + nb = json.loads(nb_path.read_text(encoding="utf-8")) + for i, cell in enumerate(nb.get("cells", [])): + if cell.get("cell_type") == "code" and cell.get("outputs"): + failures.append(f" {nb_path.name}: cell {i} has outputs") + break + + if failures: + print("ERROR: Notebooks with outputs found (run `poe notebooks-clear`):") + print("\n".join(failures)) + sys.exit(1) + + print(f"OK: {len(list(TUTORIALS_DIR.glob('*.ipynb')))} notebooks checked, no outputs found") + + +if __name__ == "__main__": + main() diff --git a/uv.lock b/uv.lock index 2271380e5..b53637146 100644 --- a/uv.lock +++ b/uv.lock @@ -1740,6 +1740,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/64/285f20a31679bf547b75602702f7800e74dbabae36ef324f716c02804753/jupyter-1.1.1-py2.py3-none-any.whl", hash = "sha256:7a59533c22af65439b24bbe60373a4e95af8f16ac65a6c00820ad378e3f7cc83", size = 2657, upload-time = "2024-08-30T07:15:47.045Z" }, ] +[[package]] +name = "jupyter-cache" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "click" }, + { name = "importlib-metadata" }, + { name = "nbclient" }, + { name = "nbformat" }, + { name = "pyyaml" }, + { name = "sqlalchemy" }, + { name = "tabulate" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bb/f7/3627358075f183956e8c4974603232b03afd4ddc7baf72c2bc9fff522291/jupyter_cache-1.0.1.tar.gz", hash = "sha256:16e808eb19e3fb67a223db906e131ea6e01f03aa27f49a7214ce6a5fec186fb9", size = 32048, upload-time = "2024-11-15T16:03:55.322Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/6b/67b87da9d36bff9df7d0efbd1a325fa372a43be7158effaf43ed7b22341d/jupyter_cache-1.0.1-py3-none-any.whl", hash = "sha256:9c3cafd825ba7da8b5830485343091143dff903e4d8c69db9349b728b140abf6", size = 33907, upload-time = "2024-11-15T16:03:54.021Z" }, +] + [[package]] name = "jupyter-client" version = "8.7.0" @@ -2517,6 +2536,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" }, ] +[[package]] +name = "myst-nb" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "importlib-metadata" }, + { name = "ipykernel" }, + { name = "ipython" }, + { name = "jupyter-cache" }, + { name = "myst-parser" }, + { name = "nbclient" }, + { name = "nbformat" }, + { name = "pyyaml" }, + { name = "sphinx" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bd/b4/ff1abeea67e8cfe0a8c033389f6d1d8b0bfecfd611befb5cbdeab884fce6/myst_nb-1.4.0.tar.gz", hash = "sha256:c145598de62446a6fd009773dd071a40d3b76106ace780de1abdfc6961f614c2", size = 82285, upload-time = "2026-03-02T21:14:56.95Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/93/0a378b48488879a1d925b42a804edfc6e0cd0ef854220f2dce738a46e7e9/myst_nb-1.4.0-py3-none-any.whl", hash = "sha256:0e2c86e7d3b82c3aa51383f82d6268f7714f3b772c23a796ab09538a8e68b4e4", size = 82555, upload-time = "2026-03-02T21:14:55.652Z" }, +] + [[package]] name = "myst-parser" version = "4.0.1" @@ -2936,8 +2976,10 @@ version = "0.0.0" source = { editable = "docs" } dependencies = [ { name = "ipython" }, + { name = "jupyter-cache" }, + { name = "jupytext" }, { name = "matplotlib" }, - { name = "myst-parser" }, + { name = "myst-nb" }, { name = "openstef" }, { name = "openstef-beam" }, { name = "openstef-core" }, @@ -2956,8 +2998,10 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "ipython", specifier = ">=9.4.0" }, + { name = "jupyter-cache", specifier = ">=1.0.0" }, + { name = "jupytext", specifier = ">=1.16.0" }, { name = "matplotlib", specifier = ">=3.10.5" }, - { name = "myst-parser", specifier = ">=4.0.1" }, + { name = "myst-nb", specifier = ">=1.2.0" }, { name = "openstef", editable = "." }, { name = "openstef-beam", editable = "packages/openstef-beam" }, { name = "openstef-core", editable = "packages/openstef-core" }, @@ -4992,6 +5036,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, ] +[[package]] +name = "tabulate" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/46/58/8c37dea7bbf769b20d58e7ace7e5edfe65b849442b00ffcdd56be88697c6/tabulate-0.10.0.tar.gz", hash = "sha256:e2cfde8f79420f6deeffdeda9aaec3b6bc5abce947655d17ac662b126e48a60d", size = 91754, upload-time = "2026-03-04T18:55:34.402Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/55/db07de81b5c630da5cbf5c7df646580ca26dfaefa593667fc6f2fe016d2e/tabulate-0.10.0-py3-none-any.whl", hash = "sha256:f0b0622e567335c8fabaaa659f1b33bcb6ddfe2e496071b743aa113f8774f2d3", size = 39814, upload-time = "2026-03-04T18:55:31.284Z" }, +] + [[package]] name = "terminado" version = "0.18.1"