PyPSA · fneum · Mar 27, 2025 · May 20, 2026 · brynpickering · May 20, 2026
diff --git a/.gitignore b/.gitignore
@@ -24,6 +24,8 @@ paper
 .coverage*
 !.coveragerc
 
+notebooks
+
 # Ignore IDE project files
 .idea/
 .vscode

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
@@ -11,12 +11,18 @@ Release Notes
 Upcoming Release
 ================
 
-.. warning:: 
-  
-   The features listed below are not released yet, but will be part of the next release! 
-   To use the features already you have to install the ``master`` branch, e.g. 
+.. warning::
+
+   The features listed below are not released yet, but will be part of the next release!
+   To use the features already you have to install the ``master`` branch, e.g.
    ``pip install git+https://github.com/pypsa/atlite``.
 
+* The method ``runoff(normalize_using_yearly=...)`` now supports handling of
+  partial years when normalizing runoff data based on annual data. The
+  normalization is applied proportionally based on the time overlap. A warning
+  is shown for partial years noting the strong assumption of evenly distributed
+  runoff throughout the year.
+
 `v0.6.1 <https://github.com/PyPSA/atlite/releases/tag/v0.6.1>`__ (21st April 2026)
 =======================================================================================
 
@@ -29,8 +35,8 @@ Upcoming Release
 
 **Features**
 
-* Add ``aggregate_time={"sum", "mean", None}`` to ``convert_and_aggregate`` for temporal 
-  aggregation with and without spatial aggregation, and deprecate ``capacity_factor``/``capacity_factor_timeseries`` 
+* Add ``aggregate_time={"sum", "mean", None}`` to ``convert_and_aggregate`` for temporal
+  aggregation with and without spatial aggregation, and deprecate ``capacity_factor``/``capacity_factor_timeseries``
   in favor of it
 
 **Bug fixes**
@@ -42,8 +48,8 @@ Upcoming Release
 
 
 `v0.5.0 <https://github.com/PyPSA/atlite/releases/tag/v0.5.0>`__ (13th March 2026)
-=======================================================================================   
-   
+=======================================================================================
+
 **Breaking**
 
 * Default ``add_cutout_windspeed`` to ``True`` in ``get_windturbineconfig``, meaning power curves
@@ -108,14 +114,14 @@ Upcoming Release
   active support is only provided for the most recent versions (see `here
   <https://endoflife.date/python>`_). It is recommended to upgrade to the latest
   Python version if possible. Note that there might be some issues with
-  Windows and Python 3.13, which are not yet resolved. 
+  Windows and Python 3.13, which are not yet resolved.
   (https://github.com/PyPSA/atlite/pull/418)
 
 * Added support for ``numpy>=2``. (https://github.com/PyPSA/atlite/pull/419)
 
 **Bug fixes**
 
-* Fix mismatched dim lengths during rechunking. 
+* Fix mismatched dim lengths during rechunking.
   (https://github.com/PyPSA/atlite/pull/423)
 
 * Exclude versions 1.4.0 and 1.4.1 of ``rasterio`` due to a bug in these
@@ -127,13 +133,13 @@ Upcoming Release
 
 **Features**
 
-* Add power law interpolation method as a new argument to `cutout.wind` 
+* Add power law interpolation method as a new argument to `cutout.wind`
   (`#402 <https://github.com/PyPSA/atlite/pull/402>`_)
 
-* Use ``dask.array`` functions in favour of ``numpy`` functions 
+* Use ``dask.array`` functions in favour of ``numpy`` functions
   (`#367 <https://github.com/PyPSA/atlite/pull/367>`_)
 
-* Improved CI, testing, linting and build process 
+* Improved CI, testing, linting and build process
   (`#388 <https://github.com/PyPSA/atlite/pull/388>`_,
   `#392 <https://github.com/PyPSA/atlite/pull/392>`_,
   `#394 <https://github.com/PyPSA/atlite/pull/394>`_,
@@ -144,7 +150,7 @@ Upcoming Release
 
 * Adapt ERA5T merge to new CDS API (`#391 <https://github.com/PyPSA/atlite/pull/391>`_)
 
-* Fixes issues with dependeny updates 
+* Fixes issues with dependeny updates
   (`#381 <https://github.com/PyPSA/atlite/pull/381>`_,
   `#387 <https://github.com/PyPSA/atlite/pull/387>`_)
 

diff --git a/atlite/convert.py b/atlite/convert.py
@@ -1066,20 +1066,68 @@ def runoff(
         else:
             normalize_using_yearly_i = normalize_using_yearly_i.astype(int)
 
-        years = (
-            pd.Series(pd.to_datetime(result.coords["time"].values).year)
-            .value_counts()
-            .loc[lambda x: x > 8700]
-            .index.intersection(normalize_using_yearly_i)
-        )
-        assert len(years), "Need at least a full year of data (more is better)"
-        years_overlap = slice(str(min(years)), str(max(years)))
+        result_time = pd.DatetimeIndex(result.coords["time"].values)
-        result_time = pd.DatetimeIndex(result.coords["time"].values)
+        result_time = result.coords["time"].to_index()
-        result_time = pd.DatetimeIndex(result.coords["time"].values)
+        result_time = result.coords["time"].to_index()
+        result_period = (result_time.min(), result_time.max())
 
+        if isinstance(normalize_using_yearly.index, pd.DatetimeIndex):
+            norm_period = (
+                normalize_using_yearly.index.min(),
+                normalize_using_yearly.index.max(),
+            )
+        else:
+            min_year, max_year = (
+                normalize_using_yearly_i.min(),
+                normalize_using_yearly_i.max(),
+            )
+            norm_period = (pd.Timestamp(f"{min_year}"), pd.Timestamp(f"{max_year + 1}"))
+
+        overlap_start = max(result_period[0], norm_period[0])
+        overlap_end = min(result_period[1], norm_period[1])
+
+        if overlap_start >= overlap_end:
+            raise ValueError(
+                f"No overlap between runoff data ({result_period}) and normalization data ({norm_period})"
+            )
+
+        years_overlap = slice(str(overlap_start.date()), str(overlap_end.date()))
         dim = result.dims[1 - result.get_axis_num("time")]
-        dim = result.dims[1 - result.get_axis_num("time")]
+        non_time_dims = list(set(result) - {"time"}) 
-        dim = result.dims[1 - result.get_axis_num("time")]
+        non_time_dims = list(set(result) - {"time"}) 
+
+        if isinstance(normalize_using_yearly.index, pd.DatetimeIndex):
+            norm_data = normalize_using_yearly.loc[overlap_start:overlap_end].sum()
+        else:
+            # Handle year-based index with proportional scaling
+            overlap_years = set(pd.date_range(overlap_start, overlap_end).year)
+            norm_years = sorted(overlap_years.intersection(normalize_using_yearly_i))
+
+            partial_years = []
+            norm_data = 0
+
+            for year in norm_years:
+                year_start = pd.Timestamp(f"{year}")
+                year_end = pd.Timestamp(f"{year}-12-31 23:59:59")
+                start = max(overlap_start, year_start)
+                end = min(overlap_end, year_end)
+
+                if start > year_start or end < year_end:
+                    partial_years.append(year)
+
+                fraction = (end - start).total_seconds() / (
+                    year_end - year_start
+                ).total_seconds()
+                norm_data += normalize_using_yearly.loc[year] * fraction
+
+            if partial_years:
+                logger.warning(
+                    f"Normalizing partial year data for year(s) {partial_years}. "
+                    f"This assumes runoff is evenly distributed throughout the year, "
+                    f"which may not be accurate for seasonal patterns. Consider using "
+                    f"time-based normalization data for more precise results."
+                )
+
         result *= (
-            xr.DataArray(normalize_using_yearly.loc[years_overlap].sum(), dims=[dim])
+            xr.DataArray(norm_data, dims=[dim])
             / result.sel(time=years_overlap).sum("time")
-        ).reindex(countries=result.coords["countries"])
+        ).reindex({dim: result.coords[dim]})
 
     return result