Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,9 @@ paper
# Ignore IDE project files
.idea/
.vscode
AGENTS.md
uv.lock

tmp
test_ba
data
115 changes: 114 additions & 1 deletion atlite/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,7 @@ def runoff(
return result


def hydro(
def _hydro_from_runoff(
cutout,
plants,
hydrobasins,
Expand Down Expand Up @@ -1045,6 +1045,119 @@ def hydro(
)


def _hydro_from_discharge(
cutout,
plants,
):
Comment on lines +1048 to +1051
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to move the _hydro_from_discharge and _hydro_from_inflow into the hydro.py module

"""
Get inflow time-series for `plants` by extracting the discharge time series for
the nearest grid points.
Comment on lines +1053 to +1054
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completely agree that it's worth to make sure the naming reflects different nature of variables in ERA5 and GloFAS datasets.
It could be a good idea also to mention both datasets in docstrings of the respective functions (that is mention here that _hydro_from_discharge is intended for usage on GloFAS data)


Parameters
----------
plants : pd.DataFrame
Run-of-river plants or dams with lon, lat columns.
"""
print("<" * 100)
print("Extracting discharge time series for nearest grid points to plants...")
print("<" * 100)
Comment thread
StuberSimon marked this conversation as resolved.
discharge = cutout.data.discharge
inflow = xr.DataArray(
np.zeros((len(plants), discharge.indexes["time"].size)),
[("plant", plants.index), discharge.coords["time"]],
)
for plant in plants.itertuples():
# Extract the discharge time series for the nearest point
inflow.loc[dict(plant=plant.Index)] = discharge.sel(
x=plant.lon, y=plant.lat, method="nearest"
)
Comment on lines +1069 to +1073
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My previous look into these datasets suggested that it is quite easy to miss the correct river cells due to small misalignments in the datasets, so i'd expect one would like to have some more sophisticated find closest river cells which are actually part of the river, rather than find closest cell heuristic. And that this would need testing.

Copy link
Copy Markdown
Member

@coroa coroa Jun 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After some quick thinking probably the best thing would be a snapping to the largest value in the static uparea map (gridded data with the total catchment area of each cell).

That is available from here: https://confluence.ecmwf.int/display/CEMS/Auxiliary+Data

This could also be a pre-processing step on the powerplant dataset for which we provide a helper function that has to be called before.

The only problem is then that when your plant is right before a fork you would latch onto the larger river.

return inflow


def hydro(
cutout,
plants,
module="auto",
hydrobasins=None,
flowspeed=1,
weight_with_height=False,
show_progress=False,
**kwargs,
):
"""
Get inflow time-series for `plants` by either extracting the discharge time series for
the nearest grid points or by computing runoff-based inflow time series

Parameters
----------
plants : pd.DataFrame
Run-of-river plants or dams with lon, lat columns.
module : str
The method to compute hydro time series. "auto" will prefere discharge but fall back to runoff-based computation
"glofas" will use discharge directly, "era5" will use runoff-based computation
hydrobasins : str|gpd.GeoDataFrame
Filename or GeoDataFrame of one level of the HydroBASINS dataset. Only required
for runoff-based computation.
flowspeed : float
Average speed of water flows to estimate the water travel time from
basin to plant (default: 1 m/s). Only relevant for runoff-based computation.
weight_with_height : bool
Whether surface runoff should be weighted by potential height (probably
better for coarser resolution). Only relevant for runoff-based computation.
show_progress : bool
Whether to display progressbars. Only relevant for runoff-based computation.
**kwargs
Additional arguments for runoff-based computation. Only relevant for runoff-based computation.
"""
if module == "auto":
# Check if discharge data is available in cutout, otherwise use runoff
if "discharge" in cutout.available_features.values:
return _hydro_from_discharge(
cutout,
plants,
)
if hydrobasins is None or "runoff" not in cutout.available_features.values:
raise ValueError(
"For runoff-based hydro time series, hydrobasins and runoff data must be provided."
)
return _hydro_from_runoff(
cutout,
plants,
hydrobasins,
flowspeed=flowspeed,
weight_with_height=weight_with_height,
show_progress=show_progress,
**kwargs,
)
elif module.lower() == "glofas":
# Check if discharge data is available in cutout, otherwise raise error
if "discharge" not in cutout.data_vars:
raise ValueError(
"For GloFAS-based hydro time series, the cutout must include discharge data."
)
return _hydro_from_discharge(
cutout,
plants,
)
elif module.lower() == "era5":
# Check if hydrobasins is provided, otherwise raise error
if hydrobasins is None:
raise ValueError(
"For ERA5-based hydro time series, the hydrobasins dataset must be provided."
)
return _hydro_from_runoff(
cutout,
plants,
hydrobasins,
flowspeed=flowspeed,
weight_with_height=weight_with_height,
show_progress=show_progress,
**kwargs,
)
else:
raise ValueError(f'Unknown hydro module option "{module}".')


def convert_line_rating(
ds, psi, R, D=0.028, Ts=373, epsilon=0.6, alpha=0.6, per_unit=False
):
Expand Down
4 changes: 2 additions & 2 deletions atlite/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
atlite datasets.
"""

from atlite.datasets import era5, gebco, sarah
from atlite.datasets import era5, gebco, glofas, sarah

modules = {"era5": era5, "sarah": sarah, "gebco": gebco}
modules = {"era5": era5, "sarah": sarah, "gebco": gebco, "glofas": glofas}
Loading