Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 29 additions & 56 deletions docs/Multi-year_on_year_example.ipynb

Large diffs are not rendered by default.

24 changes: 8 additions & 16 deletions docs/sphinx/source/changelog/v3.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,33 @@ v3.2.0 (X, X, 2026)

Enhancements
------------
* :py:func:`~rdtools.degradation.degradation_year_on_year` has new parameter ``label=``
to return the calc_info['YoY_values'] as either right labeled (default), left or
center labeled. (:issue:`459`)
* :py:func:`~rdtools.plotting.degradation_timeseries_plot` now defaults to rolling
median, centered on the timestamp (pd.rolling(center=True)), and reduces
``min_periods`` from ``rolling_days//2`` to ``rolling_days//4``.
(:issue:`455`)
* :py:func:`~rdtools.plotting.degradation_timeseries_plot` refactored to do a rolling
median, for all slopes whose center timestamp is inside the window.
``min_periods`` reduced to ``rolling_days//4``.
(:issue:`455`) (:pull:`498`)
* :py:func:`~rdtools.degradation.degradation_year_on_year` has new parameter ``multi_yoy``
(default False) to trigger multiple YoY degradation calculations similar to Hugo Quest et
al 2023. In this mode, instead of a series of 1-year duration slopes, 2-year, 3-year etc
slopes are also included. calc_info['YoY_values'] returns a non-monotonic index
in this mode due to multiple overlapping annual slopes. (:issue:`394`)
* :py:func:`~rdtools.plotting.degradation_timeseries_plot` now supports ``multi_yoy=True``
data by resampling overlapping YoY values to their mean. A warning is issued when this
resampling occurs. (:issue:`394`)
data by resampling overlapping YoY values to their mean. A warning is issued to
notify that time-series sensitivity is reduced by multi_yoy. (:issue:`394`) (:pull:`498`)
* :py:func:`~rdtools.plotting.degradation_summary_plots` ``detailed=True`` mode now
properly handles points used odd vs even number of times (not just 0, 1, 2).
(:issue:`394`)
* :py:func:`~rdtools.degradation.degradation_year_on_year` now returns
``calc_info['YoY_times']`` DataFrame with ``dt_right``, ``dt_center``, and ``dt_left``
columns for each YoY slope. (:issue:`459`)
* Added new example notebook ``docs/Multi-year_on_year_example.ipynb`` demonstrating the
``label='center'`` and ``multi_yoy=True`` features of
``multi_yoy=True`` features of
:py:func:`~rdtools.degradation.degradation_year_on_year`. (:issue:`394`)
* :py:meth:`~rdtools.analysis_chains.TrendAnalysis.sensor_analysis` and
:py:meth:`~rdtools.analysis_chains.TrendAnalysis.clearsky_analysis` now
explicitly default ``yoy_kwargs={"label": "right"}``.

Bug Fixes
---------
* Fixed ``usage_of_points`` calculation in :py:func:`~rdtools.degradation.degradation_year_on_year`
to properly handle ``multi_yoy=True`` mode with overlapping slopes. (:issue:`394`)


Maintenance
-----------
* Added ``_avg_timestamp_old_Pandas`` helper function for pandas <2.0 compatibility
Expand All @@ -63,8 +56,7 @@ Testing
decreasing).
* Added test for ``multi_yoy=True`` parameter in ``degradation_year_on_year``.
* Added tests for :py:func:`~rdtools.plotting.degradation_timeseries_plot`
covering ``label='center'``, ``label='left'``, multi-YoY duplicate index
handling, and ``KeyError`` path.
covering multi-YoY duplicate index handling, and ``KeyError`` path.
* Set matplotlib backend to ``Agg`` in test ``conftest.py`` to avoid tkinter issues.


Expand Down
6 changes: 2 additions & 4 deletions rdtools/analysis_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ def _clearsky_preprocess(self):
)

def sensor_analysis(
self, analyses=["yoy_degradation"], yoy_kwargs={"label": "right"}, srr_kwargs={}
self, analyses=["yoy_degradation"], yoy_kwargs={}, srr_kwargs={}
):
"""
Perform entire sensor-based analysis workflow.
Expand All @@ -1014,7 +1014,6 @@ def sensor_analysis(
and 'srr_soiling'
yoy_kwargs : dict
kwargs to pass to :py:func:`rdtools.degradation.degradation_year_on_year`
default is {"label": "right"}, which will right-label the YoY slope values.
srr_kwargs : dict
kwargs to pass to :py:func:`rdtools.soiling.soiling_srr`

Expand Down Expand Up @@ -1042,7 +1041,7 @@ def sensor_analysis(
self.results["sensor"] = sensor_results

def clearsky_analysis(
self, analyses=["yoy_degradation"], yoy_kwargs={"label": "right"}, srr_kwargs={}
self, analyses=["yoy_degradation"], yoy_kwargs={}, srr_kwargs={}
):
"""
Perform entire clear-sky-based analysis workflow. Results are stored
Expand All @@ -1055,7 +1054,6 @@ def clearsky_analysis(
and 'srr_soiling'
yoy_kwargs : dict
kwargs to pass to :py:func:`rdtools.degradation.degradation_year_on_year`.
default is {"label": "right"}, which will right-label the YoY slope values.
srr_kwargs : dict
kwargs to pass to :py:func:`rdtools.soiling.soiling_srr`

Expand Down
17 changes: 4 additions & 13 deletions rdtools/degradation.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,6 @@ def degradation_year_on_year(energy_normalized, recenter=True,
If `uncertainty_method` is 'circular_block', `block_length`
determines the length of the blocks used in the circular block bootstrapping
in number of days. Must be shorter than a third of the time series.
label : {'right', 'center', 'left'}, default 'right'
Which Year-on-Year slope edge to label.
multi_yoy : bool, default False
Whether to return the standard Year-on-Year slopes where each slope
is calculated over points separated by 365 days (default) or
Expand All @@ -226,8 +224,8 @@ def degradation_year_on_year(energy_normalized, recenter=True,
degradation rate estimate
calc_info : dict

* `YoY_values` - pandas series of year on year slopes, either right
left or center labeled, depending on the `label` parameter.
* `YoY_values` - pandas series of year on year slopes, right
labeled.
* `renormalizing_factor` - float of value used to recenter data
* `exceedance_level` - the degradation rate that was outperformed with
probability of `exceedance_prob`
Expand All @@ -242,10 +240,6 @@ def degradation_year_on_year(energy_normalized, recenter=True,
energy_normalized.name = 'energy'
energy_normalized.index.name = 'dt'

if label not in {"right", "left", "center"}:
raise ValueError(f"Unsupported value {label} for `label`."
" Must be 'right', 'left' or 'center'.")

# Detect less than 2 years of data. This is complicated by two things:
# - leap days muddle the precise meaning of "two years of data".
# - can't just check the number of days between the first and last
Expand Down Expand Up @@ -332,11 +326,8 @@ def degradation_year_on_year(energy_normalized, recenter=True,
YoY_times = YoY_times[['dt', 'dt_center', 'dt_left']]
YoY_times = YoY_times.rename(columns={'dt': 'dt_right'})

YoY_times.set_index(YoY_times[f'dt_{label}'], inplace=True)
YoY_times.index.name = 'dt'

# now apply either right, left, or center label index to the yoy_result
yoy_result.index = YoY_times[f'dt_{label}']
# now apply right label index to the yoy_result
yoy_result.index = YoY_times.index
yoy_result.index.name = 'dt'

# the following is throwing a futurewarning if infer_objects() isn't included here.
Expand Down
81 changes: 65 additions & 16 deletions rdtools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ def degradation_timeseries_plot(yoy_info, rolling_days=365, include_ci=True,
yoy_info : dict
a dictionary with keys:
* YoY_values - pandas series of year on year slopes
* YoY_times - pandas series of corresponding timestamps
rolling_days: int, default 365
Number of days for rolling window. Note that
the window must contain at least 25% of datapoints to be included in
Expand Down Expand Up @@ -477,49 +478,97 @@ def _bootstrap(x, percentile, reps):
mb1 = np.nanmedian(xb1, axis=0)
return np.percentile(mb1, percentile)

def _roll_median(df, win_right, rolling_days, min_periods):
"""
rolling median
df includes following columns: dt_center, yoy
win_right: Datetime of the right end of the rolling window
rolling_days: number of days in the rolling window
min_periods: minimum number of points in the rolling window to return a value

returns: median of yoy values if the center of the slope is in the rolling window,
or NaN if there are fewer than min_periods points. Time index of the returned
value is centered on the rolling window.
"""
win_left = win_right - pd.Timedelta(days=rolling_days)
in_window = (df['dt_center'] <= win_right) & (df['dt_center'] >= win_left)
if in_window.sum() < min_periods:
return np.nan
else:
return df.loc[in_window, 'yoy'].median()

try:
results_values = yoy_info['YoY_values']
results = yoy_info['YoY_times'].join(yoy_info['YoY_values'].rename('yoy'))
except KeyError:
raise KeyError("yoy_info input dictionary does not contain key `YoY_values`.")
raise KeyError("yoy_info input dictionary does not contain keys"
" `YoY_times` and `YoY_values`.")

if plot_color is None:
plot_color = 'tab:orange'
if ci_color is None:
ci_color = 'C0'

results_values = results_values.sort_index()
if results_values.index.has_duplicates:
if yoy_info['YoY_values'].index.has_duplicates:
multi_yoy = True
# this occurs with degradation_year_on_year(multi_yoy=True). resample to daily mean
warnings.warn(
"Input `yoy_info['YoY_values']` appears to have multiple annual "
"slopes per day, which is the case if "
"degradation_year_on_year(multi_yoy=True). "
"Proceeding to plot with a daily mean which will average out the "
"Long-term multi-yoy slopes will tend to dominate the "
"time-series trend. Recommend re-running with "
"degradation_year_on_year(multi_yoy=False)."
)
roller = results_values.resample('D').mean().rolling(f'{rolling_days}d',
min_periods=rolling_days//4,
center=True)
else:
roller = results_values.rolling(f'{rolling_days}d', min_periods=rolling_days//4,
center=True)
# unfortunately it seems that you can't return multiple values in the rolling.apply() kernel.
# TODO: figure out some workaround to return both percentiles in a single pass
multi_yoy = False

# loop through results in a daily timeindex from min(dt_left) to max(dt_right)
# Apply rolling median and bootstrap confidence intervals

timeindex = pd.date_range(start=results['dt_left'].min(),
end=results['dt_right'].max(), freq='D')
results_median = pd.Series(index=timeindex, dtype=float)
for win_center in timeindex:
win_right = win_center + pd.Timedelta(days=rolling_days/2)
results_median.loc[win_center] = _roll_median(df=results,
win_right=win_right,
rolling_days=rolling_days,
min_periods=rolling_days//4)

# calculate confidence intervals for each point in the rolling median.
if include_ci:
ci_lower = roller.apply(_bootstrap, kwargs={'percentile': 2.5, 'reps': 100}, raw=True)
ci_upper = roller.apply(_bootstrap, kwargs={'percentile': 97.5, 'reps': 100}, raw=True)
# if multi_yoy is True, downsample the timeindex to every 7 days to speed up the
# bootstrap calculation, since it is slow. Otherwise downsample to every 2 days.
if multi_yoy:
timeindex = timeindex[::7]
else:
timeindex = timeindex[::2]
ci_lower = pd.Series(index=timeindex, dtype=float)
ci_upper = pd.Series(index=timeindex, dtype=float)
for win_center in timeindex:
win_right = win_center + pd.Timedelta(days=rolling_days/2)
win_left = win_center - pd.Timedelta(days=rolling_days/2)
in_window = (results['dt_center'] <= win_right) & (results['dt_center'] >= win_left)
if in_window.sum() < rolling_days//4:
ci_lower.loc[win_center] = np.nan
ci_upper.loc[win_center] = np.nan
else:
ci_lower.loc[win_center] = _bootstrap(results.loc[in_window, 'yoy'],
percentile=2.5, reps=50)
ci_upper.loc[win_center] = _bootstrap(results.loc[in_window, 'yoy'],
percentile=97.5, reps=50)

if fig is None:
fig, ax = plt.subplots()
else:
ax = fig.axes[0]
if include_ci:
ax.fill_between(ci_lower.index,
ci_lower, ci_upper, color=ci_color)
median = roller.median()
median = results_median.sort_index()
ax.plot(median.index,
median, color=plot_color, **kwargs)
ax.axhline(results_values.median(), c='k', ls='--')
ax.axhline(results_median.median(), c='k', ls='--')
plt.ylabel('Degradation trend (%/yr)')
fig.autofmt_xdate()

Expand Down
29 changes: 0 additions & 29 deletions rdtools/test/degradation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,35 +202,6 @@ def test_usage_of_points(self):
self.test_corr_energy[input_freq])
self.assertTrue((np.sum(rd_result[2]['usage_of_points'])) == 1462)

def test_degradation_year_on_year_label_center(self):
''' Test degradation_year_on_year with label="center". '''

funcName = sys._getframe().f_code.co_name
logging.debug('Running {}'.format(funcName))

# test YOY degradation calc with label='center'
input_freq = 'D'
rd_result = degradation_year_on_year(
self.test_corr_energy[input_freq], label='center')
self.assertAlmostEqual(rd_result[0], 100 * self.rd, places=1)
rd_result1 = degradation_year_on_year(
self.test_corr_energy[input_freq])
rd_result2 = degradation_year_on_year(
self.test_corr_energy[input_freq], label='right')
pd.testing.assert_index_equal(rd_result1[2]['YoY_values'].index,
rd_result2[2]['YoY_values'].index)
# 365/2 days difference between center and right label
assert (rd_result2[2]['YoY_values'].index -
rd_result[2]['YoY_values'].index).mean().days == \
pytest.approx(183, abs=1)

with pytest.raises(ValueError):
degradation_year_on_year(self.test_corr_energy[input_freq],
label='LEFT')
with pytest.raises(ValueError):
degradation_year_on_year(self.test_corr_energy[input_freq],
label=None)

def test_avg_timestamp_old_Pandas(self):
"""Test the _avg_timestamp_old_Pandas function for correct averaging."""
from rdtools.degradation import _avg_timestamp_old_Pandas
Expand Down
56 changes: 3 additions & 53 deletions rdtools/test/plotting_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,6 @@ def degradation_info(degradation_power_signal):
return degradation_power_signal, rd, rd_ci, calc_info


@pytest.fixture()
def degradation_info_center(degradation_power_signal):
# center-labeled YOY output for time-series degradation plot
rd, rd_ci, calc_info = degradation_year_on_year(degradation_power_signal, label='center')
return degradation_power_signal, rd, rd_ci, calc_info


@pytest.fixture()
def degradation_info_left(degradation_power_signal):
# left-labeled YOY output for time-series degradation plot
rd, rd_ci, calc_info = degradation_year_on_year(degradation_power_signal, label='left')
return degradation_power_signal, rd, rd_ci, calc_info


def test_degradation_summary_plots(degradation_info):
power, yoy_rd, yoy_ci, yoy_info = degradation_info

Expand Down Expand Up @@ -265,49 +251,13 @@ def test_availability_summary_plots_empty(availability_analysis_object):
plt.close('all')


def test_degradation_timeseries_plot(degradation_info, degradation_info_center,
degradation_info_left):
def test_degradation_timeseries_plot(degradation_info):
power, yoy_rd, yoy_ci, yoy_info = degradation_info

# test defaults (label='right')
# test defaults
result_right = degradation_timeseries_plot(yoy_info)
assert_isinstance(result_right, plt.Figure)
xlim_right = result_right.get_axes()[0].get_xlim()[0]

# test label='center'
result_center = degradation_timeseries_plot(yoy_info=degradation_info_center[3],
include_ci=False)
assert_isinstance(result_center, plt.Figure)
xlim_center = result_center.get_axes()[0].get_xlim()[0]

# test label='left'
result_left = degradation_timeseries_plot(yoy_info=degradation_info_left[3],
include_ci=False)
assert_isinstance(result_left, plt.Figure)
xlim_left = result_left.get_axes()[0].get_xlim()[0]

# test default label matches label='right'
result_default = degradation_timeseries_plot(yoy_info=yoy_info, include_ci=False)
xlim_default = result_default.get_axes()[0].get_xlim()[0]
assert xlim_default == xlim_right

# Check that the xlim values are offset as expected
# right > center > left (since offset_days increases)
assert xlim_right > xlim_center > xlim_left

# The expected difference from right to left is 365 days (1 yrs), allow 5% tolerance
expected_diff = 365
actual_diff = (xlim_right - xlim_left)
tolerance = expected_diff * 0.05
assert abs(actual_diff - expected_diff) <= tolerance, \
f"difference of right-left xlim {actual_diff} not within 5% of 1 yr."

# The expected difference from right to center is 182 days, allow 5% tolerance
expected_diff2 = 182
actual_diff2 = (xlim_right - xlim_center)
tolerance2 = expected_diff2 * 0.05
assert abs(actual_diff2 - expected_diff2) <= tolerance2, \
f"difference of right-center xlim {actual_diff2} not within 5% of 1/2 year."
result_right.get_axes()[0].get_xlim()[0]

with pytest.raises(KeyError):
degradation_timeseries_plot({'a': 1}, include_ci=False)
Expand Down
Loading