Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions jetstream/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1380,11 +1380,13 @@ def run(
continue

segment_labels = ["all"] + [s.name for s in self.config.experiment.segments]
analysis_length_dates = 1
if period == AnalysisPeriod.OVERALL:
analysis_length_dates = time_limits.analysis_length_dates
elif period == AnalysisPeriod.WEEK:
analysis_length_dates = 7
# Derive the analysis window length (in days) from TimeLimits so that
# normalize_over_analysis_period scales correctly for every period (day=1, week=7,
# days28=28, overall=N, preenrollment_week=7, preenrollment_days28=28). All windows
# in a period share the same length; start/end are inclusive days since enrollment,
# so length = end - start + 1. (TimeLimits has no `analysis_length_dates` attribute.)
last_window = time_limits.analysis_windows[-1]
analysis_length_dates = last_window.end - last_window.start + 1

for analysis_basis in analysis_bases:
segment_data: DataFrame
Expand Down
83 changes: 83 additions & 0 deletions jetstream/tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1791,3 +1791,86 @@ def capturing_bind(thing, deps):
"Expected at least one subset_metric_table bind call with prerequisites, "
"but all bind calls had empty deps. The covariate cross-period edge is missing."
)


@pytest.mark.parametrize(
("period", "time_limits_factory_kwargs", "expected_length"),
[
# OVERALL: single window. TimeLimits has no `analysis_length_dates` attribute, so the
# original code raised AttributeError and killed the run entirely.
(
AnalysisPeriod.OVERALL,
{"single_window": True, "analysis_start_days": 0, "analysis_length_dates": 30},
30,
),
# DAYS_28: time series. The original code fell through to the default of 1, so
# normalize_over_analysis_period silently divided by 1 instead of 28.
(
AnalysisPeriod.DAYS_28,
{"single_window": False, "time_series_period": "28_day"},
28,
),
],
)
def test_run_derives_analysis_length_dates(
experiments, monkeypatch, period, time_limits_factory_kwargs, expected_length
):
"""analysis_length_dates must be derived from the TimeLimits window for every period so that
normalize_over_analysis_period scales correctly. Regression test for both the OVERALL
AttributeError and the default-of-1 fall-through for non-WEEK/OVERALL periods."""
from mozanalysis.experiment import TimeLimits

from jetstream.statistics import Summary as JetstreamSummary

config = AnalysisSpec.default_for_experiment(experiments[0], ConfigLoader.configs).resolve(
experiments[0], ConfigLoader.configs
)

metric = Metric(
name="active_hours",
data_source=DataSource(name="clients_daily", from_expression="test.test"),
select_expression="SUM(ah)",
analysis_bases=[AnalysisBasis.ENROLLMENTS],
)
plain_statistic = MagicMock()
plain_statistic.params = {}
config.metrics = {period: [JetstreamSummary(metric, plain_statistic, [])]}

common = {"first_enrollment_date": "2024-01-01", "num_dates_enrollment": 8}
if time_limits_factory_kwargs.pop("single_window"):
time_limits = TimeLimits.for_single_analysis_window(
last_date_full_data="2024-04-01", **time_limits_factory_kwargs, **common
)
else:
time_limits = TimeLimits.for_ts(
last_date_full_data="2024-04-01", **time_limits_factory_kwargs, **common
)
assert not hasattr(time_limits, "analysis_length_dates")

stats_mock = MagicMock()
stats_mock.model_dump.return_value = []
calc_stats = MagicMock(return_value=stats_mock)

monkeypatch.setattr("jetstream.analysis.bind", lambda thing, deps: thing)
monkeypatch.setattr("jetstream.analysis.Analysis.ensure_enrollments", Mock())
monkeypatch.setattr(
"jetstream.analysis.Analysis._get_timelimits_if_ready",
MagicMock(return_value=time_limits),
)
monkeypatch.setattr("jetstream.analysis.Analysis.calculate_metrics", MagicMock())
monkeypatch.setattr("jetstream.analysis.Analysis.calculate_statistics", calc_stats)
monkeypatch.setattr("jetstream.analysis.Analysis.counts", MagicMock(return_value=stats_mock))
monkeypatch.setattr("jetstream.analysis.Analysis.save_statistics", MagicMock())
monkeypatch.setattr("jetstream.analysis.Analysis.publish_view", MagicMock())
monkeypatch.setattr("jetstream.analysis.LocalCluster", MagicMock())
monkeypatch.setattr("jetstream.analysis.Client", MagicMock())
monkeypatch.setattr("jetstream.analysis.as_completed", Mock(return_value=[]))

# must not raise AttributeError on time_limits.analysis_length_dates
Analysis("test", "test", config).run(current_date=dt.datetime(2020, 1, 1, tzinfo=pytz.utc))

assert calc_stats.called, f"calculate_statistics was never called for the {period} period"
# analysis_length_dates is the 5th positional arg and must be the derived window length,
# not the default of 1
analysis_length_dates_args = {call.args[4] for call in calc_stats.call_args_list}
assert analysis_length_dates_args == {expected_length}
Loading