diff --git a/jetstream/analysis.py b/jetstream/analysis.py index 4982ab7c..aa432934 100644 --- a/jetstream/analysis.py +++ b/jetstream/analysis.py @@ -1380,11 +1380,13 @@ def run( continue segment_labels = ["all"] + [s.name for s in self.config.experiment.segments] - analysis_length_dates = 1 - if period == AnalysisPeriod.OVERALL: - analysis_length_dates = time_limits.analysis_length_dates - elif period == AnalysisPeriod.WEEK: - analysis_length_dates = 7 + # Derive the analysis window length (in days) from TimeLimits so that + # normalize_over_analysis_period scales correctly for every period (day=1, week=7, + # days28=28, overall=N, preenrollment_week=7, preenrollment_days28=28). All windows + # in a period share the same length; start/end are inclusive days since enrollment, + # so length = end - start + 1. (TimeLimits has no `analysis_length_dates` attribute.) + last_window = time_limits.analysis_windows[-1] + analysis_length_dates = last_window.end - last_window.start + 1 for analysis_basis in analysis_bases: segment_data: DataFrame diff --git a/jetstream/tests/test_analysis.py b/jetstream/tests/test_analysis.py index 05ecad79..ca553cb0 100644 --- a/jetstream/tests/test_analysis.py +++ b/jetstream/tests/test_analysis.py @@ -1791,3 +1791,86 @@ def capturing_bind(thing, deps): "Expected at least one subset_metric_table bind call with prerequisites, " "but all bind calls had empty deps. The covariate cross-period edge is missing." ) + + +@pytest.mark.parametrize( + ("period", "time_limits_factory_kwargs", "expected_length"), + [ + # OVERALL: single window. TimeLimits has no `analysis_length_dates` attribute, so the + # original code raised AttributeError and killed the run entirely. + ( + AnalysisPeriod.OVERALL, + {"single_window": True, "analysis_start_days": 0, "analysis_length_dates": 30}, + 30, + ), + # DAYS_28: time series. The original code fell through to the default of 1, so + # normalize_over_analysis_period silently divided by 1 instead of 28. + ( + AnalysisPeriod.DAYS_28, + {"single_window": False, "time_series_period": "28_day"}, + 28, + ), + ], +) +def test_run_derives_analysis_length_dates( + experiments, monkeypatch, period, time_limits_factory_kwargs, expected_length +): + """analysis_length_dates must be derived from the TimeLimits window for every period so that + normalize_over_analysis_period scales correctly. Regression test for both the OVERALL + AttributeError and the default-of-1 fall-through for non-WEEK/OVERALL periods.""" + from mozanalysis.experiment import TimeLimits + + from jetstream.statistics import Summary as JetstreamSummary + + config = AnalysisSpec.default_for_experiment(experiments[0], ConfigLoader.configs).resolve( + experiments[0], ConfigLoader.configs + ) + + metric = Metric( + name="active_hours", + data_source=DataSource(name="clients_daily", from_expression="test.test"), + select_expression="SUM(ah)", + analysis_bases=[AnalysisBasis.ENROLLMENTS], + ) + plain_statistic = MagicMock() + plain_statistic.params = {} + config.metrics = {period: [JetstreamSummary(metric, plain_statistic, [])]} + + common = {"first_enrollment_date": "2024-01-01", "num_dates_enrollment": 8} + if time_limits_factory_kwargs.pop("single_window"): + time_limits = TimeLimits.for_single_analysis_window( + last_date_full_data="2024-04-01", **time_limits_factory_kwargs, **common + ) + else: + time_limits = TimeLimits.for_ts( + last_date_full_data="2024-04-01", **time_limits_factory_kwargs, **common + ) + assert not hasattr(time_limits, "analysis_length_dates") + + stats_mock = MagicMock() + stats_mock.model_dump.return_value = [] + calc_stats = MagicMock(return_value=stats_mock) + + monkeypatch.setattr("jetstream.analysis.bind", lambda thing, deps: thing) + monkeypatch.setattr("jetstream.analysis.Analysis.ensure_enrollments", Mock()) + monkeypatch.setattr( + "jetstream.analysis.Analysis._get_timelimits_if_ready", + MagicMock(return_value=time_limits), + ) + monkeypatch.setattr("jetstream.analysis.Analysis.calculate_metrics", MagicMock()) + monkeypatch.setattr("jetstream.analysis.Analysis.calculate_statistics", calc_stats) + monkeypatch.setattr("jetstream.analysis.Analysis.counts", MagicMock(return_value=stats_mock)) + monkeypatch.setattr("jetstream.analysis.Analysis.save_statistics", MagicMock()) + monkeypatch.setattr("jetstream.analysis.Analysis.publish_view", MagicMock()) + monkeypatch.setattr("jetstream.analysis.LocalCluster", MagicMock()) + monkeypatch.setattr("jetstream.analysis.Client", MagicMock()) + monkeypatch.setattr("jetstream.analysis.as_completed", Mock(return_value=[])) + + # must not raise AttributeError on time_limits.analysis_length_dates + Analysis("test", "test", config).run(current_date=dt.datetime(2020, 1, 1, tzinfo=pytz.utc)) + + assert calc_stats.called, f"calculate_statistics was never called for the {period} period" + # analysis_length_dates is the 5th positional arg and must be the derived window length, + # not the default of 1 + analysis_length_dates_args = {call.args[4] for call in calc_stats.call_args_list} + assert analysis_length_dates_args == {expected_length}