Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion jetstream/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1296,7 +1296,7 @@ def run(

if self.log_config:
log_plugin = LogPlugin(self.log_config)
client.register_worker_plugin(log_plugin)
client.register_plugin(log_plugin)

# add profiling plugins
# resource_profiling_plugin = ResourceProfilingPlugin(
Expand Down
41 changes: 23 additions & 18 deletions jetstream/tests/integration/test_analysis_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from metric_config_parser.statistic import Statistic
from mozanalysis.metrics import agg_sum
from mozilla_nimbus_schemas.experimenter_apis.experiments import RandomizationUnit
from mozilla_nimbus_schemas.jetstream import AnalysisBasis
from mozilla_nimbus_schemas.jetstream import AnalysisBasis, Statistics

from jetstream.analysis import Analysis
from jetstream.bigquery_client import BigQueryClient
Expand Down Expand Up @@ -229,14 +229,14 @@ def test_metrics(
).to_dataframe()

count_by_branch = stats.query("statistic == 'count'").set_index("branch")
assert count_by_branch.loc["branch1", "point"][0] == 1.0
assert count_by_branch.loc["branch2", "point"][0] == 1.0
assert count_by_branch.loc["branch1", "point"].iloc[0] == 1.0
assert count_by_branch.loc["branch2", "point"].iloc[0] == 1.0

if count_by_branch.loc["branch2", "analysis_basis"][0] == "exposures":
assert count_by_branch.loc["branch2", "analysis_basis"][1] == "enrollments"
if count_by_branch.loc["branch2", "analysis_basis"].iloc[0] == "exposures":
assert count_by_branch.loc["branch2", "analysis_basis"].iloc[1] == "enrollments"
else:
assert count_by_branch.loc["branch2", "analysis_basis"][0] == "enrollments"
assert count_by_branch.loc["branch2", "analysis_basis"][1] == "exposures"
assert count_by_branch.loc["branch2", "analysis_basis"].iloc[0] == "enrollments"
assert count_by_branch.loc["branch2", "analysis_basis"].iloc[1] == "exposures"

assert (
client.client.get_table(
Expand Down Expand Up @@ -396,14 +396,14 @@ def test_discrete_metrics(
assert len(stats[stats["metric"] == "active_hours_doubled"]) == 12

count_by_branch = stats.query("statistic == 'count'").set_index("branch")
assert count_by_branch.loc["branch1", "point"][0] == 1.0
assert count_by_branch.loc["branch2", "point"][0] == 1.0
assert count_by_branch.loc["branch1", "point"].iloc[0] == 1.0
assert count_by_branch.loc["branch2", "point"].iloc[0] == 1.0

if count_by_branch.loc["branch2", "analysis_basis"][0] == "exposures":
assert count_by_branch.loc["branch2", "analysis_basis"][1] == "enrollments"
if count_by_branch.loc["branch2", "analysis_basis"].iloc[0] == "exposures":
assert count_by_branch.loc["branch2", "analysis_basis"].iloc[1] == "enrollments"
else:
assert count_by_branch.loc["branch2", "analysis_basis"][0] == "enrollments"
assert count_by_branch.loc["branch2", "analysis_basis"][1] == "exposures"
assert count_by_branch.loc["branch2", "analysis_basis"].iloc[0] == "enrollments"
assert count_by_branch.loc["branch2", "analysis_basis"].iloc[1] == "exposures"

def test_metrics_preenrollment(
self,
Expand Down Expand Up @@ -870,10 +870,10 @@ def test_no_enrollments(
).to_dataframe()

count_by_branch = stats.query("statistic == 'count'").set_index("branch")
assert count_by_branch.loc["a", "point"][0] == 0.0
assert count_by_branch.loc["a", "point"][1] == 0.0
assert count_by_branch.loc["b", "point"][0] == 0.0
assert count_by_branch.loc["b", "point"][1] == 0.0
assert count_by_branch.loc["a", "point"].iloc[0] == 0.0
assert count_by_branch.loc["a", "point"].iloc[1] == 0.0
assert count_by_branch.loc["b", "point"].iloc[0] == 0.0
assert count_by_branch.loc["b", "point"].iloc[1] == 0.0
assert len(count_by_branch.loc["b", "analysis_basis"]) == 2

assert (
Expand Down Expand Up @@ -1302,11 +1302,16 @@ def test_statistics_export(
"""
)

statistics_export_data = query_job.to_dataframe().to_dict(orient="records")
# drop NaN before validation to match BQ NDJSON extract behavior
statistics_export_data = [
{k: v for k, v in record.items() if v == v}
for record in query_job.to_dataframe().to_dict(orient="records")
]
Comment on lines +1305 to +1309

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears to cover a scenario that won't happen because to_dict already converts pd.NA to None. We can revisit if the test starts failing, but I'm satisfied that this covers the expectation here.

schema = json.loads(
(Path(__file__).parent.parent / "data/Statistics_v1.0.json").read_text()
)
jsonschema.validate(statistics_export_data, schema)
Statistics.model_validate(statistics_export_data)

def test_subset_metric_table(
self,
Expand Down
2 changes: 1 addition & 1 deletion jetstream/tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ def test_population_ratio_non_existing_metrics(self):
{"branch": ["treatment"] * 10 + ["control"] * 10, "ad_ratio": np.nan}
)
error_str = (
"None of [Index(['non_existing', 'non_existing'], dtype='object')] are in the [columns]"
"None of [Index(['non_existing', 'non_existing'], dtype='str')] are in the [columns]"
)
with pytest.raises(Exception, match=re.escape(error_str)):
stat.transform(test_data, "ad_ratio", "control", None, AnalysisBasis.ENROLLMENTS, "all")
Expand Down
Loading
Loading