Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 53 additions & 40 deletions malariagen_data/anoph/frq_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,39 +396,50 @@ def plot_frequencies_time_series(
# Extract variant labels.
variant_labels = ds["variant_label"].values

# Check if CI variables are available.
has_ci = "event_frequency_ci_low" in ds

# Build a long-form dataframe from the dataset.
dfs = []
for cohort in df_cohorts.itertuples():
ds_cohort = ds.isel(cohorts=cohort.Index)
df = pd.DataFrame(
{
"taxon": cohort.taxon,
"area": cohort.area,
"date": cohort.period_start,
"period": str(
cohort.period
), # use string representation for hover label
"sample_size": cohort.size,
"variant": variant_labels,
"count": ds_cohort["event_count"].values,
"nobs": ds_cohort["event_nobs"].values,
"frequency": ds_cohort["event_frequency"].values,
"frequency_ci_low": ds_cohort["event_frequency_ci_low"].values,
"frequency_ci_upp": ds_cohort["event_frequency_ci_upp"].values,
}
)
cohort_data = {
"taxon": cohort.taxon,
"area": cohort.area,
"date": cohort.period_start,
"period": str(
cohort.period
), # use string representation for hover label
"sample_size": cohort.size,
"variant": variant_labels,
"count": ds_cohort["event_count"].values,
"nobs": ds_cohort["event_nobs"].values,
"frequency": ds_cohort["event_frequency"].values,
}
if has_ci:
cohort_data["frequency_ci_low"] = ds_cohort[
"event_frequency_ci_low"
].values
cohort_data["frequency_ci_upp"] = ds_cohort[
"event_frequency_ci_upp"
].values
df = pd.DataFrame(cohort_data)
dfs.append(df)
df_events = pd.concat(dfs, axis=0).reset_index(drop=True)

# Remove events with no observations.
df_events = df_events.query("nobs > 0").copy()

# Calculate error bars.
frq = df_events["frequency"]
frq_ci_low = df_events["frequency_ci_low"]
frq_ci_upp = df_events["frequency_ci_upp"]
df_events["frequency_error"] = frq_ci_upp - frq
df_events["frequency_error_minus"] = frq - frq_ci_low
# Calculate error bars if CI data is available.
error_y_args = {}
if has_ci:
frq = df_events["frequency"]
frq_ci_low = df_events["frequency_ci_low"]
frq_ci_upp = df_events["frequency_ci_upp"]
df_events["frequency_error"] = frq_ci_upp - frq
df_events["frequency_error_minus"] = frq - frq_ci_low
error_y_args["error_y"] = "frequency_error"
error_y_args["error_y_minus"] = "frequency_error_minus"

# Make a plot.
fig = px.line(
Expand All @@ -437,8 +448,7 @@ def plot_frequencies_time_series(
facet_row="area",
x="date",
y="frequency",
error_y="frequency_error",
error_y_minus="frequency_error_minus",
**error_y_args,
color="variant",
markers=True,
hover_name="variant",
Expand Down Expand Up @@ -518,19 +528,19 @@ def plot_frequencies_map_markers(
variant_label = variant

# Convert to a dataframe for convenience.
df_markers = ds_variant[
[
"cohort_taxon",
"cohort_area",
"cohort_period",
"cohort_lat_mean",
"cohort_lon_mean",
"cohort_size",
"event_frequency",
"event_frequency_ci_low",
"event_frequency_ci_upp",
]
].to_dataframe()
cols = [
"cohort_taxon",
"cohort_area",
"cohort_period",
"cohort_lat_mean",
"cohort_lon_mean",
"cohort_size",
"event_frequency",
]
has_ci = "event_frequency_ci_low" in ds
if has_ci:
cols += ["event_frequency_ci_low", "event_frequency_ci_upp"]
df_markers = ds_variant[cols].to_dataframe()

# Select data matching taxon and period parameters.
df_markers = df_markers.loc[
Expand Down Expand Up @@ -560,8 +570,11 @@ def plot_frequencies_map_markers(
Area: {x.cohort_area} <br/>
Period: {x.cohort_period} <br/>
Sample size: {x.cohort_size} <br/>
Frequency: {x.event_frequency:.0%}
(95% CI: {x.event_frequency_ci_low:.0%} - {x.event_frequency_ci_upp:.0%})
Frequency: {x.event_frequency:.0%}"""
if has_ci:
popup_html += f"""
(95% CI: {x.event_frequency_ci_low:.0%} - {x.event_frequency_ci_upp:.0%})"""
popup_html += """
"""
marker.popup = ipyleaflet.Popup(
child=ipywidgets.HTML(popup_html),
Expand Down
83 changes: 83 additions & 0 deletions tests/anoph/test_frq_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,86 @@ def test_does_not_modify_original(self):
taxon_by="taxon",
)
assert df["taxon"].tolist() == original_values


class TestPlotFrequenciesTimeSeriesMissingCI:
"""Tests for plot_frequencies_time_series when CI variables are absent.

See: https://github.com/malariagen/malariagen-data-python/issues/1035
"""

@staticmethod
def _make_ds_without_ci():
"""Create a minimal dataset without CI variables."""
import numpy as np
import xarray as xr

ds = xr.Dataset(
{
"variant_label": ("variants", ["V0", "V1", "V2"]),
"cohort_taxon": ("cohorts", ["gambiae", "coluzzii"]),
"cohort_area": ("cohorts", ["KE-01", "KE-02"]),
"cohort_period": (
"cohorts",
pd.PeriodIndex(["2020", "2021"], freq="Y"),
),
"cohort_period_start": (
"cohorts",
pd.to_datetime(["2020-01-01", "2021-01-01"]),
),
"cohort_size": ("cohorts", [50, 60]),
"event_count": (
("variants", "cohorts"),
np.array([[10, 20], [5, 15], [25, 30]]),
),
"event_nobs": (
("variants", "cohorts"),
np.array([[100, 120], [100, 120], [100, 120]]),
),
"event_frequency": (
("variants", "cohorts"),
np.array([[0.1, 0.167], [0.05, 0.125], [0.25, 0.25]]),
),
}
)
return ds

@staticmethod
def _make_ds_with_ci():
"""Create a minimal dataset with CI variables."""
import numpy as np

ds = TestPlotFrequenciesTimeSeriesMissingCI._make_ds_without_ci()
ds["event_frequency_ci_low"] = (
("variants", "cohorts"),
np.maximum(ds["event_frequency"].values - 0.05, 0),
)
ds["event_frequency_ci_upp"] = (
("variants", "cohorts"),
np.minimum(ds["event_frequency"].values + 0.05, 1),
)
return ds

def test_no_ci_no_error(self):
"""plot_frequencies_time_series should not raise when CI variables are absent."""
import plotly.graph_objects as go

from malariagen_data.anoph.frq_base import AnophelesFrequencyAnalysis

ds = self._make_ds_without_ci()
fig = AnophelesFrequencyAnalysis.plot_frequencies_time_series(
None, ds, show=False
)
assert isinstance(fig, go.Figure)

def test_with_ci_has_error_bars(self):
"""plot_frequencies_time_series should include error bars when CI variables are present."""
import plotly.graph_objects as go

from malariagen_data.anoph.frq_base import AnophelesFrequencyAnalysis

ds = self._make_ds_with_ci()
fig = AnophelesFrequencyAnalysis.plot_frequencies_time_series(
None, ds, show=False
)
assert isinstance(fig, go.Figure)
7 changes: 6 additions & 1 deletion tests/anoph/test_hap_frq.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,12 @@ def test_hap_frequencies_advanced(
)

# Run the other function under test.
ds_hap = api.haplotypes_frequencies_advanced(**params_advanced)
try:
ds_hap = api.haplotypes_frequencies_advanced(**params_advanced)
except ValueError as e:
if "No SNPs available for the given region" in str(e):
pytest.skip("Random region contained no SNPs")
raise

# Standard checks.
check_hap_frequencies_advanced(api=api, ds=ds_hap)