Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/access/profiling/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@
logger = logging.getLogger(__name__)


def _make_unique_region_names(regions: list[object]) -> list[object]:
"""Return region names with deterministic suffixes for duplicates."""

counts: dict[object, int] = {}
unique_regions: list[object] = []
for region in regions:
count = counts.get(region, 0) + 1
counts[region] = count
unique_regions.append(region if count == 1 else f"{region}_{count}")
return unique_regions


class ProfilingLog:
"""Represents a profiling log file.

Expand Down Expand Up @@ -61,7 +73,7 @@ def parse(self) -> xr.Dataset:

has_pe = "pe" in data
dims = ["region", "pe"] if has_pe else ["region"]
coords: dict = {"region": data["region"]}
coords: dict = {"region": _make_unique_region_names(list(data["region"]))}
if has_pe:
coords["pe"] = data["pe"]

Expand Down
33 changes: 25 additions & 8 deletions src/access/profiling/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,11 @@ def __repr__(self) -> str:
if self.data == {}:
summary += indent * 2 + "No parsed data.\n"
else:
for name, ds in self.data.items():
for name, exp_data in self.data.items():
summary += indent * 2 + f"'{name}':\n"
summary += textwrap.indent(f"{ds}\n", indent * 3)
for comp_name, ds in exp_data.items():
summary += indent * 3 + f"'{comp_name}':\n"
summary += textwrap.indent(f"{ds}\n", indent * 4)
return summary

@abstractmethod
Expand Down Expand Up @@ -202,9 +204,20 @@ def plot_scaling_data(
region_relabel_map (dict | None): Optional mapping to relabel regions in the plots.
"""

exp_names = experiments if experiments is not None else list(self.data.keys())
if not exp_names:
raise ValueError("No experiments selected for scaling plot.")

missing_experiments = [exp_name for exp_name in exp_names if exp_name not in self.data]
if missing_experiments:
raise ValueError(
f"No parsed profiling data found for experiment(s): {missing_experiments}. "
f"Available experiments: {list(self.data.keys())}."
)

# Find number of cpus used for each experiment
ncpus = {}
for exp_name in self.data:
for exp_name in exp_names:
with self.experiments[exp_name].directory() as (exp_path, run_path):
# Find number of cpus used
ncpus[exp_name] = self.parse_ncpus(exp_path, run_path)
Expand All @@ -213,15 +226,19 @@ def plot_scaling_data(
scaling_data = []
for component, component_regions in zip(components, regions, strict=True):
component_data = None
for exp_name in self.data:
# Skip experiments not in the specified list
if experiments is not None and exp_name not in experiments:
continue

for exp_name in exp_names:
ds = self.data[exp_name].get(component)
if ds is None:
raise ValueError(f"No profiling data found for component '{component}' in experiment '{exp_name}'.")

available_regions = ds.coords["region"].values.tolist()
missing_regions = [region for region in component_regions if region not in available_regions]
if missing_regions:
raise ValueError(
f"Requested region(s) {missing_regions} not found for component '{component}' "
f"in experiment '{exp_name}'. Available regions: {available_regions}."
)

# Select only the desired regions
ds = ds.sel(region=component_regions)

Expand Down
22 changes: 22 additions & 0 deletions tests/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,28 @@ def test_profiling_log():
mock_parser.parse.assert_called_once_with(mock_path)


def test_profiling_log_duplicate_regions():
"""Test duplicate region names are renamed correctly during parsing."""

mock_parser = mock.MagicMock(autospec=True)
mock_parser.metrics = [tavg]
mock_parser.parse.return_value = {
"region": ["Region 1", "Region 1", "Region 2", "Region 1"],
tavg: [1.0, 2.0, 3.0, 4.0],
}

mock_path = mock.MagicMock()
profiling_log = ProfilingLog(filepath=mock_path, parser=mock_parser)

dataset = profiling_log.parse()

expected_unique_regions = ["Region 1", "Region 1_2", "Region 2", "Region 1_3"]
assert list(dataset["region"].values) == expected_unique_regions
assert list(dataset[tavg].values) == [1.0, 2.0, 3.0, 4.0]
assert mock_parser.parse.call_count == 1
assert mock_parser.parse.return_value["region"] == ["Region 1", "Region 1", "Region 2", "Region 1"]


def test_profiling_log_hierarchical():
"""Test ProfilingLog.parse() with hierarchical (nested dict) parser output."""
mock_parser = mock.MagicMock(autospec=True)
Expand Down
54 changes: 54 additions & 0 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,60 @@ def test_scaling_data(mock_plot, scaling_data):
assert (Path("4cpu"), Path("/fake/runs/4cpu")) in manager._parse_ncpus_calls


def test_scaling_data_missing_region_raises_value_error(scaling_data):
"""Test plot_scaling_data raises a clear error when a requested region is missing."""

paths, ncpus, datasets = scaling_data
manager = MockProfilingManager(paths, ncpus, datasets)

with pytest.raises(ValueError, match="Requested region\\(s\\)") as exc_info:
manager.plot_scaling_data(
components=["component"],
regions=[["Missing Region"]],
metric=tavg,
experiments=["1cpu"],
)

message = str(exc_info.value)
assert "Missing Region" in message
assert "component 'component'" in message
assert "experiment '1cpu'" in message


def test_scaling_data_no_selected_experiments_raises_value_error(scaling_data):
"""Test plot_scaling_data raises when no experiments are selected."""

paths, ncpus, datasets = scaling_data
manager = MockProfilingManager(paths, ncpus, datasets)

with pytest.raises(ValueError, match="No experiments selected for scaling plot"):
manager.plot_scaling_data(
components=["component"],
regions=[["Region 1"]],
metric=tavg,
experiments=[],
)


def test_scaling_data_missing_experiment_data_raises_value_error(scaling_data):
"""Test plot_scaling_data raises when selected experiments have no parsed data."""

paths, ncpus, datasets = scaling_data
manager = MockProfilingManager(paths, ncpus, datasets)

with pytest.raises(ValueError, match="No parsed profiling data found for experiment\(s\)") as exc_info:
manager.plot_scaling_data(
components=["component"],
regions=[["Region 1"]],
metric=tavg,
experiments=["missing_exp"],
)

message = str(exc_info.value)
assert "missing_exp" in message
assert "Available experiments" in message


@mock.patch("access.profiling.manager.plot_bar_metrics")
def test_bar_chart_data(mock_plot, scaling_data):
"""Test the plot_bar_chart method of ProfilingManager.
Expand Down
Loading