diff --git a/src/access/profiling/experiment.py b/src/access/profiling/experiment.py index c402790..8391d06 100644 --- a/src/access/profiling/experiment.py +++ b/src/access/profiling/experiment.py @@ -15,6 +15,18 @@ logger = logging.getLogger(__name__) +def _make_unique_region_names(regions: list[object]) -> list[object]: + """Return region names with deterministic suffixes for duplicates.""" + + counts: dict[object, int] = {} + unique_regions: list[object] = [] + for region in regions: + count = counts.get(region, 0) + 1 + counts[region] = count + unique_regions.append(region if count == 1 else f"{region}_{count}") + return unique_regions + + class ProfilingLog: """Represents a profiling log file. @@ -61,7 +73,7 @@ def parse(self) -> xr.Dataset: has_pe = "pe" in data dims = ["region", "pe"] if has_pe else ["region"] - coords: dict = {"region": data["region"]} + coords: dict = {"region": _make_unique_region_names(list(data["region"]))} if has_pe: coords["pe"] = data["pe"] diff --git a/src/access/profiling/manager.py b/src/access/profiling/manager.py index e87328b..6dad61f 100644 --- a/src/access/profiling/manager.py +++ b/src/access/profiling/manager.py @@ -65,9 +65,11 @@ def __repr__(self) -> str: if self.data == {}: summary += indent * 2 + "No parsed data.\n" else: - for name, ds in self.data.items(): + for name, exp_data in self.data.items(): summary += indent * 2 + f"'{name}':\n" - summary += textwrap.indent(f"{ds}\n", indent * 3) + for comp_name, ds in exp_data.items(): + summary += indent * 3 + f"'{comp_name}':\n" + summary += textwrap.indent(f"{ds}\n", indent * 4) return summary @abstractmethod @@ -202,9 +204,20 @@ def plot_scaling_data( region_relabel_map (dict | None): Optional mapping to relabel regions in the plots. """ + exp_names = experiments if experiments is not None else list(self.data.keys()) + if not exp_names: + raise ValueError("No experiments selected for scaling plot.") + + missing_experiments = [exp_name for exp_name in exp_names if exp_name not in self.data] + if missing_experiments: + raise ValueError( + f"No parsed profiling data found for experiment(s): {missing_experiments}. " + f"Available experiments: {list(self.data.keys())}." + ) + # Find number of cpus used for each experiment ncpus = {} - for exp_name in self.data: + for exp_name in exp_names: with self.experiments[exp_name].directory() as (exp_path, run_path): # Find number of cpus used ncpus[exp_name] = self.parse_ncpus(exp_path, run_path) @@ -213,15 +226,19 @@ def plot_scaling_data( scaling_data = [] for component, component_regions in zip(components, regions, strict=True): component_data = None - for exp_name in self.data: - # Skip experiments not in the specified list - if experiments is not None and exp_name not in experiments: - continue - + for exp_name in exp_names: ds = self.data[exp_name].get(component) if ds is None: raise ValueError(f"No profiling data found for component '{component}' in experiment '{exp_name}'.") + available_regions = ds.coords["region"].values.tolist() + missing_regions = [region for region in component_regions if region not in available_regions] + if missing_regions: + raise ValueError( + f"Requested region(s) {missing_regions} not found for component '{component}' " + f"in experiment '{exp_name}'. Available regions: {available_regions}." + ) + # Select only the desired regions ds = ds.sel(region=component_regions) diff --git a/tests/test_experiment.py b/tests/test_experiment.py index 982e3f2..e1c486d 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -44,6 +44,28 @@ def test_profiling_log(): mock_parser.parse.assert_called_once_with(mock_path) +def test_profiling_log_duplicate_regions(): + """Test duplicate region names are renamed correctly during parsing.""" + + mock_parser = mock.MagicMock(autospec=True) + mock_parser.metrics = [tavg] + mock_parser.parse.return_value = { + "region": ["Region 1", "Region 1", "Region 2", "Region 1"], + tavg: [1.0, 2.0, 3.0, 4.0], + } + + mock_path = mock.MagicMock() + profiling_log = ProfilingLog(filepath=mock_path, parser=mock_parser) + + dataset = profiling_log.parse() + + expected_unique_regions = ["Region 1", "Region 1_2", "Region 2", "Region 1_3"] + assert list(dataset["region"].values) == expected_unique_regions + assert list(dataset[tavg].values) == [1.0, 2.0, 3.0, 4.0] + assert mock_parser.parse.call_count == 1 + assert mock_parser.parse.return_value["region"] == ["Region 1", "Region 1", "Region 2", "Region 1"] + + def test_profiling_log_hierarchical(): """Test ProfilingLog.parse() with hierarchical (nested dict) parser output.""" mock_parser = mock.MagicMock(autospec=True) diff --git a/tests/test_manager.py b/tests/test_manager.py index 9fbed3b..807602e 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -316,6 +316,60 @@ def test_scaling_data(mock_plot, scaling_data): assert (Path("4cpu"), Path("/fake/runs/4cpu")) in manager._parse_ncpus_calls +def test_scaling_data_missing_region_raises_value_error(scaling_data): + """Test plot_scaling_data raises a clear error when a requested region is missing.""" + + paths, ncpus, datasets = scaling_data + manager = MockProfilingManager(paths, ncpus, datasets) + + with pytest.raises(ValueError, match="Requested region\\(s\\)") as exc_info: + manager.plot_scaling_data( + components=["component"], + regions=[["Missing Region"]], + metric=tavg, + experiments=["1cpu"], + ) + + message = str(exc_info.value) + assert "Missing Region" in message + assert "component 'component'" in message + assert "experiment '1cpu'" in message + + +def test_scaling_data_no_selected_experiments_raises_value_error(scaling_data): + """Test plot_scaling_data raises when no experiments are selected.""" + + paths, ncpus, datasets = scaling_data + manager = MockProfilingManager(paths, ncpus, datasets) + + with pytest.raises(ValueError, match="No experiments selected for scaling plot"): + manager.plot_scaling_data( + components=["component"], + regions=[["Region 1"]], + metric=tavg, + experiments=[], + ) + + +def test_scaling_data_missing_experiment_data_raises_value_error(scaling_data): + """Test plot_scaling_data raises when selected experiments have no parsed data.""" + + paths, ncpus, datasets = scaling_data + manager = MockProfilingManager(paths, ncpus, datasets) + + with pytest.raises(ValueError, match="No parsed profiling data found for experiment\(s\)") as exc_info: + manager.plot_scaling_data( + components=["component"], + regions=[["Region 1"]], + metric=tavg, + experiments=["missing_exp"], + ) + + message = str(exc_info.value) + assert "missing_exp" in message + assert "Available experiments" in message + + @mock.patch("access.profiling.manager.plot_bar_metrics") def test_bar_chart_data(mock_plot, scaling_data): """Test the plot_bar_chart method of ProfilingManager.