Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 62 additions & 2 deletions .github/actions/collect_data/src/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,14 @@ def map_benchmark_data(self, pipeline, job_id, report_data, model_spec_data=None
metadata,
model_spec_data,
)
return benchmark_runs + benchmark_summary_runs + eval_runs
acceptance_summary_runs = self._process_acceptance_summary(
pipeline,
job,
report_data.get("acceptance_summary", {}),
metadata,
model_spec_data,
)
return benchmark_runs + benchmark_summary_runs + eval_runs + acceptance_summary_runs
except ValidationError as e:
failure_happened()
logger.error(f"Validation error: {e}")
Expand Down Expand Up @@ -380,6 +387,58 @@ def _process_evals(self, pipeline, job, evals, metadata=None, model_spec_data=No
)
return results

def _process_acceptance_summary(self, pipeline, job, acceptance_summary, metadata=None, model_spec_data=None):
"""
Processes acceptance summary entries and creates CompleteBenchmarkRun objects for each entry.
"""
results = []
if acceptance_summary:
if metadata:
logger.debug(f"Processing acceptance summary with metadata included...")
acceptance_summary = {**acceptance_summary, **metadata} # metadata values take precedence
measurements = self._create_measurements(
job,
"acceptance_summary",
acceptance_summary,
[
"acceptance_criteria",
],
)

# Merge leftover key-values from acceptance_summary into model_spec_data for config_params
acceptance_criteria_metadata = model_spec_data.copy() if model_spec_data else {}
for key, value in acceptance_summary.items():
if key not in acceptance_criteria_metadata:
acceptance_criteria_metadata[key] = value
results.append(
self._create_complete_benchmark_run(
pipeline=pipeline,
job=job,
data=acceptance_summary,
run_type="acceptance_summary",
measurements=measurements,
device_info=acceptance_summary.get("device"),
model_name=acceptance_summary.get("model"),
model_type=model_spec_data.get("model_type") if model_spec_data else None,
input_seq_length=None,
output_seq_length=None,
dataset_name=None,
batch_size=None,
config_params=acceptance_criteria_metadata,
)
)
return results

def _normalize_measurement_value(self, value):
if isinstance(value, str):
if value.lower() == "true":
return 1.0
elif value.lower() == "false":
return 0.0
elif value == "":
return None
return value

def _create_measurements(self, job, step_name, data, keys):
"""
Creates BenchmarkMeasurement objects for the specified keys in the data.
Expand All @@ -388,14 +447,15 @@ def _create_measurements(self, job, step_name, data, keys):
for key in keys:
if key in data:
try:
value = self._normalize_measurement_value(data.get(key))
measurement = BenchmarkMeasurement(
step_start_ts=job.job_start_ts,
step_end_ts=job.job_end_ts,
iteration=1,
step_name=step_name,
step_warm_up_num_iterations=None,
name=key,
value=data.get(key),
value=value,
target=None,
device_power=None,
device_temperature=None,
Expand Down
134 changes: 134 additions & 0 deletions .github/actions/collect_data/test/test_benchmark_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,140 @@ def test_evals_model_type_without_model_spec(mapper, pipeline):
assert result[0].ml_model_type is None


def test_process_acceptance_summary(mapper, pipeline):
report_data = {
"metadata": {
"report_id": "test_report",
"model": "test_model",
"device": "test_device",
"model_id": "id_test_spec_test_model_test_device",
"inference_engine": "vllm",
},
"acceptance_summary": {
"acceptance_criteria": "true",
"acceptance_blockers": "",
"acceptance_summary_markdown": "## Summary\nAll good",
},
}
result = mapper.map_benchmark_data(
pipeline, 1, report_data, {"model_name": "test_model", "device_type": "test_device"}
)
assert len(result) == 1
assert isinstance(result[0], CompleteBenchmarkRun)
assert result[0].run_type == "acceptance_summary"
assert len(result[0].measurements) == 1
assert result[0].measurements[0].name == "acceptance_criteria"
assert result[0].measurements[0].value == 1.0


def test_process_acceptance_summary_config_params(mapper, pipeline):
report_data = {
"metadata": {
"report_id": "test_report",
"model": "test_model",
"device": "test_device",
"model_id": "id_test_spec_test_model_test_device",
"inference_engine": "vllm",
},
"acceptance_summary": {
"acceptance_criteria": "true",
"acceptance_blockers": "",
"acceptance_summary_markdown": "## Summary\nAll good",
},
}
model_spec_data = {
"model_name": "test_model",
"device_type": "test_device",
"extra_param": "extra_value",
}
result = mapper.map_benchmark_data(pipeline, 1, report_data, model_spec_data)
assert len(result) == 1
assert isinstance(result[0], CompleteBenchmarkRun)
assert result[0].run_type == "acceptance_summary"
assert isinstance(result[0].config_params, dict)
assert result[0].config_params.get("extra_param") == "extra_value"
assert result[0].config_params.get("acceptance_blockers") == ""
assert result[0].config_params.get("acceptance_summary_markdown") == "## Summary\nAll good"


def test_process_acceptance_summary_with_metadata(mapper, pipeline):
report_data = {
"metadata": {
"report_id": "test_report",
"model": "test_model",
"device": "test_device",
"model_id": "id_test_spec_test_model_test_device",
"inference_engine": "vllm",
},
"acceptance_summary": {
"model": "test_model_2",
"device": "test_device_2",
"acceptance_criteria": "false",
"acceptance_blockers": "Test blocker 1, Test blocker 2",
"acceptance_summary_markdown": "## Summary\nSome issues found",
},
}
result = mapper.map_benchmark_data(pipeline, 1, report_data, {"model_name": "test_model"})
assert len(result) == 1
assert isinstance(result[0], CompleteBenchmarkRun)
assert result[0].run_type == "acceptance_summary"
assert result[0].ml_model_name == "test_model"
assert result[0].device_info == {"device_name": "test_device"}
assert len(result[0].measurements) == 1
assert result[0].measurements[0].name == "acceptance_criteria"
assert result[0].measurements[0].value == 0.0


def test_process_acceptance_summary_empty(mapper, pipeline):
report_data = {"acceptance_summary": {}}
result = mapper.map_benchmark_data(pipeline, 1, report_data)
assert len(result) == 0


def test_process_acceptance_summary_missing(mapper, pipeline):
report_data = {}
result = mapper.map_benchmark_data(pipeline, 1, report_data)
assert len(result) == 0


def test_process_acceptance_summary_model_spec_precedence(mapper, pipeline):
model_spec_data = {
"model_name": "model_from_spec",
"device_type": "tt",
"shared_key": "value_from_model_spec",
"only_in_spec": "spec_value",
}
report_data = {
"metadata": {
"report_id": "test_report",
"model": "test_model",
"device": "test_device",
"model_id": "id_test_spec_test_model_test_device",
"inference_engine": "vllm",
},
"acceptance_summary": {
"acceptance_criteria": "true",
"shared_key": "value_from_acceptance_summary",
"only_in_acceptance": "acceptance_value",
},
}
result = mapper.map_benchmark_data(pipeline, 1, report_data, model_spec_data)
assert len(result) == 1
assert isinstance(result[0], CompleteBenchmarkRun)
assert result[0].run_type == "acceptance_summary"

# Verify model_spec_data key takes precedence
assert result[0].config_params.get("shared_key") == "value_from_model_spec"

# Verify both unique keys are present
assert result[0].config_params.get("only_in_spec") == "spec_value"
assert result[0].config_params.get("only_in_acceptance") == "acceptance_value"

# Verify model_spec_data keys are present
assert result[0].config_params.get("model_name") == "model_from_spec"
assert result[0].config_params.get("device_type") == "tt"


@pytest.mark.parametrize(
"input_val, expected",
[
Expand Down
Loading