Skip to content

Commit 2f04c54

Browse files
Add PerfTools MLP family estimator packages
Signed-off-by: Yoshifumi Nakamura <nakamura@riken.jp>
1 parent 6ce377b commit 2f04c54

11 files changed

Lines changed: 261 additions & 15 deletions

docs/guides/add-estimation-package.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
- `overlap_max_basic.sh`
4646
- `gpu_kernel_lightgbm_v10.sh`
4747
- `gpu_kernel_mlp_v15.sh`
48+
- `gpu_kernel_mlp_v21.sh`
49+
- `gpu_kernel_mlp_v40.sh`
50+
- `gpu_kernel_mlp_v41.sh`
4851

4952
## 3. top-level package の責務
5053

@@ -75,6 +78,18 @@ GPU kernel 単位の外部推定ツールは、通常は section package とし
7578
- `gpu_kernel_mlp_v15`
7679
- PerfTools `MLP_NN/v1.5`
7780
- 主な依存: numpy/pandas/torch
81+
- `gpu_kernel_mlp_v21`
82+
- PerfTools `MLP_NN/v2.1`
83+
- v1.5 NN と analytical anchor を組み合わせた hybrid/reference 系
84+
- 主な依存: numpy/pandas/torch
85+
- `gpu_kernel_mlp_v40`
86+
- PerfTools `MLP_NN/v4.0`
87+
- no-ET pure NN 系
88+
- 主な依存: numpy/pandas/torch
89+
- `gpu_kernel_mlp_v41`
90+
- PerfTools `MLP_NN/v4.1`
91+
- v4.0 に single-axis trend 対応を加えた NN 系
92+
- 主な依存: numpy/pandas/torch
7893
- `gpu_kernel_lightgbm_v10`
7994
- PerfTools `LightGBM_model/1.0`
8095
- 主な依存: numpy/pandas/lightgbm/pyyaml と `libgomp`

programs/genesis/README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,19 @@ Single-package selection:
124124
```bash
125125
BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_mlp_v15
126126
# or
127+
BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_mlp_v21
128+
# or
129+
BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_mlp_v40
130+
# or
131+
BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_mlp_v41
132+
# or
127133
BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_lightgbm_v10
128134
```
129135

130136
Multiple-package comparison:
131137

132138
```bash
133-
BK_GENESIS_GPU_SECTION_PACKAGES=gpu_kernel_lightgbm_v10,gpu_kernel_mlp_v15
139+
BK_GENESIS_GPU_SECTION_PACKAGES=gpu_kernel_lightgbm_v10,gpu_kernel_mlp_v15,gpu_kernel_mlp_v21,gpu_kernel_mlp_v40,gpu_kernel_mlp_v41
134140
```
135141

136142
When multiple packages are selected, the app wrapper asks for

programs/genesis/estimate.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ genesis_gpu_section_packages() {
99
elif [[ -n "${BK_GENESIS_GPU_SECTION_PACKAGE:-}" ]]; then
1010
raw="$BK_GENESIS_GPU_SECTION_PACKAGE"
1111
else
12-
raw="gpu_kernel_lightgbm_v10,gpu_kernel_mlp_v15"
12+
raw="gpu_kernel_lightgbm_v10,gpu_kernel_mlp_v15,gpu_kernel_mlp_v21,gpu_kernel_mlp_v40,gpu_kernel_mlp_v41"
1313
fi
1414

1515
printf '%s\n' "$raw" |

scripts/estimation/packages/instrumented_app_sections_dummy.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ bk_estimation_package_metadata() {
3434
"gpu_kernel_ensemble_average",
3535
"gpu_kernel_lightgbm_v10",
3636
"gpu_kernel_mlp_v15",
37+
"gpu_kernel_mlp_v21",
38+
"gpu_kernel_mlp_v40",
39+
"gpu_kernel_mlp_v41",
3740
"logp"
3841
],
3942
"supported_overlap_packages": [

scripts/estimation/prepare_gpu_mlp_ncu_input.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python3
2-
"""Prepare a PerfTools MLP_NN/v1.5 input CSV from an Nsight Compute archive.
2+
"""Prepare a PerfTools MLP_NN input CSV from an Nsight Compute archive.
33
44
This is a small compatibility bridge for BenchKit. It converts the wide
55
Nsight Compute raw CSV exported from ``profile.ncu-rep`` into the CSV layout

scripts/estimation/section_packages/gpu_kernel_mlp_v15.sh

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/bin/bash
2-
# gpu_kernel_mlp_v15.sh - Section package for the PerfTools MLP_NN/v1.5 GPU estimator.
2+
# gpu_kernel_mlp_v15.sh - Section package and shared implementation for
3+
# PerfTools MLP_NN GPU estimators.
34

45
bk_section_package_metadata_gpu_kernel_mlp_v15() {
56
cat <<'EOF'
@@ -143,7 +144,7 @@ _bk_gpu_mlp_ensure_perftools_root() {
143144

144145
mkdir -p "$(dirname "$root")"
145146
if [[ ! -d "$root/.git" ]]; then
146-
echo "Fetching PerfTools for gpu_kernel_mlp_v15: ${repo} (${ref})" >&2
147+
echo "Fetching PerfTools for ${BK_GPU_MLP_PACKAGE_NAME:-gpu_kernel_mlp_v15}: ${repo} (${ref})" >&2
147148
git clone --depth 1 "$repo" "$root" >&2 || {
148149
printf '%s\n' "$root"
149150
return 0
@@ -161,13 +162,15 @@ _bk_gpu_mlp_ensure_perftools_root() {
161162

162163
_bk_gpu_mlp_predictor() {
163164
local root="$1"
165+
local version_dir="${BK_GPU_MLP_VERSION_DIR:-v1.5}"
166+
local predictor_script="${BK_GPU_MLP_PREDICT_SCRIPT:-predict_v15.py}"
164167

165168
if [[ -z "$root" ]]; then
166169
printf '%s\n' ""
167170
return 0
168171
fi
169172

170-
printf '%s\n' "${root}/MLP_NN/v1.5/predict_v15.py"
173+
printf '%s\n' "${root}/MLP_NN/${version_dir}/${predictor_script}"
171174
}
172175

173176
_bk_gpu_mlp_python_exists() {
@@ -346,6 +349,7 @@ bk_section_package_check_applicability_gpu_kernel_mlp_v15() {
346349
local root
347350
local predictor
348351
local python_bin="${BK_GPU_MLP_PYTHON:-$(_bk_gpu_mlp_default_python)}"
352+
local predictor_rel="MLP_NN/${BK_GPU_MLP_VERSION_DIR:-v1.5}/${BK_GPU_MLP_PREDICT_SCRIPT:-predict_v15.py}"
349353
local missing=()
350354

351355
if [[ "$item_kind" != "section" ]]; then
@@ -387,7 +391,7 @@ EOF
387391
missing+=('"BK_GPU_MLP_PERFTOOLS_ROOT"')
388392
fi
389393
if [[ -z "$predictor" || ! -f "$predictor" ]]; then
390-
missing+=('"PerfTools MLP_NN/v1.5/predict_v15.py"')
394+
missing+=("\"PerfTools predictor:${predictor_rel}\"")
391395
fi
392396
fi
393397

@@ -572,7 +576,7 @@ if nonpositive_prediction_count:
572576
"severity": "warning",
573577
"reason": "nonpositive_predicted_execution_time",
574578
"message": (
575-
"PerfTools MLP_NN/v1.5 returned non-positive predicted execution "
579+
f"PerfTools MLP_NN/{model_version} returned non-positive predicted execution "
576580
"time for one or more kernel rows. Check target GPU selection and "
577581
"required NCU feature coverage."
578582
),
@@ -601,7 +605,7 @@ print(json.dumps({
601605
},
602606
"model": {
603607
"type": "cross_gpu_kernel_prediction_model",
604-
"name": "PerfTools MLP_NN/v1.5",
608+
"name": "PerfTools MLP_NN/" + model_version,
605609
"version": model_version,
606610
"repository": "https://github.com/masaaki-kondo/PerfTools",
607611
},
@@ -645,7 +649,10 @@ _bk_gpu_mlp_run_predictor() {
645649
local root
646650
local input_csv
647651
local ncu_archive
648-
local output_dir="${BK_GPU_MLP_OUTPUT_DIR:-results/estimation_artifacts/gpu_kernel_mlp_v15}"
652+
local package_name="${BK_GPU_MLP_PACKAGE_NAME:-gpu_kernel_mlp_v15}"
653+
local version_dir="${BK_GPU_MLP_VERSION_DIR:-v1.5}"
654+
local predictor_script="${BK_GPU_MLP_PREDICT_SCRIPT:-predict_v15.py}"
655+
local output_dir="${BK_GPU_MLP_OUTPUT_DIR:-results/estimation_artifacts/${package_name}}"
649656
local prediction_csv
650657
local prediction_log
651658
local input_csv_abs
@@ -672,18 +679,18 @@ _bk_gpu_mlp_run_predictor() {
672679

673680
if ! (
674681
cd "$root"
675-
"$python_bin" MLP_NN/v1.5/predict_v15.py \
682+
"$python_bin" "MLP_NN/${version_dir}/${predictor_script}" \
676683
--csv "$input_csv_abs" \
677684
--row "${BK_GPU_MLP_ROW:-all}" \
678685
--out "$prediction_csv_abs" \
679686
--log "$prediction_log_abs"
680687
) >/dev/null; then
681-
echo "ERROR: PerfTools MLP_NN/v1.5 inference failed" >&2
688+
echo "ERROR: PerfTools MLP_NN/${version_dir} inference failed" >&2
682689
return 1
683690
fi
684691

685692
if [[ ! -s "$prediction_csv_abs" ]]; then
686-
echo "ERROR: PerfTools MLP_NN/v1.5 did not create prediction CSV: ${prediction_csv_abs}" >&2
693+
echo "ERROR: PerfTools MLP_NN/${version_dir} did not create prediction CSV: ${prediction_csv_abs}" >&2
687694
return 1
688695
fi
689696

@@ -702,8 +709,9 @@ bk_section_package_transform_gpu_kernel_mlp_v15() {
702709
local prediction_log=""
703710
local run_outputs
704711
local parsed_json
705-
local package_name="gpu_kernel_mlp_v15"
712+
local package_name="${BK_GPU_MLP_PACKAGE_NAME:-gpu_kernel_mlp_v15}"
706713
local model_version="${BK_GPU_MLP_MODEL_VERSION:-v1.5}"
714+
local scaling_method="${BK_GPU_MLP_SCALING_METHOD:-gpu-kernel-mlp-${model_version}}"
707715
local selector_kind=""
708716
local selector_value=""
709717
local selector
@@ -727,6 +735,7 @@ bk_section_package_transform_gpu_kernel_mlp_v15() {
727735
--arg prediction_log "$prediction_log" \
728736
--arg selector_kind "$selector_kind" \
729737
--arg selector_value "$selector_value" \
738+
--arg scaling_method "$scaling_method" \
730739
--argjson parsed "$parsed_json" '
731740
def selector_matches($kind; $value):
732741
if $kind == "" or $value == "" then true
@@ -767,7 +776,7 @@ bk_section_package_transform_gpu_kernel_mlp_v15() {
767776
end
768777
),
769778
bench_time: $source_section_time,
770-
scaling_method: (if $can_identity_fallback then "identity" else "gpu-kernel-mlp-v1.5" end),
779+
scaling_method: (if $can_identity_fallback then "identity" else $scaling_method end),
771780
estimation_package: (if $can_identity_fallback then "identity" else $parsed.estimation_package end),
772781
requested_estimation_package: (if $can_identity_fallback then $parsed.estimation_package else (.requested_estimation_package // $parsed.estimation_package) end),
773782
fallback_used: (if $can_identity_fallback then "identity" else null end),
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/bash
2+
# gpu_kernel_mlp_v21.sh - Thin package wrapper for PerfTools MLP_NN/v2.1.
3+
4+
bk_section_package_metadata_gpu_kernel_mlp_v21() {
5+
cat <<'EOF'
6+
{
7+
"name": "gpu_kernel_mlp_v21",
8+
"fallback_target": "identity",
9+
"source_system_scope": {
10+
"kind": "benchmark_system",
11+
"accepted_values": ["any"]
12+
},
13+
"target_system_scope": {
14+
"accepted_values": ["any"]
15+
},
16+
"item_kind_scope": ["section"],
17+
"required_result_fields": ["name", "app-side GPU section time as time or bench_time"],
18+
"required_artifact_kinds": [
19+
"PerfTools MLP_NN/v2.1 prepared input CSV",
20+
"precomputed prediction CSV",
21+
"or BenchKit padata archive with Nsight Compute raw CSV"
22+
],
23+
"acquisition_mode": "external",
24+
"output_fields": [
25+
"time",
26+
"bench_time",
27+
"scaling_method",
28+
"metrics",
29+
"package_applicability"
30+
]
31+
}
32+
EOF
33+
}
34+
35+
bk_section_package_check_applicability_gpu_kernel_mlp_v21() (
36+
export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v21"
37+
export BK_GPU_MLP_VERSION_DIR="v2.1"
38+
export BK_GPU_MLP_PREDICT_SCRIPT="predict_v21.py"
39+
export BK_GPU_MLP_MODEL_VERSION="v2.1"
40+
export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v2.1"
41+
bk_section_package_check_applicability_gpu_kernel_mlp_v15 "$@"
42+
)
43+
44+
bk_section_package_transform_gpu_kernel_mlp_v21() (
45+
export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v21"
46+
export BK_GPU_MLP_VERSION_DIR="v2.1"
47+
export BK_GPU_MLP_PREDICT_SCRIPT="predict_v21.py"
48+
export BK_GPU_MLP_MODEL_VERSION="v2.1"
49+
export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v2.1"
50+
bk_section_package_transform_gpu_kernel_mlp_v15 "$@"
51+
)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/bash
2+
# gpu_kernel_mlp_v40.sh - Thin package wrapper for PerfTools MLP_NN/v4.0.
3+
4+
bk_section_package_metadata_gpu_kernel_mlp_v40() {
5+
cat <<'EOF'
6+
{
7+
"name": "gpu_kernel_mlp_v40",
8+
"fallback_target": "identity",
9+
"source_system_scope": {
10+
"kind": "benchmark_system",
11+
"accepted_values": ["any"]
12+
},
13+
"target_system_scope": {
14+
"accepted_values": ["any"]
15+
},
16+
"item_kind_scope": ["section"],
17+
"required_result_fields": ["name", "app-side GPU section time as time or bench_time"],
18+
"required_artifact_kinds": [
19+
"PerfTools MLP_NN/v4.0 prepared input CSV",
20+
"precomputed prediction CSV",
21+
"or BenchKit padata archive with Nsight Compute raw CSV"
22+
],
23+
"acquisition_mode": "external",
24+
"output_fields": [
25+
"time",
26+
"bench_time",
27+
"scaling_method",
28+
"metrics",
29+
"package_applicability"
30+
]
31+
}
32+
EOF
33+
}
34+
35+
bk_section_package_check_applicability_gpu_kernel_mlp_v40() (
36+
export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v40"
37+
export BK_GPU_MLP_VERSION_DIR="v4.0"
38+
export BK_GPU_MLP_PREDICT_SCRIPT="predict_v40.py"
39+
export BK_GPU_MLP_MODEL_VERSION="v4.0"
40+
export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v4.0"
41+
bk_section_package_check_applicability_gpu_kernel_mlp_v15 "$@"
42+
)
43+
44+
bk_section_package_transform_gpu_kernel_mlp_v40() (
45+
export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v40"
46+
export BK_GPU_MLP_VERSION_DIR="v4.0"
47+
export BK_GPU_MLP_PREDICT_SCRIPT="predict_v40.py"
48+
export BK_GPU_MLP_MODEL_VERSION="v4.0"
49+
export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v4.0"
50+
bk_section_package_transform_gpu_kernel_mlp_v15 "$@"
51+
)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/bash
2+
# gpu_kernel_mlp_v41.sh - Thin package wrapper for PerfTools MLP_NN/v4.1.
3+
4+
bk_section_package_metadata_gpu_kernel_mlp_v41() {
5+
cat <<'EOF'
6+
{
7+
"name": "gpu_kernel_mlp_v41",
8+
"fallback_target": "identity",
9+
"source_system_scope": {
10+
"kind": "benchmark_system",
11+
"accepted_values": ["any"]
12+
},
13+
"target_system_scope": {
14+
"accepted_values": ["any"]
15+
},
16+
"item_kind_scope": ["section"],
17+
"required_result_fields": ["name", "app-side GPU section time as time or bench_time"],
18+
"required_artifact_kinds": [
19+
"PerfTools MLP_NN/v4.1 prepared input CSV",
20+
"precomputed prediction CSV",
21+
"or BenchKit padata archive with Nsight Compute raw CSV"
22+
],
23+
"acquisition_mode": "external",
24+
"output_fields": [
25+
"time",
26+
"bench_time",
27+
"scaling_method",
28+
"metrics",
29+
"package_applicability"
30+
]
31+
}
32+
EOF
33+
}
34+
35+
bk_section_package_check_applicability_gpu_kernel_mlp_v41() (
36+
export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v41"
37+
export BK_GPU_MLP_VERSION_DIR="v4.1"
38+
export BK_GPU_MLP_PREDICT_SCRIPT="predict_v41.py"
39+
export BK_GPU_MLP_MODEL_VERSION="v4.1"
40+
export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v4.1"
41+
bk_section_package_check_applicability_gpu_kernel_mlp_v15 "$@"
42+
)
43+
44+
bk_section_package_transform_gpu_kernel_mlp_v41() (
45+
export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v41"
46+
export BK_GPU_MLP_VERSION_DIR="v4.1"
47+
export BK_GPU_MLP_PREDICT_SCRIPT="predict_v41.py"
48+
export BK_GPU_MLP_MODEL_VERSION="v4.1"
49+
export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v4.1"
50+
bk_section_package_transform_gpu_kernel_mlp_v15 "$@"
51+
)

0 commit comments

Comments
 (0)