Add PerfTools MLP family estimator packages

yoshifuminakamura · yoshifuminakamura · commit 2f04c54065de · 2026-06-19T23:29:41.000+09:00
Signed-off-by: Yoshifumi Nakamura &lt;nakamura@riken.jp&gt;
diff --git a/docs/guides/add-estimation-package.md b/docs/guides/add-estimation-package.md
@@ -45,6 +45,9 @@
   - `overlap_max_basic.sh`
   - `gpu_kernel_lightgbm_v10.sh`
   - `gpu_kernel_mlp_v15.sh`
+  - `gpu_kernel_mlp_v21.sh`
+  - `gpu_kernel_mlp_v40.sh`
+  - `gpu_kernel_mlp_v41.sh`
 
 ## 3. top-level package の責務
 
@@ -75,6 +78,18 @@ GPU kernel 単位の外部推定ツールは、通常は section package とし
 - `gpu_kernel_mlp_v15`
   - PerfTools `MLP_NN/v1.5`
   - 主な依存: numpy/pandas/torch
+- `gpu_kernel_mlp_v21`
+  - PerfTools `MLP_NN/v2.1`
+  - v1.5 NN と analytical anchor を組み合わせた hybrid/reference 系
+  - 主な依存: numpy/pandas/torch
+- `gpu_kernel_mlp_v40`
+  - PerfTools `MLP_NN/v4.0`
+  - no-ET pure NN 系
+  - 主な依存: numpy/pandas/torch
+- `gpu_kernel_mlp_v41`
+  - PerfTools `MLP_NN/v4.1`
+  - v4.0 に single-axis trend 対応を加えた NN 系
+  - 主な依存: numpy/pandas/torch
 - `gpu_kernel_lightgbm_v10`
   - PerfTools `LightGBM_model/1.0`
   - 主な依存: numpy/pandas/lightgbm/pyyaml と `libgomp`
diff --git a/programs/genesis/README.md b/programs/genesis/README.md
@@ -124,13 +124,19 @@ Single-package selection:
 ```bash
 BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_mlp_v15
 # or
+BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_mlp_v21
+# or
+BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_mlp_v40
+# or
+BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_mlp_v41
+# or
 BK_GENESIS_GPU_SECTION_PACKAGE=gpu_kernel_lightgbm_v10
 ```
 
 Multiple-package comparison:
 
 ```bash
-BK_GENESIS_GPU_SECTION_PACKAGES=gpu_kernel_lightgbm_v10,gpu_kernel_mlp_v15
+BK_GENESIS_GPU_SECTION_PACKAGES=gpu_kernel_lightgbm_v10,gpu_kernel_mlp_v15,gpu_kernel_mlp_v21,gpu_kernel_mlp_v40,gpu_kernel_mlp_v41
 ```
 
 When multiple packages are selected, the app wrapper asks for
diff --git a/programs/genesis/estimate.sh b/programs/genesis/estimate.sh
@@ -9,7 +9,7 @@ genesis_gpu_section_packages() {
   elif [[ -n "${BK_GENESIS_GPU_SECTION_PACKAGE:-}" ]]; then
     raw="$BK_GENESIS_GPU_SECTION_PACKAGE"
   else
-    raw="gpu_kernel_lightgbm_v10,gpu_kernel_mlp_v15"
+    raw="gpu_kernel_lightgbm_v10,gpu_kernel_mlp_v15,gpu_kernel_mlp_v21,gpu_kernel_mlp_v40,gpu_kernel_mlp_v41"
   fi
 
   printf '%s\n' "$raw" |
diff --git a/scripts/estimation/packages/instrumented_app_sections_dummy.sh b/scripts/estimation/packages/instrumented_app_sections_dummy.sh
@@ -34,6 +34,9 @@ bk_estimation_package_metadata() {
     "gpu_kernel_ensemble_average",
     "gpu_kernel_lightgbm_v10",
     "gpu_kernel_mlp_v15",
+    "gpu_kernel_mlp_v21",
+    "gpu_kernel_mlp_v40",
+    "gpu_kernel_mlp_v41",
     "logp"
   ],
   "supported_overlap_packages": [
diff --git a/scripts/estimation/prepare_gpu_mlp_ncu_input.py b/scripts/estimation/prepare_gpu_mlp_ncu_input.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Prepare a PerfTools MLP_NN/v1.5 input CSV from an Nsight Compute archive.
+"""Prepare a PerfTools MLP_NN input CSV from an Nsight Compute archive.
 
 This is a small compatibility bridge for BenchKit.  It converts the wide
 Nsight Compute raw CSV exported from ``profile.ncu-rep`` into the CSV layout
diff --git a/scripts/estimation/section_packages/gpu_kernel_mlp_v15.sh b/scripts/estimation/section_packages/gpu_kernel_mlp_v15.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-# gpu_kernel_mlp_v15.sh - Section package for the PerfTools MLP_NN/v1.5 GPU estimator.
+# gpu_kernel_mlp_v15.sh - Section package and shared implementation for
+# PerfTools MLP_NN GPU estimators.
 
 bk_section_package_metadata_gpu_kernel_mlp_v15() {
   cat <<'EOF'
@@ -143,7 +144,7 @@ _bk_gpu_mlp_ensure_perftools_root() {
 
     mkdir -p "$(dirname "$root")"
     if [[ ! -d "$root/.git" ]]; then
-      echo "Fetching PerfTools for gpu_kernel_mlp_v15: ${repo} (${ref})" >&2
+      echo "Fetching PerfTools for ${BK_GPU_MLP_PACKAGE_NAME:-gpu_kernel_mlp_v15}: ${repo} (${ref})" >&2
       git clone --depth 1 "$repo" "$root" >&2 || {
         printf '%s\n' "$root"
         return 0
@@ -161,13 +162,15 @@ _bk_gpu_mlp_ensure_perftools_root() {
 
 _bk_gpu_mlp_predictor() {
   local root="$1"
+  local version_dir="${BK_GPU_MLP_VERSION_DIR:-v1.5}"
+  local predictor_script="${BK_GPU_MLP_PREDICT_SCRIPT:-predict_v15.py}"
 
   if [[ -z "$root" ]]; then
     printf '%s\n' ""
     return 0
   fi
 
-  printf '%s\n' "${root}/MLP_NN/v1.5/predict_v15.py"
+  printf '%s\n' "${root}/MLP_NN/${version_dir}/${predictor_script}"
 }
 
 _bk_gpu_mlp_python_exists() {
@@ -346,6 +349,7 @@ bk_section_package_check_applicability_gpu_kernel_mlp_v15() {
   local root
   local predictor
   local python_bin="${BK_GPU_MLP_PYTHON:-$(_bk_gpu_mlp_default_python)}"
+  local predictor_rel="MLP_NN/${BK_GPU_MLP_VERSION_DIR:-v1.5}/${BK_GPU_MLP_PREDICT_SCRIPT:-predict_v15.py}"
   local missing=()
 
   if [[ "$item_kind" != "section" ]]; then
@@ -387,7 +391,7 @@ EOF
       missing+=('"BK_GPU_MLP_PERFTOOLS_ROOT"')
     fi
     if [[ -z "$predictor" || ! -f "$predictor" ]]; then
-      missing+=('"PerfTools MLP_NN/v1.5/predict_v15.py"')
+      missing+=("\"PerfTools predictor:${predictor_rel}\"")
     fi
   fi
 
@@ -572,7 +576,7 @@ if nonpositive_prediction_count:
         "severity": "warning",
         "reason": "nonpositive_predicted_execution_time",
         "message": (
-            "PerfTools MLP_NN/v1.5 returned non-positive predicted execution "
+            f"PerfTools MLP_NN/{model_version} returned non-positive predicted execution "
             "time for one or more kernel rows. Check target GPU selection and "
             "required NCU feature coverage."
         ),
@@ -601,7 +605,7 @@ print(json.dumps({
     },
     "model": {
         "type": "cross_gpu_kernel_prediction_model",
-        "name": "PerfTools MLP_NN/v1.5",
+        "name": "PerfTools MLP_NN/" + model_version,
         "version": model_version,
         "repository": "https://github.com/masaaki-kondo/PerfTools",
     },
@@ -645,7 +649,10 @@ _bk_gpu_mlp_run_predictor() {
   local root
   local input_csv
   local ncu_archive
-  local output_dir="${BK_GPU_MLP_OUTPUT_DIR:-results/estimation_artifacts/gpu_kernel_mlp_v15}"
+  local package_name="${BK_GPU_MLP_PACKAGE_NAME:-gpu_kernel_mlp_v15}"
+  local version_dir="${BK_GPU_MLP_VERSION_DIR:-v1.5}"
+  local predictor_script="${BK_GPU_MLP_PREDICT_SCRIPT:-predict_v15.py}"
+  local output_dir="${BK_GPU_MLP_OUTPUT_DIR:-results/estimation_artifacts/${package_name}}"
   local prediction_csv
   local prediction_log
   local input_csv_abs
@@ -672,18 +679,18 @@ _bk_gpu_mlp_run_predictor() {
 
   if ! (
     cd "$root"
-    "$python_bin" MLP_NN/v1.5/predict_v15.py \
+    "$python_bin" "MLP_NN/${version_dir}/${predictor_script}" \
       --csv "$input_csv_abs" \
       --row "${BK_GPU_MLP_ROW:-all}" \
       --out "$prediction_csv_abs" \
       --log "$prediction_log_abs"
   ) >/dev/null; then
-    echo "ERROR: PerfTools MLP_NN/v1.5 inference failed" >&2
+    echo "ERROR: PerfTools MLP_NN/${version_dir} inference failed" >&2
     return 1
   fi
 
   if [[ ! -s "$prediction_csv_abs" ]]; then
-    echo "ERROR: PerfTools MLP_NN/v1.5 did not create prediction CSV: ${prediction_csv_abs}" >&2
+    echo "ERROR: PerfTools MLP_NN/${version_dir} did not create prediction CSV: ${prediction_csv_abs}" >&2
     return 1
   fi
 
@@ -702,8 +709,9 @@ bk_section_package_transform_gpu_kernel_mlp_v15() {
   local prediction_log=""
   local run_outputs
   local parsed_json
-  local package_name="gpu_kernel_mlp_v15"
+  local package_name="${BK_GPU_MLP_PACKAGE_NAME:-gpu_kernel_mlp_v15}"
   local model_version="${BK_GPU_MLP_MODEL_VERSION:-v1.5}"
+  local scaling_method="${BK_GPU_MLP_SCALING_METHOD:-gpu-kernel-mlp-${model_version}}"
   local selector_kind=""
   local selector_value=""
   local selector
@@ -727,6 +735,7 @@ bk_section_package_transform_gpu_kernel_mlp_v15() {
     --arg prediction_log "$prediction_log" \
     --arg selector_kind "$selector_kind" \
     --arg selector_value "$selector_value" \
+    --arg scaling_method "$scaling_method" \
     --argjson parsed "$parsed_json" '
     def selector_matches($kind; $value):
       if $kind == "" or $value == "" then true
@@ -767,7 +776,7 @@ bk_section_package_transform_gpu_kernel_mlp_v15() {
           end
         ),
         bench_time: $source_section_time,
-        scaling_method: (if $can_identity_fallback then "identity" else "gpu-kernel-mlp-v1.5" end),
+        scaling_method: (if $can_identity_fallback then "identity" else $scaling_method end),
         estimation_package: (if $can_identity_fallback then "identity" else $parsed.estimation_package end),
         requested_estimation_package: (if $can_identity_fallback then $parsed.estimation_package else (.requested_estimation_package // $parsed.estimation_package) end),
         fallback_used: (if $can_identity_fallback then "identity" else null end),
diff --git a/scripts/estimation/section_packages/gpu_kernel_mlp_v21.sh b/scripts/estimation/section_packages/gpu_kernel_mlp_v21.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# gpu_kernel_mlp_v21.sh - Thin package wrapper for PerfTools MLP_NN/v2.1.
+
+bk_section_package_metadata_gpu_kernel_mlp_v21() {
+  cat <<'EOF'
+{
+  "name": "gpu_kernel_mlp_v21",
+  "fallback_target": "identity",
+  "source_system_scope": {
+    "kind": "benchmark_system",
+    "accepted_values": ["any"]
+  },
+  "target_system_scope": {
+    "accepted_values": ["any"]
+  },
+  "item_kind_scope": ["section"],
+  "required_result_fields": ["name", "app-side GPU section time as time or bench_time"],
+  "required_artifact_kinds": [
+    "PerfTools MLP_NN/v2.1 prepared input CSV",
+    "precomputed prediction CSV",
+    "or BenchKit padata archive with Nsight Compute raw CSV"
+  ],
+  "acquisition_mode": "external",
+  "output_fields": [
+    "time",
+    "bench_time",
+    "scaling_method",
+    "metrics",
+    "package_applicability"
+  ]
+}
+EOF
+}
+
+bk_section_package_check_applicability_gpu_kernel_mlp_v21() (
+  export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v21"
+  export BK_GPU_MLP_VERSION_DIR="v2.1"
+  export BK_GPU_MLP_PREDICT_SCRIPT="predict_v21.py"
+  export BK_GPU_MLP_MODEL_VERSION="v2.1"
+  export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v2.1"
+  bk_section_package_check_applicability_gpu_kernel_mlp_v15 "$@"
+)
+
+bk_section_package_transform_gpu_kernel_mlp_v21() (
+  export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v21"
+  export BK_GPU_MLP_VERSION_DIR="v2.1"
+  export BK_GPU_MLP_PREDICT_SCRIPT="predict_v21.py"
+  export BK_GPU_MLP_MODEL_VERSION="v2.1"
+  export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v2.1"
+  bk_section_package_transform_gpu_kernel_mlp_v15 "$@"
+)
diff --git a/scripts/estimation/section_packages/gpu_kernel_mlp_v40.sh b/scripts/estimation/section_packages/gpu_kernel_mlp_v40.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# gpu_kernel_mlp_v40.sh - Thin package wrapper for PerfTools MLP_NN/v4.0.
+
+bk_section_package_metadata_gpu_kernel_mlp_v40() {
+  cat <<'EOF'
+{
+  "name": "gpu_kernel_mlp_v40",
+  "fallback_target": "identity",
+  "source_system_scope": {
+    "kind": "benchmark_system",
+    "accepted_values": ["any"]
+  },
+  "target_system_scope": {
+    "accepted_values": ["any"]
+  },
+  "item_kind_scope": ["section"],
+  "required_result_fields": ["name", "app-side GPU section time as time or bench_time"],
+  "required_artifact_kinds": [
+    "PerfTools MLP_NN/v4.0 prepared input CSV",
+    "precomputed prediction CSV",
+    "or BenchKit padata archive with Nsight Compute raw CSV"
+  ],
+  "acquisition_mode": "external",
+  "output_fields": [
+    "time",
+    "bench_time",
+    "scaling_method",
+    "metrics",
+    "package_applicability"
+  ]
+}
+EOF
+}
+
+bk_section_package_check_applicability_gpu_kernel_mlp_v40() (
+  export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v40"
+  export BK_GPU_MLP_VERSION_DIR="v4.0"
+  export BK_GPU_MLP_PREDICT_SCRIPT="predict_v40.py"
+  export BK_GPU_MLP_MODEL_VERSION="v4.0"
+  export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v4.0"
+  bk_section_package_check_applicability_gpu_kernel_mlp_v15 "$@"
+)
+
+bk_section_package_transform_gpu_kernel_mlp_v40() (
+  export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v40"
+  export BK_GPU_MLP_VERSION_DIR="v4.0"
+  export BK_GPU_MLP_PREDICT_SCRIPT="predict_v40.py"
+  export BK_GPU_MLP_MODEL_VERSION="v4.0"
+  export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v4.0"
+  bk_section_package_transform_gpu_kernel_mlp_v15 "$@"
+)
diff --git a/scripts/estimation/section_packages/gpu_kernel_mlp_v41.sh b/scripts/estimation/section_packages/gpu_kernel_mlp_v41.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# gpu_kernel_mlp_v41.sh - Thin package wrapper for PerfTools MLP_NN/v4.1.
+
+bk_section_package_metadata_gpu_kernel_mlp_v41() {
+  cat <<'EOF'
+{
+  "name": "gpu_kernel_mlp_v41",
+  "fallback_target": "identity",
+  "source_system_scope": {
+    "kind": "benchmark_system",
+    "accepted_values": ["any"]
+  },
+  "target_system_scope": {
+    "accepted_values": ["any"]
+  },
+  "item_kind_scope": ["section"],
+  "required_result_fields": ["name", "app-side GPU section time as time or bench_time"],
+  "required_artifact_kinds": [
+    "PerfTools MLP_NN/v4.1 prepared input CSV",
+    "precomputed prediction CSV",
+    "or BenchKit padata archive with Nsight Compute raw CSV"
+  ],
+  "acquisition_mode": "external",
+  "output_fields": [
+    "time",
+    "bench_time",
+    "scaling_method",
+    "metrics",
+    "package_applicability"
+  ]
+}
+EOF
+}
+
+bk_section_package_check_applicability_gpu_kernel_mlp_v41() (
+  export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v41"
+  export BK_GPU_MLP_VERSION_DIR="v4.1"
+  export BK_GPU_MLP_PREDICT_SCRIPT="predict_v41.py"
+  export BK_GPU_MLP_MODEL_VERSION="v4.1"
+  export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v4.1"
+  bk_section_package_check_applicability_gpu_kernel_mlp_v15 "$@"
+)
+
+bk_section_package_transform_gpu_kernel_mlp_v41() (
+  export BK_GPU_MLP_PACKAGE_NAME="gpu_kernel_mlp_v41"
+  export BK_GPU_MLP_VERSION_DIR="v4.1"
+  export BK_GPU_MLP_PREDICT_SCRIPT="predict_v41.py"
+  export BK_GPU_MLP_MODEL_VERSION="v4.1"
+  export BK_GPU_MLP_SCALING_METHOD="gpu-kernel-mlp-v4.1"
+  bk_section_package_transform_gpu_kernel_mlp_v15 "$@"
+)
diff --git a/scripts/tests/test_estimation_gpu_kernel_mlp_v15.sh b/scripts/tests/test_estimation_gpu_kernel_mlp_v15.sh
diff --git a/scripts/tests/test_genesis_gpu_mlp_estimation.sh b/scripts/tests/test_genesis_gpu_mlp_estimation.sh