From 20d1561a545551393ede3d5ebafcd42f7dc07659 Mon Sep 17 00:00:00 2001
From: Xinqi Li <lixinqi2010@gmail.com>
Date: Mon, 16 Mar 2026 07:49:13 +0000
Subject: [PATCH 1/2] add scripts for evaluation subset selection

---
 ...graph_sample_uids_and_op_seq_bucket_id.sql | 22 +++++++++++++++++++
 .../select_representive_sample_uids.viba      | 11 ++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 graph_net/sqlite_util/select_fusible_graph_sample_uids_and_op_seq_bucket_id.sql
 create mode 100644 graph_net/sqlite_util/select_representive_sample_uids.viba

diff --git a/graph_net/sqlite_util/select_fusible_graph_sample_uids_and_op_seq_bucket_id.sql b/graph_net/sqlite_util/select_fusible_graph_sample_uids_and_op_seq_bucket_id.sql
new file mode 100644
index 000000000..35f62fe68
--- /dev/null
+++ b/graph_net/sqlite_util/select_fusible_graph_sample_uids_and_op_seq_bucket_id.sql
@@ -0,0 +1,22 @@
+CREATE TEMP VIEW v_sample_buckets_groups AS
+SELECT
+    g.sample_type,
+    g.relative_model_path,
+    b.sample_uid,
+    b.op_seq_bucket_id,
+    grp.group_uid
+FROM graph_sample g
+JOIN graph_net_sample_buckets b ON g.uuid = b.sample_uid
+JOIN graph_net_sample_groups grp ON g.uuid = grp.sample_uid
+WHERE g.deleted = 0 AND b.deleted = 0 AND grp.deleted = 0;
+
+select distinct group_concat(sample_uid) as sample_uids, op_seq_bucket_id
+from (
+    select *
+    from v_sample_buckets_groups
+    order by sample_uid asc
+)
+where sample_type = 'fusible_graph'
+group by group_uid
+order by op_seq_bucket_id asc
+;
diff --git a/graph_net/sqlite_util/select_representive_sample_uids.viba b/graph_net/sqlite_util/select_representive_sample_uids.viba
new file mode 100644
index 000000000..cc433124e
--- /dev/null
+++ b/graph_net/sqlite_util/select_representive_sample_uids.viba
@@ -0,0 +1,11 @@
+# select_representive_sample_uids.py
+select_representive_sample_uids :=
+	$ret list[$sample_uids str]
+  <- $total_sample_uids_and_op_seqs ArgParse[FileContentEachLine[$sample_uids str, TAB, $op_seq str]]
+  <- $selected_op_seqs ArgParse[FileContentEachLine[$op_seq str]]
+  # inline
+  <- ($grouped_by_op_seq dict[$op_seq str, list[$sample_uids str]] <- $total_sample_uids_and_op_seqs)
+  <- $get_max_len_sample_uids (str <- list[$sample_uids str])
+  <- ($op_seq_to_max_len_sample_uids dict[$op_seq, $max_len_sample_uids str]
+     <- $get_max_len_sample_uids <- $grouped_by_op_seq)
+  <- ($ret <- $op_seq_to_max_len_sample_uids <- $selected_op_seqs)

From d1d20a9230f282e1e977fd79e62d485101a8fd11 Mon Sep 17 00:00:00 2001
From: Xinqi Li <lixinqi2010@gmail.com>
Date: Mon, 16 Mar 2026 08:01:55 +0000
Subject: [PATCH 2/2] add script_select_evaluation_subset.py

---
 .../script_select_evaluation_subset.py        | 103 ++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 graph_net/sqlite_util/script_select_evaluation_subset.py

diff --git a/graph_net/sqlite_util/script_select_evaluation_subset.py b/graph_net/sqlite_util/script_select_evaluation_subset.py
new file mode 100644
index 000000000..8849256b5
--- /dev/null
+++ b/graph_net/sqlite_util/script_select_evaluation_subset.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+"""
+Generated from Viba specification:
+    select_evaluation_subset := ImportFrom[
+        "graph_net.sqlite_util.select_evaluation_subset",
+        list[$op_seq list[str]]
+        <- list[$op_seq list[str]]
+        <- $k int
+    ]
+
+    main :=
+        void
+        <- $op_seq_file ArgParse[FilePathContent[EachLine[JsonStr[$op_seq list[str]]]]]
+        <- $k ArgParse[int] # default 200
+        # inline
+        <- (list[$selected_op_seq list[str]] <- select_evaluation_subset <- list[$op_seq] <- $k)
+        <- ($console <- JsonStr[$selected_op_seq])
+"""
+
+import argparse
+import json
+import sys
+from typing import List
+
+# Import the target function from the specified module.
+# Assumes the module exists and provides a function named 'select_evaluation_subset'.
+# The expected signature is: (op_seq_list: List[List[str]], k: int) -> List[List[str]]
+# Adjust if the actual signature differs (e.g., curried).
+try:
+    from graph_net.sqlite_util.select_evaluation_subset import select_evaluation_subset
+except ImportError:
+    # Fallback for development/testing – replace with actual import.
+    def select_evaluation_subset(
+        op_seq_list: List[List[str]], k: int
+    ) -> List[List[str]]:
+        """Dummy implementation: returns first k elements (or fewer)."""
+        return op_seq_list[:k]
+
+
+def parse_op_seq_file(file_path: str) -> List[List[str]]:
+    """
+    Read a file where each line is a JSON‑encoded list of strings.
+    Returns a list of those lists.
+    """
+    op_seq_list = []
+    with open(file_path, "r", encoding="utf-8") as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            if not line:  # skip empty lines
+                continue
+            try:
+                data = json.loads(line)
+                if not isinstance(data, list) or not all(
+                    isinstance(item, str) for item in data
+                ):
+                    raise ValueError(
+                        f"Line {line_num}: expected list of strings, got {type(data)}"
+                    )
+                op_seq_list.append(data)
+            except json.JSONDecodeError as e:
+                raise ValueError(f"Line {line_num}: invalid JSON – {e}")
+    return op_seq_list
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Select a subset of operation sequences using the specified evaluation strategy."
+    )
+    parser.add_argument(
+        "op_seq_file",
+        help="Path to a file where each line is a JSON list of strings (an op_seq).",
+    )
+    parser.add_argument(
+        "-k",
+        "--k",
+        type=int,
+        default=200,
+        help="Number of sequences to select (default: 200).",
+    )
+    args = parser.parse_args()
+
+    # 1. Parse input file
+    try:
+        op_seq_list = parse_op_seq_file(args.op_seq_file)
+    except Exception as e:
+        print(f"Error reading op_seq file: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # 2. Apply the selection function
+    #    (assuming it takes (list_of_sequences, k) – adjust if needed)
+    try:
+        selected = select_evaluation_subset(op_seq_list, args.k)
+    except Exception as e:
+        print(f"Error during subset selection: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # 3. Output result as JSON to console
+    for s in selected:
+        print(json.dumps(s))
+
+
+if __name__ == "__main__":
+    main()