From 20d1561a545551393ede3d5ebafcd42f7dc07659 Mon Sep 17 00:00:00 2001 From: Xinqi Li Date: Mon, 16 Mar 2026 07:49:13 +0000 Subject: [PATCH 1/2] add scripts for evaluation subset selection --- ...graph_sample_uids_and_op_seq_bucket_id.sql | 22 +++++++++++++++++++ .../select_representive_sample_uids.viba | 11 ++++++++++ 2 files changed, 33 insertions(+) create mode 100644 graph_net/sqlite_util/select_fusible_graph_sample_uids_and_op_seq_bucket_id.sql create mode 100644 graph_net/sqlite_util/select_representive_sample_uids.viba diff --git a/graph_net/sqlite_util/select_fusible_graph_sample_uids_and_op_seq_bucket_id.sql b/graph_net/sqlite_util/select_fusible_graph_sample_uids_and_op_seq_bucket_id.sql new file mode 100644 index 000000000..35f62fe68 --- /dev/null +++ b/graph_net/sqlite_util/select_fusible_graph_sample_uids_and_op_seq_bucket_id.sql @@ -0,0 +1,22 @@ +CREATE TEMP VIEW v_sample_buckets_groups AS +SELECT + g.sample_type, + g.relative_model_path, + b.sample_uid, + b.op_seq_bucket_id, + grp.group_uid +FROM graph_sample g +JOIN graph_net_sample_buckets b ON g.uuid = b.sample_uid +JOIN graph_net_sample_groups grp ON g.uuid = grp.sample_uid +WHERE g.deleted = 0 AND b.deleted = 0 AND grp.deleted = 0; + +select distinct group_concat(sample_uid) as sample_uids, op_seq_bucket_id +from ( + select * + from v_sample_buckets_groups + order by sample_uid asc +) +where sample_type = 'fusible_graph' +group by group_uid +order by op_seq_bucket_id asc +; diff --git a/graph_net/sqlite_util/select_representive_sample_uids.viba b/graph_net/sqlite_util/select_representive_sample_uids.viba new file mode 100644 index 000000000..cc433124e --- /dev/null +++ b/graph_net/sqlite_util/select_representive_sample_uids.viba @@ -0,0 +1,11 @@ +# select_representive_sample_uids.py +select_representive_sample_uids := + $ret list[$sample_uids str] + <- $total_sample_uids_and_op_seqs ArgParse[FileContentEachLine[$sample_uids str, TAB, $op_seq str]] + <- $selected_op_seqs ArgParse[FileContentEachLine[$op_seq str]] + # inline + <- ($grouped_by_op_seq dict[$op_seq str, list[$sample_uids str]] <- $total_sample_uids_and_op_seqs) + <- $get_max_len_sample_uids (str <- list[$sample_uids str]) + <- ($op_seq_to_max_len_sample_uids dict[$op_seq, $max_len_sample_uids str] + <- $get_max_len_sample_uids <- $grouped_by_op_seq) + <- ($ret <- $op_seq_to_max_len_sample_uids <- $selected_op_seqs) From d1d20a9230f282e1e977fd79e62d485101a8fd11 Mon Sep 17 00:00:00 2001 From: Xinqi Li Date: Mon, 16 Mar 2026 08:01:55 +0000 Subject: [PATCH 2/2] add script_select_evaluation_subset.py --- .../script_select_evaluation_subset.py | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 graph_net/sqlite_util/script_select_evaluation_subset.py diff --git a/graph_net/sqlite_util/script_select_evaluation_subset.py b/graph_net/sqlite_util/script_select_evaluation_subset.py new file mode 100644 index 000000000..8849256b5 --- /dev/null +++ b/graph_net/sqlite_util/script_select_evaluation_subset.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +""" +Generated from Viba specification: + select_evaluation_subset := ImportFrom[ + "graph_net.sqlite_util.select_evaluation_subset", + list[$op_seq list[str]] + <- list[$op_seq list[str]] + <- $k int + ] + + main := + void + <- $op_seq_file ArgParse[FilePathContent[EachLine[JsonStr[$op_seq list[str]]]]] + <- $k ArgParse[int] # default 200 + # inline + <- (list[$selected_op_seq list[str]] <- select_evaluation_subset <- list[$op_seq] <- $k) + <- ($console <- JsonStr[$selected_op_seq]) +""" + +import argparse +import json +import sys +from typing import List + +# Import the target function from the specified module. +# Assumes the module exists and provides a function named 'select_evaluation_subset'. +# The expected signature is: (op_seq_list: List[List[str]], k: int) -> List[List[str]] +# Adjust if the actual signature differs (e.g., curried). +try: + from graph_net.sqlite_util.select_evaluation_subset import select_evaluation_subset +except ImportError: + # Fallback for development/testing – replace with actual import. + def select_evaluation_subset( + op_seq_list: List[List[str]], k: int + ) -> List[List[str]]: + """Dummy implementation: returns first k elements (or fewer).""" + return op_seq_list[:k] + + +def parse_op_seq_file(file_path: str) -> List[List[str]]: + """ + Read a file where each line is a JSON‑encoded list of strings. + Returns a list of those lists. + """ + op_seq_list = [] + with open(file_path, "r", encoding="utf-8") as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line: # skip empty lines + continue + try: + data = json.loads(line) + if not isinstance(data, list) or not all( + isinstance(item, str) for item in data + ): + raise ValueError( + f"Line {line_num}: expected list of strings, got {type(data)}" + ) + op_seq_list.append(data) + except json.JSONDecodeError as e: + raise ValueError(f"Line {line_num}: invalid JSON – {e}") + return op_seq_list + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Select a subset of operation sequences using the specified evaluation strategy." + ) + parser.add_argument( + "op_seq_file", + help="Path to a file where each line is a JSON list of strings (an op_seq).", + ) + parser.add_argument( + "-k", + "--k", + type=int, + default=200, + help="Number of sequences to select (default: 200).", + ) + args = parser.parse_args() + + # 1. Parse input file + try: + op_seq_list = parse_op_seq_file(args.op_seq_file) + except Exception as e: + print(f"Error reading op_seq file: {e}", file=sys.stderr) + sys.exit(1) + + # 2. Apply the selection function + # (assuming it takes (list_of_sequences, k) – adjust if needed) + try: + selected = select_evaluation_subset(op_seq_list, args.k) + except Exception as e: + print(f"Error during subset selection: {e}", file=sys.stderr) + sys.exit(1) + + # 3. Output result as JSON to console + for s in selected: + print(json.dumps(s)) + + +if __name__ == "__main__": + main()