frc971 · yasen5 · Apr 4, 2026 · Apr 4, 2026
diff --git a/scripts/gamepiece/README.md b/scripts/gamepiece/README.md
@@ -0,0 +1,45 @@
+## Making the Python Environment
+- TODO use uv and write out list of files.
+- NOTE: Separate envs are currently required for autolabeler and main training/testing
+## Dataset Creation
+### Getting Images
+- Download dataset from Kaggle or Roboflow preferrably already labeled
+- NOTE: If you concatenate multiple datasets, ensure that the names of the labels are the same (don't end up training on Coral vs CORAL)
+  - Convention: Make everything lowercase
+- Put everything into a central folder
+- Fallbacks for shortage of labeled data:
+  - Find a dataset of similarly shaped objects and use the `color_shifter.py` make it approximately the same
+  - Make your own dataset: `video_clip_extractor.py` (NOT WORKING because ffmpeg and video scraping are hard)
+  - (Currently not working) Run autolabeler on the images if the labels are sketch or missing
+- Use `move_dataset.py` to move images between folders
+- `dataset_image_extractor.py` to remove the distinction of train/valid/test if it doesn't matter
+### Validating Dataset/Autodistill
+- Use `draw_detections.py` and run through the images to make sure the detections look reasonable
+- `img_printer.py` because I don't like windows
+### Image Modifications (NEEDS TESTING)
+- Most useful modifications are Shear, Rotate, Mosaic (always run mosaic), Translate, Scale, Blur, Noise
+  - Noise isn't necessary for better cameras, should check what the actual feed looks like
+  - Same for blur
+- Don't go too overboard especially if the dataset is small, can cause overfitting
+- Decision for grayscale: uniformly shaped objects could probably use grayscale, which will be much faster. Otherwise, color can help a fair amount. Alternative (UNTESTED): Use gray detection with manual pixel color verification
+## Training
+### GPU options
+- Free: Google Collab or Kaggle
+  - Google Collab free is insanely slow, also need to keep browser tab open
+  - Idk about Kaggle
+- If you have a PC with reasonable graphics card, use that. Ex: 4070 made a usable coral detector on 150 epochs on several thousand images in ~1 hour
+  - Run `gpu_specs_inspector.py` to check
+### Running the Train
+- Run the `v2_train.py` script to start train
+- Make sure to deposit to a unique position, save every n epochs to prevent random chance from deleting your progress
+- Run `onnx_exporter.py` to convert pt binary to ONNX format with NMS ENABLED
+  - It is extremely important that the binary be in NMS format, otherwise it is difficult to work with. Agents love to assume that NMS isn't enabled.
+## Validation (NEEDS IMPROVEMENT)
+- After receiving the pt binary, test it using `pt_tester.py`
+  - Can also run `onnx_tester.py`, but I don't remember if this works
+- On the device that is actually running the model (should be Jetson Orin):
+  - Run: `/usr/src/tensorrt/bin/trtexec --onnx=<onnx_file_name>.onnx --saveEngine=<DESCRIPTIVE_engine_name>.engine --fp16`
+  - Either `claude_validation.py` or `test.py` or `simple_test.py` or `old_model_tester.py` (all of them are sketch, needs testing)
+
+  - Note: The validators often perform differently on onnx/pt (running using simple YOLO python functions) vs .engine (running in C++). TODO do more validation
+
diff --git a/scripts/gamepiece/autolabeling/autolabeling.py b/scripts/gamepiece/autolabeling/autolabeling.py
@@ -0,0 +1,26 @@
+from autodistill_grounding_dino import GroundingDINO
+from autodistill.detection import CaptionOntology
+from autodistill_yolov8 import YOLOv8
+from autodistill.utils import plot
+import os
+from pathlib import Path
+import shutil
+import constants as constants
+
+def main():
+    if os.path.exists(constants.OUTPUT_FOLDER):
+        response = input(f"Output folder {constants.OUTPUT_FOLDER} already exists. Delete and continue?").lower()
+        if response == "y" or response == "yes":
+            shutil.rmtree(constants.OUTPUT_FOLDER)
+            print("removed folder")
+        else:
+            return
+    base_model = GroundingDINO(ontology=CaptionOntology({"striped foam dodgeball": "ball"}), box_threshold=0.3, text_threshold=0.6)
+    base_model.label(
+        input_folder=constants.INPUT_FOLDER,
+        extension=".jpg",
+        output_folder=constants.OUTPUT_FOLDER
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/gamepiece/autolabeling/clean_detections.py b/scripts/gamepiece/autolabeling/clean_detections.py
@@ -0,0 +1,131 @@
+from PIL import Image
+import numpy as np
+import torch
+import open_clip
+import constants as constants
+from pathlib import Path
+import cv2
+
+class CLIPReranker:
+    def __init__(self, device="cuda"):
+        self.device = device
+
+        self.model, _, self.preprocess = open_clip.create_model_and_transforms(
+            "ViT-B-32", pretrained="openai"
+        )
+        self.model.to(self.device)
+        self.model.eval()
+
+        self.positive_prompts = [
+            "a " + constants.OBJECT
+        ]
+
+        self.negative_prompts = [
+            "a space heater",
+            "a rock",
+            "a stone",
+            "a metal cylinder",
+            "a round household object"
+            "a tree"
+        ]
+
+        with torch.no_grad():
+            self.text_features = self._encode_text(
+                self.positive_prompts + self.negative_prompts
+            )
+
+    def _encode_text(self, prompts):
+        tokens = open_clip.tokenize(prompts).to(self.device)
+        text_features = self.model.encode_text(tokens)
+        return text_features / text_features.norm(dim=-1, keepdim=True)
+
+    def score_crop(self, image_crop: Image.Image):
+        image_tensor = self.preprocess(image_crop).unsqueeze(0).to(self.device)
+
+        with torch.no_grad():
+            image_features = self.model.encode_image(image_tensor)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+
+            logits = (image_features @ self.text_features.T).squeeze(0)
+            probs = logits.softmax(dim=0)
+
+        pos_count = len(self.positive_prompts)
+        pos_score = probs[:pos_count].max().item()
+        neg_score = probs[pos_count:].max().item()
+
+        return pos_score, neg_score
+
+    def is_positive(self, image_crop):
+        pos, neg = self.score_crop(image_crop)
+        return pos > 0 and pos >= neg
+
+def show_blocking(pil_img, title="image"):
+    img = np.array(pil_img)[:, :, ::-1]  # RGB → BGR
+    cv2.imshow(title, img)
+    cv2.waitKey(0)
+    cv2.destroyWindow(title)
+
+def yolo_to_xyxy(label, img_w, img_h):
+    _, cx, cy, w, h = label
+    cx *= img_w
+    cy *= img_h
+    w *= img_w
+    h *= img_h
+
+    x1 = int(cx - w / 2)
+    y1 = int(cy - h / 2)
+    x2 = int(cx + w / 2)
+    y2 = int(cy + h / 2)
+
+    x1 = max(0, min(x1, img_w - 1))
+    y1 = max(0, min(y1, img_h - 1))
+    x2 = max(1, min(x2, img_w))
+    y2 = max(1, min(y2, img_h))
+
+    return x1, y1, x2, y2
+
+def clean_split(reranker: CLIPReranker, images_dir: Path, labels_dir: Path):
+    for label_path in labels_dir.glob("*.txt"):
+        image_path = images_dir / (label_path.stem + ".jpg")
+        if not image_path.exists():
+            continue
+
+        image = Image.open(image_path).convert("RGB")
+        img_w, img_h = image.size
+
+        kept_labels = []
+
+        with open(label_path, "r") as f:
+            lines = f.readlines()
+
+        for line in lines:
+            parts = list(map(float, line.strip().split()))
+            if len(parts) != 5:
+                continue
+
+            x1, y1, x2, y2 = yolo_to_xyxy(parts, img_w, img_h)
+
+            if x2 <= x1 or y2 <= y1:
+                continue
+
+            crop = image.crop((x1, y1, x2, y2))
+
+            if reranker.is_positive(crop):
+                print("keeping")
+                kept_labels.append(line)
+            else:
+                print("removing")
+                show_blocking(crop)
+
+        # Overwrite label file with cleaned annotations
+        with open(label_path, "w") as f:
+            f.writelines(kept_labels)
+
+def main():
+    reranker = CLIPReranker()
+    for split in ["train", "val"]:
+        dir = constants.OUTPUT_FOLDER + "/" + split
+        clean_split(reranker=reranker, images_dir=Path(dir + "/images"), labels_dir=Path(dir + "/labels"))
+
+if __name__=="__main__":
+    main()
diff --git a/scripts/gamepiece/autolabeling/constants.py b/scripts/gamepiece/autolabeling/constants.py
@@ -0,0 +1,3 @@
+OBJECT: str="striped foam dodgeball"
+INPUT_FOLDER: str = "./datasets/striped_dodgeballs"
+OUTPUT_FOLDER: str = INPUT_FOLDER + "_labeled"
diff --git a/scripts/gamepiece/claude_validation.py b/scripts/gamepiece/claude_validation.py
@@ -0,0 +1,80 @@
+import onnxruntime as ort
+import cv2
+import yaml
+import numpy as np
+from pathlib import Path
+import constants
+
+# Config
+YAML_PATH = constants.MODEL_PATH + "data.yaml"
+
+with open(YAML_PATH) as f:
+    data = yaml.safe_load(f)
+yaml_dir = Path(YAML_PATH)
+val_path = Path(constants.DATASET + "valid/images")
+img_files = list(val_path.glob('*.jpg')) + list(val_path.glob('*.png'))
+
+print(f"YAML path: {YAML_PATH}")
+print(f"Val path: {val_path}")
+print(f"Val path exists: {val_path.exists()}")
+print(f"Images found: {len(img_files)}")
+if len(img_files) == 0:
+    print("ERROR: No images found!")
+    exit(1)
+print()
+
+session = ort.InferenceSession(constants.MODEL_PATH)
+input_name = session.get_inputs()[0].name
+input_shape = session.get_inputs()[0].shape
+img_size = input_shape[2]
+
+print(f"Input shape:{img_size}")
+
+# Metrics
+tp = fp = fn = 0
+
+for img_file in img_files:
+    # Load image
+    img = cv2.imread(str(img_file))
+    img_resized = cv2.resize(img, (img_size, img_size))
+    img_norm = img_resized.astype(np.float32) / 255.0
+    img_input = np.transpose(img_norm, (2, 0, 1))[None, ...]
+
+    # Inference
+    outputs = session.run(None, {input_name: img_input})[0]
+    num_preds = len(outputs)
+
+    img_draw = img_resized.copy()
+
+    print(f"Output shape: {outputs.shape}")
+    for thingy in outputs:
+        for det in thingy:
+            print(f"Detection: {det}")
+            x1, y1, x2, y2 = int(det[0]), int(det[1]), int(det[2]), int(det[3])
+            cv2.rectangle(img_draw, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        cv2.imshow('Detections', img_draw)
+        cv2.waitKey(0)
+
+    # Load ground truth
+    label_file = img_file.parent.parent / 'labels' / img_file.name
+    label_file = label_file.with_suffix('.txt')
+    num_gt = 0
+    if label_file.exists():
+        with open(label_file) as f:
+            num_gt = len(f.readlines())
+    else:
+        print(f"Label not found: {label_file}")
+        exit(1)
+
+    # Count matches (assume model output is correct)
+    matched = min(num_preds, num_gt)
+    tp += matched
+    fp += num_preds - matched
+    fn += num_gt - matched
+
+print(f"True Positives: {tp}")
+print(f"False Positives: {fp}")
+print(f"False Negatives: {fn}")
+print(f"True Negatives: N/A (object detection)")
+print(f"\nPrecision: {tp/(tp+fp) if tp+fp > 0 else 0:.3f}")
+print(f"Recall: {tp/(tp+fn) if tp+fn > 0 else 0:.3f}")