Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions scripts/gamepiece/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
## Making the Python Environment
- TODO use uv and write out list of files.
- NOTE: Separate envs are currently required for autolabeler and main training/testing
## Dataset Creation
### Getting Images
- Download dataset from Kaggle or Roboflow preferrably already labeled
- NOTE: If you concatenate multiple datasets, ensure that the names of the labels are the same (don't end up training on Coral vs CORAL)
- Convention: Make everything lowercase
- Put everything into a central folder
- Fallbacks for shortage of labeled data:
- Find a dataset of similarly shaped objects and use the `color_shifter.py` make it approximately the same
- Make your own dataset: `video_clip_extractor.py` (NOT WORKING because ffmpeg and video scraping are hard)
- (Currently not working) Run autolabeler on the images if the labels are sketch or missing
- Use `move_dataset.py` to move images between folders
- `dataset_image_extractor.py` to remove the distinction of train/valid/test if it doesn't matter
### Validating Dataset/Autodistill
- Use `draw_detections.py` and run through the images to make sure the detections look reasonable
- `img_printer.py` because I don't like windows
### Image Modifications (NEEDS TESTING)
- Most useful modifications are Shear, Rotate, Mosaic (always run mosaic), Translate, Scale, Blur, Noise
- Noise isn't necessary for better cameras, should check what the actual feed looks like
- Same for blur
- Don't go too overboard especially if the dataset is small, can cause overfitting
- Decision for grayscale: uniformly shaped objects could probably use grayscale, which will be much faster. Otherwise, color can help a fair amount. Alternative (UNTESTED): Use gray detection with manual pixel color verification
## Training
### GPU options
- Free: Google Collab or Kaggle
- Google Collab free is insanely slow, also need to keep browser tab open
- Idk about Kaggle
- If you have a PC with reasonable graphics card, use that. Ex: 4070 made a usable coral detector on 150 epochs on several thousand images in ~1 hour
- Run `gpu_specs_inspector.py` to check
### Running the Train
- Run the `v2_train.py` script to start train
- Make sure to deposit to a unique position, save every n epochs to prevent random chance from deleting your progress
- Run `onnx_exporter.py` to convert pt binary to ONNX format with NMS ENABLED
- It is extremely important that the binary be in NMS format, otherwise it is difficult to work with. Agents love to assume that NMS isn't enabled.
## Validation (NEEDS IMPROVEMENT)
- After receiving the pt binary, test it using `pt_tester.py`
- Can also run `onnx_tester.py`, but I don't remember if this works
- On the device that is actually running the model (should be Jetson Orin):
- Run: `/usr/src/tensorrt/bin/trtexec --onnx=<onnx_file_name>.onnx --saveEngine=<DESCRIPTIVE_engine_name>.engine --fp16`
- Either `claude_validation.py` or `test.py` or `simple_test.py` or `old_model_tester.py` (all of them are sketch, needs testing)

- Note: The validators often perform differently on onnx/pt (running using simple YOLO python functions) vs .engine (running in C++). TODO do more validation

26 changes: 26 additions & 0 deletions scripts/gamepiece/autolabeling/autolabeling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from autodistill_grounding_dino import GroundingDINO
from autodistill.detection import CaptionOntology
from autodistill_yolov8 import YOLOv8
from autodistill.utils import plot
import os
from pathlib import Path
import shutil
import constants as constants

def main():
if os.path.exists(constants.OUTPUT_FOLDER):
response = input(f"Output folder {constants.OUTPUT_FOLDER} already exists. Delete and continue?").lower()
if response == "y" or response == "yes":
shutil.rmtree(constants.OUTPUT_FOLDER)
print("removed folder")
else:
return
base_model = GroundingDINO(ontology=CaptionOntology({"striped foam dodgeball": "ball"}), box_threshold=0.3, text_threshold=0.6)
base_model.label(
input_folder=constants.INPUT_FOLDER,
extension=".jpg",
output_folder=constants.OUTPUT_FOLDER
)

if __name__ == "__main__":
main()
131 changes: 131 additions & 0 deletions scripts/gamepiece/autolabeling/clean_detections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from PIL import Image
import numpy as np
import torch
import open_clip
import constants as constants
from pathlib import Path
import cv2

class CLIPReranker:
def __init__(self, device="cuda"):
self.device = device

self.model, _, self.preprocess = open_clip.create_model_and_transforms(
"ViT-B-32", pretrained="openai"
)
self.model.to(self.device)
self.model.eval()

self.positive_prompts = [
"a " + constants.OBJECT
]

self.negative_prompts = [
"a space heater",
"a rock",
"a stone",
"a metal cylinder",
"a round household object"
"a tree"
]

with torch.no_grad():
self.text_features = self._encode_text(
self.positive_prompts + self.negative_prompts
)

def _encode_text(self, prompts):
tokens = open_clip.tokenize(prompts).to(self.device)
text_features = self.model.encode_text(tokens)
return text_features / text_features.norm(dim=-1, keepdim=True)

def score_crop(self, image_crop: Image.Image):
image_tensor = self.preprocess(image_crop).unsqueeze(0).to(self.device)

with torch.no_grad():
image_features = self.model.encode_image(image_tensor)
image_features /= image_features.norm(dim=-1, keepdim=True)

logits = (image_features @ self.text_features.T).squeeze(0)
probs = logits.softmax(dim=0)

pos_count = len(self.positive_prompts)
pos_score = probs[:pos_count].max().item()
neg_score = probs[pos_count:].max().item()

return pos_score, neg_score

def is_positive(self, image_crop):
pos, neg = self.score_crop(image_crop)
return pos > 0 and pos >= neg

def show_blocking(pil_img, title="image"):
img = np.array(pil_img)[:, :, ::-1] # RGB → BGR
cv2.imshow(title, img)
cv2.waitKey(0)
cv2.destroyWindow(title)

def yolo_to_xyxy(label, img_w, img_h):
_, cx, cy, w, h = label
cx *= img_w
cy *= img_h
w *= img_w
h *= img_h

x1 = int(cx - w / 2)
y1 = int(cy - h / 2)
x2 = int(cx + w / 2)
y2 = int(cy + h / 2)

x1 = max(0, min(x1, img_w - 1))
y1 = max(0, min(y1, img_h - 1))
x2 = max(1, min(x2, img_w))
y2 = max(1, min(y2, img_h))

return x1, y1, x2, y2

def clean_split(reranker: CLIPReranker, images_dir: Path, labels_dir: Path):
for label_path in labels_dir.glob("*.txt"):
image_path = images_dir / (label_path.stem + ".jpg")
if not image_path.exists():
continue

image = Image.open(image_path).convert("RGB")
img_w, img_h = image.size

kept_labels = []

with open(label_path, "r") as f:
lines = f.readlines()

for line in lines:
parts = list(map(float, line.strip().split()))
if len(parts) != 5:
continue

x1, y1, x2, y2 = yolo_to_xyxy(parts, img_w, img_h)

if x2 <= x1 or y2 <= y1:
continue

crop = image.crop((x1, y1, x2, y2))

if reranker.is_positive(crop):
print("keeping")
kept_labels.append(line)
else:
print("removing")
show_blocking(crop)

# Overwrite label file with cleaned annotations
with open(label_path, "w") as f:
f.writelines(kept_labels)

def main():
reranker = CLIPReranker()
for split in ["train", "val"]:
dir = constants.OUTPUT_FOLDER + "/" + split
clean_split(reranker=reranker, images_dir=Path(dir + "/images"), labels_dir=Path(dir + "/labels"))

if __name__=="__main__":
main()
3 changes: 3 additions & 0 deletions scripts/gamepiece/autolabeling/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
OBJECT: str="striped foam dodgeball"
INPUT_FOLDER: str = "./datasets/striped_dodgeballs"
OUTPUT_FOLDER: str = INPUT_FOLDER + "_labeled"
80 changes: 80 additions & 0 deletions scripts/gamepiece/claude_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import onnxruntime as ort
import cv2
import yaml
import numpy as np
from pathlib import Path
import constants

# Config
YAML_PATH = constants.MODEL_PATH + "data.yaml"

with open(YAML_PATH) as f:
data = yaml.safe_load(f)
yaml_dir = Path(YAML_PATH)
val_path = Path(constants.DATASET + "valid/images")
img_files = list(val_path.glob('*.jpg')) + list(val_path.glob('*.png'))

print(f"YAML path: {YAML_PATH}")
print(f"Val path: {val_path}")
print(f"Val path exists: {val_path.exists()}")
print(f"Images found: {len(img_files)}")
if len(img_files) == 0:
print("ERROR: No images found!")
exit(1)
print()

session = ort.InferenceSession(constants.MODEL_PATH)
input_name = session.get_inputs()[0].name
input_shape = session.get_inputs()[0].shape
img_size = input_shape[2]

print(f"Input shape:{img_size}")

# Metrics
tp = fp = fn = 0

for img_file in img_files:
# Load image
img = cv2.imread(str(img_file))
img_resized = cv2.resize(img, (img_size, img_size))
img_norm = img_resized.astype(np.float32) / 255.0
img_input = np.transpose(img_norm, (2, 0, 1))[None, ...]

# Inference
outputs = session.run(None, {input_name: img_input})[0]
num_preds = len(outputs)

img_draw = img_resized.copy()

print(f"Output shape: {outputs.shape}")
for thingy in outputs:
for det in thingy:
print(f"Detection: {det}")
x1, y1, x2, y2 = int(det[0]), int(det[1]), int(det[2]), int(det[3])
cv2.rectangle(img_draw, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.imshow('Detections', img_draw)
cv2.waitKey(0)

# Load ground truth
label_file = img_file.parent.parent / 'labels' / img_file.name
label_file = label_file.with_suffix('.txt')
num_gt = 0
if label_file.exists():
with open(label_file) as f:
num_gt = len(f.readlines())
else:
print(f"Label not found: {label_file}")
exit(1)

# Count matches (assume model output is correct)
matched = min(num_preds, num_gt)
tp += matched
fp += num_preds - matched
fn += num_gt - matched

print(f"True Positives: {tp}")
print(f"False Positives: {fp}")
print(f"False Negatives: {fn}")
print(f"True Negatives: N/A (object detection)")
print(f"\nPrecision: {tp/(tp+fp) if tp+fp > 0 else 0:.3f}")
print(f"Recall: {tp/(tp+fn) if tp+fn > 0 else 0:.3f}")
Loading
Loading