diff --git a/scripts/gamepiece/README.md b/scripts/gamepiece/README.md new file mode 100644 index 00000000..c6f7cb12 --- /dev/null +++ b/scripts/gamepiece/README.md @@ -0,0 +1,45 @@ +## Making the Python Environment +- TODO use uv and write out list of files. +- NOTE: Separate envs are currently required for autolabeler and main training/testing +## Dataset Creation +### Getting Images +- Download dataset from Kaggle or Roboflow preferrably already labeled +- NOTE: If you concatenate multiple datasets, ensure that the names of the labels are the same (don't end up training on Coral vs CORAL) + - Convention: Make everything lowercase +- Put everything into a central folder +- Fallbacks for shortage of labeled data: + - Find a dataset of similarly shaped objects and use the `color_shifter.py` make it approximately the same + - Make your own dataset: `video_clip_extractor.py` (NOT WORKING because ffmpeg and video scraping are hard) + - (Currently not working) Run autolabeler on the images if the labels are sketch or missing +- Use `move_dataset.py` to move images between folders +- `dataset_image_extractor.py` to remove the distinction of train/valid/test if it doesn't matter +### Validating Dataset/Autodistill +- Use `draw_detections.py` and run through the images to make sure the detections look reasonable +- `img_printer.py` because I don't like windows +### Image Modifications (NEEDS TESTING) +- Most useful modifications are Shear, Rotate, Mosaic (always run mosaic), Translate, Scale, Blur, Noise + - Noise isn't necessary for better cameras, should check what the actual feed looks like + - Same for blur +- Don't go too overboard especially if the dataset is small, can cause overfitting +- Decision for grayscale: uniformly shaped objects could probably use grayscale, which will be much faster. Otherwise, color can help a fair amount. Alternative (UNTESTED): Use gray detection with manual pixel color verification +## Training +### GPU options +- Free: Google Collab or Kaggle + - Google Collab free is insanely slow, also need to keep browser tab open + - Idk about Kaggle +- If you have a PC with reasonable graphics card, use that. Ex: 4070 made a usable coral detector on 150 epochs on several thousand images in ~1 hour + - Run `gpu_specs_inspector.py` to check +### Running the Train +- Run the `v2_train.py` script to start train +- Make sure to deposit to a unique position, save every n epochs to prevent random chance from deleting your progress +- Run `onnx_exporter.py` to convert pt binary to ONNX format with NMS ENABLED + - It is extremely important that the binary be in NMS format, otherwise it is difficult to work with. Agents love to assume that NMS isn't enabled. +## Validation (NEEDS IMPROVEMENT) +- After receiving the pt binary, test it using `pt_tester.py` + - Can also run `onnx_tester.py`, but I don't remember if this works +- On the device that is actually running the model (should be Jetson Orin): + - Run: `/usr/src/tensorrt/bin/trtexec --onnx=.onnx --saveEngine=.engine --fp16` + - Either `claude_validation.py` or `test.py` or `simple_test.py` or `old_model_tester.py` (all of them are sketch, needs testing) + + - Note: The validators often perform differently on onnx/pt (running using simple YOLO python functions) vs .engine (running in C++). TODO do more validation + diff --git a/scripts/gamepiece/autolabeling/autolabeling.py b/scripts/gamepiece/autolabeling/autolabeling.py new file mode 100644 index 00000000..08857451 --- /dev/null +++ b/scripts/gamepiece/autolabeling/autolabeling.py @@ -0,0 +1,26 @@ +from autodistill_grounding_dino import GroundingDINO +from autodistill.detection import CaptionOntology +from autodistill_yolov8 import YOLOv8 +from autodistill.utils import plot +import os +from pathlib import Path +import shutil +import constants as constants + +def main(): + if os.path.exists(constants.OUTPUT_FOLDER): + response = input(f"Output folder {constants.OUTPUT_FOLDER} already exists. Delete and continue?").lower() + if response == "y" or response == "yes": + shutil.rmtree(constants.OUTPUT_FOLDER) + print("removed folder") + else: + return + base_model = GroundingDINO(ontology=CaptionOntology({"striped foam dodgeball": "ball"}), box_threshold=0.3, text_threshold=0.6) + base_model.label( + input_folder=constants.INPUT_FOLDER, + extension=".jpg", + output_folder=constants.OUTPUT_FOLDER + ) + +if __name__ == "__main__": + main() diff --git a/scripts/gamepiece/autolabeling/clean_detections.py b/scripts/gamepiece/autolabeling/clean_detections.py new file mode 100644 index 00000000..87baa705 --- /dev/null +++ b/scripts/gamepiece/autolabeling/clean_detections.py @@ -0,0 +1,131 @@ +from PIL import Image +import numpy as np +import torch +import open_clip +import constants as constants +from pathlib import Path +import cv2 + +class CLIPReranker: + def __init__(self, device="cuda"): + self.device = device + + self.model, _, self.preprocess = open_clip.create_model_and_transforms( + "ViT-B-32", pretrained="openai" + ) + self.model.to(self.device) + self.model.eval() + + self.positive_prompts = [ + "a " + constants.OBJECT + ] + + self.negative_prompts = [ + "a space heater", + "a rock", + "a stone", + "a metal cylinder", + "a round household object" + "a tree" + ] + + with torch.no_grad(): + self.text_features = self._encode_text( + self.positive_prompts + self.negative_prompts + ) + + def _encode_text(self, prompts): + tokens = open_clip.tokenize(prompts).to(self.device) + text_features = self.model.encode_text(tokens) + return text_features / text_features.norm(dim=-1, keepdim=True) + + def score_crop(self, image_crop: Image.Image): + image_tensor = self.preprocess(image_crop).unsqueeze(0).to(self.device) + + with torch.no_grad(): + image_features = self.model.encode_image(image_tensor) + image_features /= image_features.norm(dim=-1, keepdim=True) + + logits = (image_features @ self.text_features.T).squeeze(0) + probs = logits.softmax(dim=0) + + pos_count = len(self.positive_prompts) + pos_score = probs[:pos_count].max().item() + neg_score = probs[pos_count:].max().item() + + return pos_score, neg_score + + def is_positive(self, image_crop): + pos, neg = self.score_crop(image_crop) + return pos > 0 and pos >= neg + +def show_blocking(pil_img, title="image"): + img = np.array(pil_img)[:, :, ::-1] # RGB → BGR + cv2.imshow(title, img) + cv2.waitKey(0) + cv2.destroyWindow(title) + +def yolo_to_xyxy(label, img_w, img_h): + _, cx, cy, w, h = label + cx *= img_w + cy *= img_h + w *= img_w + h *= img_h + + x1 = int(cx - w / 2) + y1 = int(cy - h / 2) + x2 = int(cx + w / 2) + y2 = int(cy + h / 2) + + x1 = max(0, min(x1, img_w - 1)) + y1 = max(0, min(y1, img_h - 1)) + x2 = max(1, min(x2, img_w)) + y2 = max(1, min(y2, img_h)) + + return x1, y1, x2, y2 + +def clean_split(reranker: CLIPReranker, images_dir: Path, labels_dir: Path): + for label_path in labels_dir.glob("*.txt"): + image_path = images_dir / (label_path.stem + ".jpg") + if not image_path.exists(): + continue + + image = Image.open(image_path).convert("RGB") + img_w, img_h = image.size + + kept_labels = [] + + with open(label_path, "r") as f: + lines = f.readlines() + + for line in lines: + parts = list(map(float, line.strip().split())) + if len(parts) != 5: + continue + + x1, y1, x2, y2 = yolo_to_xyxy(parts, img_w, img_h) + + if x2 <= x1 or y2 <= y1: + continue + + crop = image.crop((x1, y1, x2, y2)) + + if reranker.is_positive(crop): + print("keeping") + kept_labels.append(line) + else: + print("removing") + show_blocking(crop) + + # Overwrite label file with cleaned annotations + with open(label_path, "w") as f: + f.writelines(kept_labels) + +def main(): + reranker = CLIPReranker() + for split in ["train", "val"]: + dir = constants.OUTPUT_FOLDER + "/" + split + clean_split(reranker=reranker, images_dir=Path(dir + "/images"), labels_dir=Path(dir + "/labels")) + +if __name__=="__main__": + main() \ No newline at end of file diff --git a/scripts/gamepiece/autolabeling/constants.py b/scripts/gamepiece/autolabeling/constants.py new file mode 100644 index 00000000..a75b55db --- /dev/null +++ b/scripts/gamepiece/autolabeling/constants.py @@ -0,0 +1,3 @@ +OBJECT: str="striped foam dodgeball" +INPUT_FOLDER: str = "./datasets/striped_dodgeballs" +OUTPUT_FOLDER: str = INPUT_FOLDER + "_labeled" \ No newline at end of file diff --git a/scripts/gamepiece/claude_validation.py b/scripts/gamepiece/claude_validation.py new file mode 100644 index 00000000..9b0ffa3a --- /dev/null +++ b/scripts/gamepiece/claude_validation.py @@ -0,0 +1,80 @@ +import onnxruntime as ort +import cv2 +import yaml +import numpy as np +from pathlib import Path +import constants + +# Config +YAML_PATH = constants.MODEL_PATH + "data.yaml" + +with open(YAML_PATH) as f: + data = yaml.safe_load(f) +yaml_dir = Path(YAML_PATH) +val_path = Path(constants.DATASET + "valid/images") +img_files = list(val_path.glob('*.jpg')) + list(val_path.glob('*.png')) + +print(f"YAML path: {YAML_PATH}") +print(f"Val path: {val_path}") +print(f"Val path exists: {val_path.exists()}") +print(f"Images found: {len(img_files)}") +if len(img_files) == 0: + print("ERROR: No images found!") + exit(1) +print() + +session = ort.InferenceSession(constants.MODEL_PATH) +input_name = session.get_inputs()[0].name +input_shape = session.get_inputs()[0].shape +img_size = input_shape[2] + +print(f"Input shape:{img_size}") + +# Metrics +tp = fp = fn = 0 + +for img_file in img_files: + # Load image + img = cv2.imread(str(img_file)) + img_resized = cv2.resize(img, (img_size, img_size)) + img_norm = img_resized.astype(np.float32) / 255.0 + img_input = np.transpose(img_norm, (2, 0, 1))[None, ...] + + # Inference + outputs = session.run(None, {input_name: img_input})[0] + num_preds = len(outputs) + + img_draw = img_resized.copy() + + print(f"Output shape: {outputs.shape}") + for thingy in outputs: + for det in thingy: + print(f"Detection: {det}") + x1, y1, x2, y2 = int(det[0]), int(det[1]), int(det[2]), int(det[3]) + cv2.rectangle(img_draw, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.imshow('Detections', img_draw) + cv2.waitKey(0) + + # Load ground truth + label_file = img_file.parent.parent / 'labels' / img_file.name + label_file = label_file.with_suffix('.txt') + num_gt = 0 + if label_file.exists(): + with open(label_file) as f: + num_gt = len(f.readlines()) + else: + print(f"Label not found: {label_file}") + exit(1) + + # Count matches (assume model output is correct) + matched = min(num_preds, num_gt) + tp += matched + fp += num_preds - matched + fn += num_gt - matched + +print(f"True Positives: {tp}") +print(f"False Positives: {fp}") +print(f"False Negatives: {fn}") +print(f"True Negatives: N/A (object detection)") +print(f"\nPrecision: {tp/(tp+fp) if tp+fp > 0 else 0:.3f}") +print(f"Recall: {tp/(tp+fn) if tp+fn > 0 else 0:.3f}") diff --git a/scripts/gamepiece/color_changer.py b/scripts/gamepiece/color_changer.py new file mode 100644 index 00000000..098f3c5a --- /dev/null +++ b/scripts/gamepiece/color_changer.py @@ -0,0 +1,428 @@ +#!/usr/bin/env python3 +""" +Color replacement script that converts shades of a source color to corresponding +shades of a target color in an image, directory of images, or YOLO datasets. + +Usage: + python3 color_replace.py input.png --src=red --target=yellow --output=output.png + python3 color_replace.py input_dir/ --src=red --target=yellow --output=output_dir/ + python3 color_replace.py dataset/ --src=red --target=yellow --output=new_dataset/ --yolo +""" + +import argparse +import numpy as np +from PIL import Image +import colorsys +import os +from pathlib import Path +import shutil +import yaml + + +def show_comparison(original_img, transformed_img): + """Display original and transformed images side-by-side.""" + try: + import matplotlib.pyplot as plt + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6)) + + ax1.imshow(original_img) + ax1.set_title('Original', fontsize=14, fontweight='bold') + ax1.axis('off') + + ax2.imshow(transformed_img) + ax2.set_title('Transformed', fontsize=14, fontweight='bold') + ax2.axis('off') + + plt.tight_layout() + plt.show() + + except ImportError: + print("\nWarning: matplotlib not installed. Cannot display comparison.") + print("Install with: pip install matplotlib") + + +def parse_color(color_str): + """Parse color string to RGB tuple (0-255 range).""" + color_map = { + 'red': (255, 0, 0), + 'green': (0, 255, 0), + 'blue': (0, 0, 255), + 'yellow': (255, 255, 0), + 'cyan': (0, 255, 255), + 'magenta': (255, 0, 255), + 'white': (255, 255, 255), + 'black': (0, 0, 0), + 'orange': (255, 165, 0), + 'purple': (128, 0, 128), + 'pink': (255, 192, 203), + 'brown': (165, 42, 42), + 'gray': (128, 128, 128), + 'grey': (128, 128, 128), + } + + color_str = color_str.lower() + + # Check if it's a named color + if color_str in color_map: + return color_map[color_str] + + # Check if it's a hex color + if color_str.startswith('#'): + color_str = color_str[1:] + if len(color_str) == 6: + try: + return tuple(int(color_str[i:i+2], 16) for i in (0, 2, 4)) + except ValueError: + pass + + # Try parsing as RGB tuple + if ',' in color_str: + try: + rgb = tuple(int(x.strip()) for x in color_str.split(',')) + if len(rgb) == 3 and all(0 <= x <= 255 for x in rgb): + return rgb + except ValueError: + pass + + raise ValueError(f"Invalid color format: {color_str}") + + +def rgb_to_hsv(r, g, b): + """Convert RGB (0-255) to HSV (H: 0-360, S: 0-1, V: 0-1).""" + return colorsys.rgb_to_hsv(r/255, g/255, b/255) + + +def hsv_to_rgb(h, s, v): + """Convert HSV (H: 0-1, S: 0-1, V: 0-1) to RGB (0-255).""" + r, g, b = colorsys.hsv_to_rgb(h, s, v) + return int(r * 255), int(g * 255), int(b * 255) + + +def replace_color(img_array, src_rgb, target_rgb, tolerance=30): + """ + Replace shades of source color with corresponding shades of target color. + + Args: + img_array: numpy array of image (H, W, 3 or 4) + src_rgb: source color as RGB tuple (0-255) + target_rgb: target color as RGB tuple (0-255) + tolerance: color matching tolerance (0-100), higher = more permissive + + Returns: + Modified image array + """ + # Convert to float for processing + result = img_array.astype(float) + + # Get HSV values for source and target + src_h, src_s, src_v = rgb_to_hsv(*src_rgb) + target_h, target_s, target_v = rgb_to_hsv(*target_rgb) + + # Convert image to HSV - vectorized operation + rgb_img = result[:, :, :3] / 255.0 + + # Vectorized RGB to HSV conversion + r, g, b = rgb_img[:, :, 0], rgb_img[:, :, 1], rgb_img[:, :, 2] + maxc = np.maximum(np.maximum(r, g), b) + minc = np.minimum(np.minimum(r, g), b) + v = maxc + + deltac = maxc - minc + s = np.where(maxc != 0, deltac / maxc, 0) + + # Calculate hue + h = np.zeros_like(maxc) + + # Red is max + mask_r = (maxc == r) & (deltac != 0) + h[mask_r] = ((g[mask_r] - b[mask_r]) / deltac[mask_r]) % 6 + + # Green is max + mask_g = (maxc == g) & (deltac != 0) + h[mask_g] = ((b[mask_g] - r[mask_g]) / deltac[mask_g]) + 2 + + # Blue is max + mask_b = (maxc == b) & (deltac != 0) + h[mask_b] = ((r[mask_b] - g[mask_b]) / deltac[mask_b]) + 4 + + h = h / 6.0 # Normalize to 0-1 + + # Create mask for pixels matching source color + # Match based on hue primarily, with some saturation consideration + hue_diff = np.minimum(np.abs(h - src_h), 1 - np.abs(h - src_h)) # Circular distance + hue_tolerance = tolerance / 360.0 + + # For grayscale source colors (low saturation), match all hues + if src_s < 0.1: + mask = s < 0.1 # Match other grayscale pixels + else: + mask = (hue_diff < hue_tolerance) & (s > 0.1) + + # For matching pixels, change hue to target + if target_s < 0.1: # Target is grayscale + s[mask] = 0 + h[mask] = 0 + else: + h[mask] = target_h + # Optionally adjust saturation towards target + s[mask] = np.clip(s[mask] * (target_s / max(src_s, 0.1)), 0, 1) + + # Vectorized HSV to RGB conversion + h_i = (h * 6.0).astype(int) + f = h * 6.0 - h_i + p = v * (1.0 - s) + q = v * (1.0 - f * s) + t = v * (1.0 - (1.0 - f) * s) + + h_i = h_i % 6 + + # Initialize output arrays + r_out = np.zeros_like(v) + g_out = np.zeros_like(v) + b_out = np.zeros_like(v) + + # Apply conversions based on hue segment + mask0 = (h_i == 0) + r_out[mask0] = v[mask0] + g_out[mask0] = t[mask0] + b_out[mask0] = p[mask0] + + mask1 = (h_i == 1) + r_out[mask1] = q[mask1] + g_out[mask1] = v[mask1] + b_out[mask1] = p[mask1] + + mask2 = (h_i == 2) + r_out[mask2] = p[mask2] + g_out[mask2] = v[mask2] + b_out[mask2] = t[mask2] + + mask3 = (h_i == 3) + r_out[mask3] = p[mask3] + g_out[mask3] = q[mask3] + b_out[mask3] = v[mask3] + + mask4 = (h_i == 4) + r_out[mask4] = t[mask4] + g_out[mask4] = p[mask4] + b_out[mask4] = v[mask4] + + mask5 = (h_i == 5) + r_out[mask5] = v[mask5] + g_out[mask5] = p[mask5] + b_out[mask5] = q[mask5] + + # Update result array + result[:, :, 0] = r_out * 255 + result[:, :, 1] = g_out * 255 + result[:, :, 2] = b_out * 255 + + return result.astype(np.uint8) + + +def process_image(input_path, output_path, src_color, target_color, tolerance, show_compare=False): + """Process a single image file.""" + # Load image + try: + img = Image.open(input_path) + img = img.convert('RGBA') # Ensure we have alpha channel + except Exception as e: + print(f"Error loading image {input_path}: {e}") + return False + + # Convert to numpy array + img_array = np.array(img) + + # Replace colors + result_array = replace_color(img_array, src_color, target_color, tolerance) + + # Convert back to image + result_img = Image.fromarray(result_array, 'RGBA') + + # Always save as PNG to preserve quality and support transparency + output_path_str = str(output_path) + if not output_path_str.lower().endswith('.png'): + # Replace extension with .png + output_path_str = str(Path(output_path_str).with_suffix('.png')) + + # Save output + result_img.save(output_path_str) + print(f"Processed: {input_path} -> {output_path}") + + # Show comparison if requested + if show_compare: + show_comparison(img, result_img) + + return True + + +def process_yolo_dataset(input_dir, output_dir, src_color, target_color, tolerance): + """Process a YOLO format dataset, preserving structure and copying labels.""" + input_path = Path(input_dir) + output_path = Path(output_dir) + + # Check for data.yaml + data_yaml = input_path / 'data.yaml' + if not data_yaml.exists(): + print(f"Warning: data.yaml not found in {input_path}") + print("Proceeding as a standard directory...") + return False + + # Create output directory + output_path.mkdir(parents=True, exist_ok=True) + + # Copy data.yaml + shutil.copy2(data_yaml, output_path / 'data.yaml') + print(f"Copied: data.yaml") + + # Read data.yaml to get dataset structure + with open(data_yaml, 'r') as f: + data_config = yaml.safe_load(f) + + # Common YOLO dataset splits + splits = ['train', 'val', 'test'] + total_images = 0 + processed_images = 0 + + for split in splits: + split_path = input_path / split + if not split_path.exists(): + continue + + print(f"\nProcessing {split} split...") + + # Create output split directories + output_split_images = output_path / split / 'images' + output_split_labels = output_path / split / 'labels' + output_split_images.mkdir(parents=True, exist_ok=True) + output_split_labels.mkdir(parents=True, exist_ok=True) + + # Process images + images_dir = split_path / 'images' + labels_dir = split_path / 'labels' + + if images_dir.exists(): + image_files = list(images_dir.glob('*.jpg')) + list(images_dir.glob('*.png')) + \ + list(images_dir.glob('*.jpeg')) + list(images_dir.glob('*.JPG')) + \ + list(images_dir.glob('*.PNG')) + + total_images += len(image_files) + + for img_file in image_files: + # Process image - keep the full original name, just change extension to .png + output_img = output_split_images / (img_file.stem + '.png') + + if process_image(img_file, output_img, src_color, target_color, tolerance): + processed_images += 1 + + # Copy corresponding label file if it exists + # Label files should match the original image stem + if labels_dir.exists(): + label_file = labels_dir / f"{img_file.stem}.txt" + if label_file.exists(): + output_label = output_split_labels / f"{img_file.stem}.txt" + shutil.copy2(label_file, output_label) + + # Copy any additional files in the root directory + for item in input_path.iterdir(): + if item.is_file() and item.name != 'data.yaml': + shutil.copy2(item, output_path / item.name) + print(f"Copied: {item.name}") + + print(f"\n{'='*60}") + print(f"YOLO Dataset Processing Complete!") + print(f"Total images processed: {processed_images}/{total_images}") + print(f"Output saved to: {output_path}") + print(f"{'='*60}") + + return True + + +def main(): + parser = argparse.ArgumentParser( + description='Replace shades of a source color with a target color in an image, directory, or YOLO dataset' + ) + parser.add_argument('input', help='Input image file, directory, or YOLO dataset directory') + parser.add_argument('--src', required=True, help='Source color (e.g., red, #FF0000, 255,0,0)') + parser.add_argument('--target', required=True, help='Target color (e.g., yellow, #FFFF00, 255,255,0)') + parser.add_argument('--output', '-o', help='Output image file, directory, or dataset directory') + parser.add_argument('--tolerance', '-t', type=int, default=30, + help='Color matching tolerance 0-100 (default: 30)') + parser.add_argument('--compare', '-c', action='store_true', + help='Show original and transformed images side-by-side (only for single images)') + parser.add_argument('--yolo', action='store_true', + help='Process as YOLO dataset (preserves structure and copies labels)') + parser.add_argument('--extensions', nargs='+', + default=['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff'], + help='Image file extensions to process in directory mode (default: .png .jpg .jpeg .bmp .gif .tiff)') + + args = parser.parse_args() + + # Parse colors + try: + src_color = parse_color(args.src) + target_color = parse_color(args.target) + except ValueError as e: + print(f"Error: {e}") + return 1 + + print(f"Replacing {args.src} {src_color} with {args.target} {target_color}...") + + input_path = Path(args.input) + + # Check if input is a directory + if input_path.is_dir(): + # Check if YOLO mode is requested or if it looks like a YOLO dataset + if args.yolo or (input_path / 'data.yaml').exists(): + output_dir = Path(args.output) if args.output else Path(f'{input_path.name}_colorshifted') + if process_yolo_dataset(input_path, output_dir, src_color, target_color, args.tolerance): + return 0 + # If YOLO processing failed, fall through to regular directory mode + + # Regular directory mode + output_dir = Path(args.output) if args.output else Path('output_dir') + output_dir.mkdir(parents=True, exist_ok=True) + + # Find all image files + image_files = [] + for ext in args.extensions: + image_files.extend(input_path.glob(f'*{ext}')) + image_files.extend(input_path.glob(f'*{ext.upper()}')) + + if not image_files: + print(f"No image files found in {input_path}") + print(f"Looking for extensions: {args.extensions}") + return 1 + + print(f"Found {len(image_files)} image(s) to process") + + success_count = 0 + for img_file in image_files: + output_file = output_dir / img_file.stem + output_file = output_file.with_suffix('.png') + if process_image(img_file, output_file, src_color, target_color, args.tolerance): + success_count += 1 + + print(f"\nCompleted: {success_count}/{len(image_files)} images processed successfully") + print(f"Output saved to: {output_dir}") + + else: + # Single file mode + if not input_path.exists(): + print(f"Error: Input file not found: {input_path}") + return 1 + + output_path = args.output or 'output.png' + + if process_image(input_path, output_path, src_color, target_color, + args.tolerance, args.compare): + return 0 + else: + return 1 + + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/scripts/gamepiece/constants.py b/scripts/gamepiece/constants.py new file mode 100644 index 00000000..cfeeeb7d --- /dev/null +++ b/scripts/gamepiece/constants.py @@ -0,0 +1,3 @@ +MODEL_PATH="../runs/detect/rebuilt/weights/best.onnx" +DATASET="datasets/rebuilt_balls/" + diff --git a/scripts/gamepiece/dataset_image_extractor.py b/scripts/gamepiece/dataset_image_extractor.py new file mode 100644 index 00000000..738a98b1 --- /dev/null +++ b/scripts/gamepiece/dataset_image_extractor.py @@ -0,0 +1,68 @@ +import shutil +from pathlib import Path +import yaml + +SPLITS = ["train", "valid", "test"] +IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp"} + + +def require_data_yaml(dataset_root: Path): + data_yaml = dataset_root / "data.yaml" + if not data_yaml.exists(): + raise FileNotFoundError( + f"data.yaml not found in {dataset_root}. " + "This script only supports YOLOv11 exports." + ) + with open(data_yaml, "r") as f: + yaml.safe_load(f) + + +def extract_images(dataset_root: Path, output_dir: Path, move: bool): + output_dir.mkdir(parents=True, exist_ok=True) + + for split in SPLITS: + img_dir = dataset_root / split / "images" + if not img_dir.exists(): + continue + + for img_path in img_dir.iterdir(): + if img_path.suffix.lower() not in IMAGE_EXTS: + continue + + # Prefix filename with split to avoid collisions + new_name = f"{split}_{img_path.name}" + dst = output_dir / new_name + + if move: + shutil.move(img_path, dst) + else: + shutil.copy2(img_path, dst) + + +def main(source_dataset: str, output_folder: str, move: bool): + dataset_root = Path(source_dataset).resolve() + output_dir = Path(output_folder).resolve() + + require_data_yaml(dataset_root) + extract_images(dataset_root, output_dir, move) + + print("Image extraction complete.") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Extract all images from a YOLOv11 dataset into a single folder" + ) + parser.add_argument("dataset", help="YOLOv11 dataset root (must contain data.yaml)") + parser.add_argument("output", help="Output directory for extracted images") + parser.add_argument( + "--move", + action="store_true", + help="Move images instead of copying (destructive)", + ) + + args = parser.parse_args() + main(args.dataset, args.output, args.move) + diff --git a/scripts/gamepiece/draw_detections.py b/scripts/gamepiece/draw_detections.py new file mode 100644 index 00000000..a66c85b8 --- /dev/null +++ b/scripts/gamepiece/draw_detections.py @@ -0,0 +1,133 @@ +import os +import cv2 +import argparse + + +def load_yolo_labels(label_path): + """ + Reads a YOLO-format label file. + Returns a list of (class_id, x_center, y_center, width, height). + """ + boxes = [] + if not os.path.exists(label_path): + return boxes + + with open(label_path, "r") as f: + for line in f: + parts = line.strip().split() + if len(parts) != 5: + continue + class_id = int(parts[0]) + x, y, w, h = map(float, parts[1:]) + boxes.append((class_id, x, y, w, h)) + + return boxes + + +def yolo_to_pixel(box, img_width, img_height): + """ + Converts normalized YOLO box to pixel coordinates. + Returns (x1, y1, x2, y2). + """ + _, x, y, w, h = box + + x_center = x * img_width + y_center = y * img_height + box_width = w * img_width + box_height = h * img_height + + x1 = int(x_center - box_width / 2) + y1 = int(y_center - box_height / 2) + x2 = int(x_center + box_width / 2) + y2 = int(y_center + box_height / 2) + + return x1, y1, x2, y2 + + +def visualize_split(dataset_root, split): + images_dir = os.path.join(dataset_root, split, "images") + labels_dir = os.path.join(dataset_root, split, "labels") + + if not os.path.isdir(images_dir): + raise FileNotFoundError(f"Images directory not found: {images_dir}") + + image_files = sorted(os.listdir(images_dir)) + + for img_name in image_files: + img_path = os.path.join(images_dir, img_name) + label_path = os.path.join( + labels_dir, os.path.splitext(img_name)[0] + ".txt" + ) + + img = cv2.imread(img_path) + if img is None: + continue + + orig_h, orig_w = img.shape[:2] + + # Load boxes (normalized to original dimensions) + boxes = load_yolo_labels(label_path) + + # Convert YOLO coords to pixel coords using ORIGINAL dimensions + pixel_boxes = [] + for box in boxes: + class_id = box[0] + x1, y1, x2, y2 = yolo_to_pixel(box, orig_w, orig_h) + pixel_boxes.append((class_id, x1, y1, x2, y2)) + + # Now resize image to 640x640 + display_size = 640 + img = cv2.resize(img, (display_size, display_size), interpolation=cv2.INTER_LINEAR) + + # Scale the pixel coordinates to match resized image + scale_x = display_size / orig_w + scale_y = display_size / orig_h + + for class_id, x1, y1, x2, y2 in pixel_boxes: + # Scale coordinates + x1_scaled = int(x1 * scale_x) + y1_scaled = int(y1 * scale_y) + x2_scaled = int(x2 * scale_x) + y2_scaled = int(y2 * scale_y) + + cv2.rectangle(img, (x1_scaled, y1_scaled), (x2_scaled, y2_scaled), (0, 255, 0), 2) + cv2.putText( + img, + f"ID {class_id}", + (x1_scaled, max(y1_scaled - 5, 15)), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + (0, 255, 0), + 1, + cv2.LINE_AA, + ) + + cv2.imshow(f"{split} - YOLO Boxes", img) + key = cv2.waitKey(0) + + if key == 27: + break + + cv2.destroyAllWindows() + +def main(): + parser = argparse.ArgumentParser(description="Visualize YOLO bounding boxes with OpenCV") + parser.add_argument( + "--dataset", + required=True, + help="Path to dataset root (contains train/valid/test folders)", + ) + parser.add_argument( + "--split", + default="train", + choices=["train", "valid", "test"], + help="Dataset split to visualize", + ) + + args = parser.parse_args() + visualize_split(args.dataset, args.split) + + +if __name__ == "__main__": + main() + diff --git a/scripts/gamepiece/gpu_specs_inspector.py b/scripts/gamepiece/gpu_specs_inspector.py new file mode 100644 index 00000000..341e3b06 --- /dev/null +++ b/scripts/gamepiece/gpu_specs_inspector.py @@ -0,0 +1,25 @@ +import torch + +# Check if CUDA is available +print("CUDA available:", torch.cuda.is_available()) + +if torch.cuda.is_available(): + # Get number of GPUs + num_gpus = torch.cuda.device_count() + print(f"Number of CUDA devices: {num_gpus}") + + # Print info for each GPU + for i in range(num_gpus): + print(f"\n--- GPU {i} ---") + print("Name:", torch.cuda.get_device_name(i)) + props = torch.cuda.get_device_properties(i) + print(f"Total Memory: {props.total_memory / 1e9:.2f} GB") + print(f"Compute Capability: {props.major}.{props.minor}") + print(f"Multiprocessors: {props.multi_processor_count}") + + # Show which GPU is currently selected + current = torch.cuda.current_device() + print(f"\nCurrent device index: {current}") + print("Current device name:", torch.cuda.get_device_name(current)) +else: + print("No CUDA devices detected.") diff --git a/scripts/gamepiece/img_printer.py b/scripts/gamepiece/img_printer.py new file mode 100644 index 00000000..070bb0b2 --- /dev/null +++ b/scripts/gamepiece/img_printer.py @@ -0,0 +1,7 @@ +import cv2 +import sys + +img = cv2.imread(sys.argv[1]) +cv2.imshow('Y', img) +cv2.waitKey(0); +cv2.destroyAllWindows() diff --git a/scripts/gamepiece/model_tester.py b/scripts/gamepiece/model_tester.py new file mode 100644 index 00000000..79e6d9a9 --- /dev/null +++ b/scripts/gamepiece/model_tester.py @@ -0,0 +1,222 @@ +import onnxruntime as ort +import cv2 +import numpy as np +from pathlib import Path +from typing import List, Tuple, Optional + + +class ONNXDetector: + """ONNX-based object detection library.""" + + def __init__(self, model_path: str): + """ + Initialize the ONNX detector. + + Args: + model_path: Path to the ONNX model file + """ + self.model_path = model_path + + # Load ONNX session + self.session = ort.InferenceSession(model_path) + self.input_name = self.session.get_inputs()[0].name + input_shape = self.session.get_inputs()[0].shape + self.img_size = input_shape[2] + + def preprocess_image(self, img_rgb: np.ndarray) -> Tuple[np.ndarray, np.ndarray, dict]: + """ + Preprocess image with letterbox resizing. + + Args: + img_rgb: Input image in RGB format + + Returns: + Tuple of (preprocessed image tensor, preprocessing metadata) + """ + orig_h, orig_w = img_rgb.shape[:2] + + # Calculate scaling factor + scale = min(self.img_size / orig_h, self.img_size / orig_w) + new_h, new_w = int(orig_h * scale), int(orig_w * scale) + + # Resize image + img_resized = cv2.resize(img_rgb, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + + # Create padded image (letterbox) + img_padded = np.full((self.img_size, self.img_size, 3), 114, dtype=np.uint8) + + # Calculate padding offsets + top = (self.img_size - new_h) // 2 + left = (self.img_size - new_w) // 2 + + # Place resized image in center + img_padded[top:top+new_h, left:left+new_w] = img_resized + + # Normalize and transpose + img_norm = img_padded.astype(np.float32) / 255.0 + img_input = np.transpose(img_norm, (2, 0, 1))[None, ...] + + # Store metadata for postprocessing + metadata = { + 'scale': scale, + 'top': top, + 'left': left, + 'orig_h': orig_h, + 'orig_w': orig_w + } + + return img_input, img_padded, metadata + + def detect(self, img_rgb: np.ndarray) -> List[Tuple[int, int, int, int, float, int]]: + """ + Run detection on an image. + + Args: + img_rgb: Input image in RGB format + + Returns: + List of detections, each as (x1, y1, x2, y2, confidence, class_id) + """ + # Preprocess + img_input, img_padded, metadata = self.preprocess_image(img_rgb) + + # Inference + outputs = self.session.run(None, {self.input_name: img_input})[0] + + # Postprocess + detections = self._postprocess(outputs, metadata) + + return detections + + def _postprocess(self, outputs: np.ndarray, metadata: dict) -> List[Tuple[int, int, int, int, float, int]]: + """ + Postprocess model outputs to get final detections. + + Args: + outputs: Raw model outputs + metadata: Preprocessing metadata + + Returns: + List of detections in original image coordinates + """ + detections = outputs[0] # Remove batch dimension + + valid_detections = [] + scale = metadata['scale'] + top = metadata['top'] + left = metadata['left'] + + for det in detections: + x1, y1, x2, y2, conf, cls = det + + # Convert coordinates back to original image space + x1 = (x1 - left) / scale + y1 = (y1 - top) / scale + x2 = (x2 - left) / scale + y2 = (y2 - top) / scale + + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + + valid_detections.append((x1, y1, x2, y2, float(conf), int(cls))) + + return valid_detections + + def draw_detections(self, img_bgr: np.ndarray, detections: List[Tuple], + color: Tuple[int, int, int] = (0, 255, 0), + thickness: int = 3): + """ + Draw bounding boxes on image. + + Args: + img_bgr: Input image in BGR format (for cv2) + detections: List of detections from detect() + color: BGR color tuple for boxes + thickness: Line thickness for boxes + + Returns: + Image with drawn detections + """ + for x1, y1, x2, y2, conf, cls in detections: + if (x1 == 0 and x2 == 0): + continue; + cv2.rectangle(img_bgr, (x1, y1), (x2, y2), color, thickness) + + label = f"{conf:.2f}" + cv2.putText(img_bgr, label, (x1, max(y1-10, 0)), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) + + + +def resize_for_display(img: np.ndarray, max_size: int = 640) -> np.ndarray: + """ + Resize image for display while maintaining aspect ratio. + + Args: + img: Input image + max_size: Maximum dimension size + + Returns: + Resized image + """ + h, w = img.shape[:2] + scale = min(max_size / h, max_size / w) + + new_w = int(w * scale) + new_h = int(h * scale) + + return cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + + +def process_directory(img_dir: Path, model_path: str, + display_size: int = 640): + """ + Process all images in a directory and display detections. + + Args: + img_dir: Directory containing images + model_path: Path to ONNX model + display_size: Display window size + """ + # Find all image files + img_files = (list(img_dir.glob('*.jpg')) + + list(img_dir.glob('*.png')) + + list(img_dir.glob('*.jpeg'))) + + if len(img_files) == 0: + print("ERROR: No images found!") + return + + # Initialize detector + detector = ONNXDetector(model_path) + + # Process each image + for img_file in img_files: + # Load image + img_bgr = cv2.imread(str(img_file)) + img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) + + # Run detection + detections = detector.detect(img_rgb) + + # Draw results + detector.draw_detections(img_bgr, detections) + + # Resize for display + img_display = resize_for_display(img_bgr, display_size) + + # Display + cv2.imshow('Detections', img_display) + key = cv2.waitKey(0) + if key == ord('q'): + break + + cv2.destroyAllWindows() + + +if __name__ == "__main__": + import constants + + img_dir = Path(constants.DATASET + "train/images") + model_path = constants.MODEL_PATH + + process_directory(img_dir, model_path) diff --git a/scripts/gamepiece/move_dataset.py b/scripts/gamepiece/move_dataset.py new file mode 100644 index 00000000..cf992cc7 --- /dev/null +++ b/scripts/gamepiece/move_dataset.py @@ -0,0 +1,67 @@ +import shutil +from pathlib import Path +import yaml + +SPLITS = ["train", "valid", "test"] +IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".webp"} + + +def load_data_yaml(dataset_root: Path) -> dict: + data_yaml = dataset_root / "data.yaml" + if not data_yaml.exists(): + raise FileNotFoundError(f"Missing data.yaml in target dataset: {dataset_root}") + with open(data_yaml, "r") as f: + return yaml.safe_load(f) + + +def move_split(source_root: Path, target_root: Path, split: str): + src_img_dir = source_root / split / "images" + src_lbl_dir = source_root / split / "labels" + + tgt_img_dir = target_root / split / "images" + tgt_lbl_dir = target_root / split / "labels" + + if not src_img_dir.exists(): + print(f"[WARN] Source split missing: {src_img_dir}") + return + + tgt_img_dir.mkdir(parents=True, exist_ok=True) + tgt_lbl_dir.mkdir(parents=True, exist_ok=True) + + for img_path in src_img_dir.iterdir(): + if img_path.suffix.lower() not in IMAGE_EXTS: + continue + + label_path = src_lbl_dir / (img_path.stem + ".txt") + + if not label_path.exists(): + print(f"[SKIP] Missing label for {img_path.name}") + continue + + shutil.move(str(img_path), tgt_img_dir / img_path.name) + shutil.move(str(label_path), tgt_lbl_dir / label_path.name) + + +def move_dataset(source_dataset: str, target_dataset: str): + source_root = Path(source_dataset).resolve() + target_root = Path(target_dataset).resolve() + + # Enforce YOLOv11 target dataset by requiring data.yaml + load_data_yaml(target_root) + + for split in SPLITS: + move_split(source_root, target_root, split) + + print("Dataset move complete.") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Move YOLOv11 images+labels into another dataset") + parser.add_argument("source", help="Source YOLO dataset root") + parser.add_argument("target", help="Target YOLOv11 dataset root (must contain data.yaml)") + args = parser.parse_args() + + move_dataset(args.source, args.target) + diff --git a/scripts/gamepiece/obj_tracker.py b/scripts/gamepiece/obj_tracker.py new file mode 100644 index 00000000..2db75b27 --- /dev/null +++ b/scripts/gamepiece/obj_tracker.py @@ -0,0 +1,166 @@ +from model_tester import ONNXDetector +import constants +import cv2 +from dataclasses import dataclass +from typing import Deque +from collections import deque +import math +from typing import Optional + +@dataclass(frozen=True) +class BoundingBox: + x1: int + y1: int + x2: int + y2: int + cls: int + + @property + def area(self) -> int: + return max(0, self.x2 - self.x1) * max(0, self.y2 - self.y1) + + @property + def center(self) -> tuple[int, int]: + return ( + (self.x1 + self.x2) // 2, + (self.y1 + self.y2) // 2, + ) + +@dataclass +class Track: + history: Deque[BoundingBox] + missed_frames: int = 0 + x_vel: float = -1 + y_vel: float = -1 + y_accel: float = -1 + predicted_next: Optional[BoundingBox] = None + + @property + def last(self) -> BoundingBox: + return self.history[-1] + + @property + def cls(self) -> int: + return self.last.cls + + def predicted_next_pos(self) -> Optional[BoundingBox]: + if not self.predicted_next_pos: + return None + dt = self.missed_frames + 1 + y_vel_pred = self.y_vel + self.y_accel * self.missed_frames + return BoundingBox(self.last.x1 + int(self.x_vel * dt), self.last.y1 + int(y_vel_pred * dt), self.last.x2 + int(self.x_vel * dt), self.last.y2 + int(y_vel_pred * dt), self.cls) + + def update_physics(self): + if len(self.history) == 1: + return + else: + prev_vel_weight_x: float = min(0.5, (len(self.history) - 1)/20) #TODO tune + prev_vel_weight_y: float = min(0.0, (len(self.history) - 1)/20) #TODO tune + old_y_vel = self.y_vel + self.x_vel = prev_vel_weight_x * self.x_vel + (1 - prev_vel_weight_x) * (self.history[-1].center[0] - self.history[-2].center[0]) + self.y_vel = prev_vel_weight_y * self.y_vel + (1 - prev_vel_weight_y) * (self.history[-1].center[1] - self.history[-2].center[1]) + prev_accel_weight: float = min(0.5, (len(self.history) - 2)/20) + self.y_accel = prev_accel_weight * self.y_accel + (1 - prev_accel_weight) * (self.y_vel - old_y_vel) + self.predicted_next = self.predicted_next_pos() + +def compute_iou(a: BoundingBox, b: BoundingBox) -> float: + x1 = max(a.x1, b.x1) + y1 = max(a.y1, b.y1) + x2 = min(a.x2, b.x2) + y2 = min(a.y2, b.y2) + + inter_area = max(0, x2 - x1) * max(0, y2 - y1) + union_area = a.area + b.area - inter_area + + return inter_area / union_area if union_area > 0 else 0.0 + +def parse_detections(raw) -> list[BoundingBox]: + boxes = [] + for x1, y1, x2, y2, conf, cls in raw: + if x1 == 0 and x2 == 0: + continue + boxes.append(BoundingBox(x1, y1, x2, y2, cls)) + return boxes + +def run_tracking(): + MIN_DIST = -1 # TODO tune + MIN_IOU = 0.1 # TODO tune + + detector = ONNXDetector(constants.MODEL_PATH) + + cap = cv2.VideoCapture("datasets/videos/480psnipped.mp4") + + object_position_histories: list[Track] = [] + + counter = 0 + + while (True): + counter += 1 + ok, frame = cap.read() + if not ok: + print("Done") + break + print(f"Frame {counter}\tObjects: {len(object_position_histories)}") + + rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + raw_detections = detector.detect(rgb) + detections = parse_detections(raw_detections) + + claimed = [False] * len(object_position_histories) + num_possible_objects = len(object_position_histories) + + for object_pos_history in object_position_histories: + next_pos_pred = object_pos_history.predicted_next + if not next_pos_pred: + continue + cv2.rectangle(frame, (next_pos_pred.x1, next_pos_pred.y1), (next_pos_pred.x2, next_pos_pred.y2), (0, 0, 255), 4) + + for tracked in object_position_histories: + tracked.missed_frames += 1 + + for box in detections: + if box.area == 0: + continue + + closest_obj_index_by_dist: Optional[int] = None + closest_obj_index_by_iou: Optional[int] = None + largest_iou = 0 + lowest_dist = 100000 + for i in range(num_possible_objects): + if claimed[i] or object_position_histories[i].cls != box.cls: + continue + predicted_next = object_position_histories[i].predicted_next + if not predicted_next: + iou = compute_iou(box, object_position_histories[i].last) + if iou > largest_iou: + largest_iou = iou + closest_obj_index = i + else: + distance = math.hypot(box.center[0] - predicted_next.center[0], box.center[1] - predicted_next.center[1]) + if (distance < MIN_DIST and distance < lowest_dist): + + if closest_obj_index != -1: + object_position_histories[closest_obj_index].history.append(box) + object_position_histories[closest_obj_index].missed_frames = 0 + claimed[closest_obj_index] = True + else: + object_position_histories.append(Track(deque(maxlen=15), 0)) + object_position_histories[-1].history.append(box) + claimed.append(True) + detector.draw_detections(frame, raw_detections) + object_position_histories = [x for x in object_position_histories if x.missed_frames <= 3] + for object_pos_history in object_position_histories: + object_pos_history.update_physics() + if len(object_pos_history.history) < 2: + continue + for i in range(1, len(object_pos_history.history)): + new_center = object_pos_history.history[i].center + old_center = object_pos_history.history[i-1].center + cv2.line(frame, old_center, new_center, (0, 255, 0), 4) + cv2.imshow("Test", frame) + cv2.waitKey(0) + cv2.destroyAllWindows() + +if __name__=="__main__": + run_tracking() diff --git a/scripts/gamepiece/old_model_tester.py b/scripts/gamepiece/old_model_tester.py new file mode 100644 index 00000000..8c912a58 --- /dev/null +++ b/scripts/gamepiece/old_model_tester.py @@ -0,0 +1,103 @@ +import onnxruntime as ort +import cv2 +import numpy as np +from pathlib import Path +import constants + +# Config +img_dir = Path(constants.DATASET + "train/images") +MODEL_PATH = constants.MODEL_PATH + +if not img_dir.is_dir(): + print("dataset doesn't exist") + +img_files = list(img_dir.glob('*.jpg')) + list(img_dir.glob('*.png')) + list(img_dir.glob('*.jpeg')) + +if len(img_files) == 0: + print("ERROR: No images found!") + exit(1) + +session = ort.InferenceSession(MODEL_PATH) +input_name = session.get_inputs()[0].name +input_shape = session.get_inputs()[0].shape +img_size = input_shape[2] + +CONF_THRESHOLD = 0.25 +DISPLAY_SIZE = 640 # Display window size + +for img_file in img_files: + # Load image in RGB (OpenCV loads as BGR) + img_bgr = cv2.imread(str(img_file)) + img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) + + # Letterbox resize (maintain aspect ratio with padding) + orig_h, orig_w = img_rgb.shape[:2] + + # Calculate scaling factor + scale = min(img_size / orig_h, img_size / orig_w) + new_h, new_w = int(orig_h * scale), int(orig_w * scale) + + # Resize image + img_resized = cv2.resize(img_rgb, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + + # Create padded image (letterbox) + img_padded = np.full((img_size, img_size, 3), 114, dtype=np.uint8) + + # Calculate padding offsets + top = (img_size - new_h) // 2 + left = (img_size - new_w) // 2 + + # Place resized image in center + img_padded[top:top+new_h, left:left+new_w] = img_resized + + # Normalize and transpose + img_norm = img_padded.astype(np.float32) / 255.0 + img_input = np.transpose(img_norm, (2, 0, 1))[None, ...] + + # Inference + outputs = session.run(None, {input_name: img_input})[0] + + # Prepare image for drawing (use BGR for cv2.imshow) + img_draw = img_bgr.copy() + + # Remove batch dimension + detections = outputs[0] # Now shape is (300, 6) + + num_valid = 0 + for det in detections: + x1, y1, x2, y2, conf, cls = det + + # Filter by confidence threshold + if conf > CONF_THRESHOLD: + num_valid += 1 + + # Convert coordinates back to original image space + # Remove padding offset + x1 = (x1 - left) / scale + y1 = (y1 - top) / scale + x2 = (x2 - left) / scale + y2 = (y2 - top) / scale + + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + + cv2.rectangle(img_draw, (x1, y1), (x2, y2), (0, 255, 0), 6) + + label = f"{conf:.2f}" + cv2.putText(img_draw, label, (x1, max(y1-10, 0)), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + + # Resize for display while maintaining aspect ratio + display_h, display_w = img_draw.shape[:2] + display_scale = min(DISPLAY_SIZE / display_h, DISPLAY_SIZE / display_w) + + new_display_w = int(display_w * display_scale) + new_display_h = int(display_h * display_scale) + + img_display = cv2.resize(img_draw, (new_display_w, new_display_h), interpolation=cv2.INTER_LINEAR) + + cv2.imshow('Detections', img_display) + key = cv2.waitKey(0) + if key == ord('q'): + break + +cv2.destroyAllWindows() diff --git a/scripts/gamepiece/onnx_exporter.py b/scripts/gamepiece/onnx_exporter.py new file mode 100644 index 00000000..1a3e8cee --- /dev/null +++ b/scripts/gamepiece/onnx_exporter.py @@ -0,0 +1,24 @@ +from ultralytics import YOLO + +def main(): + # Path to your trained model + best_model_path = "/home/yasen/runs/detect/rebuilt/weights/best.pt" + + # Load it + model = YOLO(best_model_path) + + # Export to ONNX + export_path = model.export( + format="onnx", + simplify=True, # Simplify the exported graph + dynamic=False, # Set to True if you want variable image sizes + imgsz=640, # Match your training size + device=0, # GPU if available + name="rebuilt.onnx", + nms=True + ) + + print(f"ONNX model exported to: {export_path}") + +if __name__ == "__main__": + main() diff --git a/scripts/gamepiece/pt_tester.py b/scripts/gamepiece/pt_tester.py new file mode 100644 index 00000000..ca0b39ed --- /dev/null +++ b/scripts/gamepiece/pt_tester.py @@ -0,0 +1,40 @@ +from pathlib import Path +from ultralytics import YOLO +import cv2 + +# Config +img_dir = Path("datasets/rebuilt_balls/valid/images") +MODEL_PATH = "/home/yasen/runs/detect/rebuilt/weights/best.pt" + +img_files = list(img_dir.glob('*.jpg')) + list(img_dir.glob('*.png')) + +if len(img_files) == 0: + print("ERROR: No images found!") + exit(1) + +# Load model +model = YOLO(MODEL_PATH) + +CONF_THRESHOLD = 0.25 + +for img_file in img_files: + print(f"\nProcessing: {img_file.name}") + + # Run inference + results = model.predict( + source=str(img_file), + conf=CONF_THRESHOLD, + save=False, + verbose=True # Print detection info + ) + + # Get annotated image + annotated_img = results[0].plot() + + # Display + cv2.imshow('Detections', annotated_img) + key = cv2.waitKey(0) + if key == ord('q'): + break + +cv2.destroyAllWindows() diff --git a/scripts/gamepiece/simple_test.py b/scripts/gamepiece/simple_test.py new file mode 100644 index 00000000..7a5799b3 --- /dev/null +++ b/scripts/gamepiece/simple_test.py @@ -0,0 +1,13 @@ +from ultralytics import YOLO + +# Load model +model = YOLO("/home/yasen/Training/runs/detect/train3/weights/last.pt") + +# Run validation - it will automatically find your test set if you have a data.yaml +metrics = model.val(data="/mnt/c/Users/Yasen/Documents/grayscaled-gamepieces/data.yaml", split="test") + +# Print metrics +print(f"mAP50: {metrics.box.map50}") +print(f"mAP50-95: {metrics.box.map}") +print(f"Precision: {metrics.box.p}") +print(f"Recall: {metrics.box.r}") diff --git a/scripts/gamepiece/single_img_validation.py b/scripts/gamepiece/single_img_validation.py new file mode 100644 index 00000000..98f0f0ec --- /dev/null +++ b/scripts/gamepiece/single_img_validation.py @@ -0,0 +1,61 @@ +import onnxruntime as ort +import cv2 +import numpy as np +from pathlib import Path + +# Config +MODEL_PATH = "best.onnx" +IMAGE_PATH = "YellowBall.jpg" # Change this to your image path + +# Load model +session = ort.InferenceSession(MODEL_PATH) +input_name = session.get_inputs()[0].name +input_shape = session.get_inputs()[0].shape +img_size = input_shape[2] + +print(f"Input shape: {img_size}") + +# Load image +img_file = Path(IMAGE_PATH) +if not img_file.exists(): + print(f"ERROR: Image not found at {IMAGE_PATH}") + exit(1) + +img = cv2.imread(str(img_file)) +img_resized = cv2.resize(img, (img_size, img_size)) +img_norm = img_resized.astype(np.float32) / 255.0 +img_input = np.transpose(img_norm, (2, 0, 1))[None, ...] + +# Inference +outputs = session.run(None, {input_name: img_input})[0] +num_preds = len(outputs) + +img_draw = img_resized.copy() + +print(f"Output shape: {outputs.shape}") +print(f"Number of predictions: {num_preds}") +print() + +for thingy in outputs: + for det in thingy: + print(f"Detection: {det}") + x1, y1, x2, y2 = int(det[0]), int(det[1]), int(det[2]), int(det[3]) + cv2.rectangle(img_draw, (x1, y1), (x2, y2), (0, 255, 0), 2) + +# Display result +cv2.imshow('Detections', img_draw) +print("\nPress any key to close the window...") +cv2.waitKey(0) +cv2.destroyAllWindows() + +# Load ground truth +label_file = img_file.parent.parent / 'labels' / img_file.name +label_file = label_file.with_suffix('.txt') +num_gt = 0 +if label_file.exists(): + with open(label_file) as f: + num_gt = len(f.readlines()) + print(f"\nGround truth objects: {num_gt}") + print(f"Predicted objects: {num_preds}") +else: + print(f"\nWarning: Label file not found at {label_file}") diff --git a/scripts/gamepiece/test.py b/scripts/gamepiece/test.py new file mode 100644 index 00000000..c4f80b18 --- /dev/null +++ b/scripts/gamepiece/test.py @@ -0,0 +1,233 @@ +import os +import cv2 +import numpy as np +import onnxruntime as ort +from glob import glob +from tqdm import tqdm + +# Configuration +MODEL_PATH = "/home/yasen/Training/runs/detect/train3/weights/best.onnx" +DATASET_ROOT = "/mnt/c/Users/Yasen/Documents/gamepiece-data" +SPLIT = "test" +IMG_SIZE = 640 +CONF_THRESH = 0.25 +IOU_THRESH = 0.5 + +DATASET_DIR = os.path.join(DATASET_ROOT, SPLIT, "images") +LABEL_DIR = os.path.join(DATASET_ROOT, SPLIT, "labels") + + +def letterbox(img, new_shape=640): + """Resize and pad image to square.""" + h, w = img.shape[:2] + r = min(new_shape / h, new_shape / w) + new_w, new_h = int(round(w * r)), int(round(h * r)) + dw, dh = (new_shape - new_w) / 2, (new_shape - new_h) / 2 + + img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) + return img, r, (dw, dh) + + +def compute_iou(box1, box2): + """Compute IoU between two boxes [x1, y1, x2, y2].""" + x1 = max(box1[0], box2[0]) + y1 = max(box1[1], box2[1]) + x2 = min(box1[2], box2[2]) + y2 = min(box1[3], box2[3]) + inter = max(0, x2 - x1) * max(0, y2 - y1) + area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]) + area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]) + return inter / (area1 + area2 - inter + 1e-6) + +def nms(boxes, scores, iou_thresh): + """Non-Maximum Suppression.""" + indices = [] + boxes = np.array(boxes) + scores = np.array(scores) + order = scores.argsort()[::-1] + + while order.size > 0: + i = order[0] + indices.append(i) + if order.size == 1: + break + ious = np.array([compute_iou(boxes[i], boxes[j]) for j in order[1:]]) + order = order[1:][ious < iou_thresh] + + return indices + + +def preprocess(img): + """Preprocess image for ONNX model.""" + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + img, ratio, padding = letterbox(img, IMG_SIZE) + img = img.transpose(2, 0, 1).astype(np.float32) / 255.0 + img = np.expand_dims(img, 0) + return img, ratio, padding + + +def postprocess(output, ratio, padding, orig_shape, conf_thresh, iou_thresh=0.5): + """Convert model output to normalized coordinates.""" + dw, dh = padding + h_orig, w_orig = orig_shape + + if len(output.shape) == 3: + output = output[0] + if output.shape[0] == 6 and output.shape[1] != 6: + output = output.T + + if output.shape[0] == 0 or output.shape[1] != 6: + return [] + + for pred in output: + cx, cy, w, h, cls, conf = pred + + if conf < conf_thresh: + continue + + # Convert from center format to corner format + x1 = cx - w / 2 + y1 = cy - h / 2 + x2 = cx + w / 2 + y2 = cy + h / 2 + + # Convert to original image coordinates + x1 = np.clip((x1 - dw) / ratio, 0, w_orig) + y1 = np.clip((y1 - dh) / ratio, 0, h_orig) + x2 = np.clip((x2 - dw) / ratio, 0, w_orig) + y2 = np.clip((y2 - dh) / ratio, 0, h_orig) + + boxes.append([x1, y1, x2, y2]) + scores.append(float(conf)) + classes.append(int(cls)) + + # Apply NMS per class + final_preds = [] + for c in set(classes): + cls_idxs = [i for i, cl in enumerate(classes) if cl == c] + cls_boxes = [boxes[i] for i in cls_idxs] + cls_scores = [scores[i] for i in cls_idxs] + + keep = nms(cls_boxes, cls_scores, iou_thresh) + for i in keep: + x1, y1, x2, y2 = cls_boxes[i] + conf = cls_scores[i] + final_preds.append([ + x1 / w_orig, y1 / h_orig, x2 / w_orig, y2 / h_orig, conf, c + ]) + + return final_preds + +def load_ground_truth(label_path): + """Load YOLO format labels.""" + ground_truths = [] + with open(label_path) as f: + for line in f: + cls, xc, yc, w, h = map(float, line.strip().split()) + x1, y1 = xc - w/2, yc - h/2 + x2, y2 = xc + w/2, yc + h/2 + ground_truths.append([x1, y1, x2, y2, cls]) + return ground_truths + + +def evaluate(): + # Verify paths + for path, name in [(DATASET_ROOT, "Dataset root"), (DATASET_DIR, "Images"), (LABEL_DIR, "Labels")]: + if not os.path.exists(path): + print(f"ERROR: {name} not found: {path}") + exit(1) + + # Load ONNX model + session = ort.InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) + input_name = session.get_inputs()[0].name + + # Get image files + img_files = sorted(glob(os.path.join(DATASET_DIR, "*.jpg")) + + glob(os.path.join(DATASET_DIR, "*.png"))) + + if not img_files: + print("No images found!") + exit(1) + + print(f"Found {len(img_files)} images") + print(f"Model: {MODEL_PATH}") + print(f"Confidence threshold: {CONF_THRESH}\n") + + # Evaluation metrics + tp, fp, fn = 0, 0, 0 + total_gt = 0 + + for img_path in tqdm(img_files, desc="Evaluating"): + label_path = os.path.join(LABEL_DIR, os.path.splitext(os.path.basename(img_path))[0] + ".txt") + + if not os.path.exists(label_path): + continue + + # Load and process image + img = cv2.imread(img_path) + if img is None: + continue + + orig_shape = img.shape[:2] + input_img, ratio, padding = preprocess(img) + + # Run inference + output = session.run(None, {input_name: input_img})[0] + preds = postprocess(output, ratio, padding, orig_shape, CONF_THRESH) + + # Load ground truth + ground_truths = load_ground_truth(label_path) + total_gt += len(ground_truths) + + # Match predictions to ground truth + matched_gt = set() + matched_pred = set() + + for pred_idx, pred in enumerate(preds): + best_iou = 0 + best_gt_idx = -1 + + for gt_idx, gt in enumerate(ground_truths): + if gt_idx in matched_gt: + continue + curr_iou = compute_iou(pred[:4], gt[:4]) + if curr_iou > best_iou: + best_iou = curr_iou + best_gt_idx = gt_idx + + if best_iou > IOU_THRESH and best_gt_idx != -1: + tp += 1 + matched_gt.add(best_gt_idx) + matched_pred.add(pred_idx) + else: + fp += 1 + + # Count false negatives + fn += len(ground_truths) - len(matched_gt) + + # Calculate metrics + precision = tp / (tp + fp + 1e-6) + recall = tp / (total_gt + 1e-6) + f1 = 2 * precision * recall / (precision + recall + 1e-6) + + # Print results + print(f"\n{'='*60}") + print(f"Evaluation Results:") + print(f"{'='*60}") + print(f"True Positives (TP): {tp}") + print(f"False Positives (FP): {fp}") + print(f"False Negatives (FN): {fn}") + print(f"Total Ground Truth: {total_gt}") + print(f"{'='*60}") + print(f"Precision: {precision:.3f} ({tp}/{tp + fp})") + print(f"Recall: {recall:.3f} ({tp}/{total_gt})") + print(f"F1 Score: {f1:.3f}") + print(f"{'='*60}\n") + + +if __name__ == "__main__": + evaluate() diff --git a/scripts/gamepiece/v2_train.py b/scripts/gamepiece/v2_train.py new file mode 100644 index 00000000..c3552989 --- /dev/null +++ b/scripts/gamepiece/v2_train.py @@ -0,0 +1,54 @@ +from ultralytics import YOLO +import glob +import os + +def main(): + # Path to your dataset.yaml + data_yaml = "/home/yasen/Training/datasets/rebuilt_balls/data.yaml" + device_num = 0; + + # Load YOLO11n model (nano, fastest and smallest) + model = YOLO("/home/yasen/runs/detect/even_larger_yellow/weights/best.pt") + + print("="*60) + print("Starting YOLO11n Training") + print("="*60) + + # Train the model + results = model.train( + data=data_yaml, + epochs=40, + imgsz=640, + batch=16, + device=device_num, + + # Important: Control augmentation (default might be too aggressive) + degrees=10.0, # Rotation (set to 0 if orientation matters) + shear=10.0, + translate=0.5, # Translation + scale=0.5, # Scale variation + fliplr=0.5, # Horizontal flip probability + mosaic=1.0, # Mosaic augmentation + close_mosaic=10, + + # Optimization + optimizer='auto', # or 'SGD', 'Adam', 'AdamW' + lr0=0.01, # Initial learning rate + weight_decay=0.0005, + + # Validation during training + val=True, # Enable validation + plots=True, # Save training plots + save=True, # Save checkpoints + save_period=10, # Save checkpoint every N epochs + + # Project organization + name='rebuilt', + ) + + print("\n" + "="*60) + print("Training Complete!") + print("="*60) + +if __name__ == "__main__": + main() diff --git a/scripts/gamepiece/video_clip_extractor.py b/scripts/gamepiece/video_clip_extractor.py new file mode 100644 index 00000000..a4401782 --- /dev/null +++ b/scripts/gamepiece/video_clip_extractor.py @@ -0,0 +1,32 @@ +from yt_dlp import YoutubeDL +import os + +# URL of the video +URL = "https://www.youtube.com/watch?v=_fybREErgyM" + +# Folder to save the clip +output_folder = "datasets/videos/" +os.makedirs(output_folder, exist_ok=True) + +# Full path + filename +output_file = os.path.join(output_folder, "my_clip_37-44.mp4") + +ydl_opts = { + "format": "18", # 480p MP4 + "download_sections": ["*37-44"], # clip from 37s to 44s + "outtmpl": output_file, # save to custom location/name + "extractor_args": { + "youtube": {"player_client": ["android"]} # avoid 403 errors + }, + "postprocessors": [{ + "key": "FFmpegVideoConvertor", + "preferedformat": "mp4" + }], + "postprocessor_args": { + "ffmpeg": ["-an"] # remove audio track + }, + "quiet": False, +} + +with YoutubeDL(ydl_opts) as ydl: + ydl.download([URL])