From f97206b0c38b7b73c95d2eee1037411b79c2f9cb Mon Sep 17 00:00:00 2001 From: Josafat-Mattias Burmeister Date: Tue, 2 Sep 2025 20:45:21 +0200 Subject: [PATCH] add notebooks for data analysis in paper --- .gitignore | 1 + .../baseline/run_baseline_evaluation.bat | 9 + .../1_table_dataset_overview.ipynb | 168 ++++++++++++ .../2_example_annotations.ipynb | 256 ++++++++++++++++++ .../3_figure_label_projection.ipynb | 148 ++++++++++ .../3d-geoinfo-2025/4_get_best_runs.ipynb | 86 ++++++ .../5_paper_data_analysis.ipynb | 190 +++++++++++++ notebooks/3d-geoinfo-2025/README.md | 1 + notebooks/mapping_plots.md | 15 + 9 files changed, 874 insertions(+) create mode 100644 configs/3d-geoinfo-2025/baseline/run_baseline_evaluation.bat create mode 100644 notebooks/3d-geoinfo-2025/1_table_dataset_overview.ipynb create mode 100644 notebooks/3d-geoinfo-2025/2_example_annotations.ipynb create mode 100644 notebooks/3d-geoinfo-2025/3_figure_label_projection.ipynb create mode 100644 notebooks/3d-geoinfo-2025/4_get_best_runs.ipynb create mode 100644 notebooks/3d-geoinfo-2025/5_paper_data_analysis.ipynb create mode 100644 notebooks/3d-geoinfo-2025/README.md create mode 100644 notebooks/mapping_plots.md diff --git a/.gitignore b/.gitignore index 3291233..5a3edd6 100644 --- a/.gitignore +++ b/.gitignore @@ -160,6 +160,7 @@ data *.csv *.pl +**.laz **.tif **.log **.wandb diff --git a/configs/3d-geoinfo-2025/baseline/run_baseline_evaluation.bat b/configs/3d-geoinfo-2025/baseline/run_baseline_evaluation.bat new file mode 100644 index 0000000..e2d04db --- /dev/null +++ b/configs/3d-geoinfo-2025/baseline/run_baseline_evaluation.bat @@ -0,0 +1,9 @@ +python .\scripts\prediction.py --config-path .\configs\baseline\predict_without_finetuning_2_5_cm.toml +python .\scripts\prediction.py --config-path .\configs\baseline\predict_without_finetuning_5_cm.toml +python .\scripts\prediction.py --config-path .\configs\baseline\predict_without_finetuning_7_5_cm.toml +python .\scripts\prediction.py --config-path .\configs\baseline\predict_without_finetuning_10_cm.toml + +python .\scripts\evaluate.py --config-path .\configs\baseline\evaluate_without_finetuning_2_5_cm.toml +python .\scripts\evaluate.py --config-path .\configs\baseline\evaluate_without_finetuning_5_cm.toml +python .\scripts\evaluate.py --config-path .\configs\baseline\evaluate_without_finetuning_7_5_cm.toml +python .\scripts\evaluate.py --config-path .\configs\baseline\evaluate_without_finetuning_10_cm.toml \ No newline at end of file diff --git a/notebooks/3d-geoinfo-2025/1_table_dataset_overview.ipynb b/notebooks/3d-geoinfo-2025/1_table_dataset_overview.ipynb new file mode 100644 index 0000000..dfa6261 --- /dev/null +++ b/notebooks/3d-geoinfo-2025/1_table_dataset_overview.ipynb @@ -0,0 +1,168 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Table 1: Dataset Overview\n", + "\n", + "This notebook can be used to create the table providing an overview of the datasets used in this study." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# adjust base directory if needed\n", + "base_dir = Path(\"G:/3D-GeoInfo-2025/data/1_base_data/labels\")\n", + "csv_output_file_path = \"./datasets.csv\"\n", + "\n", + "datasets = [\n", + " # small plot that was labeled using all three labeling approaches\n", + " {\n", + " \"LabelFile\": \"./manual_labeling/s1_p1_small_coco.json\",\n", + " \"Site\": 1,\n", + " \"Plot\": \"Plot 1.0\",\n", + " \"Size\": \"50 x 50\",\n", + " \"Labeling\": \"ML\",\n", + " \"Usage\": \"train (small)\"\n", + " },\n", + " {\n", + " \"LabelFile\": \"./manual_correction/s1_p1_small_coco.json\",\n", + " \"Site\": 1,\n", + " \"Plot\": \"Plot 1.0\",\n", + " \"Size\": \"50 x 50\",\n", + " \"Labeling\": \"MC\",\n", + " \"Usage\": \"train (small)\"\n", + " },\n", + " {\n", + " \"LabelFile\": \"./automatic_labeling/s1_p1_small_coco.json\",\n", + " \"Site\": 1,\n", + " \"Plot\": \"Plot 1.0\",\n", + " \"Size\": \"50 x 50\",\n", + " \"Labeling\": \"AL\",\n", + " \"Usage\": \"train (small)\"\n", + " },\n", + " # additional manually labeled data\n", + " {\n", + " \"LabelFile\": \"./manual_labeling/s1_p1_ext_ml_coco.json\",\n", + " \"Site\": 1,\n", + " \"Plot\": \"Plot 1.1\",\n", + " \"Size\": \"100 x 100\",\n", + " \"Labeling\": \"ML\",\n", + " \"Usage\": \"train (ext.)\"\n", + " },\n", + " # additional manually corrected data\n", + " {\n", + " \"LabelFile\": \"./manual_correction/s1_p1_ext_mc_coco.json\",\n", + " \"Site\": 1,\n", + " \"Plot\": \"Plot 1.2\",\n", + " \"Size\": \"120 x 80\",\n", + " \"Labeling\": \"MC\",\n", + " \"Usage\": \"train (ext.)\"\n", + " },\n", + " {\n", + " \"LabelFile\": \"./manual_correction/s1_p2_mc_coco.json\",\n", + " \"Site\": 1,\n", + " \"Plot\": \"Plot 2\",\n", + " \"Size\": \"120 x 120\",\n", + " \"Labeling\": \"MC\",\n", + " \"Usage\": \"train (ext.)\"\n", + " },\n", + " {\n", + " \"LabelFile\": \"./manual_correction/s2_p1_mc_coco.json\",\n", + " \"Site\": 2,\n", + " \"Plot\": \"Plot 1\",\n", + " \"Size\": \"120 x 120\",\n", + " \"Labeling\": \"MC\",\n", + " \"Usage\": \"train (ext.)\"\n", + " },\n", + " # additional automatically labeled data\n", + " {\n", + " \"LabelFile\": \"./automatic_labeling/s1_p1_ext_al_coco.json\",\n", + " \"Site\": 1,\n", + " \"Plot\": \"Plot 1.3\",\n", + " \"Size\": \"200 x 100\",\n", + " \"Labeling\": \"AL\",\n", + " \"Usage\": \"train (ext.)\"\n", + " },\n", + " {\n", + " \"LabelFile\": \"./automatic_labeling/s1_p2_al_coco.json\",\n", + " \"Site\": 1,\n", + " \"Plot\": \"Plot 2\",\n", + " \"Size\": \"150 x 100\",\n", + " \"Labeling\": \"AL\",\n", + " \"Usage\": \"train (ext.)\"\n", + " },\n", + " {\n", + " \"LabelFile\": \"./automatic_labeling/s1_p3_al_coco.json\",\n", + " \"Site\": 1,\n", + " \"Plot\": \"Plot 3\",\n", + " \"Size\": \"100 x 100\",\n", + " \"Labeling\": \"AL\",\n", + " \"Usage\": \"train (ext.)\"\n", + " },\n", + " # test set\n", + " {\n", + " \"LabelFile\": \"./manual_labeling/s3_p1_coco.json\",\n", + " \"Site\": 3,\n", + " \"Plot\": \"Plot 1\",\n", + " \"Size\": \"50 x 50\",\n", + " \"Labeling\": \"ML\",\n", + " \"Usage\": \"test\"\n", + " },\n", + "]\n", + "\n", + "dataset_metadata_list = []\n", + "\n", + "for dataset in datasets:\n", + " label_file_path = base_dir / dataset[\"LabelFile\"]\n", + " assert label_file_path.exists(), f\"{label_file_path} does not exist.\"\n", + " with open(label_file_path, \"r\", encoding=\"utf-8\") as f:\n", + " labels = json.load(f)\n", + " dataset[\"NumTrees\"] = len(labels[\"annotations\"])\n", + " dataset[\"Plot\"] = dataset[\"Plot\"].replace(\"_\", r\"\\_\")\n", + " dataset[\"Size\"] = dataset[\"Size\"].replace(\"x\", r\"$\\times$\")\n", + " dataset_metadata_list.append(dataset)\n", + "\n", + "dataset_metadata = pd.DataFrame(dataset_metadata_list)\n", + "dataset_metadata.to_csv(csv_output_file_path, index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "deepforest-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/3d-geoinfo-2025/2_example_annotations.ipynb b/notebooks/3d-geoinfo-2025/2_example_annotations.ipynb new file mode 100644 index 0000000..090e7b1 --- /dev/null +++ b/notebooks/3d-geoinfo-2025/2_example_annotations.ipynb @@ -0,0 +1,256 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualize Annotations and Predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from pathlib import Path\n", + "import os\n", + "import pandas as pd\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as patches\n", + "from matplotlib.patches import Polygon\n", + "import numpy as np\n", + "from PIL import Image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_coco_annotated_image(json_path, image_dir, output_path,\n", + " draw='bbox',\n", + " line_width=2,\n", + " line_color='r',\n", + " output_size=(10, 8),\n", + " dpi=100):\n", + " \"\"\"\n", + " Saves an image annotated with COCO-format labels (bounding boxes or polygons) to a file.\n", + "\n", + " Args:\n", + " json_path (str): Path to the COCO-format JSON file (with annotations for one image).\n", + " image_dir (str): Directory where the image is stored.\n", + " output_path (str): Path to save the annotated image.\n", + " draw (str): What to draw — 'bbox' for bounding boxes or 'segmentation' for polygons.\n", + " line_width (int): Width of the annotation lines.\n", + " line_color (str or tuple): Color of the annotation lines, e.g., 'blue' or (1, 0, 0) for red in RGB.\n", + " output_size (tuple): Output image size in inches (width, height) for matplotlib.\n", + " dpi (int): Dots per inch (resolution) of the saved image.\n", + " \"\"\"\n", + " # Load JSON\n", + " with open(json_path, 'r') as f:\n", + " coco_data = json.load(f)\n", + "\n", + " # Extract image info and annotations\n", + " image_info = coco_data['images'][0]\n", + " annotations = coco_data['annotations']\n", + " image_path = os.path.join(image_dir, image_info['file_name'])\n", + "\n", + " # Open image\n", + " image = Image.open(image_path)\n", + " fig, ax = plt.subplots(figsize=output_size, dpi=dpi)\n", + " ax.imshow(image)\n", + "\n", + " for ann in annotations:\n", + " if draw == 'bbox':\n", + " x, y, w, h = ann['bbox']\n", + " rect = patches.Rectangle((x, y), w, h,\n", + " linewidth=line_width,\n", + " edgecolor=line_color,\n", + " facecolor='none')\n", + " ax.add_patch(rect)\n", + " elif draw == 'segmentation' and ann['segmentation']:\n", + " for seg in ann['segmentation']:\n", + " poly = Polygon([(seg[i], seg[i+1]) for i in range(0, len(seg), 2)],\n", + " linewidth=line_width,\n", + " edgecolor=line_color,\n", + " facecolor='none')\n", + " ax.add_patch(poly)\n", + "\n", + " ax.axis('off')\n", + " plt.savefig(output_path, bbox_inches='tight', pad_inches=0)\n", + " plt.close(fig)\n", + "\n", + "def plot_csv_annotated_image(csv_path, image_dir, output_path,\n", + " line_width=2,\n", + " line_color='r',\n", + " output_size=(10, 8),\n", + " dpi=100):\n", + " labels = pd.read_csv(csv_path)\n", + "\n", + " if len(labels[\"image_path\"].unique()) > 1:\n", + " raise ValueError(\"Predictions contain multiple images.\")\n", + "\n", + " image_path = os.path.join(image_dir, labels[\"image_path\"].iloc[0])\n", + "\n", + " # Open image\n", + " image = Image.open(image_path)\n", + " fig, ax = plt.subplots(figsize=output_size, dpi=dpi)\n", + " ax.imshow(image)\n", + "\n", + " for _, row in labels.iterrows():\n", + " x_min = row[\"xmin\"]\n", + " x_max = row[\"xmax\"]\n", + " y_min = row[\"ymin\"]\n", + " y_max = row[\"ymax\"]\n", + "\n", + " width = x_max - x_min\n", + " height = y_max - y_min\n", + " rect = patches.Rectangle((row[\"xmin\"], row[\"ymin\"]), width, height,\n", + " linewidth=line_width,\n", + " edgecolor=line_color,\n", + " facecolor='none')\n", + " ax.add_patch(rect)\n", + "\n", + " ax.axis('off')\n", + " if not isinstance(output_path, Path):\n", + " output_path = Path(output_path)\n", + " output_path.parent.mkdir(exist_ok=True, parents=True)\n", + "\n", + " plt.savefig(output_path, bbox_inches='tight', pad_inches=0)\n", + " plt.close(fig)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "line_color = tuple(np.array([255, 89, 0, 255]) / 255)\n", + "output_size = (8, 8)\n", + "\n", + "image_dir = Path(\"G:/3D-GeoInfo-2025/data/1_base_data/images/\")\n", + "kwargs = {\"line_width\": 3, \"line_color\": line_color, \"output_size\": output_size, \"dpi\": 80}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Create images for Fig. 1 - Example Annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "annotations_path = \"C:/Users/cgs/Downloads/20230720_Sauen_3512a1_8901_115852_crown_polygons_coco.json\"\n", + "plot_coco_annotated_image(annotations_path, image_dir, \"manual_labeling_polygons.png\", draw=\"segmentation\", **kwargs)\n", + "\n", + "image_dir = Path(\"G:/3D-GeoInfo-2025/data/1_base_data/images/\")\n", + "\n", + "annotations_path = image_dir.parent / \"labels/manual_labeling/s1_p1_small_coco.json\"\n", + "plot_coco_annotated_image(annotations_path, image_dir, \"manual_labeling.png\", **kwargs)\n", + "\n", + "annotations_path = image_dir.parent / \"labels/manual_correction/s1_p1_small_coco.json\"\n", + "plot_coco_annotated_image(annotations_path, image_dir, \"manual_correction.png\", **kwargs)\n", + "\n", + "annotations_path = image_dir.parent / \"labels/automatic_labeling/s1_p1_small_coco.json\"\n", + "plot_coco_annotated_image(annotations_path, image_dir, \"automatic_labeling.png\", **kwargs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Create images for Fig. 5 and Fig. 7 - Example Predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image_dir = Path(\"G:/3D-GeoInfo-2025/data/1_base_data/images/\")\n", + "\n", + "annotations_path = image_dir.parent / \"labels/manual_labeling/s3_p1_coco.json\"\n", + "plot_coco_annotated_image(annotations_path, image_dir, \"manual_labeling_test_set.png\", **kwargs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/2_5_cm/images\"\n", + "\n", + "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/baseline/test_predictions.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/baseline_2_5_cm.png\", **kwargs)\n", + "\n", + "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/5_cm/images\"\n", + "\n", + "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/5_cm/baseline/test_predictions.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/baseline_5_cm.png\", **kwargs)\n", + "\n", + "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/7_5_cm/images\"\n", + "\n", + "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/7_5_cm/baseline/test_predictions.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/baseline_7_5_cm.png\", **kwargs)\n", + "\n", + "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/10_cm/images\"\n", + "\n", + "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/10_cm/baseline/test_predictions.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/baseline_10_cm.png\", **kwargs)\n", + "\n", + "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/2_5_cm/images\"\n", + "\n", + "predictions_path = f\"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/manual_labeling_small/9_epochs/test_predictions_seed_2.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/manual_labeling_small.png\", **kwargs)\n", + "\n", + "predictions_path = f\"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/manual_labeling_ext/7_epochs/test_predictions_seed_0.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/manual_labeling_ext.png\", **kwargs)\n", + "\n", + "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/manual_correction_small/6_epochs/test_predictions_seed_0.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/manual_correction_small.png\", **kwargs)\n", + "\n", + "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/manual_correction_ext/9_epochs/test_predictions_seed_0.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/manual_correction_ext.png\", **kwargs)\n", + "\n", + "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/automatic_labeling_small/9_epochs/test_predictions_seed_4.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/automatic_labeling_small.png\", **kwargs)\n", + "\n", + "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/automatic_labeling_ext/8_epochs/test_predictions_seed_1.csv\"\n", + "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/automatic_labeling_ext.png\", **kwargs)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "deepforest-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/3d-geoinfo-2025/3_figure_label_projection.ipynb b/notebooks/3d-geoinfo-2025/3_figure_label_projection.ipynb new file mode 100644 index 0000000..ba2653d --- /dev/null +++ b/notebooks/3d-geoinfo-2025/3_figure_label_projection.ipynb @@ -0,0 +1,148 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fig. 2: Label Projection from 3D Point Clouds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "from PIL import Image\n", + "import rasterio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "color_palette = [\n", + " [209, 64, 129, 255], # other purple\n", + " [128, 255, 144, 255], # light green\n", + " [219, 254, 135, 255], # mindaro\n", + " [62, 137, 20, 255], # inida green\n", + " [255, 252, 49, 255], # yellow\n", + " [140, 192, 132, 255], # dark sea green # [245, 93, 62, 255], # orange\n", + " [105, 116, 124, 255], # slate gray\n", + " [19, 70, 17, 255], # forest green\n", + " [125, 97, 103, 255], # deep taube\n", + " [255, 180, 0, 255], # yellow\n", + " [6, 167, 125, 255], # green\n", + " [129, 164, 205, 255], # iceberg\n", + " [75, 29, 63, 255], # purple\n", + " [247, 197, 72, 255], # mayze crayola\n", + " [62, 124, 177, 255], # steel blue\n", + " [102, 99, 112, 255], # burnished brown\n", + " [117, 92, 27, 255], # field drab\n", + " [115, 191, 184, 255], # turquoise\n", + " [73, 67, 49, 255], # olive drab\n", + " [163, 0, 0, 255], # rufous\n", + " [163, 147, 191, 255], # glossy grape\n", + " [111, 115, 210, 255], # violet blue crayola\n", + " [179, 106, 94, 255], # copper penny\n", + " [0, 178, 202, 255], # pacific blue\n", + " [118, 117, 34, 255], # spanish bistre\n", + " [238, 215, 197, 255], # champagne pink\n", + " [86, 53, 30, 255], # van dyke brown\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Visualize label image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_dir = Path(\"G:/3D-GeoInfo-2025/data/\")\n", + "image_path = base_dir / \"1_base_data/labels/automatic_labeling/s1_p1_small_labels.tif\"\n", + "\n", + "with rasterio.open(image_path) as image:\n", + " image_array = image.read()\n", + "\n", + "rgb_image_array = np.full((image_array.shape[1], image_array.shape[2], 3), fill_value=240, dtype=np.uint8)\n", + "\n", + "for idx, instance_id in enumerate(np.unique(image_array)):\n", + " if instance_id == 0:\n", + " continue\n", + " instance_mask = (image_array == instance_id).reshape((image_array.shape[1], image_array.shape[2]))\n", + " color = color_palette[idx % len(color_palette)]\n", + " rgb_image_array[instance_mask] = color[:3]\n", + "\n", + "image = Image.fromarray(rgb_image_array, mode=\"RGB\")\n", + "image.save(\"./label_projection.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Colorize point cloud" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pointtorch import read\n", + "from pointtorch.operations.numpy import make_labels_consecutive\n", + "\n", + "point_cloud_path = base_dir / \"1_base_data/point_clouds/s1_p1_small.laz\"\n", + "point_cloud = read(point_cloud_path)\n", + "instance_ids = make_labels_consecutive(point_cloud[\"instance_id_prediction\"].to_numpy(), ignore_id=0)\n", + "\n", + "point_cloud[\"red\"] = 240\n", + "point_cloud[\"green\"] = 240\n", + "point_cloud[\"blue\"] = 240\n", + "\n", + "for idx, instance_id in enumerate(np.unique(instance_ids)):\n", + " if instance_id == 0:\n", + " continue\n", + " instance_mask = instance_ids == instance_id\n", + " color = color_palette[idx % len(color_palette)]\n", + " point_cloud.loc[instance_mask, [\"red\", \"green\", \"blue\"]] = np.array(color)[:3]\n", + "\n", + "point_cloud.to(\"./point_cloud_colored.laz\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "deepforest-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/3d-geoinfo-2025/4_get_best_runs.ipynb b/notebooks/3d-geoinfo-2025/4_get_best_runs.ipynb new file mode 100644 index 0000000..294e298 --- /dev/null +++ b/notebooks/3d-geoinfo-2025/4_get_best_runs.ipynb @@ -0,0 +1,86 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "b13901d6", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "383e48fa", + "metadata": {}, + "source": [ + "### Retrieve best run from each experiment" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c4790f06", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "manual_labeling small\n", + "Best run: epoch 9, seed 2.0\n", + "manual_correction small\n", + "Best run: epoch 6, seed 0.0\n", + "automatic_labeling small\n", + "Best run: epoch 9, seed 4.0\n", + "manual_labeling ext\n", + "Best run: epoch 7, seed 0.0\n", + "manual_correction ext\n", + "Best run: epoch 9, seed 0.0\n", + "automatic_labeling ext\n", + "Best run: epoch 8, seed 1.0\n" + ] + } + ], + "source": [ + "for training_set in [\"small\", \"ext\"]:\n", + " for labeling_approach in [\"manual_labeling\", \"manual_correction\", \"automatic_labeling\"]:\n", + " print(labeling_approach, training_set)\n", + " best_f1_score = 0\n", + " best_epoch = None\n", + " best_seed = None\n", + " for epoch in range(10):\n", + " data = pd.read_csv(f\"G:/3D-GeoInfo-2025/data/5_evaluation/2_5_cm_{labeling_approach}_{training_set}_{epoch + 1}_epochs_boxplot.csv\")\n", + " for idx, row in data.iterrows():\n", + " if row[\"TestFScore\"] > best_f1_score:\n", + " best_f1_score = row[\"TestFScore\"]\n", + " best_epoch = epoch\n", + " best_seed = row[\"Seed\"]\n", + "\n", + " print(f\"Best run: epoch {best_epoch}, seed {best_seed}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "deepforest-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/3d-geoinfo-2025/5_paper_data_analysis.ipynb b/notebooks/3d-geoinfo-2025/5_paper_data_analysis.ipynb new file mode 100644 index 0000000..b3592ed --- /dev/null +++ b/notebooks/3d-geoinfo-2025/5_paper_data_analysis.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Export Metrics for Visualization in tikz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "base_folder = Path(\"G:/3D-GeoInfo-2025/data/4_predictions\")\n", + "output_folder = Path(\"G:/3D-GeoInfo-2025/data/5_evaluation\")\n", + "output_folder.mkdir(parents=True, exist_ok=True)\n", + "\n", + "seeds = np.arange(5)\n", + "epochs = np.arange(20) + 1\n", + "resolutions = [\"2_5_cm\", \"5_cm\", \"7_5_cm\", \"10_cm\"]\n", + "experiments = [\"manual_labeling_small\",\n", + " \"manual_labeling_ext\",\n", + " \"manual_correction_small\",\n", + " \"manual_correction_ext\",\n", + " \"automatic_labeling_small\",\n", + " \"automatic_labeling_ext\"]\n", + "experiment_abbreviations = {\n", + " \"manual_labeling_small\": \"MLS\",\n", + " \"manual_labeling_ext\": \"MLE\",\n", + " \"manual_correction_small\": \"MCS\",\n", + " \"manual_correction_ext\": \"MCE\",\n", + " \"automatic_labeling_small\": \"ALS\",\n", + " \"automatic_labeling_ext\": \"ALE\",\n", + "}\n", + "\n", + "for resolution in resolutions:\n", + " metrics_barchart = []\n", + " for idx, experiment in enumerate(experiments):\n", + " best_f1_score = -1\n", + " best_epoch = -1\n", + " best_seed = -1\n", + "\n", + " experiment_metrics = []\n", + " metrics_line_chart = []\n", + " for epoch in epochs:\n", + " metrics = []\n", + " for seed in seeds:\n", + " current_metrics = []\n", + " for subset in [\"train\", \"test\"]:\n", + " subset_name = subset.capitalize()\n", + " metrics_file = base_folder / resolution / experiment / f\"{epoch}_epochs\" / f\"{subset}_metrics_seed_{seed}.csv\"\n", + " metr = pd.read_csv(metrics_file)\n", + " metr = pd.DataFrame([\n", + " metr[\"score\"].to_numpy()],\n", + " columns=[f\"{subset_name}{metric_name.capitalize()}\" for metric_name in metr[\"metric\"].to_list()]\n", + " )\n", + " metr.rename({f\"{subset_name}F1\": f\"{subset_name}FScore\"}, inplace=True, axis=1)\n", + " if subset == \"train\":\n", + " metr[\"Epochs\"] = epoch\n", + " metr[\"Seed\"] = seed\n", + "\n", + " if subset == \"test\" and metr[\"TestFScore\"].iloc[0] > best_f1_score:\n", + " best_f1_score = metr[\"TestFScore\"].iloc[0]\n", + " best_epoch = epoch\n", + " best_seed = seed\n", + " current_metrics.append(metr)\n", + "\n", + " metrics.append(pd.concat(current_metrics, axis=1))\n", + "\n", + " metrics_boxplot = pd.concat(metrics)\n", + " metrics_boxplot.to_csv(output_folder / f\"{resolution}_{experiment}_{epoch}_epochs_boxplot.csv\", index=False)\n", + "\n", + " metr = {\n", + " \"Epochs\": epoch,\n", + " }\n", + " for metric_name in [\"FScore\", \"Precision\", \"Recall\"]:\n", + " for subset_name in [\"Train\", \"Test\"]:\n", + " metr[f\"{experiment_abbreviations[experiment]}{subset_name}{metric_name}\"] = metrics_boxplot[f\"{subset_name}{metric_name}\"].to_numpy().mean()\n", + " metr[f\"{experiment_abbreviations[experiment]}{subset_name}{metric_name}Std\"] = metrics_boxplot[f\"{subset_name}{metric_name}\"].to_numpy().std()\n", + "\n", + " metrics_line_chart.append(metr)\n", + "\n", + " if epoch == epochs.max():\n", + " metr = {\n", + " \"Index\": idx,\n", + " \"Experiment\": experiment,\n", + " \"Epochs\": epoch,\n", + " }\n", + "\n", + " for metric_name in [\"FScore\", \"Precision\", \"Recall\"]:\n", + " for subset_name in [\"Train\", \"Test\"]:\n", + " metr[f\"{subset_name}{metric_name}\"] = metrics_boxplot[f\"{subset_name}{metric_name}\"].to_numpy().mean()\n", + " metr[f\"{subset_name}{metric_name}Std\"] = metrics_boxplot[f\"{subset_name}{metric_name}\"].to_numpy().std()\n", + " metrics_barchart.append(metr)\n", + "\n", + " if idx == 0:\n", + " metrics_line_chart_df = pd.DataFrame(metrics_line_chart)\n", + " else:\n", + " metrics_line_chart_df = pd.concat((metrics_line_chart_df, pd.DataFrame(metrics_line_chart)), axis=1)\n", + "\n", + " print(\"----------------\")\n", + " print(experiment, resolution)\n", + " print(\"best F1-score\", best_f1_score)\n", + " print(\"best epoch:\", best_epoch)\n", + " print(\"best seed:\", best_seed)\n", + " print(\"----------------\")\n", + "\n", + " for subset in [\"test\"]:\n", + " subset_name = subset.capitalize()\n", + " baseline_metrics_file = base_folder / resolution / \"baseline\" / f\"{subset}_metrics.csv\"\n", + " baseline_metrics = pd.read_csv(baseline_metrics_file)\n", + " baseline_metrics = pd.DataFrame([\n", + " baseline_metrics[\"score\"].to_numpy()],\n", + " columns=baseline_metrics[\"metric\"].to_list()\n", + " )\n", + " metrics_line_chart_df[f\"NoFT{subset_name}FScore\"] = baseline_metrics[\"f1\"].iloc[0]\n", + " metrics_line_chart_df[f\"NoFT{subset_name}Precision\"] = baseline_metrics[\"precision\"].iloc[0]\n", + " metrics_line_chart_df[f\"NoFT{subset_name}Recall\"] = baseline_metrics[\"recall\"].iloc[0]\n", + "\n", + " metrics_line_chart_df.to_csv(output_folder / f\"linechart_{resolution}.csv\", index=False)\n", + "\n", + " metrics_barchart_df = pd.DataFrame(metrics_barchart)\n", + " metrics_barchart_df.to_csv(output_folder / f\"barchart_{resolution}.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for resolution in [\"2_5_cm\", \"5_cm\", \"7_5_cm\", \"10_cm\"]:\n", + " metrics = pd.read_csv(output_folder / f\"linechart_{resolution}.csv\")\n", + " print(\"baseline:\", np.round(metrics[\"NoFTTestFScore\"].unique(), 2))\n", + " print(\"metrics\", np.round(metrics[[\"MLSTestFScore\", \"MCSTestFScore\", \"ALSTestFScore\"]].max(), 2))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for resolution in [\"2_5_cm\", \"5_cm\", \"7_5_cm\", \"10_cm\"]:\n", + " metrics = pd.read_csv(output_folder / f\"linechart_{resolution}.csv\")\n", + " print(\"metrics\", np.round(metrics[[\"MLETestFScore\", \"MCETestFScore\", \"ALETestFScore\"]].max(), 2))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for resolution in [\"2_5_cm\", \"5_cm\", \"7_5_cm\", \"10_cm\"]:\n", + " metrics = pd.read_csv(output_folder / f\"linechart_{resolution}.csv\")\n", + " print(\"metrics\", np.round(metrics[[\"MLSTrainFScore\", \"MCSTrainFScore\", \"ALSTrainFScore\"]].max(), 2))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "deepforest-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/3d-geoinfo-2025/README.md b/notebooks/3d-geoinfo-2025/README.md new file mode 100644 index 0000000..b23ad30 --- /dev/null +++ b/notebooks/3d-geoinfo-2025/README.md @@ -0,0 +1 @@ +This folder contains several Jupyter notebooks that were used to create the tables and figures in the 3D GeoInfo paper. \ No newline at end of file diff --git a/notebooks/mapping_plots.md b/notebooks/mapping_plots.md new file mode 100644 index 0000000..25cfa2b --- /dev/null +++ b/notebooks/mapping_plots.md @@ -0,0 +1,15 @@ + + +s1_p1_small - 20230720_Sauen_3512a1_8901_115852 + +s1_p1_ext_ml - 20240820_Sauen_3512a1_labeled_merged_rectangular + +s1_p1_ext_mc - 20230720_Sauen_3512a1_2x3_tile +s1_p2_mc - 20230720_Sauen_3512a1_tile +s2_p1_mc - 20230809_Sauen_3510b3_tile + +s1_p1_ext_al - 20230720_Sauen_3512a1_8898_8901_115852_115853 +s1_p2_al - 20230720_Sauen_3512a1_8902_8904_115851_115852 +s1_p3_al - 20230720_Sauen_3512a1_8904_8906_115851_115852 + +s3_p1 - 20230809_Sauen_3515b1_8901_115843_aligned