From f97206b0c38b7b73c95d2eee1037411b79c2f9cb Mon Sep 17 00:00:00 2001
From: Josafat-Mattias Burmeister <josafat-mattias.burm@hpi.de>
Date: Tue, 2 Sep 2025 20:45:21 +0200
Subject: [PATCH] add notebooks for data analysis in paper

---
 .gitignore                                    |   1 +
 .../baseline/run_baseline_evaluation.bat      |   9 +
 .../1_table_dataset_overview.ipynb            | 168 ++++++++++++
 .../2_example_annotations.ipynb               | 256 ++++++++++++++++++
 .../3_figure_label_projection.ipynb           | 148 ++++++++++
 .../3d-geoinfo-2025/4_get_best_runs.ipynb     |  86 ++++++
 .../5_paper_data_analysis.ipynb               | 190 +++++++++++++
 notebooks/3d-geoinfo-2025/README.md           |   1 +
 notebooks/mapping_plots.md                    |  15 +
 9 files changed, 874 insertions(+)
 create mode 100644 configs/3d-geoinfo-2025/baseline/run_baseline_evaluation.bat
 create mode 100644 notebooks/3d-geoinfo-2025/1_table_dataset_overview.ipynb
 create mode 100644 notebooks/3d-geoinfo-2025/2_example_annotations.ipynb
 create mode 100644 notebooks/3d-geoinfo-2025/3_figure_label_projection.ipynb
 create mode 100644 notebooks/3d-geoinfo-2025/4_get_best_runs.ipynb
 create mode 100644 notebooks/3d-geoinfo-2025/5_paper_data_analysis.ipynb
 create mode 100644 notebooks/3d-geoinfo-2025/README.md
 create mode 100644 notebooks/mapping_plots.md

diff --git a/.gitignore b/.gitignore
index 3291233..5a3edd6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,6 +160,7 @@ data
 *.csv
 *.pl
 
+**.laz
 **.tif
 **.log
 **.wandb
diff --git a/configs/3d-geoinfo-2025/baseline/run_baseline_evaluation.bat b/configs/3d-geoinfo-2025/baseline/run_baseline_evaluation.bat
new file mode 100644
index 0000000..e2d04db
--- /dev/null
+++ b/configs/3d-geoinfo-2025/baseline/run_baseline_evaluation.bat
@@ -0,0 +1,9 @@
+python .\scripts\prediction.py --config-path .\configs\baseline\predict_without_finetuning_2_5_cm.toml
+python .\scripts\prediction.py --config-path .\configs\baseline\predict_without_finetuning_5_cm.toml
+python .\scripts\prediction.py --config-path .\configs\baseline\predict_without_finetuning_7_5_cm.toml
+python .\scripts\prediction.py --config-path .\configs\baseline\predict_without_finetuning_10_cm.toml
+
+python .\scripts\evaluate.py --config-path .\configs\baseline\evaluate_without_finetuning_2_5_cm.toml
+python .\scripts\evaluate.py --config-path .\configs\baseline\evaluate_without_finetuning_5_cm.toml
+python .\scripts\evaluate.py --config-path .\configs\baseline\evaluate_without_finetuning_7_5_cm.toml
+python .\scripts\evaluate.py --config-path .\configs\baseline\evaluate_without_finetuning_10_cm.toml
\ No newline at end of file
diff --git a/notebooks/3d-geoinfo-2025/1_table_dataset_overview.ipynb b/notebooks/3d-geoinfo-2025/1_table_dataset_overview.ipynb
new file mode 100644
index 0000000..dfa6261
--- /dev/null
+++ b/notebooks/3d-geoinfo-2025/1_table_dataset_overview.ipynb
@@ -0,0 +1,168 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table 1: Dataset Overview\n",
+    "\n",
+    "This notebook can be used to create the table providing an overview of the datasets used in this study."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# adjust base directory if needed\n",
+    "base_dir = Path(\"G:/3D-GeoInfo-2025/data/1_base_data/labels\")\n",
+    "csv_output_file_path = \"./datasets.csv\"\n",
+    "\n",
+    "datasets = [\n",
+    "    # small plot that was labeled using all three labeling approaches\n",
+    "    {\n",
+    "        \"LabelFile\": \"./manual_labeling/s1_p1_small_coco.json\",\n",
+    "        \"Site\": 1,\n",
+    "        \"Plot\": \"Plot 1.0\",\n",
+    "        \"Size\": \"50 x 50\",\n",
+    "        \"Labeling\": \"ML\",\n",
+    "        \"Usage\": \"train (small)\"\n",
+    "    },\n",
+    "    {\n",
+    "        \"LabelFile\": \"./manual_correction/s1_p1_small_coco.json\",\n",
+    "        \"Site\": 1,\n",
+    "        \"Plot\": \"Plot 1.0\",\n",
+    "        \"Size\": \"50 x 50\",\n",
+    "        \"Labeling\": \"MC\",\n",
+    "        \"Usage\": \"train (small)\"\n",
+    "    },\n",
+    "    {\n",
+    "        \"LabelFile\": \"./automatic_labeling/s1_p1_small_coco.json\",\n",
+    "        \"Site\": 1,\n",
+    "        \"Plot\": \"Plot 1.0\",\n",
+    "        \"Size\": \"50 x 50\",\n",
+    "        \"Labeling\": \"AL\",\n",
+    "        \"Usage\": \"train (small)\"\n",
+    "    },\n",
+    "    # additional manually labeled data\n",
+    "    {\n",
+    "        \"LabelFile\": \"./manual_labeling/s1_p1_ext_ml_coco.json\",\n",
+    "        \"Site\": 1,\n",
+    "        \"Plot\": \"Plot 1.1\",\n",
+    "        \"Size\": \"100 x 100\",\n",
+    "        \"Labeling\": \"ML\",\n",
+    "        \"Usage\": \"train (ext.)\"\n",
+    "    },\n",
+    "    # additional manually corrected data\n",
+    "    {\n",
+    "        \"LabelFile\": \"./manual_correction/s1_p1_ext_mc_coco.json\",\n",
+    "        \"Site\": 1,\n",
+    "        \"Plot\": \"Plot 1.2\",\n",
+    "        \"Size\": \"120 x 80\",\n",
+    "        \"Labeling\": \"MC\",\n",
+    "        \"Usage\": \"train (ext.)\"\n",
+    "    },\n",
+    "    {\n",
+    "        \"LabelFile\": \"./manual_correction/s1_p2_mc_coco.json\",\n",
+    "        \"Site\": 1,\n",
+    "        \"Plot\": \"Plot 2\",\n",
+    "        \"Size\": \"120 x 120\",\n",
+    "        \"Labeling\": \"MC\",\n",
+    "        \"Usage\": \"train (ext.)\"\n",
+    "    },\n",
+    "    {\n",
+    "        \"LabelFile\": \"./manual_correction/s2_p1_mc_coco.json\",\n",
+    "        \"Site\": 2,\n",
+    "        \"Plot\": \"Plot 1\",\n",
+    "        \"Size\": \"120 x 120\",\n",
+    "        \"Labeling\": \"MC\",\n",
+    "        \"Usage\": \"train (ext.)\"\n",
+    "    },\n",
+    "    # additional automatically labeled data\n",
+    "    {\n",
+    "        \"LabelFile\": \"./automatic_labeling/s1_p1_ext_al_coco.json\",\n",
+    "        \"Site\": 1,\n",
+    "        \"Plot\": \"Plot 1.3\",\n",
+    "        \"Size\": \"200 x 100\",\n",
+    "        \"Labeling\": \"AL\",\n",
+    "        \"Usage\": \"train (ext.)\"\n",
+    "    },\n",
+    "    {\n",
+    "        \"LabelFile\": \"./automatic_labeling/s1_p2_al_coco.json\",\n",
+    "        \"Site\": 1,\n",
+    "        \"Plot\": \"Plot 2\",\n",
+    "        \"Size\": \"150 x 100\",\n",
+    "        \"Labeling\": \"AL\",\n",
+    "        \"Usage\": \"train (ext.)\"\n",
+    "    },\n",
+    "    {\n",
+    "        \"LabelFile\": \"./automatic_labeling/s1_p3_al_coco.json\",\n",
+    "        \"Site\": 1,\n",
+    "        \"Plot\": \"Plot 3\",\n",
+    "        \"Size\": \"100 x 100\",\n",
+    "        \"Labeling\": \"AL\",\n",
+    "        \"Usage\": \"train (ext.)\"\n",
+    "    },\n",
+    "    # test set\n",
+    "    {\n",
+    "        \"LabelFile\": \"./manual_labeling/s3_p1_coco.json\",\n",
+    "        \"Site\": 3,\n",
+    "        \"Plot\": \"Plot 1\",\n",
+    "        \"Size\": \"50 x 50\",\n",
+    "        \"Labeling\": \"ML\",\n",
+    "        \"Usage\": \"test\"\n",
+    "    },\n",
+    "]\n",
+    "\n",
+    "dataset_metadata_list = []\n",
+    "\n",
+    "for dataset in datasets:\n",
+    "    label_file_path = base_dir / dataset[\"LabelFile\"]\n",
+    "    assert label_file_path.exists(), f\"{label_file_path} does not exist.\"\n",
+    "    with open(label_file_path, \"r\", encoding=\"utf-8\") as f:\n",
+    "        labels = json.load(f)\n",
+    "    dataset[\"NumTrees\"] = len(labels[\"annotations\"])\n",
+    "    dataset[\"Plot\"] = dataset[\"Plot\"].replace(\"_\", r\"\\_\")\n",
+    "    dataset[\"Size\"] = dataset[\"Size\"].replace(\"x\", r\"$\\times$\")\n",
+    "    dataset_metadata_list.append(dataset)\n",
+    "\n",
+    "dataset_metadata = pd.DataFrame(dataset_metadata_list)\n",
+    "dataset_metadata.to_csv(csv_output_file_path, index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "deepforest-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/3d-geoinfo-2025/2_example_annotations.ipynb b/notebooks/3d-geoinfo-2025/2_example_annotations.ipynb
new file mode 100644
index 0000000..090e7b1
--- /dev/null
+++ b/notebooks/3d-geoinfo-2025/2_example_annotations.ipynb
@@ -0,0 +1,256 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Visualize Annotations and Predictions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "from pathlib import Path\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.patches as patches\n",
+    "from matplotlib.patches import Polygon\n",
+    "import numpy as np\n",
+    "from PIL import Image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_coco_annotated_image(json_path, image_dir, output_path,\n",
+    "                              draw='bbox',\n",
+    "                              line_width=2,\n",
+    "                              line_color='r',\n",
+    "                              output_size=(10, 8),\n",
+    "                              dpi=100):\n",
+    "    \"\"\"\n",
+    "    Saves an image annotated with COCO-format labels (bounding boxes or polygons) to a file.\n",
+    "\n",
+    "    Args:\n",
+    "        json_path (str): Path to the COCO-format JSON file (with annotations for one image).\n",
+    "        image_dir (str): Directory where the image is stored.\n",
+    "        output_path (str): Path to save the annotated image.\n",
+    "        draw (str): What to draw — 'bbox' for bounding boxes or 'segmentation' for polygons.\n",
+    "        line_width (int): Width of the annotation lines.\n",
+    "        line_color (str or tuple): Color of the annotation lines, e.g., 'blue' or (1, 0, 0) for red in RGB.\n",
+    "        output_size (tuple): Output image size in inches (width, height) for matplotlib.\n",
+    "        dpi (int): Dots per inch (resolution) of the saved image.\n",
+    "    \"\"\"\n",
+    "    # Load JSON\n",
+    "    with open(json_path, 'r') as f:\n",
+    "        coco_data = json.load(f)\n",
+    "\n",
+    "    # Extract image info and annotations\n",
+    "    image_info = coco_data['images'][0]\n",
+    "    annotations = coco_data['annotations']\n",
+    "    image_path = os.path.join(image_dir, image_info['file_name'])\n",
+    "\n",
+    "    # Open image\n",
+    "    image = Image.open(image_path)\n",
+    "    fig, ax = plt.subplots(figsize=output_size, dpi=dpi)\n",
+    "    ax.imshow(image)\n",
+    "\n",
+    "    for ann in annotations:\n",
+    "        if draw == 'bbox':\n",
+    "            x, y, w, h = ann['bbox']\n",
+    "            rect = patches.Rectangle((x, y), w, h,\n",
+    "                                     linewidth=line_width,\n",
+    "                                     edgecolor=line_color,\n",
+    "                                     facecolor='none')\n",
+    "            ax.add_patch(rect)\n",
+    "        elif draw == 'segmentation' and ann['segmentation']:\n",
+    "            for seg in ann['segmentation']:\n",
+    "                poly = Polygon([(seg[i], seg[i+1]) for i in range(0, len(seg), 2)],\n",
+    "                               linewidth=line_width,\n",
+    "                               edgecolor=line_color,\n",
+    "                               facecolor='none')\n",
+    "                ax.add_patch(poly)\n",
+    "\n",
+    "    ax.axis('off')\n",
+    "    plt.savefig(output_path, bbox_inches='tight', pad_inches=0)\n",
+    "    plt.close(fig)\n",
+    "\n",
+    "def plot_csv_annotated_image(csv_path, image_dir, output_path,\n",
+    "                              line_width=2,\n",
+    "                              line_color='r',\n",
+    "                              output_size=(10, 8),\n",
+    "                              dpi=100):\n",
+    "    labels = pd.read_csv(csv_path)\n",
+    "\n",
+    "    if len(labels[\"image_path\"].unique()) > 1:\n",
+    "        raise ValueError(\"Predictions contain multiple images.\")\n",
+    "\n",
+    "    image_path = os.path.join(image_dir, labels[\"image_path\"].iloc[0])\n",
+    "\n",
+    "    # Open image\n",
+    "    image = Image.open(image_path)\n",
+    "    fig, ax = plt.subplots(figsize=output_size, dpi=dpi)\n",
+    "    ax.imshow(image)\n",
+    "\n",
+    "    for _, row in labels.iterrows():\n",
+    "        x_min = row[\"xmin\"]\n",
+    "        x_max = row[\"xmax\"]\n",
+    "        y_min = row[\"ymin\"]\n",
+    "        y_max = row[\"ymax\"]\n",
+    "\n",
+    "        width = x_max - x_min\n",
+    "        height = y_max - y_min\n",
+    "        rect = patches.Rectangle((row[\"xmin\"], row[\"ymin\"]), width, height,\n",
+    "                                linewidth=line_width,\n",
+    "                                edgecolor=line_color,\n",
+    "                                facecolor='none')\n",
+    "        ax.add_patch(rect)\n",
+    "\n",
+    "    ax.axis('off')\n",
+    "    if not isinstance(output_path, Path):\n",
+    "        output_path = Path(output_path)\n",
+    "    output_path.parent.mkdir(exist_ok=True, parents=True)\n",
+    "\n",
+    "    plt.savefig(output_path, bbox_inches='tight', pad_inches=0)\n",
+    "    plt.close(fig)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "line_color = tuple(np.array([255, 89, 0, 255]) / 255)\n",
+    "output_size = (8, 8)\n",
+    "\n",
+    "image_dir = Path(\"G:/3D-GeoInfo-2025/data/1_base_data/images/\")\n",
+    "kwargs = {\"line_width\": 3, \"line_color\": line_color, \"output_size\": output_size, \"dpi\": 80}\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Create images for Fig. 1 - Example Annotations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "annotations_path = \"C:/Users/cgs/Downloads/20230720_Sauen_3512a1_8901_115852_crown_polygons_coco.json\"\n",
+    "plot_coco_annotated_image(annotations_path, image_dir, \"manual_labeling_polygons.png\", draw=\"segmentation\", **kwargs)\n",
+    "\n",
+    "image_dir = Path(\"G:/3D-GeoInfo-2025/data/1_base_data/images/\")\n",
+    "\n",
+    "annotations_path = image_dir.parent / \"labels/manual_labeling/s1_p1_small_coco.json\"\n",
+    "plot_coco_annotated_image(annotations_path, image_dir, \"manual_labeling.png\", **kwargs)\n",
+    "\n",
+    "annotations_path = image_dir.parent / \"labels/manual_correction/s1_p1_small_coco.json\"\n",
+    "plot_coco_annotated_image(annotations_path, image_dir, \"manual_correction.png\", **kwargs)\n",
+    "\n",
+    "annotations_path = image_dir.parent / \"labels/automatic_labeling/s1_p1_small_coco.json\"\n",
+    "plot_coco_annotated_image(annotations_path, image_dir, \"automatic_labeling.png\", **kwargs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##### Create images for Fig. 5 and Fig. 7 - Example Predictions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image_dir = Path(\"G:/3D-GeoInfo-2025/data/1_base_data/images/\")\n",
+    "\n",
+    "annotations_path = image_dir.parent / \"labels/manual_labeling/s3_p1_coco.json\"\n",
+    "plot_coco_annotated_image(annotations_path, image_dir, \"manual_labeling_test_set.png\", **kwargs)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/2_5_cm/images\"\n",
+    "\n",
+    "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/baseline/test_predictions.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/baseline_2_5_cm.png\", **kwargs)\n",
+    "\n",
+    "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/5_cm/images\"\n",
+    "\n",
+    "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/5_cm/baseline/test_predictions.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/baseline_5_cm.png\", **kwargs)\n",
+    "\n",
+    "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/7_5_cm/images\"\n",
+    "\n",
+    "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/7_5_cm/baseline/test_predictions.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/baseline_7_5_cm.png\", **kwargs)\n",
+    "\n",
+    "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/10_cm/images\"\n",
+    "\n",
+    "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/10_cm/baseline/test_predictions.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/baseline_10_cm.png\", **kwargs)\n",
+    "\n",
+    "image_dir = \"G:/3D-GeoInfo-2025/data/2_rescaled_data/2_5_cm/images\"\n",
+    "\n",
+    "predictions_path = f\"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/manual_labeling_small/9_epochs/test_predictions_seed_2.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/manual_labeling_small.png\", **kwargs)\n",
+    "\n",
+    "predictions_path = f\"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/manual_labeling_ext/7_epochs/test_predictions_seed_0.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/manual_labeling_ext.png\", **kwargs)\n",
+    "\n",
+    "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/manual_correction_small/6_epochs/test_predictions_seed_0.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/manual_correction_small.png\", **kwargs)\n",
+    "\n",
+    "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/manual_correction_ext/9_epochs/test_predictions_seed_0.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/manual_correction_ext.png\", **kwargs)\n",
+    "\n",
+    "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/automatic_labeling_small/9_epochs/test_predictions_seed_4.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/automatic_labeling_small.png\", **kwargs)\n",
+    "\n",
+    "predictions_path = \"G:/3D-GeoInfo-2025/data/4_predictions/2_5_cm/automatic_labeling_ext/8_epochs/test_predictions_seed_1.csv\"\n",
+    "plot_csv_annotated_image(predictions_path, image_dir, \"predictions/automatic_labeling_ext.png\", **kwargs)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "deepforest-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/3d-geoinfo-2025/3_figure_label_projection.ipynb b/notebooks/3d-geoinfo-2025/3_figure_label_projection.ipynb
new file mode 100644
index 0000000..ba2653d
--- /dev/null
+++ b/notebooks/3d-geoinfo-2025/3_figure_label_projection.ipynb
@@ -0,0 +1,148 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Fig. 2: Label Projection from 3D Point Clouds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
+    "import rasterio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "color_palette = [\n",
+    "    [209, 64, 129, 255],  # other purple\n",
+    "    [128, 255, 144, 255],  # light green\n",
+    "    [219, 254, 135, 255],  # mindaro\n",
+    "    [62, 137, 20, 255],  # inida green\n",
+    "    [255, 252, 49, 255],  # yellow\n",
+    "    [140, 192, 132, 255],  # dark sea green # [245, 93, 62, 255],  # orange\n",
+    "    [105, 116, 124, 255],  # slate gray\n",
+    "    [19, 70, 17, 255],  # forest green\n",
+    "    [125, 97, 103, 255],  # deep taube\n",
+    "    [255, 180, 0, 255],  # yellow\n",
+    "    [6, 167, 125, 255],  # green\n",
+    "    [129, 164, 205, 255],  # iceberg\n",
+    "    [75, 29, 63, 255],  # purple\n",
+    "    [247, 197, 72, 255],  # mayze crayola\n",
+    "    [62, 124, 177, 255],  # steel blue\n",
+    "    [102, 99, 112, 255],  # burnished brown\n",
+    "    [117, 92, 27, 255],  # field drab\n",
+    "    [115, 191, 184, 255],  # turquoise\n",
+    "    [73, 67, 49, 255],  # olive drab\n",
+    "    [163, 0, 0, 255],  # rufous\n",
+    "    [163, 147, 191, 255],  # glossy grape\n",
+    "    [111, 115, 210, 255],  # violet blue crayola\n",
+    "    [179, 106, 94, 255],  # copper penny\n",
+    "    [0, 178, 202, 255],  # pacific blue\n",
+    "    [118, 117, 34, 255],  # spanish bistre\n",
+    "    [238, 215, 197, 255],  # champagne pink\n",
+    "    [86, 53, 30, 255],  # van dyke brown\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Visualize label image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base_dir = Path(\"G:/3D-GeoInfo-2025/data/\")\n",
+    "image_path = base_dir / \"1_base_data/labels/automatic_labeling/s1_p1_small_labels.tif\"\n",
+    "\n",
+    "with rasterio.open(image_path) as image:\n",
+    "    image_array = image.read()\n",
+    "\n",
+    "rgb_image_array = np.full((image_array.shape[1], image_array.shape[2], 3), fill_value=240, dtype=np.uint8)\n",
+    "\n",
+    "for idx, instance_id in enumerate(np.unique(image_array)):\n",
+    "    if instance_id == 0:\n",
+    "        continue\n",
+    "    instance_mask = (image_array == instance_id).reshape((image_array.shape[1], image_array.shape[2]))\n",
+    "    color = color_palette[idx % len(color_palette)]\n",
+    "    rgb_image_array[instance_mask] = color[:3]\n",
+    "\n",
+    "image = Image.fromarray(rgb_image_array, mode=\"RGB\")\n",
+    "image.save(\"./label_projection.png\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Colorize point cloud"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pointtorch import read\n",
+    "from pointtorch.operations.numpy import make_labels_consecutive\n",
+    "\n",
+    "point_cloud_path = base_dir / \"1_base_data/point_clouds/s1_p1_small.laz\"\n",
+    "point_cloud = read(point_cloud_path)\n",
+    "instance_ids = make_labels_consecutive(point_cloud[\"instance_id_prediction\"].to_numpy(), ignore_id=0)\n",
+    "\n",
+    "point_cloud[\"red\"] = 240\n",
+    "point_cloud[\"green\"] = 240\n",
+    "point_cloud[\"blue\"] = 240\n",
+    "\n",
+    "for idx, instance_id in enumerate(np.unique(instance_ids)):\n",
+    "    if instance_id == 0:\n",
+    "        continue\n",
+    "    instance_mask = instance_ids == instance_id\n",
+    "    color = color_palette[idx % len(color_palette)]\n",
+    "    point_cloud.loc[instance_mask, [\"red\", \"green\", \"blue\"]] = np.array(color)[:3]\n",
+    "\n",
+    "point_cloud.to(\"./point_cloud_colored.laz\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "deepforest-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/3d-geoinfo-2025/4_get_best_runs.ipynb b/notebooks/3d-geoinfo-2025/4_get_best_runs.ipynb
new file mode 100644
index 0000000..294e298
--- /dev/null
+++ b/notebooks/3d-geoinfo-2025/4_get_best_runs.ipynb
@@ -0,0 +1,86 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b13901d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "383e48fa",
+   "metadata": {},
+   "source": [
+    "### Retrieve best run from each experiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "c4790f06",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "manual_labeling small\n",
+      "Best run: epoch 9, seed 2.0\n",
+      "manual_correction small\n",
+      "Best run: epoch 6, seed 0.0\n",
+      "automatic_labeling small\n",
+      "Best run: epoch 9, seed 4.0\n",
+      "manual_labeling ext\n",
+      "Best run: epoch 7, seed 0.0\n",
+      "manual_correction ext\n",
+      "Best run: epoch 9, seed 0.0\n",
+      "automatic_labeling ext\n",
+      "Best run: epoch 8, seed 1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "for training_set in [\"small\", \"ext\"]:\n",
+    "    for labeling_approach in [\"manual_labeling\", \"manual_correction\", \"automatic_labeling\"]:\n",
+    "        print(labeling_approach, training_set)\n",
+    "        best_f1_score = 0\n",
+    "        best_epoch = None\n",
+    "        best_seed = None\n",
+    "        for epoch in range(10):\n",
+    "            data = pd.read_csv(f\"G:/3D-GeoInfo-2025/data/5_evaluation/2_5_cm_{labeling_approach}_{training_set}_{epoch + 1}_epochs_boxplot.csv\")\n",
+    "            for idx, row in data.iterrows():\n",
+    "                if row[\"TestFScore\"] > best_f1_score:\n",
+    "                    best_f1_score = row[\"TestFScore\"]\n",
+    "                    best_epoch = epoch\n",
+    "                    best_seed = row[\"Seed\"]\n",
+    "\n",
+    "        print(f\"Best run: epoch {best_epoch}, seed {best_seed}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "deepforest-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/3d-geoinfo-2025/5_paper_data_analysis.ipynb b/notebooks/3d-geoinfo-2025/5_paper_data_analysis.ipynb
new file mode 100644
index 0000000..b3592ed
--- /dev/null
+++ b/notebooks/3d-geoinfo-2025/5_paper_data_analysis.ipynb
@@ -0,0 +1,190 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Export Metrics for Visualization in tikz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "base_folder = Path(\"G:/3D-GeoInfo-2025/data/4_predictions\")\n",
+    "output_folder = Path(\"G:/3D-GeoInfo-2025/data/5_evaluation\")\n",
+    "output_folder.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "seeds = np.arange(5)\n",
+    "epochs = np.arange(20) + 1\n",
+    "resolutions = [\"2_5_cm\", \"5_cm\", \"7_5_cm\", \"10_cm\"]\n",
+    "experiments = [\"manual_labeling_small\",\n",
+    "               \"manual_labeling_ext\",\n",
+    "               \"manual_correction_small\",\n",
+    "               \"manual_correction_ext\",\n",
+    "               \"automatic_labeling_small\",\n",
+    "               \"automatic_labeling_ext\"]\n",
+    "experiment_abbreviations = {\n",
+    "    \"manual_labeling_small\": \"MLS\",\n",
+    "    \"manual_labeling_ext\": \"MLE\",\n",
+    "    \"manual_correction_small\": \"MCS\",\n",
+    "    \"manual_correction_ext\": \"MCE\",\n",
+    "    \"automatic_labeling_small\": \"ALS\",\n",
+    "    \"automatic_labeling_ext\": \"ALE\",\n",
+    "}\n",
+    "\n",
+    "for resolution in resolutions:\n",
+    "    metrics_barchart = []\n",
+    "    for idx, experiment in enumerate(experiments):\n",
+    "        best_f1_score = -1\n",
+    "        best_epoch = -1\n",
+    "        best_seed = -1\n",
+    "\n",
+    "        experiment_metrics = []\n",
+    "        metrics_line_chart = []\n",
+    "        for epoch in epochs:\n",
+    "            metrics = []\n",
+    "            for seed in seeds:\n",
+    "                current_metrics = []\n",
+    "                for subset in [\"train\", \"test\"]:\n",
+    "                    subset_name = subset.capitalize()\n",
+    "                    metrics_file = base_folder / resolution / experiment / f\"{epoch}_epochs\" / f\"{subset}_metrics_seed_{seed}.csv\"\n",
+    "                    metr = pd.read_csv(metrics_file)\n",
+    "                    metr = pd.DataFrame([\n",
+    "                        metr[\"score\"].to_numpy()],\n",
+    "                        columns=[f\"{subset_name}{metric_name.capitalize()}\" for metric_name in metr[\"metric\"].to_list()]\n",
+    "                    )\n",
+    "                    metr.rename({f\"{subset_name}F1\": f\"{subset_name}FScore\"}, inplace=True, axis=1)\n",
+    "                    if subset == \"train\":\n",
+    "                        metr[\"Epochs\"] = epoch\n",
+    "                        metr[\"Seed\"] = seed\n",
+    "\n",
+    "                    if subset == \"test\" and metr[\"TestFScore\"].iloc[0] > best_f1_score:\n",
+    "                        best_f1_score = metr[\"TestFScore\"].iloc[0]\n",
+    "                        best_epoch = epoch\n",
+    "                        best_seed = seed\n",
+    "                    current_metrics.append(metr)\n",
+    "\n",
+    "                metrics.append(pd.concat(current_metrics, axis=1))\n",
+    "\n",
+    "            metrics_boxplot = pd.concat(metrics)\n",
+    "            metrics_boxplot.to_csv(output_folder / f\"{resolution}_{experiment}_{epoch}_epochs_boxplot.csv\", index=False)\n",
+    "\n",
+    "            metr = {\n",
+    "                \"Epochs\": epoch,\n",
+    "            }\n",
+    "            for metric_name in [\"FScore\", \"Precision\", \"Recall\"]:\n",
+    "                for subset_name in [\"Train\", \"Test\"]:\n",
+    "                    metr[f\"{experiment_abbreviations[experiment]}{subset_name}{metric_name}\"] = metrics_boxplot[f\"{subset_name}{metric_name}\"].to_numpy().mean()\n",
+    "                    metr[f\"{experiment_abbreviations[experiment]}{subset_name}{metric_name}Std\"] = metrics_boxplot[f\"{subset_name}{metric_name}\"].to_numpy().std()\n",
+    "\n",
+    "            metrics_line_chart.append(metr)\n",
+    "\n",
+    "            if epoch == epochs.max():\n",
+    "                metr = {\n",
+    "                    \"Index\": idx,\n",
+    "                    \"Experiment\": experiment,\n",
+    "                    \"Epochs\": epoch,\n",
+    "                }\n",
+    "\n",
+    "                for metric_name in [\"FScore\", \"Precision\", \"Recall\"]:\n",
+    "                    for subset_name in [\"Train\", \"Test\"]:\n",
+    "                        metr[f\"{subset_name}{metric_name}\"] = metrics_boxplot[f\"{subset_name}{metric_name}\"].to_numpy().mean()\n",
+    "                        metr[f\"{subset_name}{metric_name}Std\"] = metrics_boxplot[f\"{subset_name}{metric_name}\"].to_numpy().std()\n",
+    "                metrics_barchart.append(metr)\n",
+    "\n",
+    "        if idx == 0:\n",
+    "            metrics_line_chart_df = pd.DataFrame(metrics_line_chart)\n",
+    "        else:\n",
+    "            metrics_line_chart_df = pd.concat((metrics_line_chart_df, pd.DataFrame(metrics_line_chart)), axis=1)\n",
+    "\n",
+    "        print(\"----------------\")\n",
+    "        print(experiment, resolution)\n",
+    "        print(\"best F1-score\", best_f1_score)\n",
+    "        print(\"best epoch:\", best_epoch)\n",
+    "        print(\"best seed:\", best_seed)\n",
+    "        print(\"----------------\")\n",
+    "\n",
+    "    for subset in [\"test\"]:\n",
+    "        subset_name = subset.capitalize()\n",
+    "        baseline_metrics_file = base_folder / resolution / \"baseline\" / f\"{subset}_metrics.csv\"\n",
+    "        baseline_metrics = pd.read_csv(baseline_metrics_file)\n",
+    "        baseline_metrics = pd.DataFrame([\n",
+    "            baseline_metrics[\"score\"].to_numpy()],\n",
+    "            columns=baseline_metrics[\"metric\"].to_list()\n",
+    "        )\n",
+    "        metrics_line_chart_df[f\"NoFT{subset_name}FScore\"] = baseline_metrics[\"f1\"].iloc[0]\n",
+    "        metrics_line_chart_df[f\"NoFT{subset_name}Precision\"] = baseline_metrics[\"precision\"].iloc[0]\n",
+    "        metrics_line_chart_df[f\"NoFT{subset_name}Recall\"] = baseline_metrics[\"recall\"].iloc[0]\n",
+    "\n",
+    "    metrics_line_chart_df.to_csv(output_folder / f\"linechart_{resolution}.csv\", index=False)\n",
+    "\n",
+    "    metrics_barchart_df = pd.DataFrame(metrics_barchart)\n",
+    "    metrics_barchart_df.to_csv(output_folder / f\"barchart_{resolution}.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for resolution in [\"2_5_cm\", \"5_cm\", \"7_5_cm\", \"10_cm\"]:\n",
+    "    metrics = pd.read_csv(output_folder / f\"linechart_{resolution}.csv\")\n",
+    "    print(\"baseline:\", np.round(metrics[\"NoFTTestFScore\"].unique(), 2))\n",
+    "    print(\"metrics\", np.round(metrics[[\"MLSTestFScore\", \"MCSTestFScore\", \"ALSTestFScore\"]].max(), 2))\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for resolution in [\"2_5_cm\", \"5_cm\", \"7_5_cm\", \"10_cm\"]:\n",
+    "    metrics = pd.read_csv(output_folder / f\"linechart_{resolution}.csv\")\n",
+    "    print(\"metrics\", np.round(metrics[[\"MLETestFScore\", \"MCETestFScore\", \"ALETestFScore\"]].max(), 2))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for resolution in [\"2_5_cm\", \"5_cm\", \"7_5_cm\", \"10_cm\"]:\n",
+    "    metrics = pd.read_csv(output_folder / f\"linechart_{resolution}.csv\")\n",
+    "    print(\"metrics\", np.round(metrics[[\"MLSTrainFScore\", \"MCSTrainFScore\", \"ALSTrainFScore\"]].max(), 2))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "deepforest-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/3d-geoinfo-2025/README.md b/notebooks/3d-geoinfo-2025/README.md
new file mode 100644
index 0000000..b23ad30
--- /dev/null
+++ b/notebooks/3d-geoinfo-2025/README.md
@@ -0,0 +1 @@
+This folder contains several Jupyter notebooks that were used to create the tables and figures in the 3D GeoInfo paper.
\ No newline at end of file
diff --git a/notebooks/mapping_plots.md b/notebooks/mapping_plots.md
new file mode 100644
index 0000000..25cfa2b
--- /dev/null
+++ b/notebooks/mapping_plots.md
@@ -0,0 +1,15 @@
+
+
+s1_p1_small - 20230720_Sauen_3512a1_8901_115852
+
+s1_p1_ext_ml - 20240820_Sauen_3512a1_labeled_merged_rectangular
+
+s1_p1_ext_mc - 20230720_Sauen_3512a1_2x3_tile
+s1_p2_mc - 20230720_Sauen_3512a1_tile
+s2_p1_mc - 20230809_Sauen_3510b3_tile
+
+s1_p1_ext_al - 20230720_Sauen_3512a1_8898_8901_115852_115853
+s1_p2_al - 20230720_Sauen_3512a1_8902_8904_115851_115852
+s1_p3_al - 20230720_Sauen_3512a1_8904_8906_115851_115852
+
+s3_p1 - 20230809_Sauen_3515b1_8901_115843_aligned