HHNK · Overmeen · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025 · Oct 9, 2025
diff --git a/docs/guides/release.md b/docs/guides/release.md
@@ -0,0 +1,51 @@
+# Release
+
+Deze handleiding volgt op de handleiding `contribute` en beschrijft de release-procedure vanuit de main branch.
+
+## 1. Verander het versienummer van de module
+
+Kies het nieuwe versienummer volgens _semantic versioning_:
+
+- **MAJOR**: brekende wijzigingen (bijv. `1.0.0` → `2.0.0`)
+- **MINOR**: nieuwe functionaliteit, achterwaarts compatibel (bijv. `1.3.0` → `1.4.0`)
+- **PATCH**: bugfixes, kleine wijzigingen (bijv. `1.3.2` → `1.3.3`)
+
+Voorbeeld: huidige versie is `1.4.1`, je kiest `1.4.2` omdat we bestaande functionaliteit hebben verbeterd.
+
+Zet het versienummer in `hydamo_validation/__init__.py` achter [`__version__`](https://github.com/HetWaterschapshuis/HyDAMOValidatieModule/blob/main/hydamo_validation/__init__.py#L4).
+
+Zorg dat alle wijzigingen via Pull Requests zijn gecommit in de main branch
+
+
+## 2. Testen
+Nadat álle code voor een nieuwe release is gecommit in de main branch, draai nog 1x alle tests met pytest. In de dev-environment (zie `env/dev_environment.yml`) is pytest beschikbaar. Run: 
+
+`pytest --cov=hydamo_validation tests/`
+
+Publiceer alleen als alle tests slagen:
+![](images/test.png "pytest")
+
+## 3. Aanmaken van een release
+Zorg dat alle wijzigingen via Pull Requests zijn gecommit in de main branch
+
+1. Ga naar de GitHub-pagina van de repository.
+2. Klik op Releases (rechts in de sidebar of onder het tabje “Code”).
+3. Klik op `Draft a new release` (of New release).
+4. Kies `Tag: Select tag` en kies `Create new tag`. Maak een annotated tag gelijk aan versienummer, dus `v.X.Y.Z`, bijvoorbeeld `v1.4.2`
+5. Laat `Target: main`
+6. Zet de tag in de `Release title`, dus weer `vX.Y.Z`, dus `v1.4.2` in het voorbeeld hierboven
+7. `Release notes`, klik eventueel `Generate release notes` en/of beschrijf de belangrijkste wijzigingen:
+    * Nieuwe features
+    * Bugfixes
+    * Breaking changes
+8. Klik op `Publish release`
+
+Je ziet nu een nieuwe release beschikbaar in GitHub met een `afdruk` van de code uit de main branch
+
+## 4. Publiceren op PyPi
+
+Publiceren doe je in 2 stappen:
+1. Bouw distributies met `python setup.py sdist` 
+2. Upload naar PyPI `twine upload dist/* -p jouw_eigen_twine_password`
+
+De laatste release moet nu ook beschikbaar zijn op https://pypi.org/project/hydamo-validation/ en wordt vanaf nu geinstalleerd met `pip install hydamo-validation`
diff --git a/envs/dev_environment.yml b/envs/dev_environment.yml
@@ -22,4 +22,5 @@ dependencies:
   - rasterstats==0.19.0
   - rtree
   - shapely==2.0.2
+  - tqdm
   - twine
diff --git a/envs/environment.yml b/envs/environment.yml
@@ -14,4 +14,5 @@ dependencies:
   - rasterio==1.3.9
   - rasterstats==0.19.0
   - shapely==2.0.2
-  - rtree
+  - rtree
+  - tqdm
diff --git a/hydamo_validation/__init__.py b/hydamo_validation/__init__.py
@@ -1,14 +1,13 @@
 __author__ = ["Het Waterschapshuis", "D2HYDRO", "HKV", "HydroConsult"]
 __copyright__ = "Copyright 2021, HyDAMO ValidatieTool"
 __credits__ = ["D2HYDRO", "HKV", "HydroConsult"]
-__version__ = "1.4.0"
+__version__ = "1.4.2"
 
 __license__ = "MIT"
 __maintainer__ = "Daniel Tollenaar"
 __email__ = "daniel@d2hydro.nl"
 
-import fiona  # top-level import to avoid fiona import issue: https://github.com/conda-forge/fiona-feedstock/issues/213
-from pathlib import Path
+#import fiona  # top-level import to avoid fiona import issue: https://github.com/conda-forge/fiona-feedstock/issues/213
 from hydamo_validation.functions import topologic as topologic_functions
 from hydamo_validation.functions import logic as logic_functions
 from hydamo_validation.functions import general as general_functions

diff --git a/hydamo_validation/functions/topologic.py b/hydamo_validation/functions/topologic.py
@@ -5,6 +5,9 @@
 import pandas as pd
 from shapely.geometry import Point
 from hydamo_validation import geometry
+from tqdm import tqdm
+
+tqdm.pandas(desc="Processing rows")
 
 """ 
 In this block we define supporting functions. Supporting functions are ignored
@@ -117,37 +120,13 @@ def _not_overlapping_point(row, gdf, sindex, tolerance, exclude_row=True):
     return not_overlapping
 
 
-def _snap_nodes(row, series, tolerance):
-    series_selec = series.loc[~(series.index == row.name)]
-    indices = series_selec.loc[
-        (series_selec.distance(row["geometry"]) < tolerance)
-    ].index.to_list()
-    geom = None
-    if indices:
-        indices.sort()
-        if indices[0] < row.name:
-            geom = series.loc[indices[0]]
-    if geom is None:
-        geom = row["geometry"]
-
-    return geom
-
-
-def _get_nodes(gdf, tolerance):
+def _get_nodes(gdf):
     # start and end-nodes to GeoSeries
     nodes_series = gdf["geometry"].apply(lambda x: Point(x.coords[0]))
     nodes_series = pd.concat(
         [nodes_series, gdf["geometry"].apply(lambda x: Point(x.coords[-1]))]
     ).reset_index(drop=True)
 
-    # snap nodes within tolerance: nodes within tolerance get the coordinate
-    # of the first node.
-    nodes_series = gpd.GeoSeries(
-        gpd.GeoDataFrame(nodes_series, columns=["geometry"]).apply(
-            lambda x: _snap_nodes(x, nodes_series, tolerance), axis=1
-        )
-    )
-
     # as all is snapped we can filter unique points
     nodes_series = gpd.GeoSeries(nodes_series.unique())
 
@@ -156,9 +135,9 @@ def _get_nodes(gdf, tolerance):
 
 def _only_end_nodes(row, series, sindex, tolerance):
     geometry = row["geometry"]
-    indices = list(sindex.intersection(geometry.bounds))
+    indices = list(sindex.query(geometry.buffer(tolerance), predicate="intersects"))
     if indices:
-        series_select = series.loc[indices]
+        series_select = series.iloc[indices]
         only_end_nodes = all(
             _point_not_overlapping_line(i, geometry, tolerance) for i in series_select
         )
@@ -424,11 +403,11 @@ def not_overlapping(gdf, datamodel, tolerance):
     """
     sindex = gdf.sindex
     if (gdf.geom_type == "LineString").all():
-        return gdf.apply(
+        return gdf.progress_apply(
             lambda x: _not_overlapping_line(x, gdf, sindex, tolerance), axis=1
         )
     elif (gdf.geom_type == "Point").all():
-        return gdf.apply(
+        return gdf.progress_apply(
             lambda x: _not_overlapping_point(x, gdf, sindex, tolerance), axis=1
         )
     else:
@@ -456,12 +435,13 @@ def splitted_at_junction(gdf, datamodel, tolerance):
 
     """
     # get the nodes of the hydroobjects within tolerance
-    nodes_series = _get_nodes(gdf, tolerance)
+    # nodes_series = _get_nodes(gdf, tolerance=None)
+    nodes_series = _get_nodes(gdf)
 
     # check for lines if there are nodes on segment outside tolerance of
-    # the start-node and end-node.
+    # the lines start-node and end-node.
     sindex = nodes_series.sindex
-    return gdf.apply(
+    return gdf.progress_apply(
         (lambda x: _only_end_nodes(x, nodes_series, sindex, tolerance)), axis=1
     )
 
@@ -493,7 +473,7 @@ def structures_at_intersections(gdf, datamodel, structures, tolerance):
     sindex = gdf.sindex
     struc_sindex = struc_series.sindex
     # return result
-    return gdf.apply(
+    return gdf.progress_apply(
         lambda x: _structures_at_intersections(
             x, gdf, sindex, struc_series, struc_sindex, tolerance
         ),
@@ -520,11 +500,11 @@ def no_dangling_node(gdf, datamodel, tolerance):
         Default dtype is bool
 
     """
-    end_nodes_series = gdf["geometry"].apply(lambda x: Point(x.coords[-1]))
-    series = gdf["geometry"].apply(lambda x: Point(x.coords[0]))
+    end_nodes_series = gdf["geometry"].progress_apply(lambda x: Point(x.coords[-1]))
+    series = gdf["geometry"].progress_apply(lambda x: Point(x.coords[0]))
     sindex = series.sindex
 
-    return end_nodes_series.apply(
+    return end_nodes_series.progress_apply(
         lambda x: _intersects_end_node(x, series, sindex, tolerance)
     )
 
@@ -561,7 +541,7 @@ def structures_at_boundaries(gdf, datamodel, areas, structures, tolerance, dista
     struc_series = _layers_from_datamodel(structures, datamodel)
     struc_sindex = struc_series.sindex
 
-    return gdf.apply(
+    return gdf.progress_apply(
         lambda x: _structures_at_boundaries(
             x, areas_gdf, areas_sindex, struc_series, struc_sindex, tolerance, distance
         ),
@@ -591,7 +571,9 @@ def distant_to_others(gdf, datamodel, distance):
 
     sindex = gdf.sindex
 
-    return gdf.apply(lambda x: _distant_to_others(x, gdf, sindex, distance), axis=1)
+    return gdf.progress_apply(
+        lambda x: _distant_to_others(x, gdf, sindex, distance), axis=1
+    )
 
 
 def structures_at_nodes(gdf, datamodel, structures, tolerance):
@@ -618,7 +600,7 @@ def structures_at_nodes(gdf, datamodel, structures, tolerance):
     struc_series = _layers_from_datamodel(structures, datamodel)
     struc_sindex = struc_series.sindex
 
-    return gdf["geometry"].apply(
+    return gdf["geometry"].progress_apply(
         lambda x: _no_struc_on_line(x, struc_series, struc_sindex, tolerance)
     )
 
@@ -663,7 +645,7 @@ def compare_longitudinal(
     )
 
     geometry.find_nearest_branch(branches=branches, geometries=gdf, method="overall")
-    return gdf.apply(
+    return gdf.progress_apply(
         lambda x: _compare_longitudinal(
             x, parameter, compare_gdf, compare_parameter, direction, logical_operator
         ),

diff --git a/hydamo_validation/summaries.py b/hydamo_validation/summaries.py
@@ -1,4 +1,3 @@
-import logging
 import geopandas as gpd
 import pandas as pd
 from pathlib import Path
@@ -10,11 +9,10 @@
 
 
 class LayersSummary:
-    def __init__(self, log_level="DEBUG", date_check=pd.Timestamp.now().isoformat()):
+    def __init__(self, logger, date_check=pd.Timestamp.now().isoformat()):
         self.geo_types = {}
         self.date_check = date_check
-        self.logger = logging.getLogger(__name__)
-        self.logger.setLevel(getattr(logging, log_level))
+        self.logger = logger
 
     def _get_properties(self, gdf):
         properties = {"nen3610id": "str"}

diff --git a/hydamo_validation/validator.py b/hydamo_validation/validator.py
@@ -19,14 +19,17 @@
     missing_layers,
     fields_syntax,
 )
+import sys
 import traceback
 
+
 OUTPUT_TYPES = ["geopackage", "geojson", "csv"]
 LOG_LEVELS = Literal["INFO", "DEBUG"]
-INCLUDE_COLUMNS = ["nen3610id", "code"]
+INCLUDE_COLUMNS = ["nen3610id", "code", "categorieoppwaterlichaam"]
 SCHEMAS_PATH = Path(__file__).parent.joinpath(r"./schemas")
 HYDAMO_SCHEMAS_PATH = SCHEMAS_PATH.joinpath("hydamo")
 RULES_SCHEMAS_PATH = SCHEMAS_PATH.joinpath("rules")
+LOGGING_FORMAT = "%(asctime)s %(levelname)s %(name)s - %(message)s"
 
 
 def _read_schema(version, schemas_path):
@@ -38,17 +41,23 @@ def _read_schema(version, schemas_path):
 
 def _init_logger(log_level):
     """Init logger for validator."""
+
+    # Set up logging to console
+    logging.basicConfig(
+        level=getattr(logging, log_level),
+        format=LOGGING_FORMAT,
+        handlers=[logging.StreamHandler(sys.stdout)],
+        force=True,
+    )
+    # Get logger
     logger = logging.getLogger(__name__)
-    logger.setLevel(getattr(logging, log_level))
     return logger
 
 
 def _add_log_file(logger, log_file):
     """Add log-file to existing logger"""
     fh = logging.FileHandler(log_file)
-    fh.setFormatter(
-        logging.Formatter("%(asctime)s %(name)s %(levelname)s - %(message)s")
-    )
+    fh.setFormatter(logging.Formatter(LOGGING_FORMAT))
     fh.setLevel(logging.DEBUG)
     logger.addHandler(fh)
     return logger
@@ -190,7 +199,7 @@ def _validator(
         logger.info("init validatie")
         date_check = pd.Timestamp.now().isoformat()
         result_summary = ResultSummary(date_check=date_check)
-        layers_summary = LayersSummary(date_check=date_check)
+        layers_summary = LayersSummary(logger=logger, date_check=date_check)
         # check if all files are present
         # create a results_path
         permission_error = False
@@ -219,8 +228,8 @@ def _validator(
             if not path.exists():
                 missing_paths += [str(path)]
         if missing_paths:
-            result_summary.error += [f'missing_paths: {",".join(missing_paths)}']
-            raise FileNotFoundError(f'missing_paths: {",".join(missing_paths)}')
+            result_summary.error += [f"missing_paths: {','.join(missing_paths)}"]
+            raise FileNotFoundError(f"missing_paths: {','.join(missing_paths)}")
         else:
             validation_rules_sets = read_validation_rules(
                 validation_rules_json, result_summary
@@ -232,7 +241,7 @@ def _validator(
         ]
         if unsupported_output_types:
             error_message = (
-                r"unsupported output types: " f'{",".join(unsupported_output_types)}'
+                r"unsupported output types: " f"{','.join(unsupported_output_types)}"
             )
             result_summary.error += [error_message]
             raise TypeError(error_message)

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -11,6 +11,7 @@ nav:
     - Guides:
         - Get Started: guides/get_started.md
         - Contribute: guides/contribute.md
+        - Release: guides/release.md
     - Code Reference:
         - DataSets: reference/datasets.md
         - HyDAMO: reference/hydamo.md

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ dependencies = [
     "rasterio",
     "shapely>=2",
     "rasterstats",
+    "tqdm"
 ]
 
 dynamic = ["version"]

diff --git a/tests/test_dommelerwaard.py b/tests/test_dommelerwaard.py
@@ -30,6 +30,7 @@ def test_hydroobjects():
         [
             "nen3610id",
             "code",
+            "categorieoppwaterlichaam",
             "geometry",
             "syntax_breedte",
             "syntax_categorieoppwaterlichaam",

diff --git a/tests/test_summaries.py b/tests/test_summaries.py
@@ -3,6 +3,7 @@
 from hydamo_validation.datasets import DataSets
 from pathlib import Path
 import shutil
+import logging
 
 try:
     from .config import DATA_DIR
@@ -15,7 +16,7 @@
 exports_dir.mkdir(exist_ok=True)
 
 datasets = DataSets(dataset_path)
-layers_summary = LayersSummary()
+layers_summary = LayersSummary(logger=logging.getLogger("test_logger"))
 result_summary = ResultSummary()
 result_summary.dataset_layers = datasets.layers
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,4 +22,5 @@ dependencies: @@
       - rasterstats==0.19.0
       - rtree
       - shapely==2.0.2
+      - tqdm
       - twine