From 65a7b86ae94c44c098aceb17b13bf3fa624716b9 Mon Sep 17 00:00:00 2001 From: lachlangrose Date: Tue, 2 Dec 2025 10:48:56 +1100 Subject: [PATCH 01/18] fix: change to no default arguments and add unit_name_column as a required --- map2loop/sorter.py | 58 ++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/map2loop/sorter.py b/map2loop/sorter.py index 7d1a7672..738cfd25 100644 --- a/map2loop/sorter.py +++ b/map2loop/sorter.py @@ -65,12 +65,14 @@ class SorterUseNetworkX(Sorter): Sorter class which returns a sorted list of units based on the unit relationships using a topological graph sorting algorithm """ required_arguments: List[str] = [ - 'geology_data' + 'geology_data', + 'unit_name_column' ] def __init__( self, *, + unit_name_column:Optional[str]='name', unit_relationships: Optional[pandas.DataFrame] = None, geology_data: Optional[geopandas.GeoDataFrame] = None, ): @@ -82,6 +84,7 @@ def __init__( """ super().__init__() self.sorter_label = "SorterUseNetworkX" + self.unit_name_column = unit_name_column if geology_data is not None: self.set_geology_data(geology_data) elif unit_relationships is not None: @@ -97,7 +100,7 @@ def set_geology_data(self, geology_data: geopandas.GeoDataFrame): """ self._calculate_topology(geology_data) def _calculate_topology(self, geology_data: geopandas.GeoDataFrame): - if not geology_data: + if geology_data is None: raise ValueError("geology_data is required") if isinstance(geology_data, geopandas.GeoDataFrame) is False: @@ -166,12 +169,13 @@ class SorterAgeBased(Sorter): """ Sorter class which returns a sorted list of units based on the min and max ages of the units """ - required_arguments = ['min_age_column','max_age_column'] - def __init__(self,*, min_age_column:Optional[str]='minAge', max_age_column:Optional[str]='maxAge'): + required_arguments = ['min_age_column','max_age_column','unit_name_column'] + def __init__(self,*, unit_name_column:Optional[str]='name',min_age_column:Optional[str]='minAge', max_age_column:Optional[str]='maxAge'): """ Initialiser for age based sorter """ super().__init__() + self.unit_name_column = unit_name_column self.min_age_column = min_age_column self.max_age_column = max_age_column self.sorter_label = "SorterAgeBased" @@ -204,9 +208,9 @@ def sort(self, units: pandas.DataFrame) -> list: sorted_units = sorted_units.sort_values(by=["meanAge"]) logger.info("Stratigraphic order calculated using age based sorting") for _i, row in sorted_units.iterrows(): - logger.info(f"{row['name']} - {row['minAge']} - {row['maxAge']}") + logger.info(f"{row[self.unit_name_column]} - {row[self.min_age_column]} - {row[self.max_age_column]}") - return list(sorted_units["name"]) + return list(sorted_units[self.unit_name_column]) class SorterAlpha(Sorter): @@ -214,11 +218,12 @@ class SorterAlpha(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units prioritising the units with lower number of contacting units """ - required_arguments = ['contacts'] + required_arguments = ['contacts', 'unit_name_column'] def __init__( self, *, contacts: Optional[geopandas.GeoDataFrame] = None, + unit_name_column:Optional[str]='name', ): """ Initialiser for adjacency based sorter @@ -228,6 +233,7 @@ def __init__( """ super().__init__() self.contacts = contacts + self.unit_name_column = unit_name_column self.sorter_label = "SorterAlpha" if 'UNITNAME_1' not in contacts.columns or 'UNITNAME_2' not in contacts.columns or 'length' not in contacts.columns: raise ValueError("contacts GeoDataFrame must contain 'UNITNAME_1', 'UNITNAME_2' and 'length' columns") @@ -301,11 +307,14 @@ class SorterMaximiseContacts(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units prioritising the maximum length of each contact """ - required_arguments = ['contacts'] + required_arguments = ['contacts', 'unit_name_column', 'unitname1_column', 'unitname2_column'] def __init__( self, *, contacts: Optional[geopandas.GeoDataFrame] = None, + unit_name_column: str = 'name', + unitname1_column: str = 'UNITNAME_1', + unitname2_column: str = 'UNITNAME_2', ): """ Initialiser for adjacency based sorter @@ -320,8 +329,11 @@ def __init__( self.route = None self.directed_graph = None self.contacts = contacts - if 'UNITNAME_1' not in contacts.columns or 'UNITNAME_2' not in contacts.columns or 'length' not in contacts.columns: - raise ValueError("contacts GeoDataFrame must contain 'UNITNAME_1', 'UNITNAME_2' and 'length' columns") + self.unit_name_column = unit_name_column + self.unitname1_column = unitname1_column + self.unitname2_column = unitname2_column + if self.unitname1_column not in contacts.columns or self.unitname2_column not in contacts.columns or 'length' not in contacts.columns: + raise ValueError(f"contacts GeoDataFrame must contain '{self.unitname1_column}', '{self.unitname2_column}' and 'length' columns") def sort(self, units: pandas.DataFrame) -> list: """ @@ -339,13 +351,13 @@ def sort(self, units: pandas.DataFrame) -> list: raise ValueError("SorterMaximiseContacts requires 'contacts' argument") sorted_contacts = self.contacts.sort_values(by="length", ascending=False) self.graph = nx.Graph() - unit_names = list(units["name"].unique()) + unit_names = list(units[self.unit_name_column].unique()) for unit in unit_names: ## some units may not have any contacts e.g. if they are intrusives or sills. If we leave this then the ## sorter crashes if ( - unit not in sorted_contacts['UNITNAME_1'].values - or unit not in sorted_contacts['UNITNAME_2'].values + unit not in sorted_contacts[self.unitname1_column].values + or unit not in sorted_contacts[self.unitname2_column].values ): continue self.graph.add_node(unit, name=unit) @@ -353,7 +365,7 @@ def sort(self, units: pandas.DataFrame) -> list: max_weight = max(list(sorted_contacts["length"])) + 1 sorted_contacts['length'] /= max_weight for _, row in sorted_contacts.iterrows(): - self.graph.add_edge(row["UNITNAME_1"], row["UNITNAME_2"], weight=(1 - row["length"])) + self.graph.add_edge(row[self.unitname1_column], row[self.unitname2_column], weight=(1 - row["length"])) self.route = nx_app.traveling_salesman_problem(self.graph) edge_list = list(nx.utils.pairwise(self.route)) @@ -384,10 +396,13 @@ class SorterObservationProjections(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units using the direction of observations to predict which unit is adjacent to the current one """ - required_arguments = ['contacts', 'geology_data', 'structure_data', 'dtm_data'] + required_arguments = ['contacts', 'geology_data', 'structure_data', 'dtm_data', 'unit_name_column', 'unitname1_column', 'unitname2_column'] def __init__( self, *, + unitname1_column: Optional[str] = 'UNITNAME_1', + unitname2_column: Optional[str] = 'UNITNAME_2', + unit_name_column: Optional[str] = 'name', contacts: Optional[geopandas.GeoDataFrame] = None, geology_data: Optional[geopandas.GeoDataFrame] = None, structure_data: Optional[geopandas.GeoDataFrame] = None, @@ -409,9 +424,12 @@ def __init__( self.geology_data = geology_data self.structure_data = structure_data self.dtm_data = dtm_data + self.unit_name_column = unit_name_column self.sorter_label = "SorterObservationProjections" self.length = length self.lines = [] + self.unit1name_column = unitname1_column + self.unit2name_column = unitname2_column def sort(self, units: pandas.DataFrame) -> list: """ @@ -543,13 +561,13 @@ def sort(self, units: pandas.DataFrame) -> list: g_undirected = g.to_undirected() for unit in unit_names: if len(list(g_undirected.neighbors(unit))) < 1: - mask1 = self.contacts["UNITNAME_1"] == unit - mask2 = self.contacts["UNITNAME_2"] == unit + mask1 = self.contacts[self.unit1name_column] == unit + mask2 = self.contacts[self.unit2name_column] == unit for _, row in self.contacts[mask1 | mask2].iterrows(): - if unit == row["UNITNAME_1"]: - g.add_edge(row["UNITNAME_2"], unit, weight=max_value * 10) + if unit == row[self.unit1name_column]: + g.add_edge(row[self.unit2name_column], unit, weight=max_value * 10) else: - g.add_edge(row["UNITNAME_1"], unit, weight=max_value * 10) + g.add_edge(row[self.unit1name_column], unit, weight=max_value * 10) # Run travelling salesman using the observation evidence as weighting route = nx_app.traveling_salesman_problem(g.to_undirected()) From 5226b49cfeda5592d878fc07e5ff960ba125f51e Mon Sep 17 00:00:00 2001 From: lachlangrose Date: Tue, 2 Dec 2025 10:49:09 +1100 Subject: [PATCH 02/18] fix: set age based sorter args --- map2loop/project.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/map2loop/project.py b/map2loop/project.py index ec7f10f5..3cf355fd 100644 --- a/map2loop/project.py +++ b/map2loop/project.py @@ -614,6 +614,11 @@ def calculate_stratigraphic_order(self, take_best=False): self.sorter.structure_data = self.map_data.get_map_data(Datatype.STRUCTURE) if hasattr(self.sorter, 'dtm_data') and self.sorter.dtm_data is None: self.sorter.dtm_data = self.map_data.get_map_data(Datatype.DTM) + if hasattr(self.sorter, 'min_age_column') and self.sorter.min_age_column is None: + self.sorter.min_age_column = self.stratigraphic_column.get_min_age_column() + if hasattr(self.sorter, 'max_age_column') and self.sorter.max_age_column is None: + self.sorter.max_age_column = self.stratigraphic_column.get_max_age_column() + self.stratigraphic_column.column = self.sorter.sort( self.stratigraphic_column.stratigraphicUnits, ) From 6eb151882369e3e7a062f60722aac9b7e3436736 Mon Sep 17 00:00:00 2001 From: lachlangrose Date: Tue, 2 Dec 2025 10:50:10 +1100 Subject: [PATCH 03/18] fix: project import for test --- tests/project/test_thickness_calculations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/project/test_thickness_calculations.py b/tests/project/test_thickness_calculations.py index 961e1a9c..7f601c84 100644 --- a/tests/project/test_thickness_calculations.py +++ b/tests/project/test_thickness_calculations.py @@ -5,7 +5,7 @@ from map2loop.utils import load_map2loop_data from map2loop.thickness_calculator import InterpolatedStructure, StructuralPoint -from map2loop import Project +from map2loop.project import Project # 1. self.stratigraphic_column.stratigraphicUnits, st_units = pandas.DataFrame( From b0f12b68cfea2e0d16935720f01240230d912595 Mon Sep 17 00:00:00 2001 From: lachlangrose Date: Tue, 2 Dec 2025 11:14:20 +1100 Subject: [PATCH 04/18] fix: updating requirements for alpha sorter plus add check for empty contacts --- map2loop/sorter.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/map2loop/sorter.py b/map2loop/sorter.py index 738cfd25..b4aca705 100644 --- a/map2loop/sorter.py +++ b/map2loop/sorter.py @@ -9,6 +9,7 @@ from osgeo import gdal from map2loop.utils import value_from_raster from .logging import getLogger +import networkx as nx logger = getLogger(__name__) @@ -218,12 +219,14 @@ class SorterAlpha(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units prioritising the units with lower number of contacting units """ - required_arguments = ['contacts', 'unit_name_column'] + required_arguments = ['contacts', 'unit_name_column', 'unitname1_column', 'unitname2_column'] def __init__( self, *, contacts: Optional[geopandas.GeoDataFrame] = None, unit_name_column:Optional[str]='name', + unitname1_column:Optional[str]='UNITNAME_1', + unitname2_column:Optional[str]='UNITNAME_2', ): """ Initialiser for adjacency based sorter @@ -235,9 +238,11 @@ def __init__( self.contacts = contacts self.unit_name_column = unit_name_column self.sorter_label = "SorterAlpha" - if 'UNITNAME_1' not in contacts.columns or 'UNITNAME_2' not in contacts.columns or 'length' not in contacts.columns: - raise ValueError("contacts GeoDataFrame must contain 'UNITNAME_1', 'UNITNAME_2' and 'length' columns") - + self.unitname1_column = unitname1_column + self.unitname2_column = unitname2_column + if self.unitname1_column not in contacts.columns or self.unitname2_column not in contacts.columns or 'length' not in contacts.columns: + raise ValueError(f"contacts GeoDataFrame must contain '{self.unitname1_column}', '{self.unitname2_column}' and 'length' columns") + def sort(self, units: pandas.DataFrame) -> list: """ Execute sorter method takes unit data and returns the sorted unit names based on this algorithm. @@ -250,20 +255,22 @@ def sort(self, units: pandas.DataFrame) -> list: """ if self.contacts is None: raise ValueError("contacts must be set (not None) before calling sort() in SorterAlpha.") - import networkx as nx - if self.contacts is None: - raise ValueError("SorterAlpha requires 'contacts' argument") + if len(self.contacts) == 0: + raise ValueError("contacts GeoDataFrame is empty in SorterAlpha.") + if 'length' not in self.contacts.columns: + self.contacts['length'] = self.contacts.geometry.length + self.contacts['length'] = self.contacts['length'].astype(float) sorted_contacts = self.contacts.sort_values(by="length", ascending=False)[ - ["UNITNAME_1", "UNITNAME_2", "length"] + [self.unitname1_column, self.unitname2_column, "length"] ] - unit_names = list(units["name"].unique()) + unit_names = list(units[self.unit_name_column].unique()) graph = nx.Graph() for unit in unit_names: graph.add_node(unit, name=unit) max_weight = max(list(sorted_contacts["length"])) + 1 for _, row in sorted_contacts.iterrows(): graph.add_edge( - row["UNITNAME_1"], row["UNITNAME_2"], weight=int(max_weight - row["length"]) + row[self.unitname1_column], row[self.unitname2_column], weight=int(max_weight - row["length"]) ) cnode = None @@ -534,6 +541,7 @@ def sort(self, units: pandas.DataFrame) -> list: df = pandas.DataFrame(0, index=unit_names, columns=unit_names) for younger, older in ordered_unit_observations: df.loc[younger, older] += 1 + print(df, df.max()) max_value = max(df.max()) # Using the older/younger matrix create a directed graph From f2cf211ce61e9547ac9a00e4d70f00cba24f3d4d Mon Sep 17 00:00:00 2001 From: lachlangrose Date: Tue, 2 Dec 2025 11:15:38 +1100 Subject: [PATCH 05/18] fix: update maximise contacts to compute length and check for empty contacts --- map2loop/sorter.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/map2loop/sorter.py b/map2loop/sorter.py index b4aca705..02343e3a 100644 --- a/map2loop/sorter.py +++ b/map2loop/sorter.py @@ -356,6 +356,11 @@ def sort(self, units: pandas.DataFrame) -> list: import networkx.algorithms.approximation as nx_app if self.contacts is None: raise ValueError("SorterMaximiseContacts requires 'contacts' argument") + if len(self.contacts) == 0: + raise ValueError("contacts GeoDataFrame is empty in SorterMaximiseContacts.") + if "length" not in self.contacts.columns: + self.contacts['length'] = self.contacts.geometry.length + self.contacts['length'] = self.contacts['length'].astype(float) sorted_contacts = self.contacts.sort_values(by="length", ascending=False) self.graph = nx.Graph() unit_names = list(units[self.unit_name_column].unique()) From 5fb0720d795a7123094c39f171c3f497f53eda07 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Tue, 16 Dec 2025 10:52:42 +1100 Subject: [PATCH 06/18] fix: add option to set logger level and handler. Makes passing handler from qgis to python possible --- map2loop/logging.py | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/map2loop/logging.py b/map2loop/logging.py index 816ac45e..b7e41b3f 100644 --- a/map2loop/logging.py +++ b/map2loop/logging.py @@ -66,3 +66,53 @@ def set_level(level: str): logger = logging.getLogger(name) logger.setLevel(level) logger.info(f"Logging level set to {level}") + + +logger = getLogger(__name__) +logger.info("Imported LoopStructural") + + +def setLogging(level="info", handler=None): + """Set the logging parameters for log file or custom handler. + + Parameters + ---------- + level : str, optional + Logging level to set, by default "info" + Valid options: 'info', 'warning', 'error', 'debug' + handler : logging.Handler, optional + A logging handler to use instead of the default StreamHandler, by default None + + Examples + -------- + >>> from map2loop.logging import setLogging + >>> setLogging('debug') + >>> setLogging('info', logging.FileHandler('loop.log')) + """ + levels = get_levels() + level_value = levels.get(level, logging.WARNING) + + # Create default handler if none provided + if handler is None: + handler = logging.StreamHandler() + + formatter = logging.Formatter( + "%(levelname)s: %(asctime)s: %(filename)s:%(lineno)d -- %(message)s" + ) + handler.setFormatter(formatter) + handler.setLevel(level_value) + + # Replace handlers in all known loggers + for name in loggers: + logger = logging.getLogger(name) + logger.handlers = [] + logger.addHandler(handler) + logger.setLevel(level_value) + + # Also apply to main module logger + main_logger = logging.getLogger(__name__) + main_logger.handlers = [] + main_logger.addHandler(handler) + main_logger.setLevel(level_value) + + main_logger.info(f"Set logging to {level}") From 71f473cd7e18d984b474de3a1b4bfd91bac98d81 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Tue, 16 Dec 2025 10:53:19 +1100 Subject: [PATCH 07/18] fix: updating incorrect type hints --- map2loop/utils/utility_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/map2loop/utils/utility_functions.py b/map2loop/utils/utility_functions.py index 7c864fc0..cabb6896 100644 --- a/map2loop/utils/utility_functions.py +++ b/map2loop/utils/utility_functions.py @@ -184,7 +184,7 @@ def find_segment_strike_from_pt( @beartype.beartype def calculate_endpoints( - start_point: shapely.geometry.Point, azimuth_deg: float, distance: int, bbox: pandas.DataFrame + start_point: shapely.geometry.Point, azimuth_deg: float, distance: Union[float,int], bbox: pandas.DataFrame ) -> shapely.geometry.LineString: """ Calculate the endpoints of a line segment given a start point, azimuth angle, distance, and bounding box. From a675be2d6cef5646a682dbcd5c706681830d1ad9 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Tue, 16 Dec 2025 10:53:54 +1100 Subject: [PATCH 08/18] fix: adding safeguarding and fixing merge --- map2loop/thickness_calculator.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/map2loop/thickness_calculator.py b/map2loop/thickness_calculator.py index 13a1b806..c8949bbb 100644 --- a/map2loop/thickness_calculator.py +++ b/map2loop/thickness_calculator.py @@ -107,7 +107,7 @@ def _check_thickness_percentage_calculations(self, thicknesses: pandas.DataFrame f"have a calculated thickness of -1. This may indicate that {self.thickness_calculator_label} " f"is not suitable for this dataset." ) - + class ThicknessCalculatorAlpha(ThicknessCalculator): """ ThicknessCalculator class which estimates unit thickness based on units, basal_contacts and stratigraphic order @@ -446,8 +446,10 @@ def compute( ) # Combine all location_tracking DataFrames into a single DataFrame - combined_location_tracking = pandas.concat(_location_tracking, ignore_index=True) - + if _location_tracking and len(_location_tracking) > 0: + combined_location_tracking = pandas.concat(_location_tracking, ignore_index=True) + else: + combined_location_tracking = pandas.DataFrame() # Save the combined DataFrame as an attribute of the class self.location_tracking = combined_location_tracking @@ -486,7 +488,6 @@ def __init__( self.strike_allowance = 30 self.lines = None - @beartype.beartype def compute( self, @@ -546,7 +547,9 @@ def compute( crs=basal_contacts.crs, ) # add unitname to the sampled structures - sampled_structures['unit_name'] = geopandas.sjoin(sampled_structures, geology)['UNITNAME'] + sampled_structures['unit_name'] = geopandas.sjoin( + sampled_structures.drop(columns=['UNITNAME']), geology, how='inner', predicate='within' + )['UNITNAME'] # remove nans from sampled structures # this happens when there are strati measurements within intrusions. If intrusions are removed from the geology map, unit_name will then return a nan @@ -677,7 +680,7 @@ def compute( if not (b_s[0] < strike1 < b_s[1] and b_s[0] < strike2 < b_s[1]): continue - #build the debug info + # build the debug info line = shapely.geometry.LineString([int_pt1, int_pt2]) _lines.append(line) _dip.append(measurement['DIP']) @@ -688,17 +691,17 @@ def compute( # if length is higher than max_line_length, skip if self.max_line_length is not None and L > self.max_line_length: continue - + # calculate thickness thickness = L * math.sin(math.radians(measurement['DIP'])) thicknesses.append(thickness) lis.append(litho_in) - + # create the debug gdf self.lines = geopandas.GeoDataFrame(geometry=_lines, crs=basal_contacts.crs) self.lines["DIP"] = _dip - + # create a DataFrame of the thicknesses median and standard deviation by lithology result = pandas.DataFrame({'unit': lis, 'thickness': thicknesses}) result = result.groupby('unit')['thickness'].agg(['median', 'mean', 'std']).reset_index() @@ -709,7 +712,7 @@ def compute( output_units['ThicknessMedian'] = numpy.full(len(output_units), numpy.nan) output_units['ThicknessMean'] = numpy.full(len(output_units), numpy.nan) output_units['ThicknessStdDev'] = numpy.full(len(output_units), numpy.nan) - + # find which units have no thickness calculated names_not_in_result = units[~units['name'].isin(result['unit'])]['name'].to_list() # assign the thicknesses to the each unit @@ -718,12 +721,11 @@ def compute( output_units.loc[idx, 'ThicknessMedian'] = unit['median'] output_units.loc[idx, 'ThicknessMean'] = unit['mean'] output_units.loc[idx, 'ThicknessStdDev'] = unit['std'] - + output_units["ThicknessMean"] = output_units["ThicknessMean"].fillna(-1) output_units["ThicknessMedian"] = output_units["ThicknessMedian"].fillna(-1) output_units["ThicknessStdDev"] = output_units["ThicknessStdDev"].fillna(-1) - - + # handle the units that have no thickness for unit in names_not_in_result: # if no thickness has been calculated for the unit @@ -754,5 +756,5 @@ def compute( output_units.loc[output_units["name"] == unit, "ThicknessStdDev"] = -1 self._check_thickness_percentage_calculations(output_units) - + return output_units From 17e14cae5cd466e8e3185e2569c311a611e0bed4 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Tue, 16 Dec 2025 13:33:32 +1100 Subject: [PATCH 09/18] fix: save location tracking as a line geodataframe for qgis compatibility --- map2loop/thickness_calculator.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/map2loop/thickness_calculator.py b/map2loop/thickness_calculator.py index c8949bbb..172a1665 100644 --- a/map2loop/thickness_calculator.py +++ b/map2loop/thickness_calculator.py @@ -23,6 +23,7 @@ import pandas import geopandas import shapely +from shapely.geometry import LineString import math from osgeo import gdal from shapely.errors import UnsupportedGEOSVersionError @@ -451,8 +452,17 @@ def compute( else: combined_location_tracking = pandas.DataFrame() # Save the combined DataFrame as an attribute of the class - self.location_tracking = combined_location_tracking - + # self.location_tracking = combined_location_tracking + combined_location_tracking['geometry'] = combined_location_tracking.apply( + lambda row: LineString([ + (row['p1_x'], row['p1_y'], row['p1_z']), + (row['p2_x'], row['p2_y'], row['p2_z']) + ]), + axis=1 + ) + + # Convert to GeoDataFrame and set CRS to match basal_contacts + self.location_tracking = geopandas.GeoDataFrame(combined_location_tracking, geometry='geometry', crs=basal_contacts.crs) # Create GeoDataFrame for lines self.lines = geopandas.GeoDataFrame(geometry=_lines, crs=basal_contacts.crs) self.lines['dip'] = _dips From 46fade8bc8a673c0b8c067115970da70fed209bc Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Tue, 16 Dec 2025 13:55:25 +1100 Subject: [PATCH 10/18] fix: only store geodataframe if layer is not empty --- map2loop/thickness_calculator.py | 48 +++++++++++++++++--------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/map2loop/thickness_calculator.py b/map2loop/thickness_calculator.py index 172a1665..31596e4a 100644 --- a/map2loop/thickness_calculator.py +++ b/map2loop/thickness_calculator.py @@ -233,7 +233,6 @@ def __init__( super().__init__(dtm_data, bounding_box, max_line_length, is_strike) self.thickness_calculator_label = "InterpolatedStructure" self.lines = None - @beartype.beartype def compute( @@ -292,7 +291,7 @@ def compute( # set the crs of the contacts to the crs of the units contacts = contacts.set_crs(crs=basal_contacts.crs) if self.dtm_data is not None: - # get the elevation Z of the contacts + # get the elevation Z of the contacts contacts = set_z_values_from_raster_df(self.dtm_data, contacts) # update the geometry of the contact points to include the Z value contacts["geometry"] = contacts.apply( @@ -341,11 +340,11 @@ def compute( interpolated_orientations = interpolated_orientations[ ["geometry", "dip", "UNITNAME"] ].copy() - + _lines = [] _dips = [] _location_tracking = [] - + for i in range(0, len(stratigraphic_order) - 1): if ( stratigraphic_order[i] in basal_unit_list @@ -367,21 +366,21 @@ def compute( dip = interpolated_orientations.loc[ interpolated_orientations["UNITNAME"] == stratigraphic_order[i], "dip" ].to_numpy() - + _thickness = [] - + for _, row in basal_contact.iterrows(): # find the shortest line between the basal contact points and top contact points short_line = shapely.shortest_line(row.geometry, top_contact_geometry) _lines.append(short_line[0]) - - # check if the short line is + + # check if the short line is if self.max_line_length is not None and short_line.length > self.max_line_length: continue if self.dtm_data is not None: inv_geotransform = gdal.InvGeoTransform(self.dtm_data.GetGeoTransform()) data_array = numpy.array(self.dtm_data.GetRasterBand(1).ReadAsArray().T) - + # extract the end points of the shortest line p1 = numpy.zeros(3) p1[0] = numpy.asarray(short_line[0].coords[0][0]) @@ -409,7 +408,7 @@ def compute( _dips.append(_dip) # calculate the true thickness t = L * sin(dip) thickness = line_length * numpy.sin(_dip) - + # add location tracking location_tracking = pandas.DataFrame( { @@ -420,7 +419,7 @@ def compute( } ) _location_tracking.append(location_tracking) - + # Average thickness along the shortest line if all(numpy.isnan(thickness)): pass @@ -445,31 +444,36 @@ def compute( logger.warning( f"Thickness Calculator InterpolatedStructure: Cannot calculate thickness between {stratigraphic_order[i]} and {stratigraphic_order[i + 1]}\n" ) - + # Combine all location_tracking DataFrames into a single DataFrame if _location_tracking and len(_location_tracking) > 0: combined_location_tracking = pandas.concat(_location_tracking, ignore_index=True) + combined_location_tracking['geometry'] = combined_location_tracking.apply( + lambda row: LineString( + [ + (row['p1_x'], row['p1_y'], row['p1_z']), + (row['p2_x'], row['p2_y'], row['p2_z']), + ] + ), + axis=1, + ) else: combined_location_tracking = pandas.DataFrame() # Save the combined DataFrame as an attribute of the class # self.location_tracking = combined_location_tracking - combined_location_tracking['geometry'] = combined_location_tracking.apply( - lambda row: LineString([ - (row['p1_x'], row['p1_y'], row['p1_z']), - (row['p2_x'], row['p2_y'], row['p2_z']) - ]), - axis=1 - ) # Convert to GeoDataFrame and set CRS to match basal_contacts - self.location_tracking = geopandas.GeoDataFrame(combined_location_tracking, geometry='geometry', crs=basal_contacts.crs) + if not combined_location_tracking.empty: + self.location_tracking = geopandas.GeoDataFrame(combined_location_tracking, geometry='geometry', crs=basal_contacts.crs) + else: + self.location_tracking = geopandas.GeoDataFrame(columns=['p1_x', 'p1_y', 'p1_z', 'p2_x', 'p2_y', 'p2_z', 'thickness', 'unit', 'geometry'], crs=basal_contacts.crs) # Create GeoDataFrame for lines self.lines = geopandas.GeoDataFrame(geometry=_lines, crs=basal_contacts.crs) self.lines['dip'] = _dips - + # Check thickness calculation self._check_thickness_percentage_calculations(thicknesses) - + return thicknesses class StructuralPoint(ThicknessCalculator): From 86b658c582c8ca6102729e8f8fd7370f28a113f3 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Wed, 17 Dec 2025 10:13:16 +1100 Subject: [PATCH 11/18] fix: updating logic --- map2loop/thickness_calculator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/map2loop/thickness_calculator.py b/map2loop/thickness_calculator.py index 31596e4a..ca3c5da1 100644 --- a/map2loop/thickness_calculator.py +++ b/map2loop/thickness_calculator.py @@ -560,9 +560,11 @@ def compute( geometry=geopandas.points_from_xy(sampled_structures.X, sampled_structures.Y), crs=basal_contacts.crs, ) + if 'UNITNAME' in sampled_structures.columns: + sampled_structures = sampled_structures.drop(columns=['UNITNAME']) # add unitname to the sampled structures sampled_structures['unit_name'] = geopandas.sjoin( - sampled_structures.drop(columns=['UNITNAME']), geology, how='inner', predicate='within' + sampled_structures, geology, how='inner', predicate='within' )['UNITNAME'] # remove nans from sampled structures From 6e2ee69f08d913ce3a55df3f08144ded01922829 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Thu, 18 Dec 2025 08:26:31 +1100 Subject: [PATCH 12/18] fix: beartype issues --- map2loop/thickness_calculator.py | 2 +- map2loop/utils/utility_functions.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/map2loop/thickness_calculator.py b/map2loop/thickness_calculator.py index ca3c5da1..3db49c8f 100644 --- a/map2loop/thickness_calculator.py +++ b/map2loop/thickness_calculator.py @@ -624,7 +624,7 @@ def compute( # draw orthogonal line to the strike (default value 10Km), and clip it by the bounding box of the lithology if self.max_line_length is None: self.max_line_length = 10000 - B = calculate_endpoints(measurement_pt, strike, self.max_line_length, bbox_poly) + B = calculate_endpoints(measurement_pt, float(strike), float(self.max_line_length), bbox_poly) b = geopandas.GeoDataFrame({'geometry': [B]}).set_crs(basal_contacts.crs) # find all intersections diff --git a/map2loop/utils/utility_functions.py b/map2loop/utils/utility_functions.py index cabb6896..b4ebfb95 100644 --- a/map2loop/utils/utility_functions.py +++ b/map2loop/utils/utility_functions.py @@ -184,8 +184,8 @@ def find_segment_strike_from_pt( @beartype.beartype def calculate_endpoints( - start_point: shapely.geometry.Point, azimuth_deg: float, distance: Union[float,int], bbox: pandas.DataFrame -) -> shapely.geometry.LineString: + start_point: shapely.geometry.Point, azimuth_deg: Union[float,int], distance: Union[float,int], bbox: pandas.DataFrame +) -> Union[shapely.geometry.LineString,shapely.geometry.GeometryCollection]: """ Calculate the endpoints of a line segment given a start point, azimuth angle, distance, and bounding box. @@ -220,7 +220,7 @@ def calculate_endpoints( line = shapely.geometry.LineString([left_endpoint, right_endpoint]) new_line = shapely.ops.clip_by_rect(line, minx, miny, maxx, maxy) - + print(type(new_line)) return new_line From 92f754b2511a18f41c0f990ca6f6391774858ad1 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Thu, 18 Dec 2025 08:36:05 +1100 Subject: [PATCH 13/18] fix: add ability to specify fault field to topoloy --- map2loop/topology.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/map2loop/topology.py b/map2loop/topology.py index d7a90dc4..88b4e643 100644 --- a/map2loop/topology.py +++ b/map2loop/topology.py @@ -36,7 +36,8 @@ class Topology: def __init__( self, geology_data: geopandas.GeoDataFrame, fault_data: Optional[geopandas.GeoDataFrame] = None, - verbose_level: VerboseLevel = VerboseLevel.NONE + verbose_level: VerboseLevel = VerboseLevel.NONE, + id_field: str = 'ID', ): """ The initialiser for the map2model wrapper @@ -55,7 +56,7 @@ def __init__( self.fault_data = fault_data self.verbose_level = verbose_level self.buffer_radius = 500 - + self.fault_id_field = id_field @property def fault_fault_relationships(self): if self._fault_fault_relationships is None: From 3c0e0e075b9d91bc32341cc8e2f249a828b00ee4 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Thu, 18 Dec 2025 17:07:09 +1100 Subject: [PATCH 14/18] fix: add call methods to allow for generic debug exporting --- map2loop/sampler.py | 3 ++- map2loop/sorter.py | 4 +++- map2loop/thickness_calculator.py | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/map2loop/sampler.py b/map2loop/sampler.py index b4c7835c..0544f2f5 100644 --- a/map2loop/sampler.py +++ b/map2loop/sampler.py @@ -52,7 +52,8 @@ def sample( pandas.DataFrame: data frame containing samples """ pass - + def __call__(self, *args): + return self.sample(*args) class SamplerDecimator(Sampler): """ diff --git a/map2loop/sorter.py b/map2loop/sorter.py index 02343e3a..730b2f41 100644 --- a/map2loop/sorter.py +++ b/map2loop/sorter.py @@ -59,7 +59,9 @@ def sort(self, units: pandas.DataFrame) -> list: list: sorted list of unit names """ pass - + + def __call__(self, *args): + return self.sort(*args) class SorterUseNetworkX(Sorter): """ diff --git a/map2loop/thickness_calculator.py b/map2loop/thickness_calculator.py index 3db49c8f..a5a67ae7 100644 --- a/map2loop/thickness_calculator.py +++ b/map2loop/thickness_calculator.py @@ -108,6 +108,8 @@ def _check_thickness_percentage_calculations(self, thicknesses: pandas.DataFrame f"have a calculated thickness of -1. This may indicate that {self.thickness_calculator_label} " f"is not suitable for this dataset." ) + def __call__(self, *args): + return self.compute(*args) class ThicknessCalculatorAlpha(ThicknessCalculator): """ From e1cb1e642ffc9a3c206d51a8ccf78a064d645b5f Mon Sep 17 00:00:00 2001 From: lachlangrose Date: Mon, 19 Jan 2026 10:57:24 +1100 Subject: [PATCH 15/18] fix: replace args for kwargs --- map2loop/sampler.py | 4 ++-- map2loop/sorter.py | 4 ++-- map2loop/thickness_calculator.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/map2loop/sampler.py b/map2loop/sampler.py index 0544f2f5..6aad3550 100644 --- a/map2loop/sampler.py +++ b/map2loop/sampler.py @@ -52,8 +52,8 @@ def sample( pandas.DataFrame: data frame containing samples """ pass - def __call__(self, *args): - return self.sample(*args) + def __call__(self, **kwargs): + return self.sample(**kwargs) class SamplerDecimator(Sampler): """ diff --git a/map2loop/sorter.py b/map2loop/sorter.py index 730b2f41..4e162119 100644 --- a/map2loop/sorter.py +++ b/map2loop/sorter.py @@ -60,8 +60,8 @@ def sort(self, units: pandas.DataFrame) -> list: """ pass - def __call__(self, *args): - return self.sort(*args) + def __call__(self, **kwargs): + return self.sort(**kwargs) class SorterUseNetworkX(Sorter): """ diff --git a/map2loop/thickness_calculator.py b/map2loop/thickness_calculator.py index a5a67ae7..442162fd 100644 --- a/map2loop/thickness_calculator.py +++ b/map2loop/thickness_calculator.py @@ -108,8 +108,8 @@ def _check_thickness_percentage_calculations(self, thicknesses: pandas.DataFrame f"have a calculated thickness of -1. This may indicate that {self.thickness_calculator_label} " f"is not suitable for this dataset." ) - def __call__(self, *args): - return self.compute(*args) + def __call__(self, **kwargs): + return self.compute(**kwargs) class ThicknessCalculatorAlpha(ThicknessCalculator): """ From 48711c3b241775151a2c447176ccfb537e3e06e3 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Mon, 19 Jan 2026 11:01:23 +1100 Subject: [PATCH 16/18] Enable auto-update for conda in workflow --- .github/workflows/linting_and_testing.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/linting_and_testing.yml b/.github/workflows/linting_and_testing.yml index 0259ccb5..1af945f6 100644 --- a/.github/workflows/linting_and_testing.yml +++ b/.github/workflows/linting_and_testing.yml @@ -46,9 +46,8 @@ jobs: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: + auto-update-conda: true python-version: ${{ matrix.python-version }} - conda-remove-defaults: "true" - - name: Install dependencies for windows python 3.9 and 3.10 if: ${{ matrix.os == 'windows-latest' && (matrix.python-version == '3.9' || matrix.python-version == '3.10') }} run: | @@ -77,4 +76,4 @@ jobs: - name: Run tests run: | - conda run -n test pytest -v \ No newline at end of file + conda run -n test pytest -v From 40216402634b5b9349719b681fb48525080b48a8 Mon Sep 17 00:00:00 2001 From: lachlangrose Date: Mon, 19 Jan 2026 11:08:42 +1100 Subject: [PATCH 17/18] fix: applying copilot suggestions --- map2loop/logging.py | 2 +- map2loop/utils/utility_functions.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/map2loop/logging.py b/map2loop/logging.py index b7e41b3f..c0fe217c 100644 --- a/map2loop/logging.py +++ b/map2loop/logging.py @@ -69,7 +69,7 @@ def set_level(level: str): logger = getLogger(__name__) -logger.info("Imported LoopStructural") +logger.info("Imported map2loop.logging module") def setLogging(level="info", handler=None): diff --git a/map2loop/utils/utility_functions.py b/map2loop/utils/utility_functions.py index b4ebfb95..cb0a7342 100644 --- a/map2loop/utils/utility_functions.py +++ b/map2loop/utils/utility_functions.py @@ -220,7 +220,6 @@ def calculate_endpoints( line = shapely.geometry.LineString([left_endpoint, right_endpoint]) new_line = shapely.ops.clip_by_rect(line, minx, miny, maxx, maxy) - print(type(new_line)) return new_line From 1febaa90f1d70c83ab3529ae071486b3da824f10 Mon Sep 17 00:00:00 2001 From: lachlangrose Date: Mon, 19 Jan 2026 11:11:09 +1100 Subject: [PATCH 18/18] style: formatting --- map2loop/sorter.py | 135 +++++++++++++++++++--------- map2loop/thickness_calculator.py | 1 - map2loop/utils/utility_functions.py | 42 +++++---- 3 files changed, 114 insertions(+), 64 deletions(-) diff --git a/map2loop/sorter.py b/map2loop/sorter.py index 4e162119..123d357c 100644 --- a/map2loop/sorter.py +++ b/map2loop/sorter.py @@ -22,11 +22,10 @@ class Sorter(ABC): ABC (ABC): Derived from Abstract Base Class """ - def __init__( - self): + def __init__(self): """ Initialiser for Sorter - + Args: unit_relationships (pandas.DataFrame): the relationships between units (columns must contain ["Index1", "Unitname1", "Index2", "Unitname2"]) contacts (pandas.DataFrame): unit contacts with length of the contacts in metres @@ -35,7 +34,6 @@ def __init__( dtm_data (gdal.Dataset): the dtm data """ self.sorter_label = "SorterBaseClass" - def type(self): """ @@ -59,29 +57,28 @@ def sort(self, units: pandas.DataFrame) -> list: list: sorted list of unit names """ pass - + def __call__(self, **kwargs): return self.sort(**kwargs) + class SorterUseNetworkX(Sorter): """ Sorter class which returns a sorted list of units based on the unit relationships using a topological graph sorting algorithm """ - required_arguments: List[str] = [ - 'geology_data', - 'unit_name_column' - ] + + required_arguments: List[str] = ['geology_data', 'unit_name_column'] def __init__( self, *, - unit_name_column:Optional[str]='name', + unit_name_column: Optional[str] = 'name', unit_relationships: Optional[pandas.DataFrame] = None, geology_data: Optional[geopandas.GeoDataFrame] = None, ): """ Initialiser for networkx graph sorter - + Args: unit_relationships (pandas.DataFrame): the relationships between units """ @@ -94,6 +91,7 @@ def __init__( self.unit_relationships = unit_relationships else: self.unit_relationships = None + def set_geology_data(self, geology_data: geopandas.GeoDataFrame): """ Set geology data and calculate topology and unit relationships @@ -102,19 +100,20 @@ def set_geology_data(self, geology_data: geopandas.GeoDataFrame): geology_data (geopandas.GeoDataFrame): the geology data """ self._calculate_topology(geology_data) + def _calculate_topology(self, geology_data: geopandas.GeoDataFrame): if geology_data is None: raise ValueError("geology_data is required") if isinstance(geology_data, geopandas.GeoDataFrame) is False: raise TypeError("geology_data must be a geopandas.GeoDataFrame") - + if 'UNITNAME' not in geology_data.columns: raise ValueError("geology_data must contain 'UNITNAME' column") - + self.topology = Topology(geology_data=geology_data) self.unit_relationships = self.topology.get_unit_unit_relationships() - + @beartype.beartype def sort(self, units: pandas.DataFrame) -> list: """ @@ -127,6 +126,7 @@ def sort(self, units: pandas.DataFrame) -> list: list: the sorted unit names """ import networkx as nx + if self.unit_relationships is None: raise ValueError("SorterUseNetworkX requires 'unit_relationships' argument") graph = nx.DiGraph() @@ -156,24 +156,26 @@ def sort(self, units: pandas.DataFrame) -> list: class SorterUseHint(SorterUseNetworkX): required_arguments: List[str] = ['unit_relationships'] - def __init__( - self, - *, - geology_data: Optional[geopandas.GeoDataFrame] = None, - ): - logger.warning( - "SorterUseHint is deprecated in v3.2. Using SorterUseNetworkX instead" - ) + + def __init__(self, *, geology_data: Optional[geopandas.GeoDataFrame] = None): + logger.warning("SorterUseHint is deprecated in v3.2. Using SorterUseNetworkX instead") super().__init__(geology_data=geology_data) - class SorterAgeBased(Sorter): """ Sorter class which returns a sorted list of units based on the min and max ages of the units """ - required_arguments = ['min_age_column','max_age_column','unit_name_column'] - def __init__(self,*, unit_name_column:Optional[str]='name',min_age_column:Optional[str]='minAge', max_age_column:Optional[str]='maxAge'): + + required_arguments = ['min_age_column', 'max_age_column', 'unit_name_column'] + + def __init__( + self, + *, + unit_name_column: Optional[str] = 'name', + min_age_column: Optional[str] = 'minAge', + max_age_column: Optional[str] = 'maxAge', + ): """ Initialiser for age based sorter """ @@ -202,16 +204,22 @@ def sort(self, units: pandas.DataFrame) -> list: lambda row: (row[self.min_age_column] + row[self.max_age_column]) / 2.0, axis=1 ) else: - logger.error(f"Columns {self.min_age_column} and {self.max_age_column} must be present in units DataFrame") + logger.error( + f"Columns {self.min_age_column} and {self.max_age_column} must be present in units DataFrame" + ) logger.error(f"Available columns are: {units.columns.tolist()}") - raise ValueError(f"Columns {self.min_age_column} and {self.max_age_column} must be present in units DataFrame") + raise ValueError( + f"Columns {self.min_age_column} and {self.max_age_column} must be present in units DataFrame" + ) if "group" in units.columns: sorted_units = sorted_units.sort_values(by=["group", "meanAge"]) else: sorted_units = sorted_units.sort_values(by=["meanAge"]) logger.info("Stratigraphic order calculated using age based sorting") for _i, row in sorted_units.iterrows(): - logger.info(f"{row[self.unit_name_column]} - {row[self.min_age_column]} - {row[self.max_age_column]}") + logger.info( + f"{row[self.unit_name_column]} - {row[self.min_age_column]} - {row[self.max_age_column]}" + ) return list(sorted_units[self.unit_name_column]) @@ -221,18 +229,20 @@ class SorterAlpha(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units prioritising the units with lower number of contacting units """ + required_arguments = ['contacts', 'unit_name_column', 'unitname1_column', 'unitname2_column'] + def __init__( self, *, contacts: Optional[geopandas.GeoDataFrame] = None, - unit_name_column:Optional[str]='name', - unitname1_column:Optional[str]='UNITNAME_1', - unitname2_column:Optional[str]='UNITNAME_2', + unit_name_column: Optional[str] = 'name', + unitname1_column: Optional[str] = 'UNITNAME_1', + unitname2_column: Optional[str] = 'UNITNAME_2', ): """ Initialiser for adjacency based sorter - + Args: contacts (geopandas.GeoDataFrame): unit contacts with length of the contacts in metres """ @@ -242,9 +252,15 @@ def __init__( self.sorter_label = "SorterAlpha" self.unitname1_column = unitname1_column self.unitname2_column = unitname2_column - if self.unitname1_column not in contacts.columns or self.unitname2_column not in contacts.columns or 'length' not in contacts.columns: - raise ValueError(f"contacts GeoDataFrame must contain '{self.unitname1_column}', '{self.unitname2_column}' and 'length' columns") - + if ( + self.unitname1_column not in contacts.columns + or self.unitname2_column not in contacts.columns + or 'length' not in contacts.columns + ): + raise ValueError( + f"contacts GeoDataFrame must contain '{self.unitname1_column}', '{self.unitname2_column}' and 'length' columns" + ) + def sort(self, units: pandas.DataFrame) -> list: """ Execute sorter method takes unit data and returns the sorted unit names based on this algorithm. @@ -256,7 +272,9 @@ def sort(self, units: pandas.DataFrame) -> list: list: the sorted unit names """ if self.contacts is None: - raise ValueError("contacts must be set (not None) before calling sort() in SorterAlpha.") + raise ValueError( + "contacts must be set (not None) before calling sort() in SorterAlpha." + ) if len(self.contacts) == 0: raise ValueError("contacts GeoDataFrame is empty in SorterAlpha.") if 'length' not in self.contacts.columns: @@ -272,7 +290,9 @@ def sort(self, units: pandas.DataFrame) -> list: max_weight = max(list(sorted_contacts["length"])) + 1 for _, row in sorted_contacts.iterrows(): graph.add_edge( - row[self.unitname1_column], row[self.unitname2_column], weight=int(max_weight - row["length"]) + row[self.unitname1_column], + row[self.unitname2_column], + weight=int(max_weight - row["length"]), ) cnode = None @@ -316,7 +336,9 @@ class SorterMaximiseContacts(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units prioritising the maximum length of each contact """ + required_arguments = ['contacts', 'unit_name_column', 'unitname1_column', 'unitname2_column'] + def __init__( self, *, @@ -327,7 +349,7 @@ def __init__( ): """ Initialiser for adjacency based sorter - + Args: contacts (pandas.DataFrame): unit contacts with length of the contacts in metres """ @@ -341,8 +363,14 @@ def __init__( self.unit_name_column = unit_name_column self.unitname1_column = unitname1_column self.unitname2_column = unitname2_column - if self.unitname1_column not in contacts.columns or self.unitname2_column not in contacts.columns or 'length' not in contacts.columns: - raise ValueError(f"contacts GeoDataFrame must contain '{self.unitname1_column}', '{self.unitname2_column}' and 'length' columns") + if ( + self.unitname1_column not in contacts.columns + or self.unitname2_column not in contacts.columns + or 'length' not in contacts.columns + ): + raise ValueError( + f"contacts GeoDataFrame must contain '{self.unitname1_column}', '{self.unitname2_column}' and 'length' columns" + ) def sort(self, units: pandas.DataFrame) -> list: """ @@ -356,6 +384,7 @@ def sort(self, units: pandas.DataFrame) -> list: """ import networkx as nx import networkx.algorithms.approximation as nx_app + if self.contacts is None: raise ValueError("SorterMaximiseContacts requires 'contacts' argument") if len(self.contacts) == 0: @@ -379,7 +408,9 @@ def sort(self, units: pandas.DataFrame) -> list: max_weight = max(list(sorted_contacts["length"])) + 1 sorted_contacts['length'] /= max_weight for _, row in sorted_contacts.iterrows(): - self.graph.add_edge(row[self.unitname1_column], row[self.unitname2_column], weight=(1 - row["length"])) + self.graph.add_edge( + row[self.unitname1_column], row[self.unitname2_column], weight=(1 - row["length"]) + ) self.route = nx_app.traveling_salesman_problem(self.graph) edge_list = list(nx.utils.pairwise(self.route)) @@ -410,7 +441,17 @@ class SorterObservationProjections(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units using the direction of observations to predict which unit is adjacent to the current one """ - required_arguments = ['contacts', 'geology_data', 'structure_data', 'dtm_data', 'unit_name_column', 'unitname1_column', 'unitname2_column'] + + required_arguments = [ + 'contacts', + 'geology_data', + 'structure_data', + 'dtm_data', + 'unit_name_column', + 'unitname1_column', + 'unitname2_column', + ] + def __init__( self, *, @@ -421,7 +462,7 @@ def __init__( geology_data: Optional[geopandas.GeoDataFrame] = None, structure_data: Optional[geopandas.GeoDataFrame] = None, dtm_data: Optional[gdal.Dataset] = None, - length: Union[float, int] = 1000 + length: Union[float, int] = 1000, ): """ Initialiser for adjacency based sorter @@ -458,6 +499,7 @@ def sort(self, units: pandas.DataFrame) -> list: import networkx as nx import networkx.algorithms.approximation as nx_app from shapely.geometry import LineString, Point + if self.contacts is None: raise ValueError("SorterObservationProjections requires 'contacts' argument") if self.geology_data is None: @@ -525,7 +567,12 @@ def sort(self, units: pandas.DataFrame) -> list: # Get heights for intersection point and start of ray height = value_from_raster(inv_geotransform, dtm_array, start.x, start.y) first_intersect_point = Point(start.x, start.y, height) - height = value_from_raster(inv_geotransform, dtm_array, second_intersect_point.x, second_intersect_point.y) + height = value_from_raster( + inv_geotransform, + dtm_array, + second_intersect_point.x, + second_intersect_point.y, + ) second_intersect_point = Point(second_intersect_point.x, start.y, height) # Check vertical difference between points and compare to projected dip angle diff --git a/map2loop/thickness_calculator.py b/map2loop/thickness_calculator.py index 442162fd..b0346d5a 100644 --- a/map2loop/thickness_calculator.py +++ b/map2loop/thickness_calculator.py @@ -462,7 +462,6 @@ def compute( else: combined_location_tracking = pandas.DataFrame() # Save the combined DataFrame as an attribute of the class - # self.location_tracking = combined_location_tracking # Convert to GeoDataFrame and set CRS to match basal_contacts if not combined_location_tracking.empty: diff --git a/map2loop/utils/utility_functions.py b/map2loop/utils/utility_functions.py index cb0a7342..529fbefe 100644 --- a/map2loop/utils/utility_functions.py +++ b/map2loop/utils/utility_functions.py @@ -10,6 +10,7 @@ from osgeo import gdal from ..logging import getLogger + logger = getLogger(__name__) @@ -184,8 +185,11 @@ def find_segment_strike_from_pt( @beartype.beartype def calculate_endpoints( - start_point: shapely.geometry.Point, azimuth_deg: Union[float,int], distance: Union[float,int], bbox: pandas.DataFrame -) -> Union[shapely.geometry.LineString,shapely.geometry.GeometryCollection]: + start_point: shapely.geometry.Point, + azimuth_deg: Union[float, int], + distance: Union[float, int], + bbox: pandas.DataFrame, +) -> Union[shapely.geometry.LineString, shapely.geometry.GeometryCollection]: """ Calculate the endpoints of a line segment given a start point, azimuth angle, distance, and bounding box. @@ -225,7 +229,7 @@ def calculate_endpoints( @beartype.beartype def multiline_to_line( - geometry: Union[shapely.geometry.LineString, shapely.geometry.MultiLineString] + geometry: Union[shapely.geometry.LineString, shapely.geometry.MultiLineString], ) -> shapely.geometry.LineString: """ Converts a multiline geometry to a single line geometry. @@ -382,7 +386,6 @@ def hex_to_rgb(hex_color: str) -> tuple: def calculate_minimum_fault_length( bbox: dict[str, Union[int, float]], area_percentage: float ) -> float: - """ Calculate the minimum fault length based on the map bounding box and a given area percentage. @@ -440,11 +443,10 @@ def read_hjson_with_json(file_path: str) -> dict: except json.JSONDecodeError as e: raise ValueError(f"Failed to decode preprocessed HJSON as JSON: {e}") from e + @beartype.beartype def update_from_legacy_file( - filename: str, - json_save_path: Optional[str] = None, - lower: bool = False + filename: str, json_save_path: Optional[str] = None, lower: bool = False ) -> Optional[Dict[str, Dict]]: """ Update the config dictionary from the provided old version dictionary @@ -452,18 +454,19 @@ def update_from_legacy_file( filename (str): the path to the legacy file json_save_path (str, optional): the path to save the updated json file. Defaults to None. lower (bool, optional): whether to convert all strings to lowercase. Defaults to False. - + Returns: Dict[Dict]: the updated config dictionary - + Example: from map2loop.utils import update_from_legacy_file update_from_legacy_file(filename=r"./source_data/example.hjson") """ # only import config if needed from .config import Config + file_map = Config() - + code_mapping = { "otype": (file_map.structure_config, "orientation_type"), "dd": (file_map.structure_config, "dipdir_column"), @@ -505,7 +508,7 @@ def update_from_legacy_file( except Exception as e: logger.error(f"Error reading file {filename}: {e}") return - #map the keys + # map the keys file_map = file_map.to_dict() for legacy_key, new_mapping in code_mapping.items(): if legacy_key in parsed_data: @@ -514,21 +517,22 @@ def update_from_legacy_file( if lower and isinstance(value, str): value = value.lower() section[new_key] = value - + if "o" in parsed_data: object_id_value = parsed_data["o"] if lower and isinstance(object_id_value, str): object_id_value = object_id_value.lower() file_map['structure']["objectid_column"] = object_id_value file_map['geology']["objectid_column"] = object_id_value - file_map['fold']["objectid_column"] = object_id_value - + file_map['fold']["objectid_column"] = object_id_value + if json_save_path is not None: with open(json_save_path, "w") as f: json.dump(parsed_data, f, indent=4) - + return file_map + @beartype.beartype def value_from_raster(inv_geotransform, data, x: float, y: float): """ @@ -556,6 +560,7 @@ def value_from_raster(inv_geotransform, data, x: float, y: float): py = min(py, data.shape[1] - 1) return data[px][py] + @beartype.beartype def set_z_values_from_raster_df(dtm_data: gdal.Dataset, df: pandas.DataFrame): """ @@ -573,7 +578,7 @@ def set_z_values_from_raster_df(dtm_data: gdal.Dataset, df: pandas.DataFrame): if len(df) <= 0: df["Z"] = [] return df - + if dtm_data is None: logger.warning("Cannot get value from data as data is not loaded") return None @@ -582,8 +587,7 @@ def set_z_values_from_raster_df(dtm_data: gdal.Dataset, df: pandas.DataFrame): data_array = numpy.array(dtm_data.GetRasterBand(1).ReadAsArray().T) df["Z"] = df.apply( - lambda row: value_from_raster(inv_geotransform, data_array, row["X"], row["Y"]), - axis=1, + lambda row: value_from_raster(inv_geotransform, data_array, row["X"], row["Y"]), axis=1 ) - return df \ No newline at end of file + return df