diff --git a/docs/source/pyissm.rst b/docs/source/pyissm.rst index bf774c95..26dc4c55 100644 --- a/docs/source/pyissm.rst +++ b/docs/source/pyissm.rst @@ -65,6 +65,19 @@ Utilities and helper functions for interacting with the ISSM ``Model()`` object. pyissm.tools +.. _inversion: + +Inversion +------------------- +Tools for performing inversions, including sensitivity analyses, on the ISSM ``Model()`` object. + +.. autosummary:: + :toctree: api + :recursive: + + pyissm.inversion + + .. _data: Data diff --git a/pyproject.toml b/pyproject.toml index d2201c0e..437ccb30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,8 @@ dependencies = [ "pandas", "matplotlib", "attrs", - "PyYAML" + "PyYAML", + "geopandas" ] [project.optional-dependencies] diff --git a/src/pyissm/__init__.py b/src/pyissm/__init__.py index bbf5eced..89a8d9d5 100644 --- a/src/pyissm/__init__.py +++ b/src/pyissm/__init__.py @@ -1,4 +1,4 @@ -from . import analysis, data, plot, model, tools +from . import analysis, data, plot, model, tools, inversion import importlib.metadata try: diff --git a/src/pyissm/inversion/__init__.py b/src/pyissm/inversion/__init__.py new file mode 100644 index 00000000..1028b37e --- /dev/null +++ b/src/pyissm/inversion/__init__.py @@ -0,0 +1,7 @@ +""" +Inversion module for pyISSM. + +This module contains tools for performing inversions, including sensitivity analyses and convergence diagnostics. +""" + +from . import metrics, plot, sensitivity \ No newline at end of file diff --git a/src/pyissm/inversion/metrics.py b/src/pyissm/inversion/metrics.py new file mode 100644 index 00000000..f43a3054 --- /dev/null +++ b/src/pyissm/inversion/metrics.py @@ -0,0 +1,238 @@ +""" +Inversion metrics for pyISSM. + +This module contains various functions for computing metrics related to inversion processes in ISSM models. +""" + +from .. import model, tools +import numpy as np +import pandas as pd +import itertools + + +def cost_function_values(md): + """ + Extract final inversion cost-function values. + + Parameters + ---------- + md : :class: `pyissm.Model` + ISSM model object containing inversion results. + + Returns + ------- + :class:`dict` + Dictionary containing final cost-function values for each cost function and total cost. + """ + + # Get cost function history from model results + J = md.results.StressbalanceSolution.J + + # Get final cost function values (last entry in J) + final_costs = J[-1] + + # Create output dictionary + output = {} + + # Loop over cost functions and add to output + for i, cf in enumerate(md.inversion.cost_functions): + + output[f"cost_{cf}"] = final_costs[i] + + # Get total cost (last entry in J) + output["cost_total"] = final_costs[-1] + + return output + +def cost_function_ratios(md): + """ + Compute ratios between all inversion cost functions. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model object containing inversion results. + + Returns + ------- + :class:`dict` + Dictionary of pairwise cost-function ratios. + """ + + # Get individual cost values + costs = cost_function_values(md) + + # Remove total cost + cost_keys = [ + key for key in costs.keys() + if key != "cost_total" + ] + + # Create output dictionary + output = {} + + # Generate all unique cost-function pairs + for key1, key2 in itertools.combinations(cost_keys, 2): + + # Extract cost values for the pair + value1 = costs[key1] + value2 = costs[key2] + + # Extract numeric IDs (e.g. "cost_101" -> "101") for naming ratios + cf1 = key1.replace("cost_", "") + cf2 = key2.replace("cost_", "") + + # Construct ratio name (e.g. "ratio_101_103") + ratio_name = f"ratio_{cf1}_{cf2}" + + # Calculate ratio and avoid divide-by-zero + if value2 == 0: + output[ratio_name] = np.nan + else: + output[ratio_name] = value1 / value2 + + return output + +def extract_convergence_history(md): + """ + Extract convergence history of cost functions. + + Parameters + ---------- + md : :class:`pyissm.Model` + + Returns + ------- + :class:`pandas.DataFrame` + DataFrame containing cost function history. + """ + + # Get cost function history from model results + J = md.results.StressbalanceSolution.J + + # Create output dictionary + output = {} + + # Loop over cost functions and add to output + for i, cf in enumerate(md.inversion.cost_functions): + + # Add cost history for this cost function (column in J) to output dictionary + output[f"cost_{cf}"] = J[:, i] + + # Add total cost history (last column in J) + output["cost_total"] = J[:, -1] + + # Convert to DataFrame for easier analysis + output = pd.DataFrame(output) + + # Add iteration numbers as first column + output.insert(0, "iteration", np.arange(1, len(output) + 1)) + + return output + +def velocity_residual_area_metrics(md): + """ + Compute positive/negative residual area metrics. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model object containing mesh and solution data. + + Returns + ------- + :class:`dict` + Dictionary containing the computed metrics. + """ + + # Compute velocity residuals at vertices + residuals_vertex = tools.diagnostics.velocity_residuals(md) + + # Convert residuals to elements + residuals_element = tools.interp.vertex_to_element(md, residuals_vertex) + + # Calculate elemtent areas + element_areas = model.mesh.get_element_areas_volumes(md.mesh.elements, md.mesh.x, md.mesh.y) + + # Create masks for positive and negative residuals + positive_mask = residuals_element > 0 + negative_mask = residuals_element < 0 + + # Calculate total area of positive and negative residuals + positive_area = np.sum(element_areas[positive_mask]) + negative_area = np.sum(element_areas[negative_mask]) + + # Calculate total area for normalization + total_area = np.sum(element_areas) + + # Calculate area fractions + positive_fraction = positive_area / total_area + negative_fraction = negative_area / total_area + + # Return metrics in a dictionary + return { + "positive_residual_area": positive_area, + "negative_residual_area": negative_area, + "positive_residual_fraction": positive_fraction, + "negative_residual_fraction": negative_fraction, + } + +def _find_control_parameter_field(md): + """ + Helper function to extract control parameter field from model results. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model object containing inversion results. + + Returns + ------- + :class:`numpy.ndarray` + Control parameter field values defined on vertices. + """ + + if md.inversion.control_parameters == ['MaterialsRheologyBbar']: + return md.results.StressbalanceSolution.MaterialsRheologyBbar, "MaterialsRheologyBbar" + elif md.inversion.control_parameters == ['FrictionCoefficient']: + return md.results.StressbalanceSolution.FrictionCoefficient, "FrictionCoefficient" + elif md.inversion.control_parameters == ['FrictionC']: + return md.results.StressbalanceSolution.FrictionC, "FrictionC" + else: + raise ValueError("pyissm.inversion.metrics._find_control_parameter_field: Unsupported control parameter for smoothness metrics") + +def field_smoothness_metrics(md, + field = None): + """ + Compute field smoothness metrics. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model object containing mesh and (optionally) solution data. + field : :class:`numpy.ndarray` + Field values, defined on vertices. If None, will attempt to extract from model results based on control parameter. + + Returns + ------- + :class:`dict` + Dictionary containing smoothness metrics. + """ + + # Extract field from model if not provided + if field is None: + field, _ = _find_control_parameter_field(md) + + # Compute slope (magnitude of gradient) + _, _, slope = tools.geometry.slope(md, field) + + # Compute area-weighted mean slope + area_weighted_mean_slope = tools.diagnostics.weighted_mean(md, slope) + + # Compute area-weighted RMSE of slope + area_weighted_rmse_slope = tools.diagnostics.weighted_rmse(md, slope) + + return { + "mean_gradient_magnitude": area_weighted_mean_slope, + "rmse_gradient_magnitude": area_weighted_rmse_slope, + } \ No newline at end of file diff --git a/src/pyissm/inversion/plot.py b/src/pyissm/inversion/plot.py new file mode 100644 index 00000000..6011e776 --- /dev/null +++ b/src/pyissm/inversion/plot.py @@ -0,0 +1,649 @@ +import numpy as np +import matplotlib.pyplot as plt +import textwrap +from . import metrics +from .. import tools, plot +from matplotlib.backends.backend_pdf import PdfPages + +def plot_convergence_history(convergence_history, + metrics = None, + log = True, + ax = None, + figsize = (6.4, 4.8), + constrained_layout = True): + """ + Plot optimization convergence metrics over iterations. + + Parameters + ---------- + convergence_history : :class:`pandas.DataFrame` + Table containing an ``"iteration"`` column and one or more metric + columns to plot against iteration. + + metrics : :class:`list` of :class:`str` or ``None``, optional + Metric column names to plot. If ``None`` (default), all columns except + ``"iteration"`` are plotted. + + log : :class:`bool`, optional + If ``True`` (default), use a logarithmic scale for the y-axis. + + ax : matplotlib.axes.Axes, optional + Axes object to draw on. If None, a new figure and axes are created. + + figsize : tuple of float, optional + Size of the figure in inches (width, height). Default is (6.4, 4.8). + + constrained_layout : bool, optional + Whether to use constrained layout in figure. Default is True. + + Returns + ------- + fig : :class:`matplotlib.figure.Figure` + The created Matplotlib figure. Returned only if a new figure was created. + + ax : :class:`matplotlib.axes.Axes` + The axes containing the heatmap. If an axes was passed in, this is the same object; if no axes was passed, this is the newly created axes. + + Notes + ----- + This function does not validate metric names. A missing metric column will + raise a pandas ``KeyError`` during plotting. + """ + + ## Is an ax passed? + ax_defined = ax is not None + + ## If no ax is defined, create new figure + ## otherwise, plot on defined ax + if ax is None: + fig, ax = plt.subplots(figsize = figsize, constrained_layout = constrained_layout) + else: + fig = ax.get_figure() + + # If metrics is not provided, plot all metrics in convergence history (except iteration) + if metrics is None: + + metrics = [ + col for col in convergence_history.columns + if col != "iteration" + ] + + # Loop over metrics and plot each one + for metric in metrics: + + # Use dashed linestyle for total cost, solid for individual cost components + linestyle = "--" if metric == "cost_total" else "-" + + # Plot metric vs iteration + ax.plot(convergence_history["iteration"], + convergence_history[metric], + label = metric, + linestyle = linestyle + ) + + # Set log scale if requested + if log: + ax.set_yscale('log') + + # Set labels and legend + ax.set_xlabel("Iteration") + ax.set_ylabel("Metric Value") + ax.legend(title = "Metric") + + if not ax_defined: + return fig, ax + else: + return ax + +def plot_sensitivity_heatmap(diagnostics, + x, + y, + value, + cmap = "viridis", + ax = None, + figsize = (6.4, 4.8), + constrained_layout = True, + **kwargs): + """ + Plot a sensitivity heatmap from tabular diagnostics data. + + Parameters + ---------- + diagnostics : :class:`pandas.DataFrame` + Input table containing at least the columns specified by ``x``, ``y``, + and ``value``. + + x : :class:`str` + Column name used for heatmap x-axis categories. + + y : :class:`str` + Column name used for heatmap y-axis categories. + + value : :class:`str` + Column name whose values populate the heatmap cells. + + cmap : :class:`str` or :class:`matplotlib.colors.Colormap`, optional + Colormap used to render the heatmap. Defaults to ``"viridis"``. + + ax : matplotlib.axes.Axes, optional + Axes object to draw on. If None, a new figure and axes are created. + + figsize : :class:`tuple` of :class:`float`, optional + Figure size in inches as ``(width, height)``. Defaults to + ``(6.4, 4.8)``. + + constrained_layout : :class:`bool`, optional + Whether to use Matplotlib constrained layout. Defaults to ``True``. + + **kwargs + Additional keyword arguments forwarded to :meth:`matplotlib.axes.Axes.imshow`. + + Returns + ------- + fig : :class:`matplotlib.figure.Figure` + The created Matplotlib figure. Returned only if a new figure was created. + + ax : :class:`matplotlib.axes.Axes` + The axes containing the heatmap. If an axes was passed in, this is the same object; if no axes was passed, this is the newly created axes. + + Notes + ----- + The heatmap is generated via :meth:`pandas.DataFrame.pivot_table` using + ``y`` as the index, ``x`` as the columns, and ``value`` as cell values. + """ + + ## Is an ax passed? + ax_defined = ax is not None + + ## If no ax is defined, create new figure + ## otherwise, plot on defined ax + if ax is None: + fig, ax = plt.subplots(figsize = figsize, constrained_layout = constrained_layout) + else: + fig = ax.get_figure() + + # Pivot table + heatmap = diagnostics.pivot_table(index = y, columns = x, values = value) + + # Populate heatmap plot + im = ax.imshow( + heatmap.values, + aspect = "auto", + origin = "lower", + cmap = cmap, + **kwargs + ) + + # Tick labels + ax.set_xticks(range(len(heatmap.columns))) + ax.set_xticklabels(heatmap.columns) + + ax.set_yticks(range(len(heatmap.index))) + ax.set_yticklabels(heatmap.index) + + # Labels and title + ax.set_xlabel(x) + ax.set_ylabel(y) + ax.set_title(value) + + # Add colorbar + plt.colorbar(im, ax = ax, label=value) + + if not ax_defined: + return fig, ax + else: + return ax + +def plot_run_summary(md, + output_pdf, + diagnostics = None, + plot_kwargs = None): + """ + Generate PDF summary report for inversion run. + + Saves a multi-page PDF report to the specified path, containing visualizations and diagnostics of the inversion run. + The first page includes spatial plots of observed velocity, modelled velocity, velocity residuals, and the inverted + control parameter field. The second page includes a convergence history plot, histograms of velocity residuals and + inverted field slopes, and a text summary of key diagnostics. Plot appearance can be customized via the `plot_kwargs` parameter. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model object containing inversion results. + output_pdf : :class:`str` + Path to output PDF file. + diagnostics : :class:`dict`, optional + Diagnostics configuration. Default is None. + plot_kwargs : :class:`dict`, optional + Plot keyword arguments. Default is None. + + Returns + ------- + None + """ + + # Compute velocity residuals for diagnostics and plotting + residuals = tools.diagnostics.velocity_residuals(md) + + # Determine if velocity residuals exceed colorbar limits, and set colorbar extension accordingly + extend_min = False + extend_max = False + if np.max(residuals) > 50: + extend_max = True + extend = 'max' + if np.min(residuals) < -50: + extend_min = True + extend = 'min' + if extend_max and extend_min: + extend = 'both' + else: + extend = 'neither' + + # Extract control parameter field and name for plotting + inversion_field, inversion_field_name = metrics._find_control_parameter_field(md) + + # Define default plot kwargs for each plot type, which can be overridden by user-provided plot_kwargs + default_plot_kwargs = { + + # PAGE 1 - SPATIAL FIELDS + ## Observed velocity + "observed": { + "show_cbar": True, + "xlabel": "", + "cbar_kwargs": { + "label": "Observed velocity (m/yr)", + }, + }, + ## Modelled velocity + "modelled": { + "show_cbar": True, + "xlabel": "", + "ylabel": "", + "cbar_kwargs": { + "label": "Modelled velocity (m/yr)", + }, + }, + ## Velocity residuals + "residuals": { + "cmap": "RdBu_r", + "show_cbar": True, + "vmin": -50, + "vmax": 50, + "cbar_kwargs": { + "label": "Velocity residual (m/yr)", + "extend": extend, + }, + }, + ## Inversion field + "inversion": { + "show_cbar": True, + "ylabel": "", + "cbar_kwargs": { + "label": f"Inverted {inversion_field_name}", + }, + }, + + # PAGE 2 - CONVERGENCE, DISTRIBUTIONS & TEXT SUMMARY + ## Convergence history + "convergence": { + "log": True, + }, + ## Residual histogram + "residual_hist": { + "bins": 100, + }, + ## Inversion field slope histogram + "slope_hist": { + "bins": 100, + }, + + # FIGURE SETTINGS (PER PAGE) + ## Page 1 - spatial fields + "page1_figure": { + "figsize": (15, 10), + "sharex": True, + "sharey": True, + "constrained_layout": True, + }, + ## Page 2 - convergence, distributions & text summary + "page2_figure": { + "figsize": (15, 10), + "constrained_layout": True, + } + } + + # Merge user-provided plot kwargs with defaults + plot_kwargs = {} if plot_kwargs is None else plot_kwargs + + merged_plot_kwargs = {} + + for key, defaults in default_plot_kwargs.items(): + merged_plot_kwargs[key] = { + **defaults, + **plot_kwargs.get(key, {}), + } + + # Generate PDF report with two pages: (1) spatial fields and (2) convergence, distributions, and text summary + with PdfPages(output_pdf) as pdf: + + # -------------------------------------------------- + # PAGE 1 — SPATIAL FIELDS + # -------------------------------------------------- + + # Set up 2x2 figure for spatial plots + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, **merged_plot_kwargs["page1_figure"]) + + # Observed velocity + plot.plot_model_field( + md, + md.inversion.vel_obs, + ax = ax1, + **merged_plot_kwargs["observed"] + ) + ax1.set_title("Observed velocity") + + # Modelled velocity + plot.plot_model_field( + md, + md.results.StressbalanceSolution.Vel, + ax = ax2, + **merged_plot_kwargs["modelled"] + ) + ax2.set_title("Modelled velocity") + + # Residuals + plot.plot_model_field( + md, + residuals, + ax = ax3, + **merged_plot_kwargs["residuals"] + ) + ax3.set_title("Velocity residual (mod - obs)") + + # Inversion field + plot.plot_model_field( + md, + inversion_field, + ax = ax4, + **merged_plot_kwargs["inversion"] + ) + ax4.set_title(f"Inverted {inversion_field_name}") + + # Save page 1 to PDF + pdf.savefig(fig) + plt.close(fig) + + # -------------------------------------------------- + # PAGE 2 — CONVERGENCE, DISTRIBUTIONS & TEXT SUMMARY + # -------------------------------------------------- + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, **merged_plot_kwargs["page2_figure"]) + + # Convergence history + convergence_history = metrics.extract_convergence_history(md) + + plot_convergence_history(convergence_history, + ax = ax1, + **merged_plot_kwargs["convergence"] + ) + ax1.set_title("Cost function convergence history") + ax1.grid() + + # Residual histogram + finite = residuals[np.isfinite(residuals)] + + ax2.hist(finite, + **merged_plot_kwargs["residual_hist"] + ) + + ax2.set_title("Velocity residual distribution") + ax2.set_xlabel("Velocity residual (m/yr)") + ax2.set_ylabel("Count") + + # Slope histogram + _, _, slope = tools.geometry.slope(md, inversion_field) + + finite = slope[np.isfinite(slope)] + + ax3.hist(finite, + **merged_plot_kwargs["slope_hist"] + ) + + ax3.set_title(f"Inverted {inversion_field_name} distribution") + ax3.set_xlabel(f"{inversion_field_name} slope") + ax3.set_ylabel("Count") + + # Text Summary + ## If diagnostics are not provided, compute a default set of diagnostics to display + if diagnostics is None: + diagnostics = { + "vel_rmse": tools.diagnostics.velocity_rmse(md), + + # Expand dictionaries into flat metrics + **metrics.cost_function_values(md), + **metrics.cost_function_ratios(md), + **metrics.velocity_residual_area_metrics(md), + **metrics.field_smoothness_metrics(md) + } + ## Turn off axis + ax4.axis("off") + + ## Format diagnostics into lines of text for display + lines = [] + + for key, value in diagnostics.items(): + + if isinstance(value, (np.integer, int)): + text = f"{key}: {value}" + + elif isinstance(value, (np.floating, float)): + text = f"{key}: {value:.3g}" + + else: + text = f"{key}: {value}" + + # Wrap long lines + wrapped = textwrap.wrap(text, + width = 80, + subsequent_indent = " " + ) + + # Add wrapped lines + lines.extend(wrapped) + + ## Display diagnostics as text in the axis + ax4.text( + 0.01, + 0.99, + "\n".join(lines), + va = "top", + family = "monospace", + ) + + # Save page 2 to PDF + pdf.savefig(fig) + plt.close(fig) + +def plot_lcurve(diagnostics, + alpha_col = None, + misfit_cols = None, + regularization_col = None, + ax = None, + annotate = True, + sort_alpha = True, + marker = "s", + figsize = (7, 6), + constrained_layout = True, + **kwargs): + """ + Plot inversion L-curve. + + Parameters + ---------- + diagnostics : :class:`pandas.DataFrame` + Diagnostics table generated from sensitivity analysis. + + alpha_col : :class:`str`, optional + Column containing regularization coefficient values. + + If None, automatically inferred from regularization term: + cost_501 -> cf501 + cost_502 -> cf502 + cost_503 -> cf503 + + misfit_cols : list of :class:`str`, optional + Cost-function columns contributing to model-data misfit. + + If None, automatically inferred as all cost-function columns except total cost and regularization term. + + regularization_col : :class:`str`, optional + Regularization cost-function column. + + If None: + - uses cost_501 if present + - uses cost_502 if present + - uses cost_503 if present + - raises error if both exist + - raises error if neither exist + + ax : :class:`matplotlib.axes.Axes`, optional + Existing axis. + + annotate : :class:`bool`, default = True + Label points with alpha values. + + sort_alpha : :class:`bool`, default = True + Sort curve by alpha before plotting. + + marker : :class:`str`, default = "s" + Marker style. + + figsize : :class:`tuple`, default = (7, 6) + Figure size when creating a new figure. + + constrained_layout : :class:`bool`, default = True + Whether to use constrained layout when creating a new figure. + + **kwargs + Passed to matplotlib plotting function. + + Returns + ------- + fig : :class:`matplotlib.figure.Figure` + The created Matplotlib figure. Returned only if a new figure was created. + + ax : :class:`matplotlib.axes.Axes` + The axes containing the plot. If an axes was passed in, this is the same object; if no axes was passed, this is the newly created axes. + """ + + ## Is an ax passed? + ax_defined = ax is not None + + ## If no ax is defined, create new figure + ## otherwise, plot on defined ax + if ax is None: + fig, ax = plt.subplots(figsize = figsize, constrained_layout = constrained_layout) + else: + fig = ax.get_figure() + + # Infer regularization term + if regularization_col is None: + + candidates = [] + + if "cost_501" in diagnostics.columns: + candidates.append("cost_501") + + if "cost_502" in diagnostics.columns: + candidates.append("cost_502") + + if "cost_503" in diagnostics.columns: + candidates.append("cost_503") + + if len(candidates) == 0: + raise ValueError( + "pyissm.inversion.plot.plot_lcurve: No regularization cost function found (expected cost_501, cost_502 or cost_503).") + + elif len(candidates) > 1: + raise ValueError( + "pyissm.inversion.plot.plot_lcurve: Multiple regularization terms found: " + f"{candidates}. " + "Please specify regularization_col." + ) + + regularization_col = candidates[0] + + print(f'Using regularization_col: {regularization_col}') + + # Infer alpha column + if alpha_col is None: + + cf = regularization_col.replace("cost_", "") + + alpha_col = f"cf{cf}" + + if alpha_col not in diagnostics.columns: + raise ValueError(f"pyissm.inversion.plot.plot_lcurve: Could not infer alpha column '{alpha_col}'.") + + print(f'Using alpha_col: {alpha_col}') + + # Default misfit terms + if misfit_cols is None: + + misfit_cols = [] + + for col in diagnostics.columns: + + if not col.startswith("cost_"): + continue + + if col in ["cost_total", regularization_col]: + continue + + misfit_cols.append(col) + + if len(misfit_cols) == 0: + raise ValueError("pyissm.inversion.plot.plot_lcurve: No misfit cost-function columns found.") + print(f'Using misfit_cols: {misfit_cols}') + + # Compute plotting terms + df = diagnostics.copy() + + # Total misfit + df["misfit"] = df[misfit_cols].sum(axis = 1) + + # Regularization term + df["regularization"] = (df[regularization_col] / df[alpha_col]) + + # Sort by alpha + if sort_alpha: + df = df.sort_values(alpha_col) + + # Plot + ax.loglog( + df["misfit"], + df["regularization"], + f"-{marker}", + **kwargs + ) + + ax.set_xlabel(r'$\log(\mathrm{Data\ Misfit})$') + ax.set_ylabel(r'$\log(\mathrm{Regularization})$') + + ax.grid(True, which = "both") + + # Annotate alpha values + if annotate: + + for _, row in df.iterrows(): + + ax.text( + row["misfit"] * 1.02, + row["regularization"] * 1.02, + f"{row[alpha_col]:.0e}", + fontsize = 9, + ) + + if not ax_defined: + return fig, ax + else: + return ax \ No newline at end of file diff --git a/src/pyissm/inversion/sensitivity.py b/src/pyissm/inversion/sensitivity.py new file mode 100644 index 00000000..9e29ca13 --- /dev/null +++ b/src/pyissm/inversion/sensitivity.py @@ -0,0 +1,557 @@ +""" +Sensitivity analysis tools for ISSM inversions. + +This module provides functions to systematically vary inversion parameters, execute runs, and compute diagnostics for analyzing sensitivity of inversion results to parameter choices. +""" + +import copy +import itertools +from pathlib import Path +import numpy as np +import pandas as pd +from .. import model, tools +from . import metrics, plot + +def build_parameter_grid(coefficients, initial_run_id = 1): + """ + Build parameter sensitivity combinations. + + Parameters + ---------- + coefficients : :class:`dict` + Dictionary mapping ISSM cost function IDs to coefficient lists. + + Example + ------- + {101: [0.1, 1, 10], + 103: [1, 10], + 502: [1e-19]} + + initial_run_id : :class:`int`, default = 1 + Starting index for run_id assignment. Subsequent runs will have IDs incremented by 1 from this value. + Useful for tracking runs across multiple calls to this function without overwriting previous run_ids. + + Returns + ------- + :class:`pandas.DataFrame` + Parameter combination table. + + Notes + ----- + Returned DataFrame contains: + - run_id + - cfXXX columns for each cost function + - run_name + """ + + # Sort cost functions for consistent ordering + cost_functions = sorted(coefficients.keys()) + + # Build cartesian product of parameter combinations + combinations = list( + itertools.product( + *[coefficients[cf] for cf in cost_functions] + ) + ) + + # Store output rows before converting to DataFrame + records = [] + + # Loop through all parameter combinations + for run_id, combo in enumerate(combinations, start = initial_run_id): + + # Initialize record with run_id + record = { + "run_id": run_id, + } + + # Add coefficient columns + for cf, value in zip(cost_functions, combo): + record[f"cf{cf}"] = value + + # Generate unique run name based on parameter values + pair_string = "_".join( + [f"{v:g}" for v in combo] + ) + + # Assign run_name to record + record["run_name"] = f"run_{run_id:03d}_{pair_string}" + + # Append record to list + records.append(record) + + # Convert list of records to DataFrame + return pd.DataFrame(records) + + +def assign_cost_functions(md, + coefficients, + global_mask = None, + coeff_masks = None): + """ + Assign inversion cost function coefficients. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model object. + + coefficients : :class:`dict` + Dictionary mapping cost function IDs to scalar coefficients. + + Example + ------- + {101: 1000, + 103: 25, + 502: 1e-19} + + global_mask : :class:`numpy.ndarray` of :class:`bool`, optional + Global boolean mask applied to all cost functions. + + True -> coefficient active + False -> coefficient set to zero + + coeff_masks : :class:`dict`, optional + Dictionary mapping cost-function IDs to boolean masks. + Masks are applied in the same way as the global_mask, but only affect their respective cost functions. + + Example + ------- + {101: vel_mask, + 103: grounded_mask} + + Returns + ------- + md : :class:`pyissm.Model` + Updated ISSM model. + """ + + # Sort cost functions for consistent ordering + cost_functions = sorted(coefficients.keys()) + + # Determine number of vertices + nverts = md.mesh.numberofvertices + + # Allocate coefficient matrix (default = 0 [inactive]) + coeff_matrix = np.zeros((nverts, len(cost_functions)), dtype=float) + + # Validate global_mask + if global_mask is None: + global_mask = np.ones(nverts, dtype = bool) + else: + + # Ensure global_mask is a boolean array of the correct shape. Enforce boolean to prevent potential unintentional partial weighting with floats + global_mask = np.asarray(global_mask) + + if global_mask.dtype != bool: + raise TypeError("pyissm.inversion.sensitivity.assign_cost_functions: global_mask must be boolean.") + + if global_mask.shape != (nverts,): + raise ValueError(f"pyissm.inversion.sensitivity.assign_cost_functions: global_mask must have shape ({nverts},)") + + + # Validate coeff_masks + if coeff_masks is not None: + + # Ensure coeff_masks is a dictionary mapping cost function IDs to boolean masks + for cf, cf_mask in coeff_masks.items(): + + cf_mask = np.asarray(cf_mask) + + # Ensure cf_mask is a boolean array of the correct shape. Enforce boolean to prevent potential unintentional partial weighting with floats + if cf_mask.dtype != bool: + raise TypeError(f"pyissm.inversion.sensitivity.assign_cost_functions: Mask for cost function {cf} must be boolean.") + + if cf_mask.shape != (nverts,): + raise ValueError(f"pyissm.inversion.sensitivity.assign_cost_functions: Mask for cost function {cf} must have shape ({nverts},)") + + coeff_masks[cf] = cf_mask + + # Populate coefficient matrix + for i, cf in enumerate(cost_functions): + + # Start from global mask + active = global_mask.copy() + + # Apply per-cost-function mask + if coeff_masks is not None and cf in coeff_masks: + active &= coeff_masks[cf] + + # Assign coefficient only where active + coeff_matrix[active, i] = coefficients[cf] + + # Assign to ISSM model + md.inversion.cost_functions = cost_functions + md.inversion.cost_functions_coefficients = coeff_matrix + + return md + +def parameter_sensitivity(md, + parameter_grid, + output_dir, + run = True, + load_only = False, + global_mask = None, + coeff_masks = None): + """ + Run inversion sensitivity experiments. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model object. + + parameter_grid : :class:`pandas.DataFrame` + Output from :func:`pyissm.inversion.sensitivity.build_parameter_grid`. + + output_dir : :class:`str` or :class:`pathlib.Path` + Directory used to store sensitivity outputs, manifests, and saved models. + + **NOTE:** + This is intentionally separate from: md.cluster.executionpath: + - md.cluster.executionpath controls where ISSM stages runtime files for execution. + - output_dir controls where pyISSM stores organized experiment outputs. + + run : :class:`bool`, default = True + Submit inversion runs. + + load_only : :class:`bool`, default = False + Load completed inversion results. + + global_mask : :class:`numpy.ndarray` of :class:`bool`, optional + Global boolean mask applied to all cost functions. Passed to :func:`assign_cost_functions`. + + True -> coefficient active + False -> coefficient set to zero + + coeff_masks : :class:`dict`, optional + Dictionary mapping cost-function IDs to boolean masks. Passed to :func:`assign_cost_functions`. + + Example + ------- + {101: vel_mask, + 103: grounded_mask} + + Returns + ------- + :class:`pandas.DataFrame` + Experiment manifest. + """ + + # Prevent ambiguous execution mode + if run and load_only: + raise ValueError("pyissm.inversion.sensitivity.parameter_sensitivity: Cannot set both run and load_only to True.") + + if not run and not load_only: + raise ValueError("pyissm.inversion.sensitivity.parameter_sensitivity: Either run or load_only must be True.") + + # Create output directory for sensitivity study + output_dir = Path(output_dir) + output_dir.mkdir(parents = True, exist_ok = True) + + # Initialize manifest records + manifest = [] + + # Identify cost-function columns (e.g., cf101, cf102, cf502) + cf_columns = [ + col for col in parameter_grid.columns + if col.startswith("cf") + ] + + # Loop through all parameter combinations + for _, row in parameter_grid.iterrows(): + + # Extract run metadata + run_id = int(row["run_id"]) + run_name = row["run_name"] + + print(f"Running sensitivity experiment: {run_name}") + + # Create independent model copy + mdi = copy.deepcopy(md) + + # Build coefficient dictionary + coefficients = {} + + # Extract coefficient values from the parameter grid row + for col in cf_columns: + + # Extract cost function ID from column name (e.g., cf101 -> 101) + cf = int(col.replace("cf", "")) + + # Store coefficient value for this cost function + coefficients[cf] = row[col] + + # Assign inversion coefficients + mdi = assign_cost_functions(mdi, coefficients, global_mask = global_mask, coeff_masks = coeff_masks) + + # Assign unique ISSM runtime name + mdi.miscellaneous.name = run_name + + # Create dedicated run directory to store saved model and convergence history for this run + run_dir = output_dir / run_name + run_dir.mkdir(exist_ok = True) + + # Default status + status = "pending" + + # Try to execute or load the run, and catch any exceptions to record failure in the manifest + try: + + # Submit inversion run + if run: + + mdi = model.execute.solve(mdi, "Stressbalance", runtime_name = False, load_only = False) + + # Distinguish between submitted vs completed based on waitonlock setting + if mdi.settings.waitonlock == 0: + # If waitonlock is 0 the run is submitted, but we do not wait for completion, so we mark as "submitted" + status = "submitted" + else: + # If waitonlock is not 0, we wait for the run to complete before proceeding, so we can mark as "completed" + status = "completed" + + # Save completed model + model.io.save_model(mdi, run_dir / f"{run_name}.nc") + + # Load previously completed results + elif load_only: + + # If the model file already exists, we can assume the run was completed and just load the model + if (run_dir / f"{run_name}.nc").exists(): + print(f"Model already exists for {run_name}, loading model.") + status = "completed" + else: + print(f"Model does not exist for {run_name}, attempting to load model from execution path.") + + # Load the model from the execution path (this will raise an error if the model is not found, which we catch to mark the run as failed) + mdi = model.execute.solve(mdi, "Stressbalance", runtime_name = False, load_only = True) + + # Save loaded model + model.io.save_model(mdi, run_dir / f"{run_name}.nc") + + status = "completed" + + # If exceptions occur during run submission or loading, catch them and record failure in the manifest + except Exception as exc: + + print(f"FAILED: {run_name}") + print(exc) + + status = "failed" + + # Build manifest entry + manifest_entry = { + "run_id": run_id, + "run_name": run_name, + "run_dir": str(run_dir), + "execution_path": str(mdi.cluster.executionpath), + "run_status": status, + } + + # Store coefficient metadata (e.g. cf101, cf103) in manifest for easy reference when analyzing diagnostics later + manifest_entry.update({f"cf{cf}": value for cf, value in coefficients.items()}) + + # Append manifest entry + manifest.append(manifest_entry) + + # Convert manifest to DataFrame + manifest = pd.DataFrame(manifest) + + # Save manifest + manifest.to_csv(output_dir / "manifest.csv", index = False) + + return manifest + +def load_parameter_sensitivity_run(manifest_row): + """ + Load a parameter sensitivity run. + + Parameters + ---------- + manifest_row : :class:`pandas.Series` + + Returns + ------- + md : :class:`pyissm.Model` + Loaded ISSM model for the specified run. + """ + + # Extract run directory and name from manifest row + run_dir = Path(manifest_row["run_dir"]) + run_name = manifest_row["run_name"] + + # Construct model path + model_path = run_dir / f"{run_name}.nc" + + # Check if model file exists + if not model_path.exists(): + raise FileNotFoundError(f"pyissm.inversion.sensitivity.load_parameter_sensitivity_run: Model file not found: {model_path}") + + # Load model + md = model.io.load_model(model_path) + + return md + +def compute_sensitivity_diagnostics(manifest, + output_dir = None, + plot_run_summaries = True): + """ + Compute diagnostics for parameter sensitivity runs. + + Parameters + ---------- + manifest : :class:`pandas.DataFrame` + Output from :func:`pyissm.inversion.sensitivity.parameter_sensitivity`. + + Returns + ------- + :class:`pandas.DataFrame` + Diagnostics table. + """ + + # Copy manifest so coefficient metadata is preserved + diagnostics = manifest.copy() + + # Loop through all runs + for idx, row in manifest.iterrows(): + + # Extract run name for logging + run_name = row["run_name"] + print(f"Processing diagnostics: {run_name}") + + # Try to load the model and compute diagnostics, and catch any exceptions to record failure in the diagnostics table + try: + + # Load model + mdi = load_parameter_sensitivity_run(row) + + # ----------------------------------------- + # Compute diagnostics + # ----------------------------------------- + + # Compute all diagnostics for this run and store in a dictionary + run_metrics = { + "vel_rmse": tools.diagnostics.velocity_rmse(mdi), + + # Expand dictionaries into flat metrics + **metrics.cost_function_values(mdi), + **metrics.cost_function_ratios(mdi), + **metrics.velocity_residual_area_metrics(mdi), + **metrics.field_smoothness_metrics(mdi) + } + + # Dynamically assign all metrics to the diagnostics DataFrame for this run + for key, value in run_metrics.items(): + + diagnostics.at[idx, key] = value + + diagnostics.at[idx, "diagnostics_status"] = "complete" + + if output_dir is not None: + # Save intermediate diagnostics to CSV after each run in case of long-running computations and to preserve results even if some runs fail + diagnostics.to_csv(Path(output_dir) / "diagnostics.csv", index = False) + + # ----------------------------------------- + # Extract & save convergence history + # ----------------------------------------- + + # Extract convergence history DataFrame from model results + convergence_history = metrics.extract_convergence_history(mdi) + + # Save convergence history to CSV in the run directory + convergence_history.to_csv(Path(row["run_dir"]) / f"{run_name}_convergence_history.csv", index = False) + + # ----------------------------------------- + # Generate summary PDF for this run + # ----------------------------------------- + if plot_run_summaries: + plot.plot_run_summary(mdi, + output_pdf = Path(row["run_dir"]) / f"{run_name}_summary.pdf", + diagnostics = diagnostics.loc[idx]) + + # If exceptions occur during loading or diagnostics computation, catch them and record failure in the diagnostics table + except Exception as exc: + + # Record failure in diagnostics table & print error message + diagnostics.at[idx, "diagnostics_status"] = "failed" + print(f"Error processing run {run_name}: {exc}") + + return diagnostics + +def normalize_diagnostics(diagnostics, + columns = None, + higher_is_better = None, + suffix = "_norm"): + """ + Normalize diagnostic columns to [0, 1]. + + Parameters + ---------- + diagnostics : :class:`pandas.DataFrame` + DataFrame containing model diagnostics. + + columns : :class:`list` of :class:`str`, optional + Columns to normalize. Defaults to all numeric columns. + + higher_is_better : :class:`dict`, optional + Dictionary mapping column name -> bool. + + True = higher values are better + False = lower values are better + + Example + ------- + {"velocity_rmse": False, + "thickness_rmse": False, + "correlation": True} + + Any unspecified column defaults to False. + + suffix : :class:`str`, optional + Suffix for normalized columns. + + Returns + ------- + :class:`pandas.DataFrame` + Copy of dataframe with normalized columns added. + """ + + # Create a copy of the diagnostics DataFrame to avoid modifying the original + out = diagnostics.copy() + + # If columns to normalize are not specified, default to all numeric columns + if columns is None: + columns = out.select_dtypes(include=[np.number]).columns.tolist() + + # If higher_is_better is not provided, default to all False (lower is better) + if higher_is_better is None: + higher_is_better = {} + + # Loop through specified columns and normalize each one + for col in columns: + + # Extract values and compute min/max for normalization + values = out[col].astype(float) + + vmin = values.min() + vmax = values.max() + + # Avoid divide-by-zero if all values identical + if np.isclose(vmin, vmax): + out[f"{col}{suffix}"] = 1.0 + continue + + # Normalize based on whether higher values are better or not + if higher_is_better.get(col, False): + # Higher is better + norm = (values - vmin) / (vmax - vmin) + + else: + # Lower is better + norm = (vmax - values) / (vmax - vmin) + + # Assign normalized values to new column in output DataFrame + out[f"{col}{suffix}"] = norm + + return out \ No newline at end of file diff --git a/src/pyissm/model/Model.py b/src/pyissm/model/Model.py index c24c204c..331e7fb7 100644 --- a/src/pyissm/model/Model.py +++ b/src/pyissm/model/Model.py @@ -975,9 +975,6 @@ def collapse(self): elif isinstance(md.friction, classes.friction.weertman): md.friction.C = mesh._project_2d(md, md.friction.C, 1) md.friction.m = mesh._project_2d(md, md.friction.m, 1) - elif isinstance(md.friction, classes.friction.weertmantemp): - md.friction.C = mesh._project_2d(md, md.friction.C, 1) - md.friction.m = mesh._project_2d(md, md.friction.m, 1) else: raise Exception('pyissm.model.Model.collapse: Friction type not supported for collapse.') diff --git a/src/pyissm/model/execute.py b/src/pyissm/model/execute.py index f13d2a8b..b8f52ed3 100644 --- a/src/pyissm/model/execute.py +++ b/src/pyissm/model/execute.py @@ -1368,6 +1368,12 @@ def wait_on_lock(md): # Determine whether the cluster is the local machine is_local = (md.cluster.name.lower() == tools.config.get_hostname().lower()) + # In interactive mode the queue script runs synchronously (no '&' at end of + # the mpiexec command), so launch_queue_job already blocked until the job + # finished before we get here. There is nothing left to wait for. + if getattr(md.cluster, 'interactive', False): + return True + elapsed = 0 last_print = -60 # ensures a message is printed on the first verbose tick @@ -1499,8 +1505,11 @@ def load_results_from_cluster(md, if hostname.lower() == md.cluster.name.lower(): ## Remove bin and toolkits files - os.remove(md.miscellaneous.name + '.bin') - os.remove(md.miscellaneous.name + '.toolkits') + try: + os.remove(md.miscellaneous.name + '.bin') + os.remove(md.miscellaneous.name + '.toolkits') + except FileNotFoundError: + pass ## If using dakota, remove dakota input file if md.qmu.isdakota and os.path.exists('qmu.in'): diff --git a/src/pyissm/model/param.py b/src/pyissm/model/param.py index 90a14ccd..1a71d44f 100644 --- a/src/pyissm/model/param.py +++ b/src/pyissm/model/param.py @@ -3,6 +3,7 @@ """ import numpy as np +from collections import deque import os from pathlib import Path from datetime import datetime @@ -781,4 +782,327 @@ def reinitialize_levelset(md, levelset): else: levelsetnew[pos] = -levelsetnew[pos] - return levelsetnew \ No newline at end of file + return levelsetnew + +def remove_isolated_ice(md, mode = "floating", min_size = None): + """ + Remove isolated ice patches. + + This function identifies and removes isolated patches of ice from the model's ice_levelset field. + Depending on the specified mode, it can target floating ice (icebergs), grounded ice, or any disconnected + ice patches regardless of grounding. The function uses a connectivity-based approach to determine which + ice vertices are considered isolated and should be removed by setting their ice_levelset values to +1 (non-ice). + + The default mode if "floating" (with no min_size specified). This is equivalent to the `kill_icebergs` function. The "grounded" mode can be + used to remove grounded ice patches that are not connected to floating ice, which may be useful for certain applications. + The "any" mode is more general and removes any disconnected ice patches regardless of grounding, which can help clean + up the model in cases where there are small isolated ice regions. + + Parameters + ---------- + md : :class:`pyissm.model.Model` + ISSM model object. + + mode : :class:`str`, default="floating" + Type of isolated ice to remove. + + - "floating": remove floating ice not connected to grounded ice. + - "grounded": remove grounded ice not connected to floating ice. + - "any": remove disconnected ice patches regardless of grounding. + + min_size : :class:`int` or None, default=None + Minimum number of vertices a disconnected patch must contain to + be retained. + + - None: remove all isolated patches. + - N: remove only isolated patches containing fewer than N vertices. + + Returns + ------- + :class:`numpy.ndarray` + Modified ice_levelset array. + """ + + # Check mode validity + if mode not in {"floating", "grounded", "any"}: + raise ValueError( f"Invalid mode '{mode}'. Expected 'floating', 'grounded', or 'any'.") + + # Get mesh and mask information + elements = md.mesh.elements - 1 + ice_ls = md.mask.ice_levelset + ocean_ls = md.mask.ocean_levelset + + nverts = md.mesh.numberofvertices + nelems = md.mesh.numberofelements + + # ------------------------------------------------------------------ + # MODE = "ANY" + # ------------------------------------------------------------------ + + # In this mode, we look for any disconnected ice patches regardless of grounding. + # We identify connected components of ice vertices and remove those that are not the + # largest (main ice body) or that are smaller than min_size if specified. + if mode == "any": + + print("Looking for isolated ice patches") + + # Identify ice vertices + ice_vertices = np.where(ice_ls < 0)[0] + + if ice_vertices.size == 0: + print("No ice found!") + return ice_ls.copy() + + # Build adjacency graph + adjacency = [set() for _ in range(nverts)] + + # For each element, add edges between its vertices in the adjacency graph + for tri in elements: + i, j, k = tri + + adjacency[i].update((j, k)) + adjacency[j].update((i, k)) + adjacency[k].update((i, j)) + + # Restrict adjacency to ice vertices only for efficiency + ice_set = set(ice_vertices) + + visited = np.zeros(nverts, dtype=bool) + components = [] + + # Find connected ice components + for start in ice_vertices: + + if visited[start]: + continue + + component = [] + queue = deque([start]) + visited[start] = True + + while queue: + + v = queue.popleft() + component.append(v) + + for nbr in adjacency[v]: + + if ( + nbr in ice_set + and not visited[nbr] + ): + visited[nbr] = True + queue.append(nbr) + + components.append(component) + + if len(components) == 1: + print("No isolated ice patches found!") + return ice_ls.copy() + + # Largest component = main ice body + largest = max(len(c) for c in components) + + remove_vertices = [] + + # Remove components that are not the largest or smaller than min_size if specified + for component in components: + + if len(component) == largest: + continue + + if min_size is None: + remove_vertices.extend(component) + elif len(component) < min_size: + remove_vertices.extend(component) + + remove_vertices = np.asarray(remove_vertices, dtype=int) + + if remove_vertices.size == 0: + print( + f"No isolated ice patches smaller than " + f"{min_size} vertices found!" + ) + return ice_ls.copy() + + print( + f"REMOVING {remove_vertices.size} " + f"vertex{'es' if remove_vertices.size != 1 else ''} " + f"from isolated ice patches" + ) + + # Set removed ice vertices to +1 (non-ice) + new_ice_ls = ice_ls.copy() + new_ice_ls[remove_vertices] = 1 + + return new_ice_ls + + # ------------------------------------------------------------------ + # MODES = "FLOATING" OR "GROUNDED" + # ------------------------------------------------------------------ + + # In these modes, we look for isolated patches of either floating or grounded ice. + # We use a flood-fill algorithm starting from the "seed" elements + # (grounded for floating mode, floating for grounded mode) to mark connected ice. + # Any ice vertices that remain unmarked after the flood-fill are considered isolated and removed. + # If min_size is specified, we further check the size of each isolated patch before removing it. + + # Initialize mask and element flags + mask = np.zeros(nverts, dtype=np.int8) + element_flag = np.zeros(nelems, dtype=np.int8) + + isice = np.min(ice_ls[elements], axis=1) < 0 + isgrounded = np.sum(ocean_ls[elements] > 0, axis=1) > 2 + isfloating = isice & ~isgrounded + + element_flag[~isice] = 1 + + if mode == "floating": + + print("Looking for isolated floating ice patches (icebergs)") + + seed_idx = np.where(isgrounded)[0] + + label = "iceberg" + + candidate_mask = ( + (mask == 0) + & (ice_ls < 0) + & (ocean_ls <= 0) + ) + + else: + + print("Looking for isolated grounded ice patches") + + seed_idx = np.where(isfloating)[0] + + label = "grounded ice island" + + candidate_mask = ( + (mask == 0) + & (ice_ls < 0) + & (ocean_ls > 0) + ) + + if seed_idx.size: + mask[elements[seed_idx].ravel()] = 1 + + mask[ice_ls >= 0] = 0 + + iteration = 1 + more = True + + while more: + + print(f" -- iteration {iteration}") + + more = False + + for elem in np.where(element_flag == 0)[0]: + + verts = elements[elem] + + if np.sum(mask[verts]) > 1: + element_flag[elem] = 1 + mask[verts] = 1 + more = True + + iteration += 1 + + if mode == "floating": + candidates = ( + (mask == 0) + & (ice_ls < 0) + & (ocean_ls <= 0) + ) + else: + candidates = ( + (mask == 0) + & (ice_ls < 0) + & (ocean_ls > 0) + ) + + candidate_vertices = np.where(candidates)[0] + + if candidate_vertices.size == 0: + print(f"No isolated {label} found!") + return ice_ls.copy() + + if min_size is None: + + new_ice_ls = ice_ls.copy() + new_ice_ls[candidate_vertices] = 1 + + print( + f"REMOVING {candidate_vertices.size} " + f"vertex{'es' if candidate_vertices.size != 1 else ''} " + f"on {label}s" + ) + + return new_ice_ls + + # Build adjacency graph + adjacency = [set() for _ in range(nverts)] + + for tri in elements: + i, j, k = tri + + adjacency[i].update((j, k)) + adjacency[j].update((i, k)) + adjacency[k].update((i, j)) + + candidate_set = set(candidate_vertices) + + visited = np.zeros(nverts, dtype=bool) + remove_vertices = [] + + for start in candidate_vertices: + + if visited[start]: + continue + + component = [] + queue = deque([start]) + visited[start] = True + + while queue: + + v = queue.popleft() + component.append(v) + + for nbr in adjacency[v]: + + if ( + nbr in candidate_set + and not visited[nbr] + ): + visited[nbr] = True + queue.append(nbr) + + if len(component) < min_size: + remove_vertices.extend(component) + + remove_vertices = np.asarray(remove_vertices, dtype=int) + + if remove_vertices.size == 0: + + print( + f"No isolated {label} smaller than " + f"{min_size} vertices found!" + ) + + return ice_ls.copy() + + print( + f"REMOVING {remove_vertices.size} " + f"vertex{'es' if remove_vertices.size != 1 else ''} " + f"from isolated {label} patches " + f"smaller than {min_size} vertices" + ) + + # Set removed ice vertices to +1 (non-ice) + new_ice_ls = ice_ls.copy() + new_ice_ls[remove_vertices] = 1 + + return new_ice_ls \ No newline at end of file diff --git a/src/pyissm/tools/__init__.py b/src/pyissm/tools/__init__.py index 4af01188..57b1e406 100644 --- a/src/pyissm/tools/__init__.py +++ b/src/pyissm/tools/__init__.py @@ -2,4 +2,4 @@ Universal tools for interacting with ISSM Model objects. """ -from pyissm.tools import config, exp, general, wrappers, materials, geometry, interp, archive +from pyissm.tools import archive, config, diagnostics, exp, general, geometry, interp, materials, wrappers diff --git a/src/pyissm/tools/diagnostics.py b/src/pyissm/tools/diagnostics.py new file mode 100644 index 00000000..5f2307aa --- /dev/null +++ b/src/pyissm/tools/diagnostics.py @@ -0,0 +1,151 @@ +""" +Diagnostic functions for ISSM models + +This module contains various functions that support diagnostic analysis of ISSM models. +""" + +import numpy as np +from . import interp +from .. import model + +def weighted_mean(md, + values, + weights = None): + """ + Compute weighted mean. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model. + + values : :class:`numpy.ndarray` + Values to compute weighted mean. Either vertex-based or element-based. + + weights : :class:`numpy.ndarray`, optional + Custom weights. Default = None, in which case element areas will be used. + + Returns + ------- + :class:`float` + Weighted mean value. + """ + + # Ensure values is a numpy array + values = np.asarray(values) + + # If values are vertex-based, convert to element-based + if values.shape[0] == md.mesh.numberofvertices: + values = interp.vertex_to_element(md, values) + + # If weights are not provided, use element areas + if weights is None: + weights = model.mesh.get_element_areas_volumes(md.mesh.elements, md.mesh.x, md.mesh.y) + + # Ensure weights is a numpy array + weights = np.asarray(weights) + + # Mask out non-finite values and corresponding weights + mask = np.isfinite(values) & np.isfinite(weights) + + # Compute area-weighted mean + wm = np.sum(values[mask] * weights[mask]) / np.sum(weights[mask]) + + return wm + +def weighted_rmse(md, + values, + weights = None): + """ + Compute weighted RMSE. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model. + + values : :class:`numpy.ndarray` + Values to compute weighted RMSE. + + weights : :class:`numpy.ndarray`, optional. + Custom weights. Default = None, in which case element areas will be used. + + Returns + ------- + :class:`float` + Weighted RMSE value. + """ + + # Ensure values is a numpy array + values = np.asarray(values) + + # Compute weighted mean square error + wmse = weighted_mean(md, values**2, weights = weights) + + # Return RMSE + return np.sqrt(wmse) + +def velocity_residuals(md, + obs_vel = None, + model_vel = None): + """ + Compute velocity residuals. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model. + + obs_vel : :class:`numpy.ndarray`, optional + Observed velocities. If None, will use md.inversion.vel_obs. + + model_vel : :class:`numpy.ndarray`, optional + Model velocities. If None, will use md.results.StressbalanceSolution.Vel + + Returns + ------- + :class:`numpy.ndarray` + Velocity residuals. + """ + + # If obs_vel is not provided, extract from model inversion data + if obs_vel is None: + obs_vel = md.inversion.vel_obs + + # If model_vel is not provided, extract from model results + if model_vel is None: + model_vel = md.results.StressbalanceSolution.Vel + + # Calculate residuals + residuals = model_vel - obs_vel + + return residuals + +def velocity_rmse(md, + obs_vel = None, + model_vel = None): + """ + Compute weighted velocity RMSE. + + Parameters + ---------- + md : :class:`pyissm.Model` + ISSM model. + + obs_vel : :class:`numpy.ndarray`, optional + Observed velocities. If None, will use md.inversion.vel_obs. + + model_vel : :class:`numpy.ndarray`, optional + Model velocities. If None, will use md.results.StressbalanceSolution.Vel + + Returns + ------- + :class:`float` + Weighted velocity RMSE. + """ + + # Compute velocity residuals + residuals = velocity_residuals(md, obs_vel, model_vel) + + # Compute and return weighted RMSE of velocity residuals + return weighted_rmse(md, residuals) \ No newline at end of file diff --git a/src/pyissm/tools/exp.py b/src/pyissm/tools/exp.py index 1b51f79e..6f989ce3 100644 --- a/src/pyissm/tools/exp.py +++ b/src/pyissm/tools/exp.py @@ -5,6 +5,9 @@ import numpy as np import collections import os +from pathlib import Path +import geopandas as gpd +from shapely.geometry import LineString, MultiLineString, Point, Polygon, MultiPolygon def exp_write(contours, filename): """ @@ -600,4 +603,255 @@ def _find_edge(edge_pairs, edge_id): return contours, edges_tria # default: "struct" - return contours, edges_tria \ No newline at end of file + return contours, edges_tria + +def gdf_to_exp(gdf, exp_filename): + """ + Convert a GeoDataFrame to an Argus .exp file. + + The function reads geometries from the input GeoDataFrame and converts them to contours in the exp file format. + Supported geometry types include Point, LineString, MultiLineString, Polygon, and MultiPolygon. Each contour in the exp file + will include coordinate data and optional attributes such as name and density. The function performs error checks + on file existence and extensions before processing. + + + Parameters + ---------- + gdf : GeoDataFrame + Input GeoDataFrame containing geometries to convert to exp format. + exp_filename : str or Path + Output exp filename. Must have .exp extension. + + Returns + ------- + None + """ + + # Convert to Path object + exp_filename = Path(exp_filename) + + # Error checks + if exp_filename.suffix.lower() != ".exp": + raise ValueError(f"pyissm.tools.exp.gdf_to_exp: Exp file {exp_filename} does not have extension '.exp'") + + # Accept GeoSeries OR GeoDataFrame + if isinstance(gdf, gpd.GeoDataFrame): + geoms = gdf.geometry + attrs = gdf + elif isinstance(gdf, gpd.GeoSeries): + geoms = gdf + attrs = None + else: + raise TypeError(f"pyissm.tools.exp.gdf_to_exp: Input must be a GeoDataFrame or GeoSeries") + + # Define helper function to extract name + def _get_name(i): + if attrs is None: + return "unknown" + + row = attrs.iloc[i] + + for key in ("id", "NAME", "SUBREGION1"): + if key in row and row[key] not in (None, ""): + return str(row[key]) + + return "unknown" + + contours = [] + + + # Loop over geometries in the geoms object and convert to exp contours + for i, geom in enumerate(geoms): + + ## Extract name + name = _get_name(i) + + # Polygon + if isinstance(geom, Polygon): + + coords = np.asarray(geom.exterior.coords) + + contours.append({ + "x": coords[:, 0], + "y": coords[:, 1], + "nods": len(coords), + "density": 1, + "closed": 1, + "name": name, + }) + + # Point + elif isinstance(geom, Point): + + contours.append({ + "x": geom.x, + "y": geom.y, + "nods": 1, + "density": 1, + "closed": 0, + "name": name, + }) + + # LineString + elif isinstance(geom, LineString): + + coords = np.asarray(geom.coords) + + contours.append({ + "x": coords[:, 0], + "y": coords[:, 1], + "nods": len(coords), + "density": 1, + "closed": 0, + "name": name, + }) + + # MultiLineString + elif isinstance(geom, MultiLineString): + + for line in geom.geoms: + + coords = np.asarray(line.coords) + + contours.append({ + "x": coords[:, 0], + "y": coords[:, 1], + "nods": len(coords), + "density": 1, + "closed": 0, + "name": name, + }) + + # MultiPolygon + elif isinstance(geom, MultiPolygon): + + for poly in geom.geoms: + + coords = np.asarray(poly.exterior.coords) + + contours.append({ + "x": coords[:, 0], + "y": coords[:, 1], + "nods": len(coords), + "density": 1, + "closed": 1, + "name": name, + }) + + # Write contours to exp file + exp_write(contours, exp_filename) + +def shp_to_exp(shp_filename, exp_filename): + """ + Convert a shapefile (.shp) to an Argus (.exp) file. + + The function reads geometries from the input shapefile and converts them to contours in the exp file format. + + Parameters + ---------- + shp_filename : str or Path + Input shapefile. + exp_filename : str or Path + Output exp filename. + + Returns + ------- + None + """ + + # Convert to Path objects + shp_filename = Path(shp_filename) + exp_filename = Path(exp_filename) + + # Error checks + if shp_filename.suffix.lower() != ".shp": + raise ValueError(f"pyissm.tools.exp.shp_to_exp: Shapefile {shp_filename} does not have extension '.shp'") + + if exp_filename.suffix.lower() != ".exp": + raise ValueError(f"pyissm.tools.exp.shp_to_exp: Exp file {exp_filename} does not have extension '.exp'") + + if not shp_filename.exists(): + raise FileNotFoundError(f"pyissm.tools.exp.shp_to_exp: Shapefile {shp_filename} does not exist") + + # Read shapefile + gdf = gpd.read_file(shp_filename) + + # Convert GeoDataFrame to exp contours and write to file + gdf_to_exp(gdf, exp_filename) + + +def exp_to_shp(exp_filename, shp_filename): + """ + Convert an Argus (.exp) file to a shapefile (.shp). + + The function reads contours from the input exp file and converts them to geometries in a shapefile format. + Supported geometry types include Point, LineString, and Polygon based on the number of points and closed flag + in the exp contours. Each geometry in the shapefile will include attributes such as name and density from the + exp file. The function performs error checks on file existence and extensions before processing. + + Parameters + ---------- + exp_filename : str or Path + Input exp file. + shp_filename : str or Path + Output shapefile. + + Returns + ------- + None + """ + + # Convert to Path objects + exp_filename = Path(exp_filename) + shp_filename = Path(shp_filename) + + # Error checks + if exp_filename.suffix.lower() != ".exp": + raise ValueError(f"pyissm.tools.exp.exp_to_shp: Exp file {exp_filename} does not have extension '.exp'") + + if shp_filename.suffix.lower() != ".shp": + raise ValueError(f"pyissm.tools.exp.exp_to_shp: Shapefile {shp_filename} does not have extension '.shp'") + + if not exp_filename.exists(): + raise FileNotFoundError(f"pyissm.tools.exp.exp_to_shp: Exp file {exp_filename} does not exist") + + # Read exp contours + contours = exp_read(exp_filename) + + records = [] + + # Loop over contours and convert to shapely geometries + for contour in contours: + + x = contour["x"] + y = contour["y"] + + coords = list(zip(x, y)) + + # Determine geometry type based on number of points and closed flag + if len(coords) == 1: + + geometry = Point(coords[0]) + + elif contour.get("closed", False): + + geometry = Polygon(coords) + + else: + + geometry = LineString(coords) + + # Store attributes + records.append({ + "geometry": geometry, + "name": contour.get("name", ""), + "density": contour.get("density", 1), + "nods": contour.get("nods", len(coords)), + "closed": contour.get("closed", False), + }) + + # Create GeoDataFrame and write shapefile + gdf = gpd.GeoDataFrame(records) + + # Save to shapefile + gdf.to_file(shp_filename) \ No newline at end of file diff --git a/src/pyissm/tools/interp.py b/src/pyissm/tools/interp.py index b7a82322..f5139304 100644 --- a/src/pyissm/tools/interp.py +++ b/src/pyissm/tools/interp.py @@ -147,4 +147,97 @@ def averaging(md, data, iterations, layer = 0): # Return output as a full matrix (C code does not like sparse matrices) average = np.expand_dims(np.asarray(average_node.todense()).reshape(-1, ),axis=1) - return average \ No newline at end of file + return average + +def vertex_to_element(md, + values, + method = "mean"): + """ + Transfer vertex-defined values to mesh elements. + + Parameters + ---------- + md : :class:`pyissm.model` + ISSM model object containing mesh connectivity in + ``md.mesh.elements`` and vertex count in + ``md.mesh.numberofvertices``. + + values : :class:`array_like` + Vertex-defined field of shape ``(md.mesh.numberofvertices,)``. + + method : :class:`str`, optional + Aggregation method used to combine vertex values within each element. + Available options: ``"mean"``, ``"min"``, ``"max"``. Default is ``"mean"``. + - ``"mean"``: Compute the average of vertex values for each element. + - ``"min"``: Take the minimum vertex value for each element. + - ``"max"``: Take the maximum vertex value for each element. + + Returns + ------- + numpy.ndarray + Element-defined field of shape ``(md.mesh.numberofelements,)``. + + Raises + ------ + ValueError + If ``values`` is not vertex-defined for the mesh or if ``method`` is + unsupported. + """ + + # Convert input to numpy array + values = np.asarray(values) + + # Get element connectivity (adjusting for 0-based indexing) + elements = md.mesh.elements - 1 + + # Check that values are vertex-defined + if values.shape[0] != md.mesh.numberofvertices: + raise ValueError(f"pyissm.tools.interp.vertex_to_element: values must be vertex-defined") + + # Gather vertex values for each element + element_values = values[elements] + + # Aggregate vertex values to element values using the specified method + if method == "mean": + return np.mean(element_values, axis = 1) + + elif method == "min": + return np.min(element_values, axis = 1) + + elif method == "max": + return np.max(element_values, axis = 1) + + # Unsupported method + else: + raise ValueError(f"pyissm.tools.interp.vertex_to_element: Unsupported method: {method}") + +def element_to_vertex(md, + values, + layer = 0): + """ + Transfer element-defined values to vertices. + + Parameters + ---------- + md : :class:`pyissm.model` + ISSM model object containing mesh connectivity in + ``md.mesh.elements`` and vertex count in + ``md.mesh.numberofvertices``. + + values : :class:`numpy.ndarray` + Element-defined field of shape ``(md.mesh.numberofelements,)``. + + layer : :class:`int`, optional + Layer index to extract when working with a 3-D mesh. Default is 0, + meaning operate on the full mesh. If non-zero, it must satisfy + 1 <= layer <= md.mesh.numberoflayers and the function will operate on + the corresponding 2-D layer (using mesh.elements2d, x2d, y2d, etc.). + + Returns + ------- + :class:`numpy.ndarray` + Vertex-defined field of shape ``(md.mesh.numberofvertices,)``. + """ + + # Return the area/weighted average of element values at each vertex + return averaging(md, values, iterations = 0, layer = layer).flatten() \ No newline at end of file