LeMaterial · andaero · May 27, 2025 · May 27, 2025 · Jun 2, 2025 · Jun 2, 2025
diff --git a/.gitignore b/.gitignore
@@ -170,3 +170,5 @@ cython_debug/
 lightning_logs/
 src/lematerial_forgebench/.DS_Store
 
+# Ignore .gz
+*.gz
diff --git a/pyproject.toml b/pyproject.toml
@@ -57,3 +57,4 @@ material-hasher = { git = "https://github.com/lematerial/material-hasher.git" }
 
 [tool.ruff.lint]
 extend-select = ["I"]
+ignore = ["F401"]
diff --git a/src/lematerial_forgebench/benchmarks/stability_benchmark.py b/src/lematerial_forgebench/benchmarks/stability_benchmark.py
@@ -0,0 +1,147 @@
+"""Stability benchmark for material structures.
+
+This module implements a benchmark that evaluates the stability of
+generated material structures using various relaxation methods.
+"""
+
+from typing import Any, Dict
+
+from lematerial_forgebench.benchmarks.base import BaseBenchmark
+from lematerial_forgebench.evaluator import EvaluationResult, EvaluatorConfig
+from lematerial_forgebench.metrics.stability_metrics import (
+    MetastabilityMetric,
+    StabilityMetric,
+)
+
+
+class StabilityBenchmark(BaseBenchmark):
+    """Benchmark for evaluating the stability of generated material structures."""
+
+    def __init__(
+        self,
+        name: str = "StabilityBenchmark",
+        description: str | None = None,
+        metadata: Dict[str, Any] | None = None,
+    ):
+        """Initialize the stability benchmark.
+
+        Parameters
+        ----------
+        relaxer_type : str, default="orb"
+            Type of relaxer to use (e.g., "orb", "ocp").
+        relaxer_config : dict, optional
+            Configuration for the relaxer. If None, uses default config.
+        mp_entries_file : str
+            Path to the Materials Project entries file.
+        name : str
+            Name of the benchmark.
+        description : str, optional
+            Description of the benchmark.
+        metadata : dict, optional
+            Additional metadata for the benchmark.
+        """
+        if description is None:
+            description = (
+                "Evaluates the stability and metastability of crystal structures"
+            )
+
+        # Initialize the stability metric
+        stability_metric = StabilityMetric()
+
+        # Set up evaluator configs
+        evaluator_configs = {
+            "stability": EvaluatorConfig(
+                name="Stability",
+                description="Evaluates structure stability",
+                metrics={"stability": stability_metric},
+                weights={"stability": 1.0},
+                aggregation_method="weighted_mean",
+            ),
+        }
+
+        # Add metastability evaluator if requested
+        metastability_metric = MetastabilityMetric()
+        evaluator_configs["metastability"] = EvaluatorConfig(
+            name="Metastability Analysis",
+            description="Evaluates metastability from precomputed e_above_hull values",
+            metrics={"metastability": metastability_metric},
+            weights={"metastability": 1.0},
+            aggregation_method="weighted_mean",
+        )
+
+        # Create benchmark metadata
+        benchmark_metadata = {
+            "version": "0.1.0",
+            "category": "stability",
+            **(metadata or {}),
+        }
+
+        super().__init__(
+            name=name,
+            description=description,
+            evaluator_configs=evaluator_configs,
+            metadata=benchmark_metadata,
+        )
+
+    def aggregate_evaluator_results(
+        self, evaluator_results: Dict[str, EvaluationResult]
+    ) -> Dict[str, float]:
+        """Aggregate results from multiple evaluators into final scores.
+
+        Parameters
+        ----------
+        evaluator_results : dict[str, EvaluationResult]
+            Results from each evaluator.
+
+        Returns
+        -------
+        dict[str, float]
+            Final aggregated scores.
+        """
+        import math
+
+        def safe_float(value, default=0.0):
+            """Safely convert value to float, handling None and NaN."""
+            if value is None:
+                return default
+            try:
+                float_val = float(value)
+                if math.isnan(float_val):
+                    return default
+                return float_val
+            except (TypeError, ValueError):
+                return default
+
+        final_scores = {
+            "stable_ratio": 0.0,
+            "metastable_ratio": 0.0,
+            "mean_e_above_hull": 0.0,
+        }
+
+        # Extract stability results
+        stability_results = evaluator_results.get("stability")
+        if stability_results:
+            # Main stability ratio
+            final_scores["stable_ratio"] = safe_float(
+                stability_results.get("combined_value")
+            )
+
+            # Extract individual metrics from stability metric
+            stability_metric_results = stability_results.get("metric_results", {}).get(
+                "stability", {}
+            )
+            stability_metrics = stability_metric_results.get("metrics", {})
+
+            final_scores["mean_e_above_hull"] = safe_float(
+                stability_metrics.get("mean_e_above_hull")
+            )
+
+        # Extract metastability results if available
+        metastability_results = evaluator_results.get("metastability")
+        if metastability_results:
+            # Main metastability score
+            final_scores["metastable_ratio"] = safe_float(
+                metastability_results.get("combined_value")
+            )
+
+        return final_scores
diff --git a/src/lematerial_forgebench/cli.py b/src/lematerial_forgebench/cli.py
@@ -11,6 +11,7 @@
 import yaml
 
 from lematerial_forgebench.benchmarks.example import ExampleBenchmark
+from lematerial_forgebench.benchmarks.stability_benchmark import StabilityBenchmark
 from lematerial_forgebench.benchmarks.validity_benchmark import ValidityBenchmark
 from lematerial_forgebench.data.structure import format_structures
 from lematerial_forgebench.metrics.validity_metrics import (
@@ -19,6 +20,9 @@
     MinimumInteratomicDistanceMetric,
     PhysicalPlausibilityMetric,
 )
+from lematerial_forgebench.preprocess.stability_preprocess import (
+    StabilityPreprocessor,
+)
 from lematerial_forgebench.utils.logging import logger
 
 CONFIGS_DIR = Path(__file__).parent.parent / "config"
@@ -169,13 +173,9 @@ def main(input: str, config_name: str, output: str):
             coord_tolerance = coord_config.get("tolerance", 0.2)
 
             # Create custom metrics with configuration
-            ChargeNeutralityMetric(
-                tolerance=charge_tolerance, strict=charge_strict
-            )
+            ChargeNeutralityMetric(tolerance=charge_tolerance, strict=charge_strict)
 
-            MinimumInteratomicDistanceMetric(
-                scaling_factor=distance_scaling
-            )
+            MinimumInteratomicDistanceMetric(scaling_factor=distance_scaling)
 
             CoordinationEnvironmentMetric(
                 nn_method=coord_nn_method, tolerance=coord_tolerance
@@ -196,6 +196,14 @@ def main(input: str, config_name: str, output: str):
                     "metric_configs": metric_configs,
                 },
             )
+        elif benchmark_type == "stability":
+            # before running the benchmark, we need to preprocess the structures
+            stability_preprocessor = StabilityPreprocessor()
+            # Use the preprocessor to process structures
+            preprocessor_result = stability_preprocessor(structures)
+            structures = preprocessor_result.processed_structures
+
+            benchmark = StabilityBenchmark()
         else:
             raise ValueError(f"Unknown benchmark type: {benchmark_type}")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -57,3 +57,4 @@ material-hasher = { git = "https://github.com/lematerial/material-hasher.git" }

		[tool.ruff.lint]
		extend-select = ["I"]
		ignore = ["F401"]