diff --git a/src/pyfia/constants/defaults.py b/src/pyfia/constants/defaults.py index 464c40ce..50584576 100644 --- a/src/pyfia/constants/defaults.py +++ b/src/pyfia/constants/defaults.py @@ -61,45 +61,6 @@ class ValidationRanges: MAX_PLOTS = 1_000_000 -class EVALIDYearParsing: - """Constants for parsing years from EVALID codes. - - EVALID format: SSYYTT where: - - SS = State FIPS code (2 digits) - - YY = Year (2 digits, requires Y2K windowing) - - TT = Evaluation type code (2 digits) - - FIA uses Y2K windowing to interpret 2-digit years: - - Years 00-30 are interpreted as 2000-2030 - - Years 31-99 are interpreted as 1931-1999 - - Note: The FIA program began annual inventory in 1999, so valid - evaluation years are typically 1999-present. Earlier years may - appear in legacy data. - - References: - FIA Database User Guide, Appendix B: EVALID Construction - """ - - # Y2K windowing threshold: years <= this value are 20xx, > are 19xx - Y2K_WINDOW_THRESHOLD = 30 - - # Century bases for Y2K windowing - CENTURY_2000 = 2000 - CENTURY_1900 = 1900 - - # Alternative threshold used in some contexts (90-99 are 1990s) - LEGACY_THRESHOLD = 90 - - # Valid year range for FIA evaluations - # FIA annual inventory began in 1999; evaluations extend through near-future - MIN_VALID_YEAR = 1990 # Allow some pre-annual inventory data - MAX_VALID_YEAR = 2050 # Reasonable future limit - - # Default year offset when year cannot be determined - # FIA data typically has ~2 year processing lag - DEFAULT_YEAR_OFFSET = 2 - class ErrorMessages: """Standard error messages.""" diff --git a/src/pyfia/estimation/base.py b/src/pyfia/estimation/base.py index e169c9d5..13741dee 100644 --- a/src/pyfia/estimation/base.py +++ b/src/pyfia/estimation/base.py @@ -13,7 +13,6 @@ import polars as pl -from ..constants.defaults import EVALIDYearParsing from ..core import FIA from ..filtering import ( apply_area_filters, @@ -574,10 +573,10 @@ def _preserve_plot_tree_data( def _extract_evaluation_year(self) -> int: """ - Extract evaluation year from EVALID or INVYR. + Extract evaluation year from POP_EVAL.END_INVYR or PLOT.INVYR. - The year extraction follows FIA conventions: - 1. Primary: Extract from EVALID (SSYYTT format where YY is year) + The year extraction follows this priority: + 1. Primary: END_INVYR from POP_EVAL table (unambiguous 4-digit year) 2. Fallback: Use max INVYR from PLOT table 3. Default: Current year minus 2 (typical FIA processing lag) @@ -588,60 +587,39 @@ def _extract_evaluation_year(self) -> int: """ year = None - # Primary source: EVALID encodes the evaluation reference year - # EVALIDs are 6-digit codes: SSYYTT where YY is the evaluation year - if hasattr(self.db, "evalids") and self.db.evalids: - evalid = self.db.evalids[0] # Use first EVALID - evalid_str = str(evalid) - - # Validate EVALID format: must be exactly 6 digits (SSYYTT) - if len(evalid_str) != 6: - logger.debug( - f"Invalid EVALID format: '{evalid_str}' has {len(evalid_str)} " - f"characters, expected 6 (SSYYTT format)" - ) - elif not evalid_str.isdigit(): - logger.debug( - f"Invalid EVALID format: '{evalid_str}' contains non-digit " - f"characters, expected 6 digits (SSYYTT format)" + # Primary source: END_INVYR from POP_EVAL for the current EVALID + if hasattr(self.db, "evalid") and self.db.evalid: + try: + if "POP_EVAL" not in self.db.tables: + self.db.load_table("POP_EVAL") + pop_eval_data = self.db.tables["POP_EVAL"] + pop_eval_df: pl.DataFrame = ( + pop_eval_data.collect() + if isinstance(pop_eval_data, pl.LazyFrame) + else pop_eval_data ) - else: - try: - year_part = int(evalid_str[2:4]) # Extract YY portion - - # Handle century using Y2K windowing - # Years >= 90 are 1990s, years < 90 are 2000s - if year_part >= EVALIDYearParsing.LEGACY_THRESHOLD: - year = EVALIDYearParsing.CENTURY_1900 + year_part - else: - year = EVALIDYearParsing.CENTURY_2000 + year_part - - # Validate year is within reasonable range - if ( - year < EVALIDYearParsing.MIN_VALID_YEAR - or year > EVALIDYearParsing.MAX_VALID_YEAR - ): - logger.debug( - f"EVALID year {year} outside valid range " - f"({EVALIDYearParsing.MIN_VALID_YEAR}-" - f"{EVALIDYearParsing.MAX_VALID_YEAR}), using fallback" - ) - year = None # Fall back to other methods - except ValueError as e: - logger.debug( - f"Could not parse year from EVALID '{evalid_str}': {e}" + if "END_INVYR" in pop_eval_df.columns: + filtered = pop_eval_df.filter( + pl.col("EVALID").is_in(self.db.evalid) ) + if not filtered.is_empty(): + max_year = filtered["END_INVYR"].max() + if max_year is not None: + year = int(max_year) # type: ignore[arg-type] + except Exception as e: + logger.debug(f"Could not extract year from POP_EVAL: {e}") - # Fallback: If no EVALID, use most recent INVYR as approximation + # Fallback: use most recent INVYR from PLOT table if year is None and "PLOT" in self.db.tables: try: plot_data = self.db.tables["PLOT"] - if isinstance(plot_data, pl.LazyFrame): - plot_years = plot_data.select("INVYR").collect() - else: - plot_years = plot_data.select("INVYR") + plot_df: pl.DataFrame = ( + plot_data.collect() + if isinstance(plot_data, pl.LazyFrame) + else plot_data + ) + plot_years = plot_df.select("INVYR") if not plot_years.is_empty(): - # Use max year as it best represents the evaluation period max_year = plot_years["INVYR"].max() if max_year is not None: year = int(max_year) # type: ignore[arg-type] @@ -652,7 +630,7 @@ def _extract_evaluation_year(self) -> int: if year is None: from datetime import datetime - year = datetime.now().year - EVALIDYearParsing.DEFAULT_YEAR_OFFSET + year = datetime.now().year - 2 return year diff --git a/tests/unit/test_defaults.py b/tests/unit/test_defaults.py index e2bd5fe2..baa876f4 100644 --- a/tests/unit/test_defaults.py +++ b/tests/unit/test_defaults.py @@ -4,7 +4,6 @@ from pyfia.constants.defaults import ( Defaults, ErrorMessages, - EVALIDYearParsing, MathConstants, ValidationRanges, ) @@ -82,85 +81,6 @@ def test_plot_count_range(self): assert ValidationRanges.MIN_PLOTS < ValidationRanges.MAX_PLOTS -class TestEVALIDYearParsing: - """Tests for EVALID year parsing constants.""" - - def test_y2k_window_threshold(self): - """Test Y2K windowing threshold.""" - # Years 00-30 should be interpreted as 2000-2030 - assert EVALIDYearParsing.Y2K_WINDOW_THRESHOLD == 30 - - def test_century_constants(self): - """Test century base constants.""" - assert EVALIDYearParsing.CENTURY_2000 == 2000 - assert EVALIDYearParsing.CENTURY_1900 == 1900 - - def test_legacy_threshold(self): - """Test legacy threshold for 1990s detection.""" - # Years 90-99 should be interpreted as 1990-1999 - assert EVALIDYearParsing.LEGACY_THRESHOLD == 90 - - def test_valid_year_range(self): - """Test valid year range for FIA evaluations.""" - assert EVALIDYearParsing.MIN_VALID_YEAR == 1990 - assert EVALIDYearParsing.MAX_VALID_YEAR == 2050 - assert EVALIDYearParsing.MIN_VALID_YEAR < EVALIDYearParsing.MAX_VALID_YEAR - - def test_default_year_offset(self): - """Test default year offset for processing lag.""" - assert EVALIDYearParsing.DEFAULT_YEAR_OFFSET == 2 - - def test_y2k_windowing_logic(self): - """Test that constants support correct Y2K windowing logic.""" - - # Simulate Y2K windowing as used in code - def parse_evalid_year(year_part: int) -> int: - if year_part <= EVALIDYearParsing.Y2K_WINDOW_THRESHOLD: - return EVALIDYearParsing.CENTURY_2000 + year_part - else: - return EVALIDYearParsing.CENTURY_1900 + year_part - - # Test boundary cases - assert parse_evalid_year(0) == 2000 - assert parse_evalid_year(30) == 2030 - assert parse_evalid_year(31) == 1931 - assert parse_evalid_year(99) == 1999 - - def test_legacy_year_parsing_logic(self): - """Test legacy year parsing logic used in base estimator.""" - - # Simulate legacy year parsing as used in _infer_evaluation_year - def parse_legacy_year(year_part: int) -> int: - if year_part >= EVALIDYearParsing.LEGACY_THRESHOLD: - return EVALIDYearParsing.CENTURY_1900 + year_part - else: - return EVALIDYearParsing.CENTURY_2000 + year_part - - # Test boundary cases - assert parse_legacy_year(89) == 2089 # Below threshold - assert parse_legacy_year(90) == 1990 # At threshold - assert parse_legacy_year(99) == 1999 # 1990s - - def test_year_validation_logic(self): - """Test that year validation logic works with constants.""" - - def is_valid_year(year: int) -> bool: - return ( - EVALIDYearParsing.MIN_VALID_YEAR - <= year - <= EVALIDYearParsing.MAX_VALID_YEAR - ) - - # Valid years - assert is_valid_year(1990) is True - assert is_valid_year(2000) is True - assert is_valid_year(2024) is True - assert is_valid_year(2050) is True - - # Invalid years - assert is_valid_year(1989) is False - assert is_valid_year(2051) is False - class TestErrorMessages: """Tests for standard error messages.""" @@ -205,27 +125,6 @@ def test_invalid_domain_message_format(self): class TestConstantsIntegration: """Integration tests verifying constants work together.""" - def test_evalid_year_range_covers_valid_years(self): - """Test that EVALID parsing produces years within valid range.""" - # All years from Y2K windowing should be validatable - # Years 00-30 -> 2000-2030 (all within MIN_VALID_YEAR to MAX_VALID_YEAR) - for year_part in range(0, EVALIDYearParsing.Y2K_WINDOW_THRESHOLD + 1): - year = EVALIDYearParsing.CENTURY_2000 + year_part - assert ( - EVALIDYearParsing.MIN_VALID_YEAR - <= year - <= EVALIDYearParsing.MAX_VALID_YEAR - ), f"Year {year} from year_part {year_part} is outside valid range" - - # Years 90-99 -> 1990-1999 (all within MIN_VALID_YEAR to MAX_VALID_YEAR) - for year_part in range(EVALIDYearParsing.LEGACY_THRESHOLD, 100): - year = EVALIDYearParsing.CENTURY_1900 + year_part - assert ( - EVALIDYearParsing.MIN_VALID_YEAR - <= year - <= EVALIDYearParsing.MAX_VALID_YEAR - ), f"Year {year} from year_part {year_part} is outside valid range" - def test_defaults_are_reasonable(self): """Test that default values are reasonable for FIA analysis.""" # Adjustment and expansion factors default to 1 (no adjustment)