Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 0 additions & 39 deletions src/pyfia/constants/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,45 +61,6 @@ class ValidationRanges:
MAX_PLOTS = 1_000_000


class EVALIDYearParsing:
"""Constants for parsing years from EVALID codes.

EVALID format: SSYYTT where:
- SS = State FIPS code (2 digits)
- YY = Year (2 digits, requires Y2K windowing)
- TT = Evaluation type code (2 digits)

FIA uses Y2K windowing to interpret 2-digit years:
- Years 00-30 are interpreted as 2000-2030
- Years 31-99 are interpreted as 1931-1999

Note: The FIA program began annual inventory in 1999, so valid
evaluation years are typically 1999-present. Earlier years may
appear in legacy data.

References:
FIA Database User Guide, Appendix B: EVALID Construction
"""

# Y2K windowing threshold: years <= this value are 20xx, > are 19xx
Y2K_WINDOW_THRESHOLD = 30

# Century bases for Y2K windowing
CENTURY_2000 = 2000
CENTURY_1900 = 1900

# Alternative threshold used in some contexts (90-99 are 1990s)
LEGACY_THRESHOLD = 90

# Valid year range for FIA evaluations
# FIA annual inventory began in 1999; evaluations extend through near-future
MIN_VALID_YEAR = 1990 # Allow some pre-annual inventory data
MAX_VALID_YEAR = 2050 # Reasonable future limit

# Default year offset when year cannot be determined
# FIA data typically has ~2 year processing lag
DEFAULT_YEAR_OFFSET = 2


class ErrorMessages:
"""Standard error messages."""
Expand Down
82 changes: 30 additions & 52 deletions src/pyfia/estimation/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import polars as pl

from ..constants.defaults import EVALIDYearParsing
from ..core import FIA
from ..filtering import (
apply_area_filters,
Expand Down Expand Up @@ -574,10 +573,10 @@ def _preserve_plot_tree_data(

def _extract_evaluation_year(self) -> int:
"""
Extract evaluation year from EVALID or INVYR.
Extract evaluation year from POP_EVAL.END_INVYR or PLOT.INVYR.

The year extraction follows FIA conventions:
1. Primary: Extract from EVALID (SSYYTT format where YY is year)
The year extraction follows this priority:
1. Primary: END_INVYR from POP_EVAL table (unambiguous 4-digit year)
2. Fallback: Use max INVYR from PLOT table
3. Default: Current year minus 2 (typical FIA processing lag)

Expand All @@ -588,60 +587,39 @@ def _extract_evaluation_year(self) -> int:
"""
year = None

# Primary source: EVALID encodes the evaluation reference year
# EVALIDs are 6-digit codes: SSYYTT where YY is the evaluation year
if hasattr(self.db, "evalids") and self.db.evalids:
evalid = self.db.evalids[0] # Use first EVALID
evalid_str = str(evalid)

# Validate EVALID format: must be exactly 6 digits (SSYYTT)
if len(evalid_str) != 6:
logger.debug(
f"Invalid EVALID format: '{evalid_str}' has {len(evalid_str)} "
f"characters, expected 6 (SSYYTT format)"
)
elif not evalid_str.isdigit():
logger.debug(
f"Invalid EVALID format: '{evalid_str}' contains non-digit "
f"characters, expected 6 digits (SSYYTT format)"
# Primary source: END_INVYR from POP_EVAL for the current EVALID
if hasattr(self.db, "evalid") and self.db.evalid:
try:
if "POP_EVAL" not in self.db.tables:
self.db.load_table("POP_EVAL")
pop_eval_data = self.db.tables["POP_EVAL"]
pop_eval_df: pl.DataFrame = (
pop_eval_data.collect()
if isinstance(pop_eval_data, pl.LazyFrame)
else pop_eval_data
)
else:
try:
year_part = int(evalid_str[2:4]) # Extract YY portion

# Handle century using Y2K windowing
# Years >= 90 are 1990s, years < 90 are 2000s
if year_part >= EVALIDYearParsing.LEGACY_THRESHOLD:
year = EVALIDYearParsing.CENTURY_1900 + year_part
else:
year = EVALIDYearParsing.CENTURY_2000 + year_part

# Validate year is within reasonable range
if (
year < EVALIDYearParsing.MIN_VALID_YEAR
or year > EVALIDYearParsing.MAX_VALID_YEAR
):
logger.debug(
f"EVALID year {year} outside valid range "
f"({EVALIDYearParsing.MIN_VALID_YEAR}-"
f"{EVALIDYearParsing.MAX_VALID_YEAR}), using fallback"
)
year = None # Fall back to other methods
except ValueError as e:
logger.debug(
f"Could not parse year from EVALID '{evalid_str}': {e}"
if "END_INVYR" in pop_eval_df.columns:
filtered = pop_eval_df.filter(
pl.col("EVALID").is_in(self.db.evalid)
)
if not filtered.is_empty():
max_year = filtered["END_INVYR"].max()
if max_year is not None:
year = int(max_year) # type: ignore[arg-type]
except Exception as e:
logger.debug(f"Could not extract year from POP_EVAL: {e}")

# Fallback: If no EVALID, use most recent INVYR as approximation
# Fallback: use most recent INVYR from PLOT table
if year is None and "PLOT" in self.db.tables:
try:
plot_data = self.db.tables["PLOT"]
if isinstance(plot_data, pl.LazyFrame):
plot_years = plot_data.select("INVYR").collect()
else:
plot_years = plot_data.select("INVYR")
plot_df: pl.DataFrame = (
plot_data.collect()
if isinstance(plot_data, pl.LazyFrame)
else plot_data
)
plot_years = plot_df.select("INVYR")
if not plot_years.is_empty():
# Use max year as it best represents the evaluation period
max_year = plot_years["INVYR"].max()
if max_year is not None:
year = int(max_year) # type: ignore[arg-type]
Expand All @@ -652,7 +630,7 @@ def _extract_evaluation_year(self) -> int:
if year is None:
from datetime import datetime

year = datetime.now().year - EVALIDYearParsing.DEFAULT_YEAR_OFFSET
year = datetime.now().year - 2

return year

Expand Down
101 changes: 0 additions & 101 deletions tests/unit/test_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pyfia.constants.defaults import (
Defaults,
ErrorMessages,
EVALIDYearParsing,
MathConstants,
ValidationRanges,
)
Expand Down Expand Up @@ -82,85 +81,6 @@ def test_plot_count_range(self):
assert ValidationRanges.MIN_PLOTS < ValidationRanges.MAX_PLOTS


class TestEVALIDYearParsing:
"""Tests for EVALID year parsing constants."""

def test_y2k_window_threshold(self):
"""Test Y2K windowing threshold."""
# Years 00-30 should be interpreted as 2000-2030
assert EVALIDYearParsing.Y2K_WINDOW_THRESHOLD == 30

def test_century_constants(self):
"""Test century base constants."""
assert EVALIDYearParsing.CENTURY_2000 == 2000
assert EVALIDYearParsing.CENTURY_1900 == 1900

def test_legacy_threshold(self):
"""Test legacy threshold for 1990s detection."""
# Years 90-99 should be interpreted as 1990-1999
assert EVALIDYearParsing.LEGACY_THRESHOLD == 90

def test_valid_year_range(self):
"""Test valid year range for FIA evaluations."""
assert EVALIDYearParsing.MIN_VALID_YEAR == 1990
assert EVALIDYearParsing.MAX_VALID_YEAR == 2050
assert EVALIDYearParsing.MIN_VALID_YEAR < EVALIDYearParsing.MAX_VALID_YEAR

def test_default_year_offset(self):
"""Test default year offset for processing lag."""
assert EVALIDYearParsing.DEFAULT_YEAR_OFFSET == 2

def test_y2k_windowing_logic(self):
"""Test that constants support correct Y2K windowing logic."""

# Simulate Y2K windowing as used in code
def parse_evalid_year(year_part: int) -> int:
if year_part <= EVALIDYearParsing.Y2K_WINDOW_THRESHOLD:
return EVALIDYearParsing.CENTURY_2000 + year_part
else:
return EVALIDYearParsing.CENTURY_1900 + year_part

# Test boundary cases
assert parse_evalid_year(0) == 2000
assert parse_evalid_year(30) == 2030
assert parse_evalid_year(31) == 1931
assert parse_evalid_year(99) == 1999

def test_legacy_year_parsing_logic(self):
"""Test legacy year parsing logic used in base estimator."""

# Simulate legacy year parsing as used in _infer_evaluation_year
def parse_legacy_year(year_part: int) -> int:
if year_part >= EVALIDYearParsing.LEGACY_THRESHOLD:
return EVALIDYearParsing.CENTURY_1900 + year_part
else:
return EVALIDYearParsing.CENTURY_2000 + year_part

# Test boundary cases
assert parse_legacy_year(89) == 2089 # Below threshold
assert parse_legacy_year(90) == 1990 # At threshold
assert parse_legacy_year(99) == 1999 # 1990s

def test_year_validation_logic(self):
"""Test that year validation logic works with constants."""

def is_valid_year(year: int) -> bool:
return (
EVALIDYearParsing.MIN_VALID_YEAR
<= year
<= EVALIDYearParsing.MAX_VALID_YEAR
)

# Valid years
assert is_valid_year(1990) is True
assert is_valid_year(2000) is True
assert is_valid_year(2024) is True
assert is_valid_year(2050) is True

# Invalid years
assert is_valid_year(1989) is False
assert is_valid_year(2051) is False


class TestErrorMessages:
"""Tests for standard error messages."""
Expand Down Expand Up @@ -205,27 +125,6 @@ def test_invalid_domain_message_format(self):
class TestConstantsIntegration:
"""Integration tests verifying constants work together."""

def test_evalid_year_range_covers_valid_years(self):
"""Test that EVALID parsing produces years within valid range."""
# All years from Y2K windowing should be validatable
# Years 00-30 -> 2000-2030 (all within MIN_VALID_YEAR to MAX_VALID_YEAR)
for year_part in range(0, EVALIDYearParsing.Y2K_WINDOW_THRESHOLD + 1):
year = EVALIDYearParsing.CENTURY_2000 + year_part
assert (
EVALIDYearParsing.MIN_VALID_YEAR
<= year
<= EVALIDYearParsing.MAX_VALID_YEAR
), f"Year {year} from year_part {year_part} is outside valid range"

# Years 90-99 -> 1990-1999 (all within MIN_VALID_YEAR to MAX_VALID_YEAR)
for year_part in range(EVALIDYearParsing.LEGACY_THRESHOLD, 100):
year = EVALIDYearParsing.CENTURY_1900 + year_part
assert (
EVALIDYearParsing.MIN_VALID_YEAR
<= year
<= EVALIDYearParsing.MAX_VALID_YEAR
), f"Year {year} from year_part {year_part} is outside valid range"

def test_defaults_are_reasonable(self):
"""Test that default values are reasonable for FIA analysis."""
# Adjustment and expansion factors default to 1 (no adjustment)
Expand Down
Loading