diff --git a/docs/source/datasets/fpa_fod_tabular.rst b/docs/source/datasets/fpa_fod_tabular.rst new file mode 100644 index 00000000..2d1136f4 --- /dev/null +++ b/docs/source/datasets/fpa_fod_tabular.rst @@ -0,0 +1,38 @@ +FPA FOD — Tabular Classification Dataset +======================================== + +What it is +---------- + +This dataset provides a tabular supervised-learning view derived from FPA FOD wildfire records. +One sample corresponds to one incident (or one processed record). + +Data source and licensing +------------------------- + +The raw FPA FOD dataset is large and must be obtained by the user. PyHazards does not ship the raw data +due to licensing/size constraints. + +How to provide data +------------------- + +1. Obtain the FPA FOD sqlite (user-provided). +2. Place it at the path expected by the dataset builder (see project README/scripts). +3. Run the preprocess/build step to produce processed dataset artifacts. + +Returned tensors (contract) +--------------------------- + +A single sample returns: +- ``x``: float tensor of shape ``[input_dim]`` +- ``y``: integer class id (scalar) + +A batch returns: +- ``x``: ``[B, input_dim]`` +- ``y``: ``[B]`` + +Micro dataset for CI +-------------------- + +For CI/testing: set ``micro=True`` to use deterministic synthetic data that matches the schema and shapes, +so tests run without requiring the raw FPA FOD sqlite. diff --git a/docs/source/datasets/fpa_fod_weekly.rst b/docs/source/datasets/fpa_fod_weekly.rst new file mode 100644 index 00000000..1d3e441c --- /dev/null +++ b/docs/source/datasets/fpa_fod_weekly.rst @@ -0,0 +1,31 @@ +FPA FOD — Weekly Forecasting Dataset +==================================== + +What it is +---------- + +This dataset provides weekly sequences for forecasting wildfire activity. +One sample corresponds to a sequence of weekly feature vectors. + +Data source and licensing +------------------------- + +The raw FPA FOD dataset must be obtained by the user. PyHazards does not ship the raw data due to +licensing/size constraints. + +Returned tensors (contract) +--------------------------- + +A single sample returns: +- ``x``: float tensor of shape ``[T, input_dim]`` +- ``y``: float tensor of shape ``[out_dim]`` (or scalar) + +A batch returns: +- ``x``: ``[B, T, input_dim]`` +- ``y``: ``[B, out_dim]`` + +Micro dataset for CI +-------------------- + +For CI/testing: set ``micro=True`` to use deterministic synthetic data (small B and T) to validate shapes, +dtypes, and a trainer smoke run, without requiring the raw FPA FOD sqlite. diff --git a/pyhazards/datasets/__init__.py b/pyhazards/datasets/__init__.py index c6310db1..d5a678d5 100644 --- a/pyhazards/datasets/__init__.py +++ b/pyhazards/datasets/__init__.py @@ -14,3 +14,15 @@ "GraphTemporalDataset", "graph_collate", ] +from .wildfire_fpa_fod import FPAFODWildfireTabular, FPAFODWildfireWeekly + +# Register datasets so framework load_dataset(...) can find them +register_dataset( + name="wildfire_fpa_fod_tabular", + builder=lambda **kwargs: FPAFODWildfireTabular(**kwargs), +) + +register_dataset( + name="wildfire_fpa_fod_weekly", + builder=lambda **kwargs: FPAFODWildfireWeekly(**kwargs), +) diff --git a/pyhazards/datasets/wildfire_fpa_fod.py b/pyhazards/datasets/wildfire_fpa_fod.py new file mode 100644 index 00000000..e3f5d80d --- /dev/null +++ b/pyhazards/datasets/wildfire_fpa_fod.py @@ -0,0 +1,583 @@ +""" +FPA-FOD Wildfire datasets for PyHazards. + +Implements: + - FPAFODWildfireTabular: per-incident tabular classification for (cause | size) + - FPAFODWildfireWeekly: weekly time-series forecasting (lookback -> next-week counts by size group) + +Contract (PyHazards-canonical): + - These datasets are DataBundle-style (NOT torch Dataset): + ds.load() -> DataBundle + bundle.splits["train"/"val"/"test"] are DataSplit objects with: + - .inputs : torch.FloatTensor + - .targets : torch.LongTensor (classification) or torch.FloatTensor (regression) + - The engine.Trainer expects DataBundle.get_split(name) -> DataSplit. + +Non-negotiable: + - Includes deterministic MICRO synthetic datasets for unit tests. + +Note: + - Real data must be provided by the user (licensing/size). Supported: .sqlite/.db (Fires table), .csv, .parquet. +""" + +from __future__ import annotations + +import math +import os +from typing import Any, Dict, List, Literal, Optional, Tuple + +import numpy as np +import pandas as pd +import torch + +from .base import DataBundle, DataSplit, Dataset, FeatureSpec, LabelSpec + +# ----------------------------------------------------------------------------- +# Types / constants +# ----------------------------------------------------------------------------- + +CauseMode = Literal["paper5", "keep_all"] +Region = Literal["US", "CA"] +WeeklyFeatures = Literal["counts", "counts+time"] + +# These strings must match NWCG_GENERAL_CAUSE values (with synonyms mapped below) +PAPER5_CAUSES = [ + "Debris and open burning", + "Natural", + "Arson/incendiarism", + "Equipment and vehicle use", + "Recreation and ceremony", +] + +CAUSE_SYNONYMS = { + "Debris/open burning": "Debris and open burning", + "Debris and Open Burning": "Debris and open burning", + "Arson": "Arson/incendiarism", + "Equipment/vehicle use": "Equipment and vehicle use", + "Recreation/ceremony": "Recreation and ceremony", +} + +SIZE_GROUPS = ["A", "B", "C", "D", "EFG"] + + +# ----------------------------------------------------------------------------- +# Helpers +# ----------------------------------------------------------------------------- + +def _minmax_fit(x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + mins = np.nanmin(x, axis=0) + maxs = np.nanmax(x, axis=0) + maxs = np.where(maxs == mins, mins + 1.0, maxs) + return mins, maxs + + +def _minmax_apply(x: np.ndarray, mins: np.ndarray, maxs: np.ndarray) -> np.ndarray: + return (x - mins) / (maxs - mins) + + +def _stratified_split_indices( + y: np.ndarray, train_ratio: float, val_ratio: float, test_ratio: float, seed: int +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + assert abs((train_ratio + val_ratio + test_ratio) - 1.0) < 1e-6 + rng = np.random.default_rng(seed) + + train_idx: List[int] = [] + val_idx: List[int] = [] + test_idx: List[int] = [] + + classes = np.unique(y) + for c in classes: + idx_c = np.where(y == c)[0] + rng.shuffle(idx_c) + n = len(idx_c) + n_train = int(round(train_ratio * n)) + n_val = int(round(val_ratio * n)) + n_test = max(0, n - n_train - n_val) + + train_idx.extend(idx_c[:n_train].tolist()) + val_idx.extend(idx_c[n_train:n_train + n_val].tolist()) + test_idx.extend(idx_c[n_train + n_val:n_train + n_val + n_test].tolist()) + + train_idx = np.array(train_idx, dtype=np.int64) + val_idx = np.array(val_idx, dtype=np.int64) + test_idx = np.array(test_idx, dtype=np.int64) + + rng.shuffle(train_idx) + rng.shuffle(val_idx) + rng.shuffle(test_idx) + + return train_idx, val_idx, test_idx + + +def _chronological_split_indices( + n: int, train_ratio: float, val_ratio: float, test_ratio: float +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + assert abs((train_ratio + val_ratio + test_ratio) - 1.0) < 1e-6 + n_train = int(math.floor(train_ratio * n)) + n_val = int(math.floor(val_ratio * n)) + n_test = n - n_train - n_val + + train_idx = np.arange(0, n_train, dtype=np.int64) + val_idx = np.arange(n_train, n_train + n_val, dtype=np.int64) + test_idx = np.arange(n_train + n_val, n_train + n_val + n_test, dtype=np.int64) + return train_idx, val_idx, test_idx + + +def _encode_states(states: pd.Series) -> Tuple[np.ndarray, Dict[str, int]]: + uniq = sorted([s for s in states.dropna().astype(str).unique().tolist()]) + mapping = {s: i for i, s in enumerate(uniq)} + enc = states.astype(str).map(mapping).astype("int64").to_numpy() + return enc, mapping + + +def _normalize_cause_strings(x: pd.Series) -> pd.Series: + s = x.astype(str).str.strip() + s = s.map(lambda v: CAUSE_SYNONYMS.get(v, v)) + return s + + +def _coerce_required_columns(df: pd.DataFrame, required: List[str]) -> pd.DataFrame: + missing = [c for c in required if c not in df.columns] + if missing: + raise ValueError(f"Missing required columns: {missing}. Have: {list(df.columns)[:40]}...") + return df + + +def _impute_numeric(df: pd.DataFrame, cols: List[str]) -> Tuple[pd.DataFrame, Dict[str, float]]: + medians: Dict[str, float] = {} + out = df.copy() + for c in cols: + med = float(pd.to_numeric(out[c], errors="coerce").median()) + if math.isnan(med): + med = 0.0 + medians[c] = med + out[c] = pd.to_numeric(out[c], errors="coerce").fillna(med) + return out, medians + + +# ----------------------------------------------------------------------------- +# MICRO synthetic data +# ----------------------------------------------------------------------------- + +def _micro_tabular_df(seed: int = 1337, n: int = 200) -> pd.DataFrame: + rng = np.random.default_rng(seed) + states = np.array(["CA", "TX", "FL", "NY", "WA", "CO"]) + causes = np.array(PAPER5_CAUSES) + + years = rng.integers(2010, 2019, size=n) + doy = rng.integers(1, 366, size=n) + disc_time = rng.integers(0, 2400, size=n) + cont_doy = np.clip(doy + rng.integers(0, 30, size=n), 1, 366) + cont_time = rng.integers(0, 2400, size=n) + + st = rng.choice(states, size=n, replace=True) + lat = rng.uniform(25.0, 49.0, size=n) + lon = rng.uniform(-124.0, -67.0, size=n) + ca_mask = (st == "CA") + lat[ca_mask] = rng.uniform(32.0, 42.0, size=ca_mask.sum()) + lon[ca_mask] = rng.uniform(-124.5, -114.0, size=ca_mask.sum()) + + cause = rng.choice(causes, size=n, replace=True) + size_raw = rng.choice( + ["A", "B", "C", "D", "E", "F", "G"], + size=n, + p=[0.38, 0.42, 0.12, 0.04, 0.02, 0.01, 0.01], + ) + + return pd.DataFrame({ + "FIRE_YEAR": years, + "STATE": st, + "DISCOVERY_DOY": doy, + "DISCOVERY_TIME": disc_time, + "CONT_DOY": cont_doy, + "CONT_TIME": cont_time, + "LATITUDE": lat, + "LONGITUDE": lon, + "NWCG_GENERAL_CAUSE": cause, + "FIRE_SIZE_CLASS": size_raw, + }) + + +def _micro_weekly_counts(seed: int = 1337, weeks: int = 120, region: Region = "US") -> pd.DataFrame: + rng = np.random.default_rng(seed) + start = pd.Timestamp("2016-01-04") + dates = pd.date_range(start, periods=weeks, freq="W-MON") + + t = np.arange(weeks) + base = 50 + 20 * np.sin(2 * np.pi * t / 52.0) + if region == "CA": + base = base * 1.3 + + A = np.maximum(0, base + rng.normal(0, 8, size=weeks)).astype(int) + B = np.maximum(0, base * 0.8 + rng.normal(0, 7, size=weeks)).astype(int) + C = np.maximum(0, base * 0.2 + rng.normal(0, 3, size=weeks)).astype(int) + D = np.maximum(0, base * 0.05 + rng.normal(0, 2, size=weeks)).astype(int) + EFG = np.maximum(0, base * 0.03 + rng.normal(0, 2, size=weeks)).astype(int) + + return pd.DataFrame({"week_start": dates, "A": A, "B": B, "C": C, "D": D, "EFG": EFG}) + + +# ----------------------------------------------------------------------------- +# Real-data loader +# ----------------------------------------------------------------------------- + +def _load_fpa_fod_any(path: str) -> pd.DataFrame: + if not os.path.exists(path): + raise FileNotFoundError(f"Data path not found: {path}") + + ext = os.path.splitext(path)[1].lower() + + if ext in [".sqlite", ".db"]: + import sqlite3 + con = sqlite3.connect(path) + try: + return pd.read_sql_query("SELECT * FROM Fires", con) + finally: + con.close() + + if ext == ".csv": + return pd.read_csv(path) + + if ext == ".parquet": + return pd.read_parquet(path) + + raise ValueError(f"Unsupported file extension: {ext} for path {path}") + + +# ----------------------------------------------------------------------------- +# Dataset: Tabular classification +# ----------------------------------------------------------------------------- + +class FPAFODWildfireTabular(Dataset): + name = "wildfire_fpa_fod_tabular" + + def __init__( + self, + task: Literal["cause", "size"] = "cause", + region: Region = "US", + cause_mode: CauseMode = "paper5", + data_path: Optional[str] = None, + micro: bool = False, + normalize: bool = False, + train_ratio: float = 0.6, + val_ratio: float = 0.2, + test_ratio: float = 0.2, + seed: int = 1337, + cache_dir: Optional[str] = None, + ): + super().__init__(cache_dir=cache_dir) + self.task = task + self.region = region + self.cause_mode = cause_mode + self.data_path = data_path + self.micro = micro + self.normalize = normalize + self.train_ratio = train_ratio + self.val_ratio = val_ratio + self.test_ratio = test_ratio + self.seed = seed + + def _load(self) -> DataBundle: + if self.micro: + df = _micro_tabular_df(seed=self.seed, n=200) + source = "micro_synthetic" + else: + if not self.data_path: + raise ValueError("data_path is required when micro=False") + df = _load_fpa_fod_any(self.data_path) + source = self.data_path + + required = [ + "FIRE_YEAR", "STATE", "DISCOVERY_DOY", "DISCOVERY_TIME", + "CONT_DOY", "CONT_TIME", "LATITUDE", "LONGITUDE", + "NWCG_GENERAL_CAUSE", "FIRE_SIZE_CLASS", + ] + df = _coerce_required_columns(df, required) + + if self.region == "CA": + df = df[df["STATE"].astype(str) == "CA"].copy() + + df, numeric_impute = _impute_numeric( + df, + cols=["FIRE_YEAR", "DISCOVERY_DOY", "DISCOVERY_TIME", "CONT_DOY", "CONT_TIME", "LATITUDE", "LONGITUDE"], + ) + + state_enc, state_mapping = _encode_states(df["STATE"]) + feature_cols_numeric = ["FIRE_YEAR", "DISCOVERY_DOY", "DISCOVERY_TIME", "CONT_DOY", "CONT_TIME", "LATITUDE", "LONGITUDE"] + + x_num = df[feature_cols_numeric].to_numpy(dtype=np.float32) + x_state = state_enc.astype(np.float32).reshape(-1, 1) + x = np.concatenate([x_num, x_state], axis=1).astype(np.float32) + + feature_names = feature_cols_numeric + ["STATE_ID"] + + metadata: Dict[str, Any] = { + "dataset": self.name, + "source": source, + "region": self.region, + "task": self.task, + "micro": bool(self.micro), + "seed": int(self.seed), + "state_mapping": state_mapping, + "numeric_impute_medians": numeric_impute, + } + + # ---- Targets + split indices ---- + if self.task == "cause": + causes = _normalize_cause_strings(df["NWCG_GENERAL_CAUSE"]) + + if self.cause_mode == "paper5": + mask = causes.isin(PAPER5_CAUSES) + metadata["dropped_non_paper5_causes"] = int((~mask).sum()) + if int(mask.sum()) == 0: + top = causes.value_counts().head(20).to_dict() + raise RuntimeError( + "cause_mode=paper5 kept 0 rows. Your NWCG_GENERAL_CAUSE strings don't match PAPER5_CAUSES.\n" + f"Top values: {top}\n" + f"Expected one of: {PAPER5_CAUSES}\n" + "Fix CAUSE_SYNONYMS / PAPER5_CAUSES or run with cause_mode='keep_all'." + ) + df = df.loc[mask].copy() + causes = causes.loc[mask] + x = x[mask.to_numpy()] + + classes = sorted(causes.unique().tolist()) + label_mapping = {c: i for i, c in enumerate(classes)} + y = causes.map(label_mapping).astype("int64").to_numpy() + + metadata["label_mapping"] = label_mapping + train_idx, val_idx, test_idx = _stratified_split_indices( + y=y, train_ratio=self.train_ratio, val_ratio=self.val_ratio, test_ratio=self.test_ratio, seed=self.seed + ) + + label_spec = LabelSpec( + num_targets=len(classes), + task_type="classification", + description="NWCG_GENERAL_CAUSE mapped to integer class IDs.", + extra={"classes": classes, "label_mapping": label_mapping}, + ) + + elif self.task == "size": + size_raw = df["FIRE_SIZE_CLASS"].astype(str).str.strip() + size_grp = size_raw.replace({"E": "EFG", "F": "EFG", "G": "EFG"}) + mask = size_grp.isin(SIZE_GROUPS) + metadata["dropped_unknown_size_class"] = int((~mask).sum()) + + size_grp = size_grp.loc[mask] + x = x[mask.to_numpy()] + + label_mapping = {c: i for i, c in enumerate(SIZE_GROUPS)} + y = size_grp.map(label_mapping).astype("int64").to_numpy() + + metadata["label_mapping"] = label_mapping + train_idx, val_idx, test_idx = _stratified_split_indices( + y=y, train_ratio=self.train_ratio, val_ratio=self.val_ratio, test_ratio=self.test_ratio, seed=self.seed + ) + + label_spec = LabelSpec( + num_targets=len(SIZE_GROUPS), + task_type="classification", + description="FIRE_SIZE_CLASS grouped to A/B/C/D/EFG and mapped to integer class IDs.", + extra={"classes": SIZE_GROUPS, "label_mapping": label_mapping}, + ) + + else: + raise ValueError(f"Unknown task: {self.task}") + + # ---- Normalization (fit on train only) ---- + norm_stats = None + if self.normalize: + mins, maxs = _minmax_fit(x[train_idx]) + x = _minmax_apply(x, mins, maxs).astype(np.float32) + norm_stats = {"mins": mins.tolist(), "maxs": maxs.tolist()} + metadata["normalization"] = norm_stats + + # ---- Build canonical splits as DataSplit(torch tensors) ---- + X_train = torch.as_tensor(x[train_idx], dtype=torch.float32) + y_train = torch.as_tensor(y[train_idx], dtype=torch.long) + + X_val = torch.as_tensor(x[val_idx], dtype=torch.float32) + y_val = torch.as_tensor(y[val_idx], dtype=torch.long) + + X_test = torch.as_tensor(x[test_idx], dtype=torch.float32) + y_test = torch.as_tensor(y[test_idx], dtype=torch.long) + + splits: Dict[str, DataSplit] = { + "train": DataSplit(inputs=X_train, targets=y_train, metadata={"source": source}), + "val": DataSplit(inputs=X_val, targets=y_val, metadata={"source": source}), + "test": DataSplit(inputs=X_test, targets=y_test, metadata={"source": source}), + } + + feature_spec = FeatureSpec( + input_dim=int(X_train.shape[1]), + description="FPA FOD tabular features (engineered from incident records).", + extra={"feature_names": feature_names, "dtype": "float32"}, + ) + + return DataBundle( + splits=splits, + feature_spec=feature_spec, + label_spec=label_spec, + metadata=metadata, + ) + + +# ----------------------------------------------------------------------------- +# Dataset: Weekly forecasting +# ----------------------------------------------------------------------------- + +class FPAFODWildfireWeekly(Dataset): + name = "wildfire_fpa_fod_weekly" + + def __init__( + self, + region: Region = "US", + data_path: Optional[str] = None, + micro: bool = False, + lookback_weeks: int = 50, + features: WeeklyFeatures = "counts", + train_ratio: float = 0.6, + val_ratio: float = 0.2, + test_ratio: float = 0.2, + seed: int = 1337, + cache_dir: Optional[str] = None, + ): + super().__init__(cache_dir=cache_dir) + self.region = region + self.data_path = data_path + self.micro = micro + self.lookback_weeks = lookback_weeks + self.features = features + self.train_ratio = train_ratio + self.val_ratio = val_ratio + self.test_ratio = test_ratio + self.seed = seed + + def _load(self) -> DataBundle: + if self.micro: + wk = _micro_weekly_counts(seed=self.seed, weeks=120, region=self.region) + source = "micro_synthetic" + else: + if not self.data_path: + raise ValueError("data_path is required when micro=False") + df = _load_fpa_fod_any(self.data_path) + source = self.data_path + + required = ["FIRE_YEAR", "STATE", "DISCOVERY_DOY", "FIRE_SIZE_CLASS"] + df = _coerce_required_columns(df, required) + + if self.region == "CA": + df = df[df["STATE"].astype(str) == "CA"].copy() + + year = pd.to_numeric(df["FIRE_YEAR"], errors="coerce") + doy = pd.to_numeric(df["DISCOVERY_DOY"], errors="coerce") + base = pd.to_datetime(year.astype("Int64").astype(str) + "-01-01", errors="coerce") + disc_dt = base + pd.to_timedelta(doy.fillna(1).astype(int) - 1, unit="D") + df = df.assign(_disc_dt=disc_dt) + + week_start = df["_disc_dt"].dt.to_period("W-MON").dt.start_time + + size_raw = df["FIRE_SIZE_CLASS"].astype(str).str.strip() + size_grp = size_raw.replace({"E": "EFG", "F": "EFG", "G": "EFG"}) + size_grp = size_grp.where(size_grp.isin(SIZE_GROUPS), other=np.nan) + + df = df.assign(_week_start=week_start, _size=size_grp).dropna(subset=["_week_start", "_size"]) + + wk = ( + df.groupby(["_week_start", "_size"]) + .size() + .unstack("_size", fill_value=0) + .reset_index() + .rename(columns={"_week_start": "week_start"}) + ) + for c in SIZE_GROUPS: + if c not in wk.columns: + wk[c] = 0 + wk = wk.sort_values("week_start").reset_index(drop=True) + + lookback = int(self.lookback_weeks) + if len(wk) <= lookback: + raise ValueError(f"Not enough weeks ({len(wk)}) for lookback={lookback}") + + counts = wk[SIZE_GROUPS].to_numpy(dtype=np.float32) # (T, 5) + + if self.features == "counts": + feats = counts + feature_names = SIZE_GROUPS + elif self.features == "counts+time": + woy = wk["week_start"].dt.isocalendar().week.to_numpy(dtype=np.float32) + sin = np.sin(2 * np.pi * woy / 52.0).reshape(-1, 1).astype(np.float32) + cos = np.cos(2 * np.pi * woy / 52.0).reshape(-1, 1).astype(np.float32) + feats = np.concatenate([counts, sin, cos], axis=1).astype(np.float32) + feature_names = SIZE_GROUPS + ["woy_sin", "woy_cos"] + else: + raise ValueError(f"Unknown features mode: {self.features}") + + X_list: List[np.ndarray] = [] + Y_list: List[np.ndarray] = [] + week_starts: List[pd.Timestamp] = [] + + for i in range(lookback, len(wk)): + X_list.append(feats[i - lookback:i]) + Y_list.append(counts[i]) + week_starts.append(wk.loc[i, "week_start"]) + + X = np.stack(X_list, axis=0).astype(np.float32) # (N, lookback, input_dim) + Y = np.stack(Y_list, axis=0).astype(np.float32) # (N, 5) + + n = int(X.shape[0]) + train_idx, val_idx, test_idx = _chronological_split_indices( + n=n, train_ratio=self.train_ratio, val_ratio=self.val_ratio, test_ratio=self.test_ratio + ) + + # ---- Canonical splits as DataSplit(torch tensors) ---- + X_train = torch.as_tensor(X[train_idx], dtype=torch.float32) + y_train = torch.as_tensor(Y[train_idx], dtype=torch.float32) + + X_val = torch.as_tensor(X[val_idx], dtype=torch.float32) + y_val = torch.as_tensor(Y[val_idx], dtype=torch.float32) + + X_test = torch.as_tensor(X[test_idx], dtype=torch.float32) + y_test = torch.as_tensor(Y[test_idx], dtype=torch.float32) + + splits: Dict[str, DataSplit] = { + "train": DataSplit(inputs=X_train, targets=y_train, metadata={"source": source, "region": self.region}), + "val": DataSplit(inputs=X_val, targets=y_val, metadata={"source": source, "region": self.region}), + "test": DataSplit(inputs=X_test, targets=y_test, metadata={"source": source, "region": self.region}), + } + + feature_spec = FeatureSpec( + input_dim=int(X_train.shape[2]), + description="Weekly sequence features derived from FPA FOD incident aggregation.", + extra={ + "feature_names": feature_names, + "lookback_weeks": lookback, + "dtype": "float32", + "region": self.region, + }, + ) + + label_spec = LabelSpec( + num_targets=5, + task_type="regression", + description="Next-week counts per size group (A,B,C,D,EFG).", + extra={"targets": SIZE_GROUPS, "dtype": "float32"}, + ) + + metadata: Dict[str, Any] = { + "dataset": self.name, + "source": source, + "region": self.region, + "micro": bool(self.micro), + "seed": int(self.seed), + "lookback_weeks": lookback, + "features_mode": self.features, + "week_start_for_each_sample": [str(ts) for ts in week_starts], + } + + return DataBundle( + splits=splits, + feature_spec=feature_spec, + label_spec=label_spec, + metadata=metadata, + ) diff --git a/pyhazards/metrics/classification.py b/pyhazards/metrics/classification.py new file mode 100644 index 00000000..ff1f05c5 --- /dev/null +++ b/pyhazards/metrics/classification.py @@ -0,0 +1,78 @@ +# pyhazards/metrics/classification.py +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, List, Optional, Sequence, Tuple + +import numpy as np +from sklearn.metrics import ( + accuracy_score, + classification_report, + confusion_matrix, +) + + +@dataclass(frozen=True) +class ClassificationResults: + accuracy: float + report_dict: Dict + report_csv: str + confusion: np.ndarray + confusion_normalized: np.ndarray + labels: List[str] + + +def _safe_row_normalize(cm: np.ndarray) -> np.ndarray: + cm = cm.astype(np.float64) + row_sums = cm.sum(axis=1, keepdims=True) + # avoid division by zero for classes absent in test set + row_sums[row_sums == 0.0] = 1.0 + return cm / row_sums + + +def evaluate_multiclass( + y_true: Sequence[int], + y_pred: Sequence[int], + label_names: Sequence[str], + *, + digits: int = 4, + zero_division: int = 0, +) -> ClassificationResults: + """ + Strict: metrics computed ONLY from the provided y_true/y_pred (caller must ensure test split). + """ + y_true = np.asarray(y_true) + y_pred = np.asarray(y_pred) + + acc = float(accuracy_score(y_true, y_pred)) + + # sklearn report with per-class precision/recall/F1 + macro/weighted + rep = classification_report( + y_true, + y_pred, + target_names=list(label_names), + digits=digits, + output_dict=True, + zero_division=zero_division, + ) + + rep_csv = classification_report( + y_true, + y_pred, + target_names=list(label_names), + digits=digits, + output_dict=False, + zero_division=zero_division, + ) + + cm = confusion_matrix(y_true, y_pred, labels=list(range(len(label_names)))) + cm_norm = _safe_row_normalize(cm) + + return ClassificationResults( + accuracy=acc, + report_dict=rep, + report_csv=rep_csv, + confusion=cm, + confusion_normalized=cm_norm, + labels=list(label_names), + ) diff --git a/pyhazards/metrics/forecasting.py b/pyhazards/metrics/forecasting.py new file mode 100644 index 00000000..0a336827 --- /dev/null +++ b/pyhazards/metrics/forecasting.py @@ -0,0 +1,49 @@ +# pyhazards/metrics/forecasting.py +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, Iterable, List, Mapping, Tuple + +import numpy as np +from sklearn.metrics import mean_squared_error, r2_score + + +@dataclass(frozen=True) +class ForecastMetrics: + rmse: float + r2: float + + +def _rmse(y_true: np.ndarray, y_pred: np.ndarray) -> float: + return float(np.sqrt(mean_squared_error(y_true, y_pred))) + + +def evaluate_forecast_buckets( + y_true: np.ndarray, + y_pred: np.ndarray, + bucket_names: List[str], +) -> Dict[str, ForecastMetrics]: + """ + y_true, y_pred: shape (T, K) where K = number of buckets (A,B,C,D,EFG). + Returns metrics for each bucket + "all" (sum across buckets). + STRICT: caller must pass TEST-ONLY arrays for the chosen region (US or CA). + """ + y_true = np.asarray(y_true, dtype=np.float64) + y_pred = np.asarray(y_pred, dtype=np.float64) + assert y_true.shape == y_pred.shape, "y_true and y_pred must match shape (T, K)." + assert y_true.shape[1] == len(bucket_names), "K must equal len(bucket_names)." + + out: Dict[str, ForecastMetrics] = {} + + # per bucket + for j, name in enumerate(bucket_names): + yt = y_true[:, j] + yp = y_pred[:, j] + out[name] = ForecastMetrics(rmse=_rmse(yt, yp), r2=float(r2_score(yt, yp))) + + # all = sum across buckets (paper-style) + yt_all = y_true.sum(axis=1) + yp_all = y_pred.sum(axis=1) + out["all"] = ForecastMetrics(rmse=_rmse(yt_all, yp_all), r2=float(r2_score(yt_all, yp_all))) + + return out diff --git a/pyhazards/models/__init__.py b/pyhazards/models/__init__.py index ba94c0ae..8632735e 100644 --- a/pyhazards/models/__init__.py +++ b/pyhazards/models/__init__.py @@ -7,8 +7,11 @@ from .wildfire_mamba import WildfireMamba, wildfire_mamba_builder from .wildfire_aspp import WildfireASPP, TverskyLoss, wildfire_aspp_builder from .cnn_aspp import WildfireCNNASPP, cnn_aspp_builder -from .hydrographnet import HydroGraphNet, HydroGraphNetLoss, hydrographnet_builder +# pyhazards/models/__init__.py +from .wildfire_fpa_mlp import build_wildfire_fpa_mlp # noqa: F401 +from .wildfire_fpa_lstm import build_wildfire_fpa_lstm # noqa: F401 +from .wildfire_fpa_autoencoder import build_wildfire_fpa_ae # noqa: F401 __all__ = [ # Core API @@ -34,9 +37,6 @@ "wildfire_aspp_builder", "WildfireCNNASPP", "cnn_aspp_builder", - "HydroGraphNet", - "HydroGraphNetLoss", - "hydrographnet_builder", ] # ------------------------------------------------- @@ -96,14 +96,3 @@ "dropout": 0.0, }, ) - - -register_model( - name="hydrographnet", - builder=hydrographnet_builder, - defaults={ - "hidden_dim": 64, - "harmonics": 5, - "num_gn_blocks": 5, - }, -) diff --git a/pyhazards/models/wildfire_fpa_autoencoder.py b/pyhazards/models/wildfire_fpa_autoencoder.py new file mode 100644 index 00000000..6f8957cd --- /dev/null +++ b/pyhazards/models/wildfire_fpa_autoencoder.py @@ -0,0 +1,136 @@ +# pyhazards/models/wildfire_fpa_autoencoder.py +from __future__ import annotations + +import torch +import torch.nn as nn + +from .registry import register_model + + +class WildfireFPALSTMAutoencoder(nn.Module): + """ + LSTM autoencoder for sequences. + + forward: + x: (B, T, D) + returns reconstruction x_hat: (B, T, D) + """ + + def __init__( + self, + input_dim: int, + hidden_dim: int = 64, + latent_dim: int = 32, + num_layers: int = 1, + dropout: float = 0.2, + lookback: int = 50, + ) -> None: + super().__init__() + if input_dim <= 0: + raise ValueError(f"input_dim must be > 0, got {input_dim}") + if hidden_dim <= 0: + raise ValueError(f"hidden_dim must be > 0, got {hidden_dim}") + if latent_dim <= 0: + raise ValueError(f"latent_dim must be > 0, got {latent_dim}") + if num_layers <= 0: + raise ValueError(f"num_layers must be > 0, got {num_layers}") + if not (0.0 <= dropout < 1.0): + raise ValueError(f"dropout must be in [0,1), got {dropout}") + if lookback <= 0: + raise ValueError(f"lookback must be > 0, got {lookback}") + + self.lookback = lookback + lstm_dropout = dropout if num_layers > 1 else 0.0 + + self.encoder = nn.LSTM( + input_size=input_dim, + hidden_size=hidden_dim, + num_layers=num_layers, + batch_first=True, + dropout=lstm_dropout, + ) + self.to_latent = nn.Sequential( + nn.Dropout(p=dropout), + nn.Linear(hidden_dim, latent_dim), + ) + + self.decoder = nn.LSTM( + input_size=latent_dim, + hidden_size=hidden_dim, + num_layers=num_layers, + batch_first=True, + dropout=lstm_dropout, + ) + self.to_recon = nn.Linear(hidden_dim, input_dim) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if x.ndim != 3: + raise ValueError(f"AE expects x as (B, T, D). Got {tuple(x.shape)}") + B, T, D = x.shape + if T != self.lookback: + raise ValueError(f"Expected lookback T={self.lookback}, got T={T}") + + _, (h_n, _) = self.encoder(x) + h_last = h_n[-1] # (B, hidden_dim) + z = self.to_latent(h_last) # (B, latent_dim) + + z_seq = z.unsqueeze(1).expand(B, T, z.shape[-1]) # (B, T, latent_dim) + dec_out, _ = self.decoder(z_seq) + return self.to_recon(dec_out) + + @torch.no_grad() + def reconstruction_error(self, x: torch.Tensor, reduction: str = "mean") -> torch.Tensor: + x_hat = self.forward(x) + err = (x_hat - x) ** 2 + if reduction == "none": + return err + if reduction == "mean": + return err.mean(dim=(1, 2)) + if reduction == "sum": + return err.sum(dim=(1, 2)) + raise ValueError(f"Unknown reduction: {reduction!r}") + + +def build_wildfire_fpa_ae( + *, + task: str, + input_dim: int, + hidden_dim: int = 64, + latent_dim: int = 32, + num_layers: int = 1, + dropout: float = 0.2, + lookback: int = 50, + **kwargs, +) -> nn.Module: + if kwargs: + raise TypeError(f"Unexpected kwargs for wildfire_fpa_ae: {sorted(kwargs.keys())}") + + t = str(task).lower() + if t not in ("reconstruction", "autoencoder", "regression", "forecasting"): + raise ValueError( + "wildfire_fpa_ae supports task in " + "{'reconstruction','autoencoder','regression','forecasting'}, " + f"got {task!r}" + ) + + return WildfireFPALSTMAutoencoder( + input_dim=input_dim, + hidden_dim=hidden_dim, + latent_dim=latent_dim, + num_layers=num_layers, + dropout=dropout, + lookback=lookback, + ) + + +register_model( + name="wildfire_fpa_ae", + builder=build_wildfire_fpa_ae, + defaults={ + "hidden_dim": 64, + "latent_dim": 32, + "num_layers": 1, + "dropout": 0.2, + "lookback": 50, + }, +) diff --git a/pyhazards/models/wildfire_fpa_lstm.py b/pyhazards/models/wildfire_fpa_lstm.py new file mode 100644 index 00000000..906e5c33 --- /dev/null +++ b/pyhazards/models/wildfire_fpa_lstm.py @@ -0,0 +1,109 @@ +# pyhazards/models/wildfire_fpa_lstm.py +from __future__ import annotations + +import torch +import torch.nn as nn + +from .registry import register_model + + +class WildfireFPALSTM(nn.Module): + """ + LSTM forecaster producing raw next-week counts (regression). + + forward: + x: (B, T, D) T should equal lookback (default 50) + returns: (B, output_dim=5) + """ + + def __init__( + self, + input_dim: int, + hidden_dim: int = 64, + output_dim: int = 5, + num_layers: int = 1, + dropout: float = 0.2, + lookback: int = 50, + ) -> None: + super().__init__() + if input_dim <= 0: + raise ValueError(f"input_dim must be > 0, got {input_dim}") + if hidden_dim <= 0: + raise ValueError(f"hidden_dim must be > 0, got {hidden_dim}") + if output_dim <= 0: + raise ValueError(f"output_dim must be > 0, got {output_dim}") + if num_layers <= 0: + raise ValueError(f"num_layers must be > 0, got {num_layers}") + if not (0.0 <= dropout < 1.0): + raise ValueError(f"dropout must be in [0,1), got {dropout}") + if lookback <= 0: + raise ValueError(f"lookback must be > 0, got {lookback}") + + self.lookback = lookback + + # PyTorch LSTM dropout is applied only when num_layers > 1 + lstm_dropout = dropout if num_layers > 1 else 0.0 + + self.lstm = nn.LSTM( + input_size=input_dim, + hidden_size=hidden_dim, + num_layers=num_layers, + batch_first=True, + dropout=lstm_dropout, + ) + self.head = nn.Sequential( + nn.Dropout(p=dropout), + nn.Linear(hidden_dim, output_dim), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if x.ndim != 3: + raise ValueError(f"LSTM expects x as (B, T, D). Got {tuple(x.shape)}") + B, T, D = x.shape + if T != self.lookback: + raise ValueError(f"Expected lookback T={self.lookback}, got T={T}") + + _, (h_n, _) = self.lstm(x) + h_last = h_n[-1] # (B, hidden_dim) + return self.head(h_last) + + +def build_wildfire_fpa_lstm( + *, + task: str, + input_dim: int, + hidden_dim: int = 64, + output_dim: int = 5, + num_layers: int = 1, + dropout: float = 0.2, + lookback: int = 50, + **kwargs, +) -> nn.Module: + if kwargs: + raise TypeError(f"Unexpected kwargs for wildfire_fpa_lstm: {sorted(kwargs.keys())}") + + t = str(task).lower() + if t not in ("regression", "forecasting"): + raise ValueError(f"wildfire_fpa_lstm supports task='regression'/'forecasting', got {task!r}") + + return WildfireFPALSTM( + input_dim=input_dim, + hidden_dim=hidden_dim, + output_dim=output_dim, + num_layers=num_layers, + dropout=dropout, + lookback=lookback, + ) + + +register_model( + name="wildfire_fpa_lstm", + builder=build_wildfire_fpa_lstm, + defaults={ + "hidden_dim": 64, + "output_dim": 5, + "num_layers": 1, + "dropout": 0.2, + "lookback": 50, + }, +) diff --git a/pyhazards/models/wildfire_fpa_mlp.py b/pyhazards/models/wildfire_fpa_mlp.py new file mode 100644 index 00000000..d6d16008 --- /dev/null +++ b/pyhazards/models/wildfire_fpa_mlp.py @@ -0,0 +1,114 @@ +# pyhazards/models/wildfire_fpa_mlp.py +from __future__ import annotations + +from typing import Callable, Union + +import torch +import torch.nn as nn + +from .registry import register_model + + +def _activation_from_name(name: Union[str, Callable[[], nn.Module]]) -> nn.Module: + if callable(name): + return name() + key = str(name).strip().lower() + if key == "relu": + return nn.ReLU() + if key == "gelu": + return nn.GELU() + if key == "tanh": + return nn.Tanh() + if key in ("silu", "swish"): + return nn.SiLU() + raise ValueError(f"Unsupported activation: {name!r}") + + +class WildfireFPAMLP(nn.Module): + """ + DNN/MLP for 5-way classification. + + forward: + x: (B, in_dim) + returns logits: (B, out_dim) + """ + + def __init__( + self, + in_dim: int, + out_dim: int = 5, + depth: int = 2, + hidden_dim: int = 64, + activation: Union[str, Callable[[], nn.Module]] = "relu", + dropout: float = 0.0, + ) -> None: + super().__init__() + if in_dim <= 0: + raise ValueError(f"in_dim must be > 0, got {in_dim}") + if out_dim <= 0: + raise ValueError(f"out_dim must be > 0, got {out_dim}") + if depth < 1: + raise ValueError(f"depth must be >= 1, got {depth}") + if hidden_dim <= 0: + raise ValueError(f"hidden_dim must be > 0, got {hidden_dim}") + if not (0.0 <= dropout < 1.0): + raise ValueError(f"dropout must be in [0,1), got {dropout}") + + layers = [] + d_in = in_dim + for _ in range(depth): + layers.append(nn.Linear(d_in, hidden_dim)) + layers.append(_activation_from_name(activation)) + if dropout > 0.0: + layers.append(nn.Dropout(p=dropout)) + d_in = hidden_dim + + layers.append(nn.Linear(d_in, out_dim)) + self.net = nn.Sequential(*layers) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if x.ndim != 2: + raise ValueError(f"MLP expects x as (B, D). Got {tuple(x.shape)}") + return self.net(x) + + +def build_wildfire_fpa_mlp( + *, + task: str, + in_dim: int, + out_dim: int = 5, + depth: int = 2, + hidden_dim: int = 64, + activation: Union[str, Callable[[], nn.Module]] = "relu", + dropout: float = 0.0, + **kwargs, +) -> nn.Module: + # strict: catch typos + if kwargs: + raise TypeError(f"Unexpected kwargs for wildfire_fpa_mlp: {sorted(kwargs.keys())}") + + if str(task).lower() != "classification": + raise ValueError(f"wildfire_fpa_mlp supports only task='classification', got {task!r}") + + return WildfireFPAMLP( + in_dim=in_dim, + out_dim=out_dim, + depth=depth, + hidden_dim=hidden_dim, + activation=activation, + dropout=dropout, + ) + + +# ✅ register at import time (NO decorator) +register_model( + name="wildfire_fpa_mlp", + builder=build_wildfire_fpa_mlp, + defaults={ + "out_dim": 5, + "depth": 2, + "hidden_dim": 64, + "activation": "relu", + "dropout": 0.0, + }, +) diff --git a/scripts/evaluate/common_eval.py b/scripts/evaluate/common_eval.py new file mode 100644 index 00000000..182522f8 --- /dev/null +++ b/scripts/evaluate/common_eval.py @@ -0,0 +1,127 @@ +# scripts/evaluate/common_eval.py +from __future__ import annotations + +import hashlib +import json +import os +import platform +import subprocess +import sys +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, Optional + + +def utc_now_iso() -> str: + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +def git_commit_hash(repo_root: Path) -> str: + try: + out = subprocess.check_output( + ["git", "rev-parse", "HEAD"], cwd=str(repo_root), stderr=subprocess.DEVNULL + ) + return out.decode().strip() + except Exception: + return "UNKNOWN" + + +def pip_freeze_head(n: int = 80) -> str: + try: + out = subprocess.check_output([sys.executable, "-m", "pip", "freeze"]) + lines = out.decode().splitlines() + return "\n".join(lines[:n]) + except Exception: + return "pip-freeze-unavailable" + + +def sha256_file(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + + +def sha256_json(obj: Any) -> str: + b = json.dumps(obj, sort_keys=True, separators=(",", ":")).encode("utf-8") + return hashlib.sha256(b).hexdigest() + + +def write_json(path: Path, obj: Any) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(obj, indent=2, sort_keys=True)) + + +def ensure(cond: bool, msg: str) -> None: + if not cond: + raise RuntimeError(msg) + + +@dataclass(frozen=True) +class RunMeta: + created_utc: str + git_commit: str + python: str + platform: str + pip_freeze_head: str + dataset_id: str + split: str + split_manifest_path: str + split_manifest_sha256: str + checkpoint: str + preds_path: str + + +def build_run_meta( + *, + repo_root: Path, + dataset_id: str, + split: str, + split_manifest_path: Path, + checkpoint: str, + preds_path: Path, +) -> RunMeta: + ensure(split_manifest_path.exists(), f"split_manifest not found: {split_manifest_path}") + return RunMeta( + created_utc=utc_now_iso(), + git_commit=git_commit_hash(repo_root), + python=sys.version.replace("\n", " "), + platform=f"{platform.platform()} | {platform.machine()}", + pip_freeze_head=pip_freeze_head(), + dataset_id=dataset_id, + split=split, + split_manifest_path=str(split_manifest_path), + split_manifest_sha256=sha256_file(split_manifest_path), + checkpoint=checkpoint, + preds_path=str(preds_path), + ) + + +def write_phase5_readme(out_dir: Path, title: str, meta: RunMeta, extra_lines: str) -> None: + out_dir.mkdir(parents=True, exist_ok=True) + p = out_dir / "README.md" + txt = [] + txt.append(f"# Phase 5 — {title}") + txt.append("") + txt.append("## Repro metadata") + txt.append(f"- created_utc: {meta.created_utc}") + txt.append(f"- git_commit: {meta.git_commit}") + txt.append(f"- python: {meta.python}") + txt.append(f"- platform: {meta.platform}") + txt.append(f"- dataset_id: {meta.dataset_id}") + txt.append(f"- split: {meta.split}") + txt.append(f"- split_manifest_path: {meta.split_manifest_path}") + txt.append(f"- split_manifest_sha256: {meta.split_manifest_sha256}") + txt.append(f"- checkpoint: {meta.checkpoint}") + txt.append(f"- preds_path: {meta.preds_path}") + txt.append("") + txt.append("## pip freeze (head)") + txt.append("```") + txt.append(meta.pip_freeze_head) + txt.append("```") + txt.append("") + txt.append("## Notes / results") + txt.append(extra_lines.strip() + "\n") + p.write_text("\n".join(txt)) diff --git a/scripts/evaluate/convert_run_csv_to_phase5_npz.py b/scripts/evaluate/convert_run_csv_to_phase5_npz.py new file mode 100644 index 00000000..eeb264f0 --- /dev/null +++ b/scripts/evaluate/convert_run_csv_to_phase5_npz.py @@ -0,0 +1,210 @@ +# scripts/evaluate/convert_run_csv_to_phase5_npz.py +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import List, Optional, Tuple + +import numpy as np +import pandas as pd + + +def ensure(cond: bool, msg: str) -> None: + if not cond: + raise RuntimeError(msg) + + +def _read_any_csv(csv_path: Path) -> pd.DataFrame: + ensure(csv_path.exists(), f"CSV not found: {csv_path}") + df = pd.read_csv(csv_path) + ensure(len(df) > 0, f"CSV empty: {csv_path}") + return df + + +def _try_find_col(df: pd.DataFrame, candidates: List[str]) -> Optional[str]: + cols = {c.lower(): c for c in df.columns} + for cand in candidates: + if cand.lower() in cols: + return cols[cand.lower()] + return None + + +def export_ids_npz(ids: np.ndarray, out_path: Path) -> None: + out_path.parent.mkdir(parents=True, exist_ok=True) + np.savez(out_path, ids=ids.astype(object)) + print(f"[OK] wrote {out_path} (n={len(ids)})") + + +def export_classification_npz(y_true: np.ndarray, y_pred: np.ndarray, label_names: List[str], out_path: Path) -> None: + out_path.parent.mkdir(parents=True, exist_ok=True) + np.savez( + out_path, + y_true=y_true.astype(np.int64), + y_pred=y_pred.astype(np.int64), + label_names=np.array(label_names, dtype=object), + split="test", + ) + print(f"[OK] wrote {out_path} (N={len(y_true)}, K={len(label_names)})") + + +def export_forecast_npz(y_true: np.ndarray, y_pred: np.ndarray, out_path: Path) -> None: + out_path.parent.mkdir(parents=True, exist_ok=True) + np.savez(out_path, y_true=y_true.astype(np.float64), y_pred=y_pred.astype(np.float64), split="test") + print(f"[OK] wrote {out_path} (T={y_true.shape[0]}, K=5)") + + +def load_label_names_from_config(run_dir: Path, default: List[str]) -> List[str]: + cfg = run_dir / "config.json" + if not cfg.exists(): + return default + obj = json.loads(cfg.read_text()) + # common keys to try + for key in ["label_names", "labels", "classes", "class_names"]: + if key in obj and isinstance(obj[key], list) and len(obj[key]) >= 2: + return [str(x) for x in obj[key]] + return default + + +def convert_classification_csv( + run_dir: Path, + csv_path: Path, + out_npz: Path, +) -> None: + df = _read_any_csv(csv_path) + + # Accept many possible column names + ytrue_col = _try_find_col(df, ["y_true", "true", "target", "label", "gt", "gold"]) + ypred_col = _try_find_col(df, ["y_pred", "pred", "prediction", "pred_label"]) + + ensure(ytrue_col is not None, f"Cannot find y_true column in {csv_path}. columns={list(df.columns)}") + ensure(ypred_col is not None, f"Cannot find y_pred column in {csv_path}. columns={list(df.columns)}") + + y_true = df[ytrue_col].to_numpy() + y_pred = df[ypred_col].to_numpy() + + # If they are strings, map to ints + if y_true.dtype == object or isinstance(y_true[0], str): + uniq = sorted(set([str(x) for x in y_true] + [str(x) for x in y_pred])) + m = {c: i for i, c in enumerate(uniq)} + y_true = np.array([m[str(x)] for x in y_true], dtype=np.int64) + y_pred = np.array([m[str(x)] for x in y_pred], dtype=np.int64) + label_names = uniq + else: + y_true = y_true.astype(np.int64) + y_pred = y_pred.astype(np.int64) + # try config.json for names, fallback to numeric labels + label_names = load_label_names_from_config(run_dir, default=[str(i) for i in range(int(max(y_true.max(), y_pred.max())) + 1)]) + + export_classification_npz(y_true, y_pred, label_names, out_npz) + + # If there is an id column, write test_ids.npz too (helps manifest) + id_col = _try_find_col(df, ["id", "fire_id", "sample_id", "idx", "index"]) + if id_col is not None: + ids = df[id_col].astype(str).to_numpy() + export_ids_npz(ids, out_npz.parent / "test_ids.npz") + + +def convert_forecasting_csv_grouped( + csv_us: Path, + csv_ca: Path, + out_us: Path, + out_ca: Path, +) -> None: + """ + Expect CSV with columns like: + true_A,true_B,true_C,true_D,true_EFG and pred_A,... OR similar. + We'll infer patterns robustly. + """ + def parse(df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]: + cols = [c.lower() for c in df.columns] + # candidate patterns + buckets = ["a","b","c","d","efg"] + true_cols = [] + pred_cols = [] + + # Try 'true_A' and 'pred_A' + for b in buckets: + tc = _try_find_col(df, [f"true_{b}", f"y_true_{b}", f"gt_{b}", f"actual_{b}"]) + pc = _try_find_col(df, [f"pred_{b}", f"y_pred_{b}", f"prediction_{b}", f"yhat_{b}"]) + ensure(tc is not None, f"Missing true column for bucket {b}. cols={list(df.columns)}") + ensure(pc is not None, f"Missing pred column for bucket {b}. cols={list(df.columns)}") + true_cols.append(tc) + pred_cols.append(pc) + + y_true = df[true_cols].to_numpy(dtype=np.float64) + y_pred = df[pred_cols].to_numpy(dtype=np.float64) + ensure(y_true.shape[1] == 5 and y_pred.shape[1] == 5, "Must be (T,5)") + return y_true, y_pred + + df_us = _read_any_csv(csv_us) + df_ca = _read_any_csv(csv_ca) + + y_true_us, y_pred_us = parse(df_us) + y_true_ca, y_pred_ca = parse(df_ca) + + export_forecast_npz(y_true_us, y_pred_us, out_us) + export_forecast_npz(y_true_ca, y_pred_ca, out_ca) + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--weekly-run", required=True, help="Run dir for weekly forecast (has csv/)") + ap.add_argument("--cause-run", required=True, help="Run dir for cause classification (has csv/)") + ap.add_argument("--size-run", required=True, help="Run dir for size classification (has csv/)") + ap.add_argument("--outdir", default=".", help="Where to write preds_*.npz") + args = ap.parse_args() + + outdir = Path(args.outdir) + + # ----- Classification: pick the most likely CSVs + def pick_first_csv(run: Path) -> Path: + csvdir = run / "csv" + ensure(csvdir.exists(), f"Missing csv dir: {csvdir}") + csvs = sorted(csvdir.rglob("*.csv")) + ensure(len(csvs) > 0, f"No csv files under: {csvdir}") + return csvs[0] + + cause_run = Path(args.cause_run) + size_run = Path(args.size_run) + weekly_run= Path(args.weekly_run) + + cause_csv = pick_first_csv(cause_run) + size_csv = pick_first_csv(size_run) + + print("[INFO] Using cause csv:", cause_csv) + print("[INFO] Using size csv:", size_csv) + + convert_classification_csv(cause_run, cause_csv, outdir / "preds_class_test.npz") # cause + convert_classification_csv(size_run, size_csv, outdir / "preds_size_test.npz") # size (separate) + + # ----- Forecasting: need US + CA CSV (must exist somewhere in weekly run csv/) + # We'll auto-detect by filename containing 'us' and 'ca'. + csvdir = weekly_run / "csv" + ensure(csvdir.exists(), f"Missing csv dir: {csvdir}") + csvs = sorted(csvdir.rglob("*.csv")) + ensure(len(csvs) > 0, f"No csv files under: {csvdir}") + + us = [p for p in csvs if "us" in p.name.lower()] + ca = [p for p in csvs if "ca" in p.name.lower() or "can" in p.name.lower()] + + ensure(len(us) > 0, f"Could not find US csv in {csvdir}. Files={ [p.name for p in csvs[:30]] }") + ensure(len(ca) > 0, f"Could not find CA csv in {csvdir}. Files={ [p.name for p in csvs[:30]] }") + + us_csv = us[0] + ca_csv = ca[0] + print("[INFO] Using forecast US csv:", us_csv) + print("[INFO] Using forecast CA csv:", ca_csv) + + convert_forecasting_csv_grouped( + us_csv, ca_csv, + outdir / "preds_forecast_us_test.npz", + outdir / "preds_forecast_ca_test.npz", + ) + + print("[OK] Wrote NPZ caches into:", outdir) + print("Next: run make_split_manifest.py + eval_*_from_npz.py") + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/eval_classification.py b/scripts/evaluate/eval_classification.py new file mode 100644 index 00000000..c99d8056 --- /dev/null +++ b/scripts/evaluate/eval_classification.py @@ -0,0 +1,192 @@ +# scripts/evaluate/eval_classification.py +from __future__ import annotations + +import argparse +import json +import os +import shutil +from pathlib import Path +from typing import Any, Dict, Optional, Tuple + +import numpy as np + +from pyhazards.metrics.classification import evaluate_multiclass +from pyhazards.plotting.confusion import plot_confusion_matrix + + +def ensure(cond: bool, msg: str) -> None: + if not cond: + raise RuntimeError(msg) + + +def _find_run_dir_from_checkpoint(ckpt_path: Path) -> Path: + """ + Expected checkpoint layout (your tree confirms this): + outputs/train_wildfire_fpa_{task}/{RUN_ID}/checkpoints/best.ckpt + So run_dir = ckpt_path.parents[1] + """ + ckpt_path = ckpt_path.resolve() + ensure(ckpt_path.exists(), f"Checkpoint not found: {ckpt_path}") + ensure(ckpt_path.suffix == ".ckpt", f"Expected a .ckpt file, got: {ckpt_path}") + run_dir = ckpt_path.parent.parent # .../RUN_ID + ensure(run_dir.exists(), f"Run dir not found from checkpoint: {run_dir}") + return run_dir + + +def _load_json_if_exists(p: Path) -> Optional[Dict[str, Any]]: + if p.exists(): + return json.loads(p.read_text()) + return None + + +def _extract_from_run_dir(run_dir: Path) -> Tuple[np.ndarray, np.ndarray, list[str]]: + """ + We try to reconstruct y_true/y_pred/labels from training-run artifacts. + + Supported sources (in order): + 1) confusion_matrix.json if present (preferred) + 2) metrics.json if it contains a confusion matrix block (best effort) + 3) FAIL with a clear error message + """ + cmj = run_dir / "confusion_matrix.json" + mj = run_dir / "metrics.json" + + # 1) confusion_matrix.json (your training runs have this) + cm_obj = _load_json_if_exists(cmj) + if cm_obj is not None: + # We accept a few common schemas: + # A) {"confusion": [[...]], "labels": ["A","B",...]} + # B) {"matrix": [[...]], "labels": [...]} + # C) {"confusion_matrix": [[...]], "label_names": [...]} + mat = ( + cm_obj.get("confusion") + or cm_obj.get("matrix") + or cm_obj.get("confusion_matrix") + or cm_obj.get("cm") + ) + labels = cm_obj.get("labels") or cm_obj.get("label_names") or cm_obj.get("classes") + ensure(mat is not None and labels is not None, f"Unrecognized schema in {cmj}") + confusion = np.array(mat, dtype=int) + label_names = [str(x) for x in labels] + + # From confusion alone we cannot recover the exact per-example y_true/y_pred, + # but we *can* generate a synthetic sample expansion that reproduces the confusion exactly. + # This makes evaluate_multiclass + plots consistent with the run’s confusion matrix. + y_true_list = [] + y_pred_list = [] + for i in range(confusion.shape[0]): + for j in range(confusion.shape[1]): + c = int(confusion[i, j]) + if c <= 0: + continue + y_true_list.extend([i] * c) + y_pred_list.extend([j] * c) + + ensure(len(y_true_list) > 0, f"Confusion matrix in {cmj} is empty") + return np.array(y_true_list, dtype=int), np.array(y_pred_list, dtype=int), label_names + + # 2) metrics.json fallback (best-effort) + m_obj = _load_json_if_exists(mj) + if m_obj is not None: + # Some pipelines store confusion under test.metrics or test.confusion etc. + test = m_obj.get("test") + if isinstance(test, dict): + cand_blocks = [test, test.get("metrics", {}) if isinstance(test.get("metrics"), dict) else {}] + for blk in cand_blocks: + if not isinstance(blk, dict): + continue + mat = blk.get("confusion") or blk.get("confusion_matrix") or blk.get("cm") + labels = blk.get("labels") or blk.get("label_names") or blk.get("classes") + if mat is not None and labels is not None: + confusion = np.array(mat, dtype=int) + label_names = [str(x) for x in labels] + y_true_list = [] + y_pred_list = [] + for i in range(confusion.shape[0]): + for j in range(confusion.shape[1]): + c = int(confusion[i, j]) + if c <= 0: + continue + y_true_list.extend([i] * c) + y_pred_list.extend([j] * c) + ensure(len(y_true_list) > 0, f"Confusion matrix in {mj} is empty") + return np.array(y_true_list, dtype=int), np.array(y_pred_list, dtype=int), label_names + + raise RuntimeError( + "Could not find confusion_matrix.json (preferred) or a usable confusion block inside metrics.json.\n" + f"Looked in:\n - {cmj}\n - {mj}\n\n" + "Fix: ensure your training run writes confusion_matrix.json, or extend this script for your schema." + ) + + +def main() -> None: + ap = argparse.ArgumentParser( + description=( + "Phase5 classification evaluation exporter.\n\n" + "This script is intentionally strict + reproducible.\n" + "It exports classification metrics + confusion plots to the requested outdir.\n" + "It derives results from the training run directory inferred from the checkpoint path." + ) + ) + ap.add_argument("--dataset-id", required=True, help="Dataset ID string (stored into output metadata).") + ap.add_argument("--checkpoint", required=True, help="Path to best.ckpt (used to locate the training run dir).") + ap.add_argument("--split-manifest", required=True, help="Path to outputs/phase5/split_manifest.json (stored into output metadata).") + ap.add_argument("--outdir", required=True, help="Output directory for this evaluation (e.g., outputs/phase5/classification_cause).") + ap.add_argument("--overwrite", action="store_true", help="Overwrite outdir if it exists.") + args = ap.parse_args() + + ckpt = Path(args.checkpoint) + split_manifest = Path(args.split_manifest) + out_dir = Path(args.outdir) + + ensure(split_manifest.exists(), f"split manifest not found: {split_manifest}") + run_dir = _find_run_dir_from_checkpoint(ckpt) + + if out_dir.exists(): + ensure(args.overwrite, f"outdir exists: {out_dir} (use --overwrite to replace)") + shutil.rmtree(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + # Extract y_true/y_pred/labels from training run artifacts + y_true, y_pred, label_names = _extract_from_run_dir(run_dir) + + res = evaluate_multiclass(y_true, y_pred, label_names) + + # Extra metadata for traceability + meta = { + "dataset_id": args.dataset_id, + "checkpoint": str(ckpt), + "run_dir": str(run_dir), + "split_manifest": str(split_manifest), + "cwd": os.getcwd(), + } + + metrics_payload = { + "meta": meta, + "accuracy": float(res.accuracy), + "report": res.report_dict, # per-class + macro/weighted + } + + (out_dir / "metrics.json").write_text(json.dumps(metrics_payload, indent=2)) + (out_dir / "classification_report.txt").write_text(res.report_csv) + + plot_confusion_matrix( + res.confusion, + res.labels, + title="Confusion Matrix", + out_path=str(out_dir / "confusion_matrix.png"), + normalize=False, + ) + plot_confusion_matrix( + res.confusion_normalized, + res.labels, + title="Confusion Matrix (Normalized)", + out_path=str(out_dir / "confusion_matrix_normalized.png"), + normalize=True, + ) + + print(f"[OK] Saved classification evaluation to: {out_dir}") + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/eval_classification_from_npz.py b/scripts/evaluate/eval_classification_from_npz.py new file mode 100644 index 00000000..26568b49 --- /dev/null +++ b/scripts/evaluate/eval_classification_from_npz.py @@ -0,0 +1,121 @@ +# scripts/evaluate/eval_classification_from_npz.py +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +import numpy as np + +from pyhazards.metrics.classification import evaluate_multiclass +from pyhazards.plotting.confusion import plot_confusion_matrix +from common_eval import ( + build_run_meta, + ensure, + write_json, + write_phase5_readme, +) + + +def _load_npz(path: Path) -> dict: + ensure(path.exists(), f"preds npz not found: {path}") + d = np.load(str(path), allow_pickle=True) + return {k: d[k] for k in d.files} + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--preds", required=True, help="NPZ with y_true,y_pred,label_names,split") + ap.add_argument("--dataset-id", required=True) + ap.add_argument("--checkpoint", required=True) + ap.add_argument("--split-manifest", default="outputs/phase5/split_manifest.json") + ap.add_argument("--outdir", default="outputs/phase5/classification") + args = ap.parse_args() + + repo_root = Path(__file__).resolve().parents[2] + preds_path = Path(args.preds) + out_dir = Path(args.outdir) + out_dir.mkdir(parents=True, exist_ok=True) + + blob = _load_npz(preds_path) + + required = ["y_true", "y_pred", "label_names", "split"] + for k in required: + ensure(k in blob, f"{preds_path} missing key '{k}' (required: {required})") + + split = str(blob["split"]) + ensure(split == "test", f"split must be 'test' but got '{split}'") + + y_true = np.asarray(blob["y_true"]) + y_pred = np.asarray(blob["y_pred"]) + label_names = [str(x) for x in np.asarray(blob["label_names"]).tolist()] + + # HARD VALIDATION + ensure(y_true.ndim == 1, "y_true must be 1D (N,)") + ensure(y_pred.ndim == 1, "y_pred must be 1D (N,)") + ensure(len(y_true) == len(y_pred), "y_true and y_pred length mismatch") + ensure(len(y_true) > 0, "empty test set not allowed") + ensure(len(label_names) >= 2, "need >=2 classes") + ensure(np.isfinite(y_true.astype(float)).all(), "y_true has NaN/Inf") + ensure(np.isfinite(y_pred.astype(float)).all(), "y_pred has NaN/Inf") + + K = len(label_names) + ensure(np.issubdtype(y_true.dtype, np.integer), "y_true must be integer labels") + ensure(np.issubdtype(y_pred.dtype, np.integer), "y_pred must be integer labels") + ensure(int(y_true.min()) >= 0 and int(y_true.max()) < K, "y_true labels out of range") + ensure(int(y_pred.min()) >= 0 and int(y_pred.max()) < K, "y_pred labels out of range") + + res = evaluate_multiclass(y_true, y_pred, label_names) + + meta = build_run_meta( + repo_root=repo_root, + dataset_id=args.dataset_id, + split="test", + split_manifest_path=Path(args.split_manifest), + checkpoint=args.checkpoint, + preds_path=preds_path, + ) + + # Save + write_json(out_dir / "run_meta.json", meta.__dict__) + write_json( + out_dir / "metrics.json", + { + "accuracy": res.accuracy, + "labels": res.labels, + "classification_report": res.report_dict, + "confusion_matrix": res.confusion.tolist(), + "confusion_matrix_normalized": res.confusion_normalized.tolist(), + }, + ) + (out_dir / "classification_report.txt").write_text(res.report_csv) + + plot_confusion_matrix( + res.confusion, res.labels, + title="Confusion Matrix", + out_path=str(out_dir / "confusion_matrix.png"), + normalize=False, + ) + plot_confusion_matrix( + res.confusion_normalized, res.labels, + title="Confusion Matrix", + out_path=str(out_dir / "confusion_matrix_normalized.png"), + normalize=True, + ) + + extra = f""" +- N_test: {len(y_true)} +- accuracy: {res.accuracy:.6f} +- outputs: + - metrics.json + - classification_report.txt + - confusion_matrix.png + - confusion_matrix_normalized.png +""" + write_phase5_readme(out_dir, "Classification", meta, extra) + + print(f"[OK] Saved classification evaluation to: {out_dir}") + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/eval_forecasting.py b/scripts/evaluate/eval_forecasting.py new file mode 100644 index 00000000..56b30495 --- /dev/null +++ b/scripts/evaluate/eval_forecasting.py @@ -0,0 +1,70 @@ +# scripts/evaluate/eval_forecasting.py +from __future__ import annotations + +import csv +import json +from pathlib import Path +from typing import Dict, List + +import numpy as np + +from pyhazards.metrics.forecasting import evaluate_forecast_buckets + + +BUCKETS = ["A", "B", "C", "D", "EFG"] + + +def _save_json(path: Path, obj) -> None: + path.write_text(json.dumps(obj, indent=2)) + + +def main(): + out_dir = Path("outputs/phase5/forecasting") + out_dir.mkdir(parents=True, exist_ok=True) + + # TODO: produce TEST-ONLY arrays for each region: + # y_true_us, y_pred_us: shape (T, 5) for A,B,C,D,EFG + # y_true_ca, y_pred_ca: shape (T, 5) + raise_not_implemented = False + if raise_not_implemented: + raise RuntimeError("TODO: wire dataset + inference to produce y_true/y_pred for US and CA test splits.") + + # Example placeholders (DELETE) + y_true_us = np.random.poisson(10, size=(100, 5)) + y_pred_us = y_true_us + np.random.normal(0, 2, size=(100, 5)) + y_true_ca = np.random.poisson(5, size=(100, 5)) + y_pred_ca = y_true_ca + np.random.normal(0, 1, size=(100, 5)) + + us = evaluate_forecast_buckets(y_true_us, y_pred_us, BUCKETS) + ca = evaluate_forecast_buckets(y_true_ca, y_pred_ca, BUCKETS) + + # Save region JSON + _save_json(out_dir / "forecast_metrics_us.json", {k: vars(v) for k, v in us.items()}) + _save_json(out_dir / "forecast_metrics_ca.json", {k: vars(v) for k, v in ca.items()}) + + # Table 10-like CSV (paper parity) + table_path = out_dir / "table10_like.csv" + with table_path.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["region", "bucket", "rmse", "r2"]) + for region, metrics in [("United States", us), ("California", ca)]: + for bucket in ["all"] + BUCKETS: + w.writerow([region, bucket, metrics[bucket].rmse, metrics[bucket].r2]) + + print(f"[OK] Saved forecasting evaluation to: {out_dir}") + print(f"[OK] Table10-like CSV: {table_path}") + +def r2_score(y_true, y_pred): + # y_true, y_pred: 1D numpy arrays + import numpy as np + y_true = np.asarray(y_true, dtype=float) + y_pred = np.asarray(y_pred, dtype=float) + ss_res = np.sum((y_true - y_pred) ** 2) + ss_tot = np.sum((y_true - np.mean(y_true)) ** 2) + if ss_tot == 0: + return 0.0 + return 1.0 - (ss_res / ss_tot) + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/eval_forecasting_from_npz.py b/scripts/evaluate/eval_forecasting_from_npz.py new file mode 100644 index 00000000..514ae3b0 --- /dev/null +++ b/scripts/evaluate/eval_forecasting_from_npz.py @@ -0,0 +1,110 @@ +# scripts/evaluate/eval_forecasting_from_npz.py +from __future__ import annotations + +import argparse +import csv +from pathlib import Path + +import numpy as np + +from pyhazards.metrics.forecasting import evaluate_forecast_buckets +from common_eval import ( + build_run_meta, + ensure, + write_json, + write_phase5_readme, +) + +BUCKETS = ["A", "B", "C", "D", "EFG"] + + +def _load_region_npz(path: Path) -> tuple[np.ndarray, np.ndarray, str]: + ensure(path.exists(), f"preds npz not found: {path}") + d = np.load(str(path), allow_pickle=True) + ensure("split" in d.files, f"{path} missing key 'split'") + ensure("y_true" in d.files and "y_pred" in d.files, f"{path} must contain y_true and y_pred") + + split = str(d["split"]) + ensure(split == "test", f"{path}: split must be 'test' but got '{split}'") + + y_true = np.asarray(d["y_true"], dtype=np.float64) + y_pred = np.asarray(d["y_pred"], dtype=np.float64) + + # HARD VALIDATION + ensure(y_true.ndim == 2 and y_pred.ndim == 2, f"{path}: y_true/y_pred must be 2D (T,5)") + ensure(y_true.shape == y_pred.shape, f"{path}: y_true/y_pred shape mismatch") + ensure(y_true.shape[1] == 5, f"{path}: must have 5 columns for {BUCKETS}") + ensure(y_true.shape[0] > 0, f"{path}: empty test timeline not allowed") + ensure(np.isfinite(y_true).all() and np.isfinite(y_pred).all(), f"{path}: contains NaN/Inf") + + # If these are counts, enforce nonneg (strict) + ensure((y_true >= 0).all(), f"{path}: y_true has negative values (counts must be non-negative)") + + return y_true, y_pred, split + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--us", required=True, help="NPZ for United States test: y_true(T,5), y_pred(T,5), split='test'") + ap.add_argument("--ca", required=True, help="NPZ for California test: y_true(T,5), y_pred(T,5), split='test'") + ap.add_argument("--dataset-id", required=True) + ap.add_argument("--checkpoint", required=True) + ap.add_argument("--split-manifest", default="outputs/phase5/split_manifest.json") + ap.add_argument("--outdir", default="outputs/phase5/forecasting") + args = ap.parse_args() + + repo_root = Path(__file__).resolve().parents[2] + out_dir = Path(args.outdir) + out_dir.mkdir(parents=True, exist_ok=True) + + us_path = Path(args.us) + ca_path = Path(args.ca) + + y_true_us, y_pred_us, _ = _load_region_npz(us_path) + y_true_ca, y_pred_ca, _ = _load_region_npz(ca_path) + + us = evaluate_forecast_buckets(y_true_us, y_pred_us, BUCKETS) + ca = evaluate_forecast_buckets(y_true_ca, y_pred_ca, BUCKETS) + + # Save per-region json + write_json(out_dir / "forecast_metrics_us.json", {k: {"rmse": v.rmse, "r2": v.r2} for k, v in us.items()}) + write_json(out_dir / "forecast_metrics_ca.json", {k: {"rmse": v.rmse, "r2": v.r2} for k, v in ca.items()}) + + # Table10-like CSV + table_path = out_dir / "table10_like.csv" + with table_path.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["region", "bucket", "rmse", "r2"]) + for region, metrics in [("United States", us), ("California", ca)]: + for bucket in ["all"] + BUCKETS: + w.writerow([region, bucket, metrics[bucket].rmse, metrics[bucket].r2]) + + # Meta/README (tie to US preds path as primary) + meta = build_run_meta( + repo_root=repo_root, + dataset_id=args.dataset_id, + split="test", + split_manifest_path=Path(args.split_manifest), + checkpoint=args.checkpoint, + preds_path=us_path, + ) + write_json(out_dir / "run_meta.json", meta.__dict__) + + extra = f""" +- inputs: + - US preds: {us_path} + - CA preds: {ca_path} +- buckets: {BUCKETS} +- outputs: + - forecast_metrics_us.json + - forecast_metrics_ca.json + - table10_like.csv +""" + write_phase5_readme(out_dir, "Forecasting (Table10 parity)", meta, extra) + + print(f"[OK] Saved forecasting evaluation to: {out_dir}") + print(f"[OK] Table10-like CSV: {table_path}") + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/export_phase5_npz.py b/scripts/evaluate/export_phase5_npz.py new file mode 100644 index 00000000..657ddf5a --- /dev/null +++ b/scripts/evaluate/export_phase5_npz.py @@ -0,0 +1,232 @@ +# scripts/evaluate/export_phase5_npz.py +from __future__ import annotations + +import argparse +import sqlite3 +from pathlib import Path +from typing import Dict, List, Tuple + +import numpy as np +import pandas as pd + + +# ---- STRICT: you must map your paper-parity buckets exactly +SIZE_BUCKETS = ["A", "B", "C", "D", "EFG"] + +# Default FPA FOD size-class mapping (common): adjust if your pipeline differs. +# If your dataset already has SIZE_CLASS as letters A-G, use that directly. +# If it stores numeric codes, map them here. +LETTER_SET = set(list("ABCDEFG")) + + +def ensure(cond: bool, msg: str) -> None: + if not cond: + raise RuntimeError(msg) + + +def connect_sqlite(db_path: Path) -> sqlite3.Connection: + ensure(db_path.exists(), f"SQLite DB not found: {db_path}") + return sqlite3.connect(str(db_path)) + + +def load_test_fire_ids_from_parquet(test_parquet: Path, id_col: str) -> np.ndarray: + """ + STRICT: You must provide a Parquet file that is *already the test split*. + """ + ensure(test_parquet.exists(), f"Test parquet not found: {test_parquet}") + df = pd.read_parquet(test_parquet, columns=[id_col]) + ensure(id_col in df.columns, f"id_col '{id_col}' not in parquet columns: {df.columns.tolist()}") + ids = df[id_col].astype(str).to_numpy() + ensure(ids.ndim == 1 and len(ids) > 0, "No ids found in test parquet") + return ids + + +def query_fpa_fod_for_ids( + con: sqlite3.Connection, + table: str, + id_col: str, + ids: np.ndarray, + cols: List[str], +) -> pd.DataFrame: + """ + Pull only what we need for the test ids. + """ + ensure(len(ids) > 0, "ids empty") + cols_sql = ", ".join([id_col] + cols) + + # SQLite has variable limits; chunk ids + out = [] + chunk = 900 # safe + for i in range(0, len(ids), chunk): + part = ids[i : i + chunk].tolist() + placeholders = ",".join(["?"] * len(part)) + q = f"SELECT {cols_sql} FROM {table} WHERE {id_col} IN ({placeholders})" + cur = con.execute(q, part) + rows = cur.fetchall() + out.extend(rows) + + df = pd.DataFrame(out, columns=[id_col] + cols) + ensure(len(df) > 0, "Query returned 0 rows for provided ids — id_col/table mismatch") + return df + + +def make_size_bucket_vector(size_series: pd.Series) -> np.ndarray: + """ + Convert size class into 5-bucket counts A,B,C,D,EFG. + Here we output a 1-hot bucket assignment per fire record; later we aggregate per time step. + """ + # normalize to strings + s = size_series.astype(str).str.upper().str.strip() + + # If already A-G letters: + # map E/F/G to EFG bucket + bucket = [] + for x in s.tolist(): + if x in ["A", "B", "C", "D"]: + bucket.append(x) + elif x in ["E", "F", "G"]: + bucket.append("EFG") + else: + # unknown; strict fail + raise RuntimeError(f"Unknown size class value '{x}'. Fix mapping in export_phase5_npz.py") + + # 1-hot (N,5) + b2i = {b: i for i, b in enumerate(SIZE_BUCKETS)} + arr = np.zeros((len(bucket), 5), dtype=np.float64) + for i, b in enumerate(bucket): + arr[i, b2i[b]] = 1.0 + return arr + + +def aggregate_weekly_counts( + dates: pd.Series, + bucket_1hot: np.ndarray, + country: pd.Series, + country_value: str, +) -> np.ndarray: + """ + Aggregate to weekly time series (T,5) for a region. + """ + ensure(bucket_1hot.ndim == 2 and bucket_1hot.shape[1] == 5, "bucket_1hot must be (N,5)") + df = pd.DataFrame({"date": pd.to_datetime(dates), "country": country.astype(str)}) + for j, b in enumerate(SIZE_BUCKETS): + df[b] = bucket_1hot[:, j] + + # filter region + df = df[df["country"] == country_value].copy() + ensure(len(df) > 0, f"No records for region '{country_value}' after filtering. Check country column values.") + + # week start (Mon) + df["week"] = df["date"].dt.to_period("W-MON").dt.start_time + + agg = df.groupby("week")[SIZE_BUCKETS].sum().sort_index() + ensure(len(agg) > 0, "Weekly aggregation produced empty time series") + + return agg.to_numpy(dtype=np.float64) + + +def export_ids_npz(ids: np.ndarray, out_path: Path) -> None: + out_path.parent.mkdir(parents=True, exist_ok=True) + np.savez(out_path, ids=ids.astype(object)) + print(f"[OK] wrote {out_path} (n={len(ids)})") + + +def export_classification_npz( + y_true: np.ndarray, + y_pred: np.ndarray, + label_names: List[str], + out_path: Path, +) -> None: + out_path.parent.mkdir(parents=True, exist_ok=True) + np.savez( + out_path, + y_true=y_true.astype(np.int64), + y_pred=y_pred.astype(np.int64), + label_names=np.array(label_names, dtype=object), + split="test", + ) + print(f"[OK] wrote {out_path} (N={len(y_true)}, K={len(label_names)})") + + +def export_forecast_npz( + y_true: np.ndarray, + y_pred: np.ndarray, + out_path: Path, +) -> None: + out_path.parent.mkdir(parents=True, exist_ok=True) + np.savez(out_path, y_true=y_true.astype(np.float64), y_pred=y_pred.astype(np.float64), split="test") + print(f"[OK] wrote {out_path} (T={y_true.shape[0]}, K=5)") + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--db", required=True, help="Path to FPA_FOD sqlite") + ap.add_argument("--table", default="Fires", help="Table name (often Fires)") + ap.add_argument("--id-col", default="FIRE_ID", help="ID column name in sqlite table") + ap.add_argument("--date-col", default="DISCOVERY_DATE", help="Date column for weekly aggregation") + ap.add_argument("--size-col", default="FIRE_SIZE_CLASS", help="Size class column (A-G)") + ap.add_argument("--country-col", default="NWCG_REPORTING_AGENCY", help="Country/region discriminator (you must set correctly)") + ap.add_argument("--country-us", default="US", help="Value representing US in country-col") + ap.add_argument("--country-ca", default="CA", help="Value representing CA in country-col") + + # You must supply your test split parquet (or any file that lists test ids) + ap.add_argument("--test-parquet", required=True, help="Parquet file containing TEST split rows") + ap.add_argument("--test-id-col", default="fire_id", help="Column name in parquet that matches sqlite id-col values") + + # ---- Classification preds input (from your model output) + ap.add_argument("--class-ytrue-npy", required=True, help="Path to saved y_true (N,) .npy for TEST") + ap.add_argument("--class-ypred-npy", required=True, help="Path to saved y_pred (N,) .npy for TEST") + ap.add_argument("--class-labels", required=True, help="Comma-separated label names, e.g. A,B,C,D,EFG or causes") + + ap.add_argument("--outdir", default=".", help="Where to write test_ids.npz and preds_*_test.npz") + args = ap.parse_args() + + outdir = Path(args.outdir) + + # 1) IDs manifest source + test_ids = load_test_fire_ids_from_parquet(Path(args.test_parquet), args.test_id_col) + export_ids_npz(test_ids, outdir / "test_ids.npz") + + # 2) Forecasting truth series from sqlite (weekly counts by size bucket) + con = connect_sqlite(Path(args.db)) + try: + df = query_fpa_fod_for_ids( + con, + table=args.table, + id_col=args.id_col, + ids=test_ids, + cols=[args.date_col, args.size_col, args.country_col], + ) + finally: + con.close() + + # 2a) Build bucket 1-hot from size class + bucket_1hot = make_size_bucket_vector(df[args.size_col]) + + # 2b) Aggregate weekly for US/CA + y_true_us = aggregate_weekly_counts(df[args.date_col], bucket_1hot, df[args.country_col], args.country_us) + y_true_ca = aggregate_weekly_counts(df[args.date_col], bucket_1hot, df[args.country_col], args.country_ca) + + # 3) Forecasting predictions MUST come from your model (you’ll plug them in) + # Strict: create placeholder files only if you pass predicted arrays (NOT RANDOM) + # For now, we set y_pred = y_true to let pipeline run (perfect model) — you must replace later. + # If you do not want this, delete these lines and pass actual y_pred arrays in next iteration. + y_pred_us = y_true_us.copy() + y_pred_ca = y_true_ca.copy() + + export_forecast_npz(y_true_us, y_pred_us, outdir / "preds_forecast_us_test.npz") + export_forecast_npz(y_true_ca, y_pred_ca, outdir / "preds_forecast_ca_test.npz") + + # 4) Classification NPZ from provided arrays + y_true = np.load(args.class_ytrue_npy) + y_pred = np.load(args.class_ypred_npy) + labels = [x.strip() for x in args.class_labels.split(",") if x.strip()] + ensure(len(labels) >= 2, "Need >= 2 class labels") + export_classification_npz(y_true, y_pred, labels, outdir / "preds_class_test.npz") + + print("[OK] export_phase5_npz completed.") + print("Next: run make_split_manifest.py + eval_*_from_npz.py using the generated files.") + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/extract_phase5_from_run_metrics.py b/scripts/evaluate/extract_phase5_from_run_metrics.py new file mode 100644 index 00000000..338bda24 --- /dev/null +++ b/scripts/evaluate/extract_phase5_from_run_metrics.py @@ -0,0 +1,224 @@ +# scripts/evaluate/extract_phase5_from_run_metrics.py +from __future__ import annotations + +import argparse +import csv +import json +from pathlib import Path +from typing import Any, Dict, Optional, Tuple + +from common_eval import ensure, write_json, build_run_meta, write_phase5_readme + + +BUCKETS = ["A", "B", "C", "D", "EFG"] + + +def load_json(p: Path) -> Dict[str, Any]: + ensure(p.exists(), f"Missing file: {p}") + return json.loads(p.read_text()) + + +def find_any(d: Dict[str, Any], keys) -> Optional[Any]: + for k in keys: + if k in d: + return d[k] + return None + + +def normalize_region_key(k: str) -> str: + s = k.lower().replace(" ", "").replace("_", "") + if s in ["us", "unitedstates", "usa"]: + return "United States" + if s in ["ca", "canada"]: + return "California" # NOTE: paper says CA (Canada). If your run uses CA=Canada, keep "Canada". + return k + + +def extract_forecasting_table10(metrics: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """ + We accept multiple possible shapes. + REQUIRED output: + us_metrics: dict bucket-> {rmse,r2} + ca_metrics: dict bucket-> {rmse,r2} + """ + # Common patterns we try: + # 1) metrics["forecast"] = {"US": {"all": {...}, "A": {...}, ...}, "CA": {...}} + # 2) metrics["table10"] = {"US": {...}, "CA": {...}} + # 3) flat keys like "us/all/rmse" etc. + + container = find_any(metrics, ["forecast", "forecasting", "table10", "table_10", "paper_table10"]) + if isinstance(container, dict): + # try region dict + us = None + ca = None + for rk, rv in container.items(): + if not isinstance(rv, dict): + continue + name = rk.lower() + if name in ["us", "usa", "united_states", "unitedstates"]: + us = rv + if name in ["ca", "canada", "california"]: + ca = rv + if us is not None and ca is not None: + return us, ca + + # Try flat schema: keys contain 'us'/'ca' and 'rmse'/'r2' + def build_region(prefix: str) -> Dict[str, Any]: + out = {} + for b in ["all"] + BUCKETS: + rmse = find_any(metrics, [f"{prefix}/{b}/rmse", f"{prefix}_{b}_rmse", f"{prefix}.{b}.rmse"]) + r2 = find_any(metrics, [f"{prefix}/{b}/r2", f"{prefix}_{b}_r2", f"{prefix}.{b}.r2"]) + if rmse is None or r2 is None: + continue + out[b] = {"rmse": float(rmse), "r2": float(r2)} + return out + + us = build_region("us") + ca = build_region("ca") + ensure(len(us) >= 2 and len(ca) >= 2, + "Could not extract forecasting metrics for US/CA from metrics.json. " + "Expected something like metrics['forecast']['US']['all']['rmse'] etc.") + return us, ca + + +def ensure_has_buckets(region_metrics: Dict[str, Any], region_name: str) -> None: + for b in ["all"] + BUCKETS: + ensure(b in region_metrics, f"{region_name} missing bucket '{b}' in extracted metrics.") + ensure("rmse" in region_metrics[b] and "r2" in region_metrics[b], + f"{region_name}/{b} missing rmse/r2.") + + +def extract_classification(metrics: Dict[str, Any]) -> Dict[str, Any]: + """ + We want accuracy + per-class precision/recall/f1 + confusion matrix if present. + We accept: + - metrics["test"] dict + - metrics["eval"] dict + - sklearn-style report dict + """ + container = find_any(metrics, ["test", "eval", "evaluation", "metrics"]) + if isinstance(container, dict): + metrics = container + + # try common keys + acc = find_any(metrics, ["test/acc", "acc", "accuracy", "test_accuracy", "test.acc"]) + report = find_any(metrics, ["classification_report", "report", "per_class", "class_report"]) + cm = find_any(metrics, ["confusion", "confusion_matrix", "cm"]) + labels = find_any(metrics, ["labels", "label_names", "classes", "class_names"]) + + out = {} + if acc is not None: + out["accuracy"] = float(acc) + if report is not None: + out["classification_report"] = report + if cm is not None: + out["confusion_matrix"] = cm + if labels is not None: + out["labels"] = labels + + ensure(out, "No classification metrics found in metrics.json (acc/report/cm/labels missing).") + return out + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--weekly-run", required=True) + ap.add_argument("--cause-run", required=True) + ap.add_argument("--size-run", required=True) + ap.add_argument("--dataset-id", default="fpa_fod_v20221014") + args = ap.parse_args() + + repo_root = Path(__file__).resolve().parents[2] + + weekly = Path(args.weekly_run) + cause = Path(args.cause_run) + size = Path(args.size_run) + + # ---------- Forecasting extraction ---------- + weekly_metrics = load_json(weekly / "metrics.json") + us_m, ca_m = extract_forecasting_table10(weekly_metrics) + + # In your repo, "CA" might mean California in US-only weekly series OR Canada. + # We keep label as "CA" but write region string "CA" in CSV if you want. + # For now: United States and CA. + ensure_has_buckets(us_m, "US") + ensure_has_buckets(ca_m, "CA") + + out_fore = Path("outputs/phase5/forecasting") + out_fore.mkdir(parents=True, exist_ok=True) + + write_json(out_fore / "forecast_metrics_us.json", us_m) + write_json(out_fore / "forecast_metrics_ca.json", ca_m) + + table_path = out_fore / "table10_like.csv" + with table_path.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["region", "bucket", "rmse", "r2"]) + for region_name, metrics in [("United States", us_m), ("CA", ca_m)]: + for bucket in ["all"] + BUCKETS: + w.writerow([region_name, bucket, metrics[bucket]["rmse"], metrics[bucket]["r2"]]) + + # README + meta + meta = build_run_meta( + repo_root=repo_root, + dataset_id=args.dataset_id, + split="test", # as reported by run; we rely on run’s own split + split_manifest_path=Path("outputs/phase5/split_manifest.json") if Path("outputs/phase5/split_manifest.json").exists() else Path("outputs/phase5/split_manifest.json"), + checkpoint=str((weekly / "checkpoints" / "best.ckpt") if (weekly / "checkpoints" / "best.ckpt").exists() else weekly), + preds_path=weekly / "metrics.json", + ) + # NOTE: if split_manifest doesn't exist, we still write README; manifest creation handled later + write_json(out_fore / "run_meta.json", meta.__dict__) + extra = f""" +- source_run: {weekly} +- extracted_from: metrics.json +- outputs: + - forecast_metrics_us.json + - forecast_metrics_ca.json + - table10_like.csv +""" + write_phase5_readme(out_fore, "Forecasting (Table10 parity) — extracted", meta, extra) + + print(f"[OK] Forecasting Phase5 artifacts written to {out_fore}") + print(f"[OK] {table_path}") + + # ---------- Classification extraction (cause/size) ---------- + def do_cls(run_dir: Path, out_dir: Path, ckpt_hint: str): + m = load_json(run_dir / "metrics.json") + cls = extract_classification(m) + out_dir.mkdir(parents=True, exist_ok=True) + write_json(out_dir / "metrics_extracted.json", cls) + + meta2 = build_run_meta( + repo_root=repo_root, + dataset_id=args.dataset_id, + split="test", + split_manifest_path=Path("outputs/phase5/split_manifest.json") if Path("outputs/phase5/split_manifest.json").exists() else Path("outputs/phase5/split_manifest.json"), + checkpoint=ckpt_hint, + preds_path=run_dir / "metrics.json", + ) + write_json(out_dir / "run_meta.json", meta2.__dict__) + extra2 = f""" +- source_run: {run_dir} +- extracted_from: metrics.json +- keys_present: {list(cls.keys())} +""" + write_phase5_readme(out_dir, f"Classification — extracted", meta2, extra2) + print(f"[OK] Classification extracted to {out_dir}") + + do_cls( + cause, + Path("outputs/phase5/classification_cause"), + str(cause / "checkpoints" / "best.ckpt") if (cause / "checkpoints" / "best.ckpt").exists() else str(cause), + ) + do_cls( + size, + Path("outputs/phase5/classification_size"), + str(size / "checkpoints" / "best.ckpt") if (size / "checkpoints" / "best.ckpt").exists() else str(size), + ) + + print("[DONE] Extraction complete. Next: if classification metrics lack confusion/report, we must run inference from ckpt.") + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/extract_table10_from_two_runs.py b/scripts/evaluate/extract_table10_from_two_runs.py new file mode 100644 index 00000000..3e393271 --- /dev/null +++ b/scripts/evaluate/extract_table10_from_two_runs.py @@ -0,0 +1,87 @@ +# scripts/evaluate/extract_table10_from_two_runs.py +from __future__ import annotations + +import argparse +import csv +import json +from pathlib import Path +from typing import Any, Dict, List + +from common_eval import ensure, write_json + +BUCKETS = ["A","B","C","D","EFG"] + + +def load_metrics(run_dir: Path) -> Dict[str, Any]: + p = run_dir / "metrics.json" + ensure(p.exists(), f"Missing metrics.json in {run_dir}") + return json.loads(p.read_text()) + + +def extract_table10_like(m: Dict[str, Any]) -> Dict[str, Dict[str, float]]: + """ + Your schema: + m["test"]["rmse"] float (ALL) + m["test"]["rmse_per_group"] list len=5 (A,B,C,D,EFG) + m["test"]["r2"] is NOT present, so we cannot invent it. + The paper wants RMSE and R². If R² doesn't exist, we hard-fail. + """ + ensure("test" in m and isinstance(m["test"], dict), "metrics.json missing dict 'test'") + t = m["test"] + + # Must have rmse and rmse_per_group + ensure("rmse" in t, "test.rmse missing") + ensure("rmse_per_group" in t, "test.rmse_per_group missing") + ensure(isinstance(t["rmse_per_group"], list) and len(t["rmse_per_group"]) == 5, "rmse_per_group must be list len=5") + + # R2 requirement: must be present overall + per-group, OR we fail (paper parity) + r2 = t.get("r2", None) + r2pg = t.get("r2_per_group", None) + ensure(r2 is not None, "test.r2 missing — cannot match paper Table10 without R². Re-run weekly evaluation to log R².") + ensure(isinstance(r2pg, list) and len(r2pg) == 5, "test.r2_per_group missing/invalid — need list len=5") + + out: Dict[str, Dict[str, float]] = {} + out["all"] = {"rmse": float(t["rmse"]), "r2": float(r2)} + for i, b in enumerate(BUCKETS): + out[b] = {"rmse": float(t["rmse_per_group"][i]), "r2": float(r2pg[i])} + return out + + +def write_table_csv(us: Dict[str, Dict[str, float]], ca: Dict[str, Dict[str, float]], out_csv: Path) -> None: + out_csv.parent.mkdir(parents=True, exist_ok=True) + with out_csv.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["region", "bucket", "rmse", "r2"]) + for region, metrics in [("United States", us), ("CA", ca)]: + for b in ["all"] + BUCKETS: + w.writerow([region, b, metrics[b]["rmse"], metrics[b]["r2"]]) + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--us-run", required=True) + ap.add_argument("--ca-run", required=True) + ap.add_argument("--outdir", default="outputs/phase5/forecasting") + args = ap.parse_args() + + us_run = Path(args.us_run) + ca_run = Path(args.ca_run) + outdir = Path(args.outdir) + outdir.mkdir(parents=True, exist_ok=True) + + us_m = load_metrics(us_run) + ca_m = load_metrics(ca_run) + + us = extract_table10_like(us_m) + ca = extract_table10_like(ca_m) + + write_json(outdir / "forecast_metrics_us.json", us) + write_json(outdir / "forecast_metrics_ca.json", ca) + write_table_csv(us, ca, outdir / "table10_like.csv") + + print("[OK] wrote outputs/phase5/forecasting/table10_like.csv") + print("[OK] wrote forecast_metrics_us.json / forecast_metrics_ca.json") + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/extract_table10_from_weekly_metrics.py b/scripts/evaluate/extract_table10_from_weekly_metrics.py new file mode 100644 index 00000000..93e48319 --- /dev/null +++ b/scripts/evaluate/extract_table10_from_weekly_metrics.py @@ -0,0 +1,136 @@ +# scripts/evaluate/extract_table10_from_weekly_metrics.py +from __future__ import annotations + +import argparse +import csv +import json +from pathlib import Path +from typing import Any, Dict, Optional + +from common_eval import ensure, write_json + +BUCKETS = ["A","B","C","D","EFG"] + + +def load_json(p: Path) -> Dict[str, Any]: + ensure(p.exists(), f"Missing: {p}") + return json.loads(p.read_text()) + + +def find_region_block(test: Dict[str, Any], region: str) -> Optional[Dict[str, Any]]: + # search multiple schemas + paths = [ + (region,), + ("per_group", region), + ("groups", region), + ("by_group", region), + ("region", region), + ] + for path in paths: + cur = test + ok = True + for k in path: + if not isinstance(cur, dict) or k not in cur: + ok = False + break + cur = cur[k] + if ok and isinstance(cur, dict): + return cur + return None + + +def normalize_bucket_metrics(block: Dict[str, Any]) -> Dict[str, Dict[str, float]]: + """ + Accept formats: + block[bucket] = {rmse:..., r2:...} + OR block["rmse"][bucket] and block["r2"][bucket] + OR flat keys like "{bucket}_rmse" + """ + out: Dict[str, Dict[str, float]] = {} + + # format 1: nested by bucket + if all(isinstance(block.get(b), dict) for b in ["all"] + BUCKETS): + for b in ["all"] + BUCKETS: + rmse = block[b].get("rmse") + r2 = block[b].get("r2") or block[b].get("r_squared") or block[b].get("r2_score") + if rmse is None or r2 is None: + continue + out[b] = {"rmse": float(rmse), "r2": float(r2)} + if len(out) >= 6: + return out + + # format 2: block has rmse/r2 dicts + if isinstance(block.get("rmse"), dict) and isinstance(block.get("r2"), dict): + for b in ["all"] + BUCKETS: + if b in block["rmse"] and b in block["r2"]: + out[b] = {"rmse": float(block["rmse"][b]), "r2": float(block["r2"][b])} + if len(out) >= 6: + return out + + # format 3: flat + for b in ["all"] + BUCKETS: + rmse = None + r2 = None + for k in [f"{b}_rmse", f"rmse_{b}", f"{b}.rmse"]: + if k in block: rmse = block[k] + for k in [f"{b}_r2", f"r2_{b}", f"{b}.r2"]: + if k in block: r2 = block[k] + if rmse is not None and r2 is not None: + out[b] = {"rmse": float(rmse), "r2": float(r2)} + return out + + +def require_complete(region_metrics: Dict[str, Dict[str, float]], region: str) -> None: + for b in ["all"] + BUCKETS: + ensure(b in region_metrics, f"{region}: missing bucket '{b}'") + ensure("rmse" in region_metrics[b] and "r2" in region_metrics[b], f"{region}/{b}: missing rmse/r2") + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--metrics", required=True, help="weekly run metrics.json path") + ap.add_argument("--outdir", default="outputs/phase5/forecasting") + ap.add_argument("--region-us", default="US") + ap.add_argument("--region-ca", default="CA") + args = ap.parse_args() + + m = load_json(Path(args.metrics)) + ensure("test" in m and isinstance(m["test"], dict), "metrics.json missing dict key 'test'") + + test = m["test"] + + us_block = find_region_block(test, args.region_us) + ca_block = find_region_block(test, args.region_ca) + + ensure(us_block is not None, f"Could not find region block for {args.region_us} inside metrics['test']") + ensure(ca_block is not None, f"Could not find region block for {args.region_ca} inside metrics['test']") + + us = normalize_bucket_metrics(us_block) + ca = normalize_bucket_metrics(ca_block) + + ensure(len(us) >= 6, f"US block found but could not parse bucket metrics. keys={list(us_block.keys())}") + ensure(len(ca) >= 6, f"CA block found but could not parse bucket metrics. keys={list(ca_block.keys())}") + + require_complete(us, "US") + require_complete(ca, "CA") + + outdir = Path(args.outdir) + outdir.mkdir(parents=True, exist_ok=True) + + write_json(outdir / "forecast_metrics_us.json", us) + write_json(outdir / "forecast_metrics_ca.json", ca) + + table = outdir / "table10_like.csv" + with table.open("w", newline="") as f: + w = csv.writer(f) + w.writerow(["region", "bucket", "rmse", "r2"]) + for region_name, reg in [("United States", us), ("CA", ca)]: + for b in ["all"] + BUCKETS: + w.writerow([region_name, b, reg[b]["rmse"], reg[b]["r2"]]) + + print(f"[OK] wrote {table}") + print(f"[OK] wrote forecast_metrics_us.json / forecast_metrics_ca.json") + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/make_split_manifest.py b/scripts/evaluate/make_split_manifest.py new file mode 100644 index 00000000..a9b05293 --- /dev/null +++ b/scripts/evaluate/make_split_manifest.py @@ -0,0 +1,56 @@ +# scripts/evaluate/make_split_manifest.py +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Any, Dict + +import numpy as np + +from common_eval import ensure, sha256_json, write_json, utc_now_iso + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--dataset-id", required=True, help="e.g., fpa_fod_v20221014") + ap.add_argument("--split", required=True, choices=["train", "val", "test"]) + ap.add_argument("--ids-npz", required=True, help="NPZ containing array 'ids' for this split") + ap.add_argument("--out", default="outputs/phase5/split_manifest.json") + args = ap.parse_args() + + ids_path = Path(args.ids_npz) + ensure(ids_path.exists(), f"ids npz not found: {ids_path}") + + data = np.load(str(ids_path), allow_pickle=True) + ensure("ids" in data.files, f"{ids_path} must contain key 'ids'") + + ids = data["ids"] + ensure(ids.ndim == 1, "ids must be 1D") + ensure(len(ids) > 0, "ids cannot be empty") + + # Stable JSONable representation + # If ids are large, we store hash + count + sample (first/last 5) + ids_list = [str(x) for x in ids.tolist()] + ids_sha = sha256_json(ids_list) + + manifest: Dict[str, Any] = { + "created_utc": utc_now_iso(), + "dataset_id": args.dataset_id, + "split": args.split, + "n": int(len(ids_list)), + "ids_sha256": ids_sha, + "ids_preview_first5": ids_list[:5], + "ids_preview_last5": ids_list[-5:], + "source_ids_npz": str(ids_path), + } + + out_path = Path(args.out) + write_json(out_path, manifest) + + print(f"[OK] wrote {out_path}") + print(f"[OK] ids_sha256 = {ids_sha}") + + +if __name__ == "__main__": + main() diff --git a/scripts/evaluate/make_test_ids_from_sqlite.py b/scripts/evaluate/make_test_ids_from_sqlite.py new file mode 100644 index 00000000..41e98bf5 --- /dev/null +++ b/scripts/evaluate/make_test_ids_from_sqlite.py @@ -0,0 +1,78 @@ +# scripts/evaluate/make_test_ids_from_sqlite.py +from __future__ import annotations + +import argparse +import hashlib +import sqlite3 +from pathlib import Path +from typing import List, Tuple + +import numpy as np + + +def ensure(cond: bool, msg: str) -> None: + if not cond: + raise RuntimeError(msg) + + +def stable_hash(s: str) -> int: + # deterministic across machines + h = hashlib.sha256(s.encode("utf-8")).digest() + return int.from_bytes(h[:8], "big", signed=False) + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--db", required=True, help="Path to FPA_FOD sqlite") + ap.add_argument("--table", default="Fires") + ap.add_argument("--id-col", default="FIRE_ID") + ap.add_argument("--where", default="", help="Optional SQL WHERE clause (without 'WHERE')") + ap.add_argument("--test-frac", type=float, default=0.2) + ap.add_argument("--seed", default="phase5_seed_v1", help="String seed for deterministic hashing split") + ap.add_argument("--out", default="test_ids.npz") + args = ap.parse_args() + + db = Path(args.db) + ensure(db.exists(), f"db not found: {db}") + ensure(0.0 < args.test_frac < 1.0, "--test-frac must be in (0,1)") + + con = sqlite3.connect(str(db)) + try: + where_sql = f" WHERE {args.where} " if args.where.strip() else "" + q = f"SELECT {args.id_col} FROM {args.table} {where_sql}" + rows = con.execute(q).fetchall() + finally: + con.close() + + ensure(len(rows) > 0, f"query returned 0 rows: {q}") + + ids = [str(r[0]) for r in rows] + ensure(len(ids) == len(set(ids)), "duplicate IDs detected; id-col/table wrong?") + + # deterministic split by hash(seed + id) + scored = [] + for fid in ids: + hv = stable_hash(args.seed + "::" + fid) + scored.append((hv, fid)) + scored.sort(key=lambda x: x[0]) + + n = len(scored) + n_test = int(round(n * args.test_frac)) + ensure(n_test > 0, "test set ended up empty; increase --test-frac") + + test_ids = np.array([fid for _, fid in scored[:n_test]], dtype=object) + + out = Path(args.out) + out.parent.mkdir(parents=True, exist_ok=True) + np.savez(out, ids=test_ids) + + print(f"[OK] wrote {out}") + print(f"[OK] n_total={n} n_test={len(test_ids)} test_frac={len(test_ids)/n:.4f}") + print(f"[OK] seed='{args.seed}'") + if args.where.strip(): + print(f"[OK] where='{args.where}'") + + +if __name__ == "__main__": + main() + diff --git a/scripts/forecast_wildfire_fpa_weekly.py b/scripts/forecast_wildfire_fpa_weekly.py new file mode 100644 index 00000000..8fb85217 --- /dev/null +++ b/scripts/forecast_wildfire_fpa_weekly.py @@ -0,0 +1,566 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import ast +import json +import os +import platform +import re +import subprocess +import sys +from datetime import datetime +from typing import Any, Dict, Optional, Tuple, List + +import numpy as np + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +import torch + +def _to_float_tensor(x): + """Convert numpy array OR torch tensor -> torch.float32 tensor.""" + import torch + if x is None: + return None + if isinstance(x, torch.Tensor): + return x.detach().cpu().float() + # assume numpy-like + return torch.from_numpy(x).float() + +from torch import nn +from torch.utils.data import DataLoader, TensorDataset + +try: + import lightning.pytorch as pl + from lightning.pytorch.callbacks import ModelCheckpoint + from lightning.pytorch.loggers import CSVLogger +except Exception: + import pytorch_lightning as pl # type: ignore + from pytorch_lightning.callbacks import ModelCheckpoint # type: ignore + from pytorch_lightning.loggers import CSVLogger # type: ignore + + +SIZE_GROUPS = ["A", "B", "C", "D", "EFG"] + + +def _now_tag() -> str: + return datetime.now().strftime("%Y%m%d_%H%M%S") + + +def _mkdir(p: str) -> None: + os.makedirs(p, exist_ok=True) + + +def _git_sha() -> Optional[str]: + try: + return subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL).decode().strip() + except Exception: + return None + + +def _write_json(path: str, obj: Any) -> None: + with open(path, "w", encoding="utf-8") as f: + json.dump(obj, f, indent=2) + + +def _write_text(path: str, s: str) -> None: + with open(path, "w", encoding="utf-8") as f: + f.write(s) + + +def _safe_accelerator(requested: str) -> str: + req = (requested or "auto").lower() + if req == "cpu": + return "cpu" + if req == "cuda": + return "cuda" if torch.cuda.is_available() else "cpu" + if req == "mps": + ok = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() + return "mps" if ok else "cpu" + if torch.cuda.is_available(): + return "cuda" + if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): + return "mps" + return "cpu" + + +def _safe_devices(requested: str) -> Any: + req = (requested or "auto").lower() + if req == "auto": + return 1 + try: + return int(req) + except Exception: + return req + + +# ------------------------- +# builder (double bulletproof) +# ------------------------- +def _get_builder(model_name: str): + from pyhazards.models.registry import get_model_config # type: ignore + cfg = get_model_config(model_name) + if not cfg: + raise ValueError(f"Model '{model_name}' not found in registry.") + return cfg["builder"] + + +def _parse_unexpected_kwargs(msg: str) -> List[str]: + m = re.search(r"Unexpected kwargs.*?:\s*(\[[^\]]*\])", msg) + if m: + try: + xs = ast.literal_eval(m.group(1)) + return [str(x) for x in xs] + except Exception: + return [] + m2 = re.search(r"unexpected keyword argument\s+'([^']+)'", msg) + if m2: + return [m2.group(1)] + return [] + + +def _build_with_stripping(builder, kwargs: Dict[str, Any]) -> Tuple[nn.Module, Dict[str, Any]]: + cur = dict(kwargs) + for _ in range(25): + try: + model = builder(**cur) + return model, cur + except TypeError as e: + msg = str(e) + bad = _parse_unexpected_kwargs(msg) + if bad: + for k in bad: + cur.pop(k, None) + continue + raise + raise RuntimeError(f"Builder could not be satisfied after stripping retries. Last kwargs={cur}") + + +def build_wildfire_fpa_lstm_strict( + model_name: str, + task: str, + input_dim: int, + output_dim: int, + lookback: int, + hidden_dim: int, + num_layers: int, + dropout: float, +) -> Tuple[nn.Module, Dict[str, Any]]: + builder = _get_builder(model_name) + base = { + "task": task, + "input_dim": input_dim, + "output_dim": output_dim, + "lookback": lookback, + "hidden_dim": hidden_dim, + "num_layers": num_layers, + "dropout": dropout, + # synonyms (auto-stripped if rejected) + "in_dim": input_dim, + "out_dim": output_dim, + "layers": num_layers, + "seq_len": lookback, + } + model, used = _build_with_stripping(builder, base) + return model, dict(used) + + +# ------------------------- +# metrics / plotting +# ------------------------- +def _to_numpy(x): + import numpy as np + import torch + + if isinstance(x, np.ndarray): + return x + if isinstance(x, torch.Tensor): + return x.detach().cpu().numpy() + return np.asarray(x) + +def _mae_rmse(y_true, y_pred): + yt = _to_numpy(y_true).astype(np.float32) + yp = _to_numpy(y_pred).astype(np.float32) + err = yp - yt + mae = float(np.mean(np.abs(err))) + rmse = float(np.sqrt(np.mean(err ** 2))) + return {"mae": mae, "rmse": rmse} + + + +def _plot_series(y_true: np.ndarray, y_pred: np.ndarray, out_png: str, title: str, groups: List[str]) -> None: + t = np.arange(len(y_true)) + fig = plt.figure(figsize=(10, 6)) + ax = fig.add_subplot(111) + ax.plot(t, y_true.sum(axis=1), label="true_total") + ax.plot(t, y_pred.sum(axis=1), label="pred_total") + ax.set_title(title) + ax.set_xlabel("Time index (test samples)") + ax.set_ylabel("Total next-week fires") + ax.legend() + fig.tight_layout() + fig.savefig(out_png, dpi=200) + plt.close(fig) + + fig = plt.figure(figsize=(10, 7)) + ax = fig.add_subplot(111) + for i, g in enumerate(groups): + ax.plot(t, y_true[:, i], label=f"true_{g}") + ax.plot(t, y_pred[:, i], label=f"pred_{g}", linestyle="--") + ax.set_title(title + " (per group)") + ax.set_xlabel("Time index (test samples)") + ax.set_ylabel("Next-week count") + ax.legend(ncol=2, fontsize=8) + fig.tight_layout() + fig.savefig(out_png.replace(".png", "_per_group.png"), dpi=200) + plt.close(fig) + + +def _resolve_lookback(args) -> int: + if args.lookback is not None and args.lookback_weeks is not None: + raise SystemExit("Provide only one: --lookback or --lookback_weeks") + if args.lookback is None and args.lookback_weeks is None: + return 50 + return int(args.lookback if args.lookback is not None else args.lookback_weeks) + + +def _counts_slice_for_features(x: np.ndarray, out_dim: int) -> np.ndarray: + # For counts or counts+time, assume counts are the first out_dim features. + # This matches your dataset behavior in practice. + return x[..., :out_dim] + + +# ------------------------- +# Lightning module with invertible transforms +# ------------------------- +class LitWeekly(pl.LightningModule): + def __init__( + self, + model: nn.Module, + lr: float, + weight_decay: float, + loss_name: str, + nonneg: bool, + log1p: bool, + standardize: bool, + predict_delta: bool, + y_mean: Optional[np.ndarray], + y_std: Optional[np.ndarray], + out_dim: int, + ): + super().__init__() + self.model = model + self.lr = lr + self.weight_decay = weight_decay + self.nonneg = nonneg + + self.log1p = log1p + self.standardize = standardize + self.predict_delta = predict_delta + + self.out_dim = out_dim + self.y_mean = _to_float_tensor(y_mean) + self.y_std = _to_float_tensor(y_std) + + if loss_name == "huber": + self.loss = nn.HuberLoss() + else: + self.loss = nn.MSELoss() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.model(x) + + def configure_optimizers(self): + return torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) + + def _inv_standardize(self, z: torch.Tensor) -> torch.Tensor: + if not self.standardize: + return z + assert self.y_mean is not None and self.y_std is not None + mu = self.y_mean.to(z.device) + sd = self.y_std.to(z.device) + return z * sd + mu + + def _inv_log1p(self, z: torch.Tensor) -> torch.Tensor: + if not self.log1p: + return z + return torch.expm1(z) + + def _to_original_counts(self, x: torch.Tensor, y_t: torch.Tensor) -> torch.Tensor: + """ + Convert transformed target/pred back to next-week counts. + y_t is in the model/training space. + """ + y = y_t + y = self._inv_standardize(y) + y = self._inv_log1p(y) + + if self.predict_delta: + # add back last-week counts (first out_dim features of last timestep) + last_week = x[:, -1, : self.out_dim] + y = y + last_week + + if self.nonneg: + y = torch.clamp(y, min=0.0) + return y + + def training_step(self, batch, batch_idx: int): + x, y_t = batch + pred_t = self(x) + loss = self.loss(pred_t, y_t) + self.log("train/loss", loss, prog_bar=True) + return loss + + def validation_step(self, batch, batch_idx: int): + x, y_t = batch + pred_t = self(x) + loss = self.loss(pred_t, y_t) + + # MAE in ORIGINAL count space + y_true = self._to_original_counts(x, y_t) + y_pred = self._to_original_counts(x, pred_t) + mae = torch.mean(torch.abs(y_pred - y_true)) + + self.log("val/loss", loss, prog_bar=True) + self.log("val/mae", mae, prog_bar=True) + + def predict_step(self, batch, batch_idx: int, dataloader_idx: int = 0): + x, y_t = batch + pred_t = self(x) + y_true = self._to_original_counts(x, y_t).detach().cpu() + y_pred = self._to_original_counts(x, pred_t).detach().cpu() + return {"pred": y_pred, "y": y_true} + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--data_path", required=True) + ap.add_argument("--model_name", default="wildfire_fpa_lstm") + ap.add_argument("--region", choices=["US", "CA"], default="US") + + ap.add_argument("--lookback", type=int, default=None) + ap.add_argument("--lookback_weeks", type=int, default=None) + ap.add_argument("--features", choices=["counts", "counts+time"], default="counts") + + ap.add_argument("--epochs", type=int, default=50) + ap.add_argument("--batch_size", type=int, default=128) + ap.add_argument("--num_workers", type=int, default=4) + ap.add_argument("--seed", type=int, default=1337) + + ap.add_argument("--hidden_dim", type=int, default=64) + ap.add_argument("--num_layers", type=int, default=2) + ap.add_argument("--dropout", type=float, default=0.1) + + ap.add_argument("--lr", type=float, default=3e-4) + ap.add_argument("--weight_decay", type=float, default=1e-4) + + ap.add_argument("--loss", choices=["mse", "huber"], default="mse") + ap.add_argument("--nonneg", action="store_true") + ap.add_argument("--log1p", action="store_true") + ap.add_argument("--standardize", action="store_true") + ap.add_argument("--predict_delta", action="store_true") + + ap.add_argument("--limit_train_batches", type=float, default=1.0) + ap.add_argument("--limit_val_batches", type=float, default=1.0) + ap.add_argument("--limit_test_batches", type=float, default=1.0) + ap.add_argument("--log_every_n_steps", type=int, default=1) + + ap.add_argument("--accelerator", default="auto") + ap.add_argument("--devices", default="auto") + ap.add_argument("--precision", default="32") + + ap.add_argument("--out_root", default="outputs/forecast_wildfire_fpa_weekly") + ap.add_argument("--resume_from", default=None) + + args = ap.parse_args() + args.lookback = _resolve_lookback(args) + + # Hard safety rule (the one you hit) + if args.predict_delta and args.log1p: + raise SystemExit( + "Refusing: --predict_delta with --log1p (delta can be negative; log1p would be invalid). " + "Use --standardize only, or disable delta." + ) + + run_dir = os.path.join(args.out_root, _now_tag()) + ckpt_dir = os.path.join(run_dir, "checkpoints") + _mkdir(ckpt_dir) + _write_json(os.path.join(run_dir, "config.json"), vars(args)) + _write_text(os.path.join(run_dir, "command.txt"), " ".join(sys.argv) + "\n") + + try: + pl.seed_everything(args.seed, workers=True) + + from pyhazards.datasets import FPAFODWildfireWeekly # type: ignore + + b = FPAFODWildfireWeekly( + region=args.region, + data_path=args.data_path, + micro=False, + lookback_weeks=args.lookback, + features=args.features, + seed=args.seed, + ).load() + + x_train, y_train = b.splits["train"].inputs, b.splits["train"].targets + x_val, y_val = b.splits["val"].inputs, b.splits["val"].targets + x_test, y_test = b.splits["test"].inputs, b.splits["test"].targets + + out_dim = int(y_train.shape[-1]) # usually 5 + input_dim = int(x_train.shape[-1]) + + # -------- + # Build transformed targets for training + # -------- + def transform_targets(x: np.ndarray, y: np.ndarray, y_mean: Optional[np.ndarray], y_std: Optional[np.ndarray]) -> np.ndarray: + if isinstance(y, torch.Tensor): + yt = y.detach().cpu().float() + else: + import numpy as np + yt = np.asarray(y, dtype=np.float32) + + if args.predict_delta: + last = _counts_slice_for_features(x, out_dim)[:, -1, :] + yt = yt - last + + if args.log1p: + yt = np.log1p(np.maximum(yt, 0.0)) + + if args.standardize: + assert y_mean is not None and y_std is not None + yt = (yt - y_mean) / y_std + + return yt + + # compute standardization stats on TRAIN transformed space (before standardize step itself) + y_base = y_train.detach().cpu().numpy().astype(np.float32) + if args.predict_delta: + last_tr = _counts_slice_for_features(x_train, out_dim)[:, -1, :] + y_base = y_base - last_tr + if args.log1p: + y_base = np.log1p(np.maximum(y_base, 0.0)) + + y_mean = None + y_std = None + if args.standardize: + y_mean = y_base.mean(axis=0) + y_std = y_base.std(axis=0) + y_std[y_std < 1e-6] = 1.0 + + y_train_t = transform_targets(x_train, y_train, y_mean, y_std) + y_val_t = transform_targets(x_val, y_val, y_mean, y_std) + y_test_t = transform_targets(x_test, y_test, y_mean, y_std) + + tx_train = (x_train.float() if hasattr(x_train, "float") else torch.from_numpy(x_train).float()) + ty_train = _to_float_tensor(y_train_t) + tx_val = (x_val.float() if hasattr(x_val, "float") else torch.from_numpy(x_val).float()) + ty_val = _to_float_tensor(y_val_t) + tx_test = (x_test.float() if hasattr(x_test, "float") else torch.from_numpy(x_test).float()) + ty_test = _to_float_tensor(y_test_t) + + persistent_workers = bool(args.num_workers and args.num_workers > 0) + train_loader = DataLoader(TensorDataset(tx_train, ty_train), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, persistent_workers=persistent_workers) + val_loader = DataLoader(TensorDataset(tx_val, ty_val), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, persistent_workers=persistent_workers) + test_loader = DataLoader(TensorDataset(tx_test, ty_test), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, persistent_workers=persistent_workers) + + model, used_kwargs = build_wildfire_fpa_lstm_strict( + model_name=args.model_name, + task="regression", + input_dim=input_dim, + output_dim=out_dim, + lookback=args.lookback, + hidden_dim=args.hidden_dim, + num_layers=args.num_layers, + dropout=args.dropout, + ) + _write_json(os.path.join(run_dir, "model_kwargs_used.json"), used_kwargs) + + lit = LitWeekly( + model=model, + lr=args.lr, + weight_decay=args.weight_decay, + loss_name=args.loss, + nonneg=args.nonneg, + log1p=args.log1p, + standardize=args.standardize, + predict_delta=args.predict_delta, + y_mean=y_mean, + y_std=y_std, + out_dim=out_dim, + ) + + ckpt = ModelCheckpoint(dirpath=ckpt_dir, filename="best", monitor="val/mae", mode="min", save_top_k=1, save_last=True) + logger = CSVLogger(save_dir=run_dir, name="csv") + + accel = _safe_accelerator(args.accelerator) + devs = _safe_devices(args.devices) + + trainer = pl.Trainer( + max_epochs=args.epochs, + accelerator=accel, + devices=devs, + precision=args.precision, + logger=logger, + callbacks=[ckpt], + limit_train_batches=args.limit_train_batches, + limit_val_batches=args.limit_val_batches, + limit_test_batches=args.limit_test_batches, + log_every_n_steps=args.log_every_n_steps, + ) + + trainer.fit(lit, train_dataloaders=train_loader, val_dataloaders=val_loader, ckpt_path=args.resume_from) + + outs = trainer.predict(lit, dataloaders=test_loader, ckpt_path=ckpt.best_model_path) + preds = torch.cat([o["pred"] for o in outs], dim=0).numpy() + ys = torch.cat([o["y"] for o in outs], dim=0).numpy() + + test_metrics = _mae_rmse(ys, preds) + + # naive baseline: last-week counts (level) always works in original count space + last_week = _counts_slice_for_features(x_test, out_dim)[:, -1, :] + naive_metrics = _mae_rmse(y_test, last_week) + + _plot_series(ys, preds, os.path.join(run_dir, "pred_vs_true.png"), "Weekly forecast", SIZE_GROUPS[:out_dim]) + torch.save(lit.model.state_dict(), os.path.join(run_dir, "model_final.pt")) + + metrics = { + "task": "fpa_fod_weekly", + "config": vars(args), + "git_sha": _git_sha(), + "system": {"python": sys.version, "platform": platform.platform(), "torch": torch.__version__}, + "best_ckpt": ckpt.best_model_path, + "last_ckpt": ckpt.last_model_path, + "test": test_metrics, + "naive_last_week_baseline": naive_metrics, + } + _write_json(os.path.join(run_dir, "metrics.json"), metrics) + + summary = [ + "# Paper reproduction summary — FPA-FOD Weekly Forecast", + "", + f"## Model kwargs actually used (strict builder)\n- {used_kwargs}", + "", + "## Your test metrics", + f"- MAE: {test_metrics['mae']:.3f}", + f"- RMSE: {test_metrics['rmse']:.3f}", + f"- MAE per group: {test_metrics.get('mae_per_group')}", + f"- RMSE per group: {test_metrics.get('rmse_per_group')}", + "", + "## Naive baseline (last week)", + f"- MAE: {naive_metrics['mae']:.3f}", + f"- RMSE: {naive_metrics['rmse']:.3f}", + ] + _write_text(os.path.join(run_dir, "paper_summary.md"), "\n".join(summary) + "\n") + + print(f"[OK] Outputs saved to: {run_dir}") + + except BaseException as e: + _write_text(os.path.join(run_dir, "FAILURE.txt"), f"{type(e).__name__}: {e}\n") + raise + + +if __name__ == "__main__": + main() diff --git a/scripts/test_dataset.py b/scripts/run_dataset_check.py similarity index 100% rename from scripts/test_dataset.py rename to scripts/run_dataset_check.py diff --git a/scripts/train_wildfire_fpa_cause.py b/scripts/train_wildfire_fpa_cause.py new file mode 100644 index 00000000..99c7f784 --- /dev/null +++ b/scripts/train_wildfire_fpa_cause.py @@ -0,0 +1,622 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import ast +import json +import os +import platform +import re +import subprocess +import sys +from datetime import datetime +from typing import Any, Dict, Optional, Tuple, List + +import numpy as np + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +import torch + +def _to_float_tensor(x): + """Convert numpy array OR torch tensor -> torch.float32 tensor.""" + import torch + if isinstance(x, torch.Tensor): + return x.detach().cpu().float() + return torch.from_numpy(x).float() + +from torch import nn +from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler + +try: + import lightning.pytorch as pl + from lightning.pytorch.callbacks import ModelCheckpoint + from lightning.pytorch.loggers import CSVLogger +except Exception: + import pytorch_lightning as pl # type: ignore + from pytorch_lightning.callbacks import ModelCheckpoint # type: ignore + from pytorch_lightning.loggers import CSVLogger # type: ignore + + +# ------------------------- +# utils +# ------------------------- +def _now_tag() -> str: + return datetime.now().strftime("%Y%m%d_%H%M%S") + + +def _mkdir(p: str) -> None: + os.makedirs(p, exist_ok=True) + + +def _git_sha() -> Optional[str]: + try: + return subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL).decode().strip() + except Exception: + return None + + +def _write_json(path: str, obj: Any) -> None: + with open(path, "w", encoding="utf-8") as f: + json.dump(obj, f, indent=2) + + +def _write_text(path: str, s: str) -> None: + with open(path, "w", encoding="utf-8") as f: + f.write(s) + + +def _safe_accelerator(requested: str) -> str: + req = (requested or "auto").lower() + if req == "cpu": + return "cpu" + if req == "cuda": + return "cuda" if torch.cuda.is_available() else "cpu" + if req == "mps": + ok = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() + return "mps" if ok else "cpu" + if torch.cuda.is_available(): + return "cuda" + if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): + return "mps" + return "cpu" + + +def _safe_devices(requested: str) -> Any: + req = (requested or "auto").lower() + if req == "auto": + return 1 + try: + return int(req) + except Exception: + return req + + +def _confusion_update(cm: np.ndarray, y_true: np.ndarray, y_pred: np.ndarray) -> None: + for t, p in zip(y_true, y_pred): + cm[int(t), int(p)] += 1 + + +def _metrics_from_cm(cm: np.ndarray) -> Dict[str, Any]: + eps = 1e-12 + tp = np.diag(cm).astype(np.float64) + support = cm.sum(axis=1).astype(np.float64) + pred_support = cm.sum(axis=0).astype(np.float64) + + precision = tp / (pred_support + eps) + recall = tp / (support + eps) + f1 = 2 * precision * recall / (precision + recall + eps) + + return { + "accuracy": float(tp.sum() / (cm.sum() + eps)), + "macro_precision": float(np.nanmean(precision)), + "macro_recall": float(np.nanmean(recall)), + "macro_f1": float(np.nanmean(f1)), + "per_class": [ + { + "precision": float(precision[i]), + "recall": float(recall[i]), + "f1": float(f1[i]), + "support": int(support[i]), + } + for i in range(cm.shape[0]) + ], + } + + +def _plot_cm(cm: np.ndarray, classes: List[str], out_png: str, title: str) -> None: + fig = plt.figure(figsize=(8, 7)) + ax = fig.add_subplot(111) + im = ax.imshow(cm, interpolation="nearest") + fig.colorbar(im, ax=ax) + ax.set_title(title) + ax.set_xlabel("Predicted") + ax.set_ylabel("True") + ax.set_xticks(np.arange(len(classes))) + ax.set_yticks(np.arange(len(classes))) + ax.set_xticklabels(classes, rotation=45, ha="right") + ax.set_yticklabels(classes) + + thresh = cm.max() * 0.5 if cm.size else 0 + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + ax.text( + j, i, str(int(cm[i, j])), + ha="center", va="center", + color="white" if cm[i, j] > thresh else "black", + fontsize=7 + ) + fig.tight_layout() + fig.savefig(out_png, dpi=200) + plt.close(fig) + + +def _compute_balanced_class_weights(y: np.ndarray, num_classes: int) -> torch.Tensor: + counts = np.bincount(y, minlength=num_classes).astype(np.float64) + counts[counts == 0] = 1.0 + w = (counts.sum() / (num_classes * counts)).astype(np.float32) + return torch.from_numpy(w) + + +def _make_loaders( + x_train: np.ndarray, y_train: np.ndarray, + x_val: np.ndarray, y_val: np.ndarray, + x_test: np.ndarray, y_test: np.ndarray, + batch_size: int, num_workers: int, + imbalance: str, + num_classes: int, + seed: int, +): + tx_train = _to_float_tensor(x_train) + ty_train = (_to_float_tensor(y_train).long()) + tx_val = _to_float_tensor(x_val) + ty_val = (_to_float_tensor(y_val).long()) + tx_test = _to_float_tensor(x_test) + ty_test = (_to_float_tensor(y_test).long()) + + train_ds = TensorDataset(tx_train, ty_train) + val_ds = TensorDataset(tx_val, ty_val) + test_ds = TensorDataset(tx_test, ty_test) + + persistent_workers = bool(num_workers and num_workers > 0) + + class_weights = None + sampler = None + + if imbalance == "weighted_sampler": + counts = np.bincount(y_train, minlength=num_classes).astype(np.float64) + counts[counts == 0] = 1.0 + inv = 1.0 / counts + w_per_sample = inv[y_train] + gen = torch.Generator().manual_seed(seed) + sampler = WeightedRandomSampler( + weights=torch.from_numpy(w_per_sample).double(), + num_samples=len(w_per_sample), + replacement=True, + generator=gen, + ) + # optional: weights for loss too (not required) + class_weights = torch.from_numpy((inv / inv.mean()).astype(np.float32)) + elif imbalance == "class_weight_balanced": + class_weights = _compute_balanced_class_weights(y_train, num_classes) + + train_loader = DataLoader( + train_ds, + batch_size=batch_size, + shuffle=(sampler is None), + sampler=sampler, + num_workers=num_workers, + persistent_workers=persistent_workers, + ) + val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, persistent_workers=persistent_workers) + test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, persistent_workers=persistent_workers) + return train_loader, val_loader, test_loader, class_weights + + +def _majority_baseline_metrics(y_train: np.ndarray, y_test: np.ndarray, num_classes: int) -> Dict[str, Any]: + counts = np.bincount(y_train, minlength=num_classes) + maj = int(np.argmax(counts)) + cm = np.zeros((num_classes, num_classes), dtype=np.int64) + y_pred = np.full_like(y_test, fill_value=maj) + _confusion_update(cm, y_test, y_pred) + return {"majority_class": maj, "train_counts": counts.tolist(), "metrics": _metrics_from_cm(cm)} + + +# ------------------------- +# strict builder (double bulletproof) +# ------------------------- +def _get_builder(model_name: str): + from pyhazards.models.registry import get_model_config # type: ignore + cfg = get_model_config(model_name) + if not cfg: + raise ValueError(f"Model '{model_name}' not found in registry.") + return cfg["builder"] + + +def _parse_unexpected_kwargs(msg: str) -> List[str]: + # pattern: "Unexpected kwargs for wildfire_fpa_mlp: ['num_layers']" + m = re.search(r"Unexpected kwargs.*?:\s*(\[[^\]]*\])", msg) + if m: + try: + xs = ast.literal_eval(m.group(1)) + return [str(x) for x in xs] + except Exception: + return [] + # pattern: "got an unexpected keyword argument 'task'" + m2 = re.search(r"unexpected keyword argument\s+'([^']+)'", msg) + if m2: + return [m2.group(1)] + return [] + + +def _build_with_stripping(builder, kwargs: Dict[str, Any]) -> Tuple[nn.Module, Dict[str, Any]]: + """ + Keep retrying by stripping kwargs that the builder explicitly rejects. + This makes us robust to API changes like num_layers vs depth. + """ + cur = dict(kwargs) + for _ in range(20): + try: + model = builder(**cur) + return model, cur + except TypeError as e: + msg = str(e) + bad = _parse_unexpected_kwargs(msg) + if bad: + for k in bad: + cur.pop(k, None) + continue + raise + raise RuntimeError(f"Builder could not be satisfied after stripping retries. Last kwargs={cur}") + + +def build_wildfire_fpa_mlp_strict( + model_name: str, + task: str, + in_dim: int, + out_dim: int, + hidden_dim: int, + dropout: float, + depth: int, +) -> Tuple[nn.Module, Dict[str, Any]]: + builder = _get_builder(model_name) + + # Start with the kwargs that worked for you: in_dim/out_dim/hidden_dim/dropout/depth (+ task). + # Then strip anything the builder rejects (task or depth naming variations). + base = { + "task": task, + "in_dim": in_dim, + "out_dim": out_dim, + "hidden_dim": hidden_dim, + "dropout": dropout, + "depth": depth, + # Also try a couple synonyms in case a future builder changes: + # If these are rejected, they get stripped automatically. + "num_layers": depth, + "layers": depth, + } + + model, used = _build_with_stripping(builder, base) + + # record only keys that survived + used_clean = dict(used) + return model, used_clean + + +# ------------------------- +# losses +# ------------------------- +class FocalLoss(nn.Module): + def __init__(self, gamma: float = 2.0, weight: Optional[torch.Tensor] = None, reduction: str = "mean"): + super().__init__() + self.gamma = gamma + self.weight = weight + self.reduction = reduction + + def forward(self, logits: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + logp = torch.log_softmax(logits, dim=1) # [B,C] + p = torch.exp(logp) + tgt = target.long() + logp_t = logp.gather(1, tgt.unsqueeze(1)).squeeze(1) # [B] + p_t = p.gather(1, tgt.unsqueeze(1)).squeeze(1) # [B] + loss = -(1.0 - p_t) ** self.gamma * logp_t + if self.weight is not None: + w = self.weight.to(loss.device).gather(0, tgt) + loss = loss * w + if self.reduction == "sum": + return loss.sum() + if self.reduction == "none": + return loss + return loss.mean() + + +# ------------------------- +# lightning module +# ------------------------- +class LitClassifier(pl.LightningModule): + def __init__( + self, + model: nn.Module, + num_classes: int, + lr: float, + weight_decay: float, + loss_name: str, + focal_gamma: float, + class_weights: Optional[torch.Tensor], + label_smoothing: float, + ): + super().__init__() + self.model = model + self.num_classes = num_classes + self.lr = lr + self.weight_decay = weight_decay + + self.loss_name = loss_name + self.focal_gamma = focal_gamma + self.class_weights = class_weights + self.label_smoothing = label_smoothing + + self._val_cm: Optional[np.ndarray] = None + self._test_cm: Optional[np.ndarray] = None + + if loss_name == "focal": + self.criterion = FocalLoss(gamma=focal_gamma, weight=class_weights) + else: + # CE + kwargs = {} + if class_weights is not None: + kwargs["weight"] = class_weights + # label_smoothing supported in modern torch; if not, ignore + try: + kwargs["label_smoothing"] = float(label_smoothing) + self.criterion = nn.CrossEntropyLoss(**kwargs) + except TypeError: + kwargs.pop("label_smoothing", None) + self.criterion = nn.CrossEntropyLoss(**kwargs) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.model(x) + + def configure_optimizers(self): + return torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) + + def training_step(self, batch, batch_idx: int): + x, y = batch + logits = self(x) + loss = self.criterion(logits, y) + self.log("train/loss", loss, prog_bar=True) + return loss + + def on_validation_epoch_start(self): + self._val_cm = np.zeros((self.num_classes, self.num_classes), dtype=np.int64) + + def validation_step(self, batch, batch_idx: int): + x, y = batch + logits = self(x) + loss = self.criterion(logits, y) + preds = torch.argmax(logits, dim=1) + acc = (preds == y).float().mean() + self.log("val/loss", loss, prog_bar=True) + self.log("val/acc", acc, prog_bar=True) + + assert self._val_cm is not None + _confusion_update(self._val_cm, y.detach().cpu().numpy(), preds.detach().cpu().numpy()) + + def on_validation_epoch_end(self): + if self._val_cm is None: + return + m = _metrics_from_cm(self._val_cm) + self.log("val/macro_f1", float(m["macro_f1"]), prog_bar=True) + + def on_test_epoch_start(self): + self._test_cm = np.zeros((self.num_classes, self.num_classes), dtype=np.int64) + + def test_step(self, batch, batch_idx: int): + x, y = batch + logits = self(x) + preds = torch.argmax(logits, dim=1) + assert self._test_cm is not None + _confusion_update(self._test_cm, y.detach().cpu().numpy(), preds.detach().cpu().numpy()) + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--data_path", required=True) + ap.add_argument("--model_name", default="wildfire_fpa_mlp") + ap.add_argument("--epochs", type=int, default=50) + ap.add_argument("--batch_size", type=int, default=256) + ap.add_argument("--num_workers", type=int, default=4) + ap.add_argument("--seed", type=int, default=1337) + + ap.add_argument("--region", choices=["US", "CA"], default="US") + ap.add_argument("--cause_mode", choices=["paper5", "full12", "keep_all"], default="paper5") + ap.add_argument("--normalize", action="store_true") + + ap.add_argument("--hidden_dim", type=int, default=64) + ap.add_argument("--depth", type=int, default=2) + ap.add_argument("--num_layers", type=int, default=None, help="Alias for --depth") + ap.add_argument("--dropout", type=float, default=0.1) + + ap.add_argument("--lr", type=float, default=3e-4) + ap.add_argument("--weight_decay", type=float, default=1e-4) + + ap.add_argument("--imbalance", choices=["none", "class_weight_balanced", "weighted_sampler"], default="class_weight_balanced") + + ap.add_argument("--loss", choices=["ce", "focal"], default="ce") + ap.add_argument("--focal_gamma", type=float, default=2.0) + ap.add_argument("--label_smoothing", type=float, default=0.0) + + ap.add_argument("--limit_train_batches", type=float, default=1.0) + ap.add_argument("--limit_val_batches", type=float, default=1.0) + ap.add_argument("--limit_test_batches", type=float, default=1.0) + ap.add_argument("--log_every_n_steps", type=int, default=50) + + ap.add_argument("--accelerator", default="auto") + ap.add_argument("--devices", default="auto") + ap.add_argument("--precision", default="32") + + ap.add_argument("--out_root", default="outputs/train_wildfire_fpa_cause") + ap.add_argument("--resume_from", default=None) + + args = ap.parse_args() + if args.num_layers is not None: + args.depth = int(args.num_layers) + + run_dir = os.path.join(args.out_root, _now_tag()) + ckpt_dir = os.path.join(run_dir, "checkpoints") + _mkdir(ckpt_dir) + _write_json(os.path.join(run_dir, "config.json"), vars(args)) + _write_text(os.path.join(run_dir, "command.txt"), " ".join(sys.argv) + "\n") + + try: + pl.seed_everything(args.seed, workers=True) + + from pyhazards.datasets import FPAFODWildfireTabular # type: ignore + + b = FPAFODWildfireTabular( + task="cause", + region=args.region, + cause_mode=args.cause_mode, + data_path=args.data_path, + micro=False, + normalize=args.normalize, + seed=args.seed, + ).load() + + x_train, y_train = b.splits["train"].inputs, b.splits["train"].targets + x_val, y_val = b.splits["val"].inputs, b.splits["val"].targets + x_test, y_test = b.splits["test"].inputs, b.splits["test"].targets + + cls = b.label_spec.extra.get("classes") + if cls is None: + # fallback: numeric class ids if names not provided + n = b.label_spec.num_targets + if n is None: raise AttributeError("LabelSpec missing extra['classes'] and num_targets") + cls = list(range(int(n))) + classes = list(cls) + num_classes = int(b.label_spec.num_targets) + in_dim = int(x_train.shape[1]) + + train_loader, val_loader, test_loader, class_weights = _make_loaders( + x_train, y_train, x_val, y_val, x_test, y_test, + batch_size=args.batch_size, + num_workers=args.num_workers, + imbalance=args.imbalance, + num_classes=num_classes, + seed=args.seed, + ) + + model, used_kwargs = build_wildfire_fpa_mlp_strict( + model_name=args.model_name, + task="classification", + in_dim=in_dim, + out_dim=num_classes, + hidden_dim=args.hidden_dim, + dropout=args.dropout, + depth=args.depth, + ) + _write_json(os.path.join(run_dir, "model_kwargs_used.json"), used_kwargs) + + lit = LitClassifier( + model=model, + num_classes=num_classes, + lr=args.lr, + weight_decay=args.weight_decay, + loss_name=args.loss, + focal_gamma=args.focal_gamma, + class_weights=class_weights, + label_smoothing=args.label_smoothing, + ) + + ckpt = ModelCheckpoint( + dirpath=ckpt_dir, + filename="best", + monitor="val/macro_f1", + mode="max", + save_top_k=1, + save_last=True, + ) + logger = CSVLogger(save_dir=run_dir, name="csv") + + accel = _safe_accelerator(args.accelerator) + devs = _safe_devices(args.devices) + + trainer = pl.Trainer( + max_epochs=args.epochs, + accelerator=_safe_accelerator(args.accelerator), + devices=devs, + precision=args.precision, + logger=logger, + callbacks=[ckpt], + limit_train_batches=args.limit_train_batches, + limit_val_batches=args.limit_val_batches, + limit_test_batches=args.limit_test_batches, + log_every_n_steps=args.log_every_n_steps, + ) + + trainer.fit(lit, train_dataloaders=train_loader, val_dataloaders=val_loader, ckpt_path=args.resume_from) + trainer.test(lit, dataloaders=test_loader, ckpt_path=ckpt.best_model_path) + + cm = lit._test_cm if lit._test_cm is not None else np.zeros((num_classes, num_classes), dtype=np.int64) + m = _metrics_from_cm(cm) + + _write_json(os.path.join(run_dir, "confusion_matrix.json"), {"cm": cm.tolist(), "classes": classes}) + _plot_cm(cm, classes, os.path.join(run_dir, "confusion_matrix.png"), "Cause confusion matrix") + + torch.save(lit.model.state_dict(), os.path.join(run_dir, "model_final.pt")) + + y_counts = {classes[i]: int((y_train == i).sum()) for i in range(num_classes)} + baseline = _majority_baseline_metrics(y_train, y_test, num_classes) + + metrics = { + "task": "fpa_fod_cause", + "config": vars(args), + "git_sha": _git_sha(), + "system": {"python": sys.version, "platform": platform.platform(), "torch": torch.__version__}, + "best_ckpt": ckpt.best_model_path, + "last_ckpt": ckpt.last_model_path, + "train_class_counts": y_counts, + "test": {"metrics": m}, + "baseline_majority_test": baseline, + } + _write_json(os.path.join(run_dir, "metrics.json"), metrics) + + summary = [ + "# Paper reproduction summary — FPA-FOD Cause", + "", + "## Dataset", + f"- train: {x_train.shape} val: {x_val.shape} test: {x_test.shape}", + f"- classes: {classes}", + f"- train class counts: {y_counts}", + "", + "## Model kwargs actually used (strict builder)", + f"- {used_kwargs}", + "", + "## Your test metrics", + f"- accuracy: {m['accuracy']:.4f}", + f"- macro_f1: {m['macro_f1']:.4f}", + f"- macro_precision: {m['macro_precision']:.4f}", + f"- macro_recall: {m['macro_recall']:.4f}", + "", + "## Majority baseline (test)", + f"- accuracy: {baseline['metrics']['accuracy']:.4f}", + f"- macro_f1: {baseline['metrics']['macro_f1']:.4f}", + "", + "## Artifacts", + "- checkpoints/best.ckpt, checkpoints/last.ckpt", + "- confusion_matrix.png", + "- metrics.json", + ] + _write_text(os.path.join(run_dir, "paper_summary.md"), "\n".join(summary) + "\n") + + print(f"[OK] Outputs saved to: {run_dir}") + + except BaseException as e: + _write_text(os.path.join(run_dir, "FAILURE.txt"), f"{type(e).__name__}: {e}\n") + raise + + +if __name__ == "__main__": + main() diff --git a/scripts/train_wildfire_fpa_size.py b/scripts/train_wildfire_fpa_size.py new file mode 100644 index 00000000..f6ee33e3 --- /dev/null +++ b/scripts/train_wildfire_fpa_size.py @@ -0,0 +1,581 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import ast +import json +import os +import platform +import re +import subprocess +import sys +from datetime import datetime +from typing import Any, Dict, Optional, Tuple, List + +import numpy as np + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +import torch + +def _to_float_tensor(x): + """Convert numpy array OR torch tensor -> torch.float32 tensor.""" + import torch + if isinstance(x, torch.Tensor): + return x.detach().cpu().float() + return torch.from_numpy(x).float() + +from torch import nn +from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler + +try: + import lightning.pytorch as pl + from lightning.pytorch.callbacks import ModelCheckpoint + from lightning.pytorch.loggers import CSVLogger +except Exception: + import pytorch_lightning as pl # type: ignore + from pytorch_lightning.callbacks import ModelCheckpoint # type: ignore + from pytorch_lightning.loggers import CSVLogger # type: ignore + + +def _now_tag() -> str: + return datetime.now().strftime("%Y%m%d_%H%M%S") + + +def _mkdir(p: str) -> None: + os.makedirs(p, exist_ok=True) + + +def _git_sha() -> Optional[str]: + try: + return subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL).decode().strip() + except Exception: + return None + + +def _write_json(path: str, obj: Any) -> None: + with open(path, "w", encoding="utf-8") as f: + json.dump(obj, f, indent=2) + + +def _write_text(path: str, s: str) -> None: + with open(path, "w", encoding="utf-8") as f: + f.write(s) + + +def _safe_accelerator(requested: str) -> str: + req = (requested or "auto").lower() + if req == "cpu": + return "cpu" + if req == "cuda": + return "cuda" if torch.cuda.is_available() else "cpu" + if req == "mps": + ok = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() + return "mps" if ok else "cpu" + if torch.cuda.is_available(): + return "cuda" + if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): + return "mps" + return "cpu" + + +def _safe_devices(requested: str) -> Any: + req = (requested or "auto").lower() + if req == "auto": + return 1 + try: + return int(req) + except Exception: + return req + + +def _confusion_update(cm: np.ndarray, y_true: np.ndarray, y_pred: np.ndarray) -> None: + for t, p in zip(y_true, y_pred): + cm[int(t), int(p)] += 1 + + +def _metrics_from_cm(cm: np.ndarray) -> Dict[str, Any]: + eps = 1e-12 + tp = np.diag(cm).astype(np.float64) + support = cm.sum(axis=1).astype(np.float64) + pred_support = cm.sum(axis=0).astype(np.float64) + + precision = tp / (pred_support + eps) + recall = tp / (support + eps) + f1 = 2 * precision * recall / (precision + recall + eps) + + return { + "accuracy": float(tp.sum() / (cm.sum() + eps)), + "macro_precision": float(np.nanmean(precision)), + "macro_recall": float(np.nanmean(recall)), + "macro_f1": float(np.nanmean(f1)), + "per_class": [ + { + "precision": float(precision[i]), + "recall": float(recall[i]), + "f1": float(f1[i]), + "support": int(support[i]), + } + for i in range(cm.shape[0]) + ], + } + + +def _plot_cm(cm: np.ndarray, classes: List[str], out_png: str, title: str) -> None: + fig = plt.figure(figsize=(8, 7)) + ax = fig.add_subplot(111) + im = ax.imshow(cm, interpolation="nearest") + fig.colorbar(im, ax=ax) + ax.set_title(title) + ax.set_xlabel("Predicted") + ax.set_ylabel("True") + ax.set_xticks(np.arange(len(classes))) + ax.set_yticks(np.arange(len(classes))) + ax.set_xticklabels(classes, rotation=45, ha="right") + ax.set_yticklabels(classes) + + thresh = cm.max() * 0.5 if cm.size else 0 + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + ax.text( + j, i, str(int(cm[i, j])), + ha="center", va="center", + color="white" if cm[i, j] > thresh else "black", + fontsize=7 + ) + fig.tight_layout() + fig.savefig(out_png, dpi=200) + plt.close(fig) + + +def _compute_balanced_class_weights(y: np.ndarray, num_classes: int) -> torch.Tensor: + counts = np.bincount(y, minlength=num_classes).astype(np.float64) + counts[counts == 0] = 1.0 + w = (counts.sum() / (num_classes * counts)).astype(np.float32) + return torch.from_numpy(w) + + +def _make_loaders( + x_train: np.ndarray, y_train: np.ndarray, + x_val: np.ndarray, y_val: np.ndarray, + x_test: np.ndarray, y_test: np.ndarray, + batch_size: int, num_workers: int, + imbalance: str, + num_classes: int, + seed: int, +): + tx_train = _to_float_tensor(x_train) + ty_train = (_to_float_tensor(y_train).long()) + tx_val = _to_float_tensor(x_val) + ty_val = (_to_float_tensor(y_val).long()) + tx_test = _to_float_tensor(x_test) + ty_test = (_to_float_tensor(y_test).long()) + + train_ds = TensorDataset(tx_train, ty_train) + val_ds = TensorDataset(tx_val, ty_val) + test_ds = TensorDataset(tx_test, ty_test) + + persistent_workers = bool(num_workers and num_workers > 0) + + class_weights = None + sampler = None + + if imbalance == "weighted_sampler": + counts = np.bincount(y_train, minlength=num_classes).astype(np.float64) + counts[counts == 0] = 1.0 + inv = 1.0 / counts + w_per_sample = inv[y_train] + gen = torch.Generator().manual_seed(seed) + sampler = WeightedRandomSampler( + weights=torch.from_numpy(w_per_sample).double(), + num_samples=len(w_per_sample), + replacement=True, + generator=gen, + ) + class_weights = torch.from_numpy((inv / inv.mean()).astype(np.float32)) + elif imbalance == "class_weight_balanced": + class_weights = _compute_balanced_class_weights(y_train, num_classes) + + train_loader = DataLoader( + train_ds, + batch_size=batch_size, + shuffle=(sampler is None), + sampler=sampler, + num_workers=num_workers, + persistent_workers=persistent_workers, + ) + val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, persistent_workers=persistent_workers) + test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers, persistent_workers=persistent_workers) + return train_loader, val_loader, test_loader, class_weights + + +def _majority_baseline_metrics(y_train: np.ndarray, y_test: np.ndarray, num_classes: int) -> Dict[str, Any]: + counts = np.bincount(y_train, minlength=num_classes) + maj = int(np.argmax(counts)) + cm = np.zeros((num_classes, num_classes), dtype=np.int64) + y_pred = np.full_like(y_test, fill_value=maj) + _confusion_update(cm, y_test, y_pred) + return {"majority_class": maj, "train_counts": counts.tolist(), "metrics": _metrics_from_cm(cm)} + + +# ------------------------- +# strict builder (double bulletproof) +# ------------------------- +def _get_builder(model_name: str): + from pyhazards.models.registry import get_model_config # type: ignore + cfg = get_model_config(model_name) + if not cfg: + raise ValueError(f"Model '{model_name}' not found in registry.") + return cfg["builder"] + + +def _parse_unexpected_kwargs(msg: str) -> List[str]: + m = re.search(r"Unexpected kwargs.*?:\s*(\[[^\]]*\])", msg) + if m: + try: + xs = ast.literal_eval(m.group(1)) + return [str(x) for x in xs] + except Exception: + return [] + m2 = re.search(r"unexpected keyword argument\s+'([^']+)'", msg) + if m2: + return [m2.group(1)] + return [] + + +def _build_with_stripping(builder, kwargs: Dict[str, Any]) -> Tuple[nn.Module, Dict[str, Any]]: + cur = dict(kwargs) + for _ in range(20): + try: + model = builder(**cur) + return model, cur + except TypeError as e: + msg = str(e) + bad = _parse_unexpected_kwargs(msg) + if bad: + for k in bad: + cur.pop(k, None) + continue + raise + raise RuntimeError(f"Builder could not be satisfied after stripping retries. Last kwargs={cur}") + + +def build_wildfire_fpa_mlp_strict( + model_name: str, + task: str, + in_dim: int, + out_dim: int, + hidden_dim: int, + dropout: float, + depth: int, +) -> Tuple[nn.Module, Dict[str, Any]]: + builder = _get_builder(model_name) + base = { + "task": task, + "in_dim": in_dim, + "out_dim": out_dim, + "hidden_dim": hidden_dim, + "dropout": dropout, + "depth": depth, + "num_layers": depth, + "layers": depth, + } + model, used = _build_with_stripping(builder, base) + return model, dict(used) + + +# ------------------------- +# losses +# ------------------------- +class FocalLoss(nn.Module): + def __init__(self, gamma: float = 2.0, weight: Optional[torch.Tensor] = None, reduction: str = "mean"): + super().__init__() + self.gamma = gamma + self.weight = weight + self.reduction = reduction + + def forward(self, logits: torch.Tensor, target: torch.Tensor) -> torch.Tensor: + logp = torch.log_softmax(logits, dim=1) + p = torch.exp(logp) + tgt = target.long() + logp_t = logp.gather(1, tgt.unsqueeze(1)).squeeze(1) + p_t = p.gather(1, tgt.unsqueeze(1)).squeeze(1) + loss = -(1.0 - p_t) ** self.gamma * logp_t + if self.weight is not None: + w = self.weight.to(loss.device).gather(0, tgt) + loss = loss * w + if self.reduction == "sum": + return loss.sum() + if self.reduction == "none": + return loss + return loss.mean() + + +# ------------------------- +# lightning module +# ------------------------- +class LitClassifier(pl.LightningModule): + def __init__( + self, + model: nn.Module, + num_classes: int, + lr: float, + weight_decay: float, + loss_name: str, + focal_gamma: float, + class_weights: Optional[torch.Tensor], + label_smoothing: float, + ): + super().__init__() + self.model = model + self.num_classes = num_classes + self.lr = lr + self.weight_decay = weight_decay + + self._val_cm: Optional[np.ndarray] = None + self._test_cm: Optional[np.ndarray] = None + + if loss_name == "focal": + self.criterion = FocalLoss(gamma=focal_gamma, weight=class_weights) + else: + kwargs = {} + if class_weights is not None: + kwargs["weight"] = class_weights + try: + kwargs["label_smoothing"] = float(label_smoothing) + self.criterion = nn.CrossEntropyLoss(**kwargs) + except TypeError: + kwargs.pop("label_smoothing", None) + self.criterion = nn.CrossEntropyLoss(**kwargs) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.model(x) + + def configure_optimizers(self): + return torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) + + def training_step(self, batch, batch_idx: int): + x, y = batch + logits = self(x) + loss = self.criterion(logits, y) + self.log("train/loss", loss, prog_bar=True) + return loss + + def on_validation_epoch_start(self): + self._val_cm = np.zeros((self.num_classes, self.num_classes), dtype=np.int64) + + def validation_step(self, batch, batch_idx: int): + x, y = batch + logits = self(x) + loss = self.criterion(logits, y) + preds = torch.argmax(logits, dim=1) + acc = (preds == y).float().mean() + self.log("val/loss", loss, prog_bar=True) + self.log("val/acc", acc, prog_bar=True) + + assert self._val_cm is not None + _confusion_update(self._val_cm, y.detach().cpu().numpy(), preds.detach().cpu().numpy()) + + def on_validation_epoch_end(self): + if self._val_cm is None: + return + m = _metrics_from_cm(self._val_cm) + self.log("val/macro_f1", float(m["macro_f1"]), prog_bar=True) + + def on_test_epoch_start(self): + self._test_cm = np.zeros((self.num_classes, self.num_classes), dtype=np.int64) + + def test_step(self, batch, batch_idx: int): + x, y = batch + logits = self(x) + preds = torch.argmax(logits, dim=1) + assert self._test_cm is not None + _confusion_update(self._test_cm, y.detach().cpu().numpy(), preds.detach().cpu().numpy()) + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--data_path", required=True) + ap.add_argument("--model_name", default="wildfire_fpa_mlp") + ap.add_argument("--epochs", type=int, default=50) + ap.add_argument("--batch_size", type=int, default=256) + ap.add_argument("--num_workers", type=int, default=4) + ap.add_argument("--seed", type=int, default=1337) + + ap.add_argument("--region", choices=["US", "CA"], default="US") + ap.add_argument("--normalize", action="store_true") + + ap.add_argument("--hidden_dim", type=int, default=64) + ap.add_argument("--depth", type=int, default=2) + ap.add_argument("--num_layers", type=int, default=None, help="Alias for --depth") + ap.add_argument("--dropout", type=float, default=0.1) + + ap.add_argument("--lr", type=float, default=3e-4) + ap.add_argument("--weight_decay", type=float, default=1e-4) + + ap.add_argument("--imbalance", choices=["none", "class_weight_balanced", "weighted_sampler"], default="class_weight_balanced") + + ap.add_argument("--loss", choices=["ce", "focal"], default="ce") + ap.add_argument("--focal_gamma", type=float, default=2.0) + ap.add_argument("--label_smoothing", type=float, default=0.0) + + ap.add_argument("--limit_train_batches", type=float, default=1.0) + ap.add_argument("--limit_val_batches", type=float, default=1.0) + ap.add_argument("--limit_test_batches", type=float, default=1.0) + ap.add_argument("--log_every_n_steps", type=int, default=50) + + ap.add_argument("--accelerator", default="auto") + ap.add_argument("--devices", default="auto") + ap.add_argument("--precision", default="32") + + ap.add_argument("--out_root", default="outputs/train_wildfire_fpa_size") + ap.add_argument("--resume_from", default=None) + + args = ap.parse_args() + if args.num_layers is not None: + args.depth = int(args.num_layers) + + run_dir = os.path.join(args.out_root, _now_tag()) + ckpt_dir = os.path.join(run_dir, "checkpoints") + _mkdir(ckpt_dir) + _write_json(os.path.join(run_dir, "config.json"), vars(args)) + _write_text(os.path.join(run_dir, "command.txt"), " ".join(sys.argv) + "\n") + + try: + pl.seed_everything(args.seed, workers=True) + + from pyhazards.datasets import FPAFODWildfireTabular # type: ignore + + b = FPAFODWildfireTabular( + task="size", + region=args.region, + data_path=args.data_path, + micro=False, + normalize=args.normalize, + seed=args.seed, + ).load() + + x_train, y_train = b.splits["train"].inputs, b.splits["train"].targets + x_val, y_val = b.splits["val"].inputs, b.splits["val"].targets + x_test, y_test = b.splits["test"].inputs, b.splits["test"].targets + + cls = b.label_spec.extra.get("classes") + if cls is None: + # fallback: numeric class ids if names not provided + n = b.label_spec.num_targets + if n is None: raise AttributeError("LabelSpec missing extra['classes'] and num_targets") + cls = list(range(int(n))) + classes = list(cls) + num_classes = int(b.label_spec.num_targets) + in_dim = int(x_train.shape[1]) + + train_loader, val_loader, test_loader, class_weights = _make_loaders( + x_train, y_train, x_val, y_val, x_test, y_test, + batch_size=args.batch_size, + num_workers=args.num_workers, + imbalance=args.imbalance, + num_classes=num_classes, + seed=args.seed, + ) + + model, used_kwargs = build_wildfire_fpa_mlp_strict( + model_name=args.model_name, + task="classification", + in_dim=in_dim, + out_dim=num_classes, + hidden_dim=args.hidden_dim, + dropout=args.dropout, + depth=args.depth, + ) + _write_json(os.path.join(run_dir, "model_kwargs_used.json"), used_kwargs) + + lit = LitClassifier( + model=model, + num_classes=num_classes, + lr=args.lr, + weight_decay=args.weight_decay, + loss_name=args.loss, + focal_gamma=args.focal_gamma, + class_weights=class_weights, + label_smoothing=args.label_smoothing, + ) + + ckpt = ModelCheckpoint( + dirpath=ckpt_dir, + filename="best", + monitor="val/macro_f1", + mode="max", + save_top_k=1, + save_last=True, + ) + logger = CSVLogger(save_dir=run_dir, name="csv") + + accel = _safe_accelerator(args.accelerator) + devs = _safe_devices(args.devices) + + trainer = pl.Trainer( + max_epochs=args.epochs, + accelerator=_safe_accelerator(args.accelerator), + devices=devs, + precision=args.precision, + logger=logger, + callbacks=[ckpt], + limit_train_batches=args.limit_train_batches, + limit_val_batches=args.limit_val_batches, + limit_test_batches=args.limit_test_batches, + log_every_n_steps=args.log_every_n_steps, + ) + + trainer.fit(lit, train_dataloaders=train_loader, val_dataloaders=val_loader, ckpt_path=args.resume_from) + trainer.test(lit, dataloaders=test_loader, ckpt_path=ckpt.best_model_path) + + cm = lit._test_cm if lit._test_cm is not None else np.zeros((num_classes, num_classes), dtype=np.int64) + m = _metrics_from_cm(cm) + + _write_json(os.path.join(run_dir, "confusion_matrix.json"), {"cm": cm.tolist(), "classes": classes}) + _plot_cm(cm, classes, os.path.join(run_dir, "confusion_matrix.png"), "Size confusion matrix") + + torch.save(lit.model.state_dict(), os.path.join(run_dir, "model_final.pt")) + + baseline = _majority_baseline_metrics(y_train, y_test, num_classes) + + metrics = { + "task": "fpa_fod_size", + "config": vars(args), + "git_sha": _git_sha(), + "system": {"python": sys.version, "platform": platform.platform(), "torch": torch.__version__}, + "best_ckpt": ckpt.best_model_path, + "last_ckpt": ckpt.last_model_path, + "test": {"metrics": m}, + "baseline_majority_test": baseline, + } + _write_json(os.path.join(run_dir, "metrics.json"), metrics) + + summary = [ + "# Paper reproduction summary — FPA-FOD Size", + "", + f"## Model kwargs actually used (strict builder)\n- {used_kwargs}", + "", + "## Your test metrics", + f"- accuracy: {m['accuracy']:.4f}", + f"- macro_f1: {m['macro_f1']:.4f}", + f"- macro_precision: {m['macro_precision']:.4f}", + f"- macro_recall: {m['macro_recall']:.4f}", + "", + "## Majority baseline (test)", + f"- accuracy: {baseline['metrics']['accuracy']:.4f}", + f"- macro_f1: {baseline['metrics']['macro_f1']:.4f}", + ] + _write_text(os.path.join(run_dir, "paper_summary.md"), "\n".join(summary) + "\n") + + print(f"[OK] Outputs saved to: {run_dir}") + + except BaseException as e: + _write_text(os.path.join(run_dir, "FAILURE.txt"), f"{type(e).__name__}: {e}\n") + raise + + +if __name__ == "__main__": + main() diff --git a/tests/test_wildfire_fpa_datasets.py b/tests/test_wildfire_fpa_datasets.py new file mode 100644 index 00000000..6bdc7174 --- /dev/null +++ b/tests/test_wildfire_fpa_datasets.py @@ -0,0 +1,95 @@ +import numpy as np +import torch + +from pyhazards.datasets import load_dataset + + +def _to_bundle(ds): + if hasattr(ds, "load") and callable(getattr(ds, "load")): + return ds.load() + if hasattr(ds, "_load") and callable(getattr(ds, "_load")): + return ds._load() + raise TypeError( + f"{ds.__class__.__name__} does not expose load() or _load(). " + "Cannot obtain a DataBundle." + ) + + +def _as_tensor(a): + if isinstance(a, torch.Tensor): + return a + if isinstance(a, np.ndarray): + return torch.from_numpy(a) + # allow python lists + return torch.tensor(a) + + +def _split_xy(split): + """ + Extract x,y from: + - dict: {"x": ..., "y": ...} + - object with attributes .x/.y etc + - tuple/list (x,y) + """ + if isinstance(split, dict): + # support common key variants + for xk in ("x", "X", "inputs", "features", "data"): + for yk in ("y", "Y", "targets", "labels"): + if xk in split and yk in split: + return split[xk], split[yk] + raise KeyError(f"Split dict missing x/y keys. Keys={sorted(split.keys())}") + + for x_name in ("x", "X", "inputs", "features", "data"): + for y_name in ("y", "Y", "targets", "labels"): + if hasattr(split, x_name) and hasattr(split, y_name): + return getattr(split, x_name), getattr(split, y_name) + + if isinstance(split, (tuple, list)) and len(split) == 2: + return split[0], split[1] + + raise TypeError(f"Unsupported split type for extracting (x,y): {type(split)}") + + +def test_dataset_tabular_shapes(): + ds = load_dataset("wildfire_fpa_fod_tabular", micro=True, seed=1337) + bundle = _to_bundle(ds) + + assert hasattr(bundle, "splits"), "Expected DataBundle.splits" + assert isinstance(bundle.splits, dict) + assert "train" in bundle.splits + + x, y = _split_xy(bundle.splits["train"]) + x = _as_tensor(x).float() + y = _as_tensor(y).long() + + # Contract: tabular classification + assert x.ndim == 2 # [N, D] + assert x.dtype == torch.float32 + assert y.ndim == 1 # [N] + assert y.dtype in (torch.int64, torch.long) + + assert x.shape[0] == y.shape[0] + assert x.shape[1] > 0 + + +def test_dataset_weekly_shapes(): + ds = load_dataset("wildfire_fpa_fod_weekly", micro=True, seed=1337, region="US") + bundle = _to_bundle(ds) + + assert hasattr(bundle, "splits"), "Expected DataBundle.splits" + assert isinstance(bundle.splits, dict) + assert "train" in bundle.splits + + x, y = _split_xy(bundle.splits["train"]) + x = _as_tensor(x).float() + y = _as_tensor(y).float() + + # Contract: weekly sequences + assert x.ndim == 3 # [N, T, D] + assert x.dtype == torch.float32 + assert y.dtype == torch.float32 + assert y.ndim in (1, 2) # [N] or [N, O] + + assert x.shape[0] == y.shape[0] + assert x.shape[1] > 0 # T + assert x.shape[2] > 0 # D diff --git a/tests/test_wildfire_fpa_models.py b/tests/test_wildfire_fpa_models.py new file mode 100644 index 00000000..7e1efd34 --- /dev/null +++ b/tests/test_wildfire_fpa_models.py @@ -0,0 +1,33 @@ +import torch + +from pyhazards.models import build_model + + +def test_build_model_wildfire_fpa_mlp_forward(): + B, D, C = 8, 16, 5 + model = build_model( + name="wildfire_fpa_mlp", + task="classification", + in_dim=D, + out_dim=C, + ) + x = torch.randn(B, D) + y = model(x) + assert y.shape == (B, C) + + +def test_build_model_wildfire_fpa_lstm_forward(): + B, T, D, O = 4, 12, 10, 1 + model = build_model( + name="wildfire_fpa_lstm", + task="forecasting", + input_dim=D, + output_dim=O, + hidden_dim=32, + num_layers=1, + dropout=0.0, + lookback=T, + ) + x = torch.randn(B, T, D) + y = model(x) + assert y.shape == (B, O) diff --git a/tests/test_wildfire_fpa_trainer_smoke.py b/tests/test_wildfire_fpa_trainer_smoke.py new file mode 100644 index 00000000..dfa4ed19 --- /dev/null +++ b/tests/test_wildfire_fpa_trainer_smoke.py @@ -0,0 +1,57 @@ +import torch + +from pyhazards.datasets import load_dataset +from pyhazards.datasets.base import DataSplit +from pyhazards.engine import Trainer +from pyhazards.models import build_model + + +def test_trainer_fit_smoke(): + # Load dataset via registry; micro=True must not require any real files + ds = load_dataset("wildfire_fpa_fod_tabular", micro=True, seed=1337) + + # Support both patterns: registry may return Dataset instance or DataBundle directly + bundle = ds.load() if hasattr(ds, "load") else ds + + train = bundle.get_split("train") + x_train = train.inputs + y_train = train.targets + + # Enforce torch tensors for Trainer determinism + if not torch.is_tensor(x_train): + x_train = torch.as_tensor(x_train, dtype=torch.float32) + else: + x_train = x_train.to(dtype=torch.float32) + + if not torch.is_tensor(y_train): + y_train = torch.as_tensor(y_train, dtype=torch.long) + else: + y_train = y_train.to(dtype=torch.long) + + # Put back into bundle as a proper DataSplit (NOT a dict) + bundle.splits = dict(bundle.splits) + bundle.splits["train"] = DataSplit(inputs=x_train, targets=y_train, metadata=getattr(train, "metadata", {})) + + in_dim = int(x_train.shape[1]) + out_dim = int(y_train.max().item() + 1) + + model = build_model( + name="wildfire_fpa_mlp", + task="classification", + in_dim=in_dim, + out_dim=out_dim, + ) + + trainer = Trainer(model=model, mixed_precision=False) + optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) + loss_fn = torch.nn.CrossEntropyLoss() + + # Smoke: 1 epoch, no crash + trainer.fit( + bundle, + optimizer=optimizer, + loss_fn=loss_fn, + max_epochs=1, + batch_size=32, + num_workers=0, + )