Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 20 additions & 14 deletions spectf/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Author: Jake Lee, jake.h.lee@jpl.nasa.gov
"""

import os
from collections.abc import Callable
from typing import List

Expand All @@ -14,37 +15,34 @@
from torch.utils.data import Dataset

from spectf.toa import l1b_to_toa_arr
from spectf.utils import drop_bands
from spectf.utils import drop_bands, envi_header


class RasterDatasetTOA(Dataset):
"""A PyTorch dataset class for pixelwise access of top-of-atmosphere (TOA)
reflectance data derived from L1b rdn.

Attributes:
shape (tuple): The shape of the L1b rdn raster.
toa_arr (ndarray): The top-of-atmosphere reflectance data,
reshaped as a list of pixels.
banddef (np.array): The band wavelengths corresponding to the
`toa_arr` indices.
shape (tuple): Shape of the L1b rdn raster.
toa_arr (ndarray): TOA reflectance data reshaped as a list of pixels.
banddef (np.array): Band wavelengths corresponding to `toa_arr` indices.
metadata (dict): Metadata of the original raster image.
transform (callable, optional): Transformations or normalizations
for each pixel spectra.
transform (callable, optional): Transformations for each pixel spectra.

The class relies on the `l1b_to_toa_arr` function to process the input data
files and generate the TOA reflectance data.
Relies on the `l1b_to_toa_arr` function to process input data files and generate
TOA reflectance data.
"""

def __init__(
self,
rdnfp: str,
obsfp: str,
irrfp:str,
rm_bands:List[List[int]]=None,
irrfp: str,
rm_bands: List[List[int]]=None,
transform: Callable = None,
keep_bands: bool = False,
dtype: torch.dtype = torch.float,
device:torch.device = None,
device: torch.device = None,
):
""" Initialize the RasterDatasetTOA Dataset object.
Args:
Expand All @@ -57,7 +55,15 @@ def __init__(
"""
super().__init__()

self.toa_arr, self.banddef, self.metadata = l1b_to_toa_arr(rdnfp, obsfp, irrfp)
# Raster files
self.rdnhdr = envi_header(rdnfp)
self.obshdr = envi_header(obsfp)

assert os.path.exists(self.rdnhdr), f"Header file {self.rdnhdr} does not exist."
assert os.path.exists(self.obshdr), f"Header file {self.obshdr} does not exist."
assert os.path.exists(irrfp), f"Irradiance file {irrfp} does not exist."

self.toa_arr, self.banddef, self.metadata = l1b_to_toa_arr(self.rdnhdr, self.obshdr, irrfp)
self.shape = self.toa_arr.shape
self.toa_arr = self.toa_arr.reshape((self.shape[0] * self.shape[1],
self.shape[2]))
Expand Down
4 changes: 2 additions & 2 deletions spectf/toa.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def l1b_to_toa_arr(rdnfp: str, obsfp: str, irrfp: str):
Converts Level 1b radiance data to top-of-atmosphere (TOA) reflectance.

Args:
rdnfp (str): File path to the radiance data (L1b product).
obsfp (str): File path to the observation data (L1b product).
rdnfp (str): File path to the radiance data (L1b product .hdr).
obsfp (str): File path to the observation data (L1b product .hdr).
irrfp (str): File path to the irradiance data (.npy).

Returns:
Expand Down
24 changes: 24 additions & 0 deletions spectf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,30 @@ def get_date(fid:str) -> str:
""" Get the date string from full FID """
return fid.split('_')[0].split('t')[1]

def envi_header(inputpath):
"""
https://github.com/emit-sds/emit-utils/blob/develop/emit_utils/file_checks.py
Convert a envi binary/header path to a header, handling extensions
Args:
inputpath: path to envi binary file
Returns:
str: the header file associated with the input reference.

"""
if op.splitext(inputpath)[-1] == '.img' or op.splitext(inputpath)[-1] == '.dat' or op.splitext(inputpath)[-1] == '.raw':
# headers could be at either filename.img.hdr or filename.hdr. Check both, return the one that exists if it
# does, if not return the latter (new file creation presumed).
hdrfile = op.splitext(inputpath)[0] + '.hdr'
if op.isfile(hdrfile):
return hdrfile
elif op.isfile(inputpath + '.hdr'):
return inputpath + '.hdr'
return hdrfile
elif op.splitext(inputpath)[-1] == '.hdr':
return inputpath
else:
return inputpath + '.hdr'

def drop_bands(
spectra: np.ndarray,
banddef: np.ndarray,
Expand Down
83 changes: 49 additions & 34 deletions spectf_cloud/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# SpecTf: Transformers Enable Data-Driven Imaging Spectroscopy Cloud Detection

Jake H. Lee, Michael Kiper, David R. Thompson, Philip G. Brodrick. *In Review.*

Preprint: https://arxiv.org/abs/2501.04916
Jake H. Lee, Michael Kiper, David R. Thompson, Philip G. Brodrick
Proceedings of the National Academy of Sciences of the United States of America
122 (27) e2502903122, https://doi.org/10.1073/pnas.2502903122 (2025).

<img src="figures/fig1.png" width="50%">

Expand Down Expand Up @@ -86,21 +86,25 @@ Within each of these datasets, there the following fields:
At any point, you can navigate the CLI with the `-h` `--help` flags
```
$ spectf-cloud -h

Usage: spectf-cloud [OPTIONS] COMMAND [ARGS]...

╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --version Display the current SpecTf version. │
│ --help -h Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Commands ───────────────────────────────────────────────────────────────────╮
│ cloud-eval Evaluation commands for SpecTf and L2A Baseline. │
│ deploy-pt Produce a SpecTf transformer-generated cloud mask using │
│ PyTorch runtime. │
│ deploy-trt Produce a SpecTf transformer-generated cloud mask using │
│ the TensorRT engine. │
│ train Train the SpecTf Hyperspectral Transformer Model. │
│ train-comparison Training commands for the ResNet and XGBoost comparison │
│ models. │
│ tui Open Textual TUI. │
╰──────────────────────────────────────────────────────────────────────────────╯

Usage: spectf-cloud [OPTIONS] COMMAND [ARGS]...

╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────╮
│ --version Display the current SpecTf version. │
│ --help -h Show this message and exit. │
╰───────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Commands ────────────────────────────────────────────────────────────────────────────────────────────╮
│ cloud-eval Evaluation commands for SpecTf and L2A Baseline. │
│ deploy-pt Produce a SpecTf transformer-generated cloud mask using PyTorch runtime. │
│ deploy-trt Produce a SpecTf transformer-generated cloud mask using the TensorRT engine. │
│ train Train the SpecTf Hyperspectral Transformer Model. │
│ train-comparison Training commands for the ResNet and XGBoost comparison models. │
│ tui Open Textual TUI. │
╰───────────────────────────────────────────────────────────────────────────────────────────────────────╯
```

### 🌈 Using `SpecTf Cloud` to cloud mask your EMIT Scene ☁️
Expand All @@ -121,23 +125,34 @@ $ which spectf-cloud

```
$ spectf-cloud deploy-pt -h

Usage: spectf-cloud deploy-pt [OPTIONS] OUTFP OBSFP RDNFP

Produce a SpecTf transformer-generated cloud mask.

╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ --threshold FLOAT Threshold for cloud classification. [default: 0.52] │
│ --device INTEGER Device specification for PyTorch (-1 for CPU, 0+ for GPU, MPS if │
│ available). │
│ [default: -1] │
│ --arch-spec FILE Filepath to model architecture YAML specification. [default: spectf_cloud_config.yml] │
│ --irradiance FILE Filepath to irradiance numpy file. [default: irr.npy] │
│ --weights FILE Filepath to trained model weights. [default: weights/current.pt] │
│ --proba Output probability map instead of binary cloud mask. │
│ --keep-bands Keep all bands in the spectra (use for non-EMIT data). │
│ --help -h Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯

Usage: spectf-cloud deploy-pt [OPTIONS] OUTFP OBSFP RDNFP

Produce a SpecTf transformer-generated cloud mask using PyTorch runtime.
OUTFP is where the output file will be written (GeoTIFF .tif) RDNFP is the
filepath of the radiance data (ENVI .img) OBSFP is the filepath of the
observation data (ENVI .img)

╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --threshold FLOAT Threshold for cloud classification. │
│ [default: 0.51] │
│ --device INTEGER Device specification for PyTorch (-1 for CPU, 0+ │
│ for GPU, MPS if available). │
│ [default: -1] │
│ --arch-spec FILE Filepath to model architecture YAML │
│ specification. This file also needs to contain │
│ the bands to remove │
│ [default: SpecTf/spectf_cloud/spectf_cloud_confi… |
│ --irradiance FILE Filepath to irradiance numpy file. │
│ [default: SpecTf/spectf_cloud/irr.npy] │
│ --weights FILE Filepath to latest trained model weights. │
│ [default: SpecTf/spectf_cloud/weights/current.pt │
│ --proba Output probability map instead of binary cloud │
│ mask. │
│ --keep-bands Keep all bands in the spectra (use for non-EMIT │
│ data). │
│ --help -h Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```

Example:
Expand Down
57 changes: 57 additions & 0 deletions spectf_cloud/datasets/test_fids.csv
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,20 @@ emit20230524t080731
emit20230526t141654
emit20230605t045030
emit20231006t052426
emit20240101t021631
emit20240101t101617
emit20240201t051020
emit20240201t065205
emit20240201t112745
emit20240201t130346
emit20240201t191530
emit20240301t031712
emit20240301t032602
emit20240301t060903
emit20240301t060915
emit20240301t061049
emit20240301t061137
emit20240301t140747
emit20240301t153909
emit20240301t154317
emit20240301t154352
Expand Down Expand Up @@ -67,3 +77,50 @@ emit20240306t070724
emit20240306t131716
emit20240306t131950
emit20240307t123027
emit20240401t073643
emit20240401t074238
emit20240401t091746
emit20240401t105337
emit20240401t165720
emit20240401t201009
emit20240401t201219
emit20240501t060729
emit20240501t092015
emit20240501t153409
emit20240601t002507
emit20240601t002519
emit20240601t063805
emit20240601t070705
emit20240601t083926
emit20240601t114551
emit20240601t145029
emit20240601t210302
emit20240601t233834
emit20240601t233857
emit20240701t083148
emit20240701t083311
emit20240701t130602
emit20240701t144917
emit20240801t050523
emit20240801t050907
emit20240801t064156
emit20240801t064623
emit20240801t094804
emit20240801t111918
emit20240801t111941
emit20240801t130136
emit20240801t155159
emit20240801t173114
emit20240801t173511
emit20240801t173657
emit20240801t190552
emit20240801t190604
emit20240801t190913
emit20240801t204038
emit20240801t204347
emit20240801t221914
emit20240901t013613
emit20240901t073802
emit20240901t121449
emit20240901t153014
emit20241101t092401
Loading