diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3a0c1a0c..199308f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,10 +11,11 @@ on: jobs: build_and_test: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: + fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -23,12 +24,32 @@ jobs: python-version: ${{ matrix.python-version }} - name: install dependencies run: | - sudo add-apt-repository ppa:ubuntugis/ppa - sudo apt-get update && sudo apt-get install -y libgdal-dev - pip install cython numpy + sudo apt-get update && sudo apt-get install -y \ + libgdal-dev build-essential swig \ + python3-dev python3-pip \ + && cd /usr/local/bin \ + && ln -s $(readlink -f $(which python3)) python \ + && cd /bin \ + && ln -s $(readlink -f $(which python3)) python \ + && rm -rf /var/lib/apt/lists/* + sudo python -m pip install --upgrade pip + sudo python -m pip install pytest pytest-cov cython "numpy<2" "pandas<2" + sudo python -m pip install "setuptools~=57.5.0" wheel - name: build gdal - run: pip install gdal==$(gdal-config --version) + run: | + sudo python -m pip uninstall gdal --break-system-packages \ + && python -m pip install "numpy<2" --break-system-packages \ + && sudo python -m pip install \ + --no-build-isolation --no-cache-dir --force-reinstall \ + gdal==$(gdal-config --version) \ + --global-option=build_ext --global-option="-I/usr/include/gdal" \ + --break-system-packages - name: build - run: pip install $GITHUB_WORKSPACE + run: | + cd $GITHUB_WORKSPACE \ + && sudo python -m pip install pytest pytest-cov \ + && sudo python -m pip install -e .[dev] --break-system-packages - name: test - run: python $GITHUB_WORKSPACE/setup.py test + run: | + cd $GITHUB_WORKSPACE \ + && sudo python -m pytest diff --git a/.gitignore b/.gitignore index d668863e..31527f2a 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,5 @@ wsmtk.cred.pkl wsmtk.key.pkl docenv/* + +.vscode/ diff --git a/modape/constants.py b/modape/constants.py index ed4fe49d..555af9bd 100644 --- a/modape/constants.py +++ b/modape/constants.py @@ -1,57 +1,109 @@ """Constants for modape""" + import re +from typing import Any + +from osgeo import gdal REGEX_PATTERNS = { - "product": re.compile(r"^M[O|Y]D\d{2}\w{1}\d{1}"), + "product": re.compile(r"^(?:VNP|M[O|Y|X]D)\d{2}\w{1}\d{1}"), "version": re.compile(r".+\.(\d{3})\..+"), "tile": re.compile(r"h\d+v\d+"), "date": re.compile(r".+A(\d{7}).+"), "processing_timestamp": re.compile(r".+(\d{13}).+"), - "AquaTerra": re.compile(r"^M[Y|O]D\d{2}.+"), + "AquaTerra": re.compile(r"^M[O|Y|X]D\d{2}.+"), "Aqua": re.compile(r"^MYD\d{2}.+"), "Terra": re.compile(r"^MYD\d{2}.+"), - "VIM": re.compile(r"^M[Y|O|X]D13.+"), + "VIM": re.compile(r"^(?:VNP|M[O|Y|X]D)13.+"), "LST": re.compile(r"^M[Y|O]D11.+"), - "VIMLST": re.compile(r"^M[Y|O]D[11|13].+"), + "VIMLST": re.compile(r"^(?:VNP|M[O|Y|X]D)[11|13].+"), +} + +VAM_PRODUCT_CODES = dict(zip(["VIM", "VEM", "LTD", "LTN"], ["NDVI", "EVI", "LST_Day", "LST_Night"])) + +PRODUCT_SRS_DICT: dict[str, dict[str, Any]] = { + "VNP13A2": { + "ProjectionWKT": """\ +PROJCRS["unnamed", + BASEGEOGCRS["Unknown datum based upon the custom spheroid", + DATUM["Not specified (based on custom spheroid)", + ELLIPSOID["Custom spheroid",6371007.181,0, + LENGTHUNIT["metre",1, + ID["EPSG",9001]]]], + PRIMEM["Greenwich",0, + ANGLEUNIT["degree",0.0174532925199433, + ID["EPSG",9122]]]], + CONVERSION["unnamed", + METHOD["Sinusoidal"], + PARAMETER["Longitude of natural origin",0, + ANGLEUNIT["degree",0.0174532925199433], + ID["EPSG",8802]], + PARAMETER["False easting",0, + LENGTHUNIT["Meter",1], + ID["EPSG",8806]], + PARAMETER["False northing",0, + LENGTHUNIT["Meter",1], + ID["EPSG",8807]]], + CS[Cartesian,2], + AXIS["easting",east, + ORDER[1], + LENGTHUNIT["Meter",1]], + AXIS["northing",north, + ORDER[2], + LENGTHUNIT["Meter",1]]]""", + "AxisMapping": "1,2", + "PixelSize": tuple([xy * 926.6254330558334 for xy in [1, -1]]), + "Origin": {"h": dict(index=18, offset=0), "v": dict(index=9, offset=0)}, + } } -VAM_PRODUCT_CODES = dict( - zip(["VIM", "VEM", "LTD", "LTN"], - ["NDVI", "EVI", "LST_Day", "LST_Night"]) -) +# Mapping for _ to physical encoding of the data: value range, nodata +PRODUCT_SDS_DICT: dict[str, dict[str, int | list[int]]] = { + "VNP13A2_NDVI": { + "Name": "//HDFEOS/GRIDS/VIIRS_Grid_16Day_VI_1km/Data_Fields/1_km_16_days_NDVI", + "DataType": gdal.GDT_Int16, + "ValueRange": (-10000, 10000), + "NoDataValue": (-15000, -13000), + "Size": (1200, 1200), + "BlockSize": (1200, 1), + } +} TEMPORAL_DICT = { - "MXD13":{ + "VNP13": { "temporalresolution": 8, "tshift": 8, + "mux": "VNP", + "min_date": "2012017", }, - "MOD13":{ + "MXD13": { + "temporalresolution": 8, + "tshift": 8, + "min_date": "2002185", + }, + "MOD13": { "temporalresolution": 16, "tshift": 8, + "mux": "MXD", }, - "MYD13":{ + "MYD13": { "temporalresolution": 16, "tshift": 8, + "mux": "MXD", }, - "MOD11":{ + "MOD11": { "temporalresolution": 8, "tshift": 4, }, - "MYD11":{ + "MYD11": { "temporalresolution": 8, "tshift": 4, }, } LST_NAME_LUD = { - "LTD": { - "MOD": "TDT", - "MYD": "TDA" - }, - "LTN": { - "MOD": "TNT", - "MYD": "TNA" - }, + "LTD": {"MOD": "TDT", "MYD": "TDA"}, + "LTN": {"MOD": "TNT", "MYD": "TNA"}, } TEMPINT_LABELS = { @@ -61,5 +113,5 @@ DATE_LABELS = { 5: dict(zip([3, 8, 13, 18, 23, 28], ["p1", "p2", "p3", "p4", "p5", "p6"])), - 10: dict(zip([5, 15, 25], ["d1", "d2", "d3"])) + 10: dict(zip([5, 15, 25], ["d1", "d2", "d3"])), } diff --git a/modape/modis/collect.py b/modape/modis/collect.py index d5220670..cd4d0d47 100644 --- a/modape/modis/collect.py +++ b/modape/modis/collect.py @@ -3,19 +3,22 @@ This file contains the class representing a raw MODIS HDF5 file. """ -#pylint: disable=E0401 -from datetime import datetime + import gc import logging -from pathlib import Path import re -from typing import List, Tuple import warnings +# pylint: disable=E0401 +from datetime import datetime +from pathlib import Path + import h5py import numpy as np + from modape.constants import ( LST_NAME_LUD, + PRODUCT_SDS_DICT, REGEX_PATTERNS, TEMPORAL_DICT, VAM_PRODUCT_CODES, @@ -26,6 +29,7 @@ log = logging.getLogger(__name__) + class ModisRawH5(HDF5Base): """Class representing HDF5 file containing raw MODIS data. @@ -34,11 +38,13 @@ class ModisRawH5(HDF5Base): subsequent step. """ - def __init__(self, - files: List[str], - targetdir: str, - vam_product_code: str = None, - interleave: bool = False) -> None: + def __init__( + self, + files: list[str], + targetdir: str, + vam_product_code: str = None, + interleave: bool = False, + ) -> None: """Initialize instance ModisRawH5 class. This creates an ModisRawH5 object. If the corresponding HDF5 file @@ -80,7 +86,9 @@ def __init__(self, for file in files: if not re.match(REGEX_PATTERNS["VIMLST"], file.split("/")[-1]): log.error("File %s not NDVI or LST", file) - raise ValueError("MODIS collect processing only implemented for M{O|Y}D {11,13} products!") + raise ValueError( + "MODIS collect processing only implemented for M{O|Y}D {11,13} products!" + ) self.rawdates = [re.findall(REGEX_PATTERNS["date"], x)[0] for x in files] self.files = [x for (y, x) in sorted(zip(self.rawdates, files))] @@ -89,9 +97,7 @@ def __init__(self, # extract product patterns products = [] for file_tmp in self.files: - products.append( - re.findall(REGEX_PATTERNS["product"], file_tmp.split("/")[-1])[0] - ) + products.append(re.findall(REGEX_PATTERNS["product"], file_tmp.split("/")[-1])[0]) # Make sure it's the same product assert len({x[3:] for x in products}) == 1, "Found different products in input files!" @@ -103,7 +109,9 @@ def __init__(self, warnings.warn(duplicate_warning_msg, Warning) # make sure number of dates is equal to number of files, so no duplicates! - processing_timestamps = [int(re.sub(REGEX_PATTERNS["processing_timestamp"], "\\1", x)) for x in self.files] + processing_timestamps = [ + int(re.sub(REGEX_PATTERNS["processing_timestamp"], "\\1", x)) for x in self.files + ] dups = [] dt_prev = None @@ -116,7 +124,9 @@ def __init__(self, to_pop = [] for dup in dups: # max TS for duplicate - max_ts = max([processing_timestamps[ix] for ix, x in enumerate(self.rawdates) if x == dup]) + max_ts = max( + [processing_timestamps[ix] for ix, x in enumerate(self.rawdates) if x == dup] + ) for ix, x in enumerate(self.rawdates): if x == dup and processing_timestamps[ix] != max_ts: @@ -147,19 +157,26 @@ def __init__(self, self.vam_product_code = vam_product_code satset = {x[:3] for x in products} + + try: + muxset: set[str] = {TEMPORAL_DICT[product[:5]]["mux"] for product in products} + except KeyError: + muxset: set[str] = set() - if interleave and not self.vam_product_code == "VIM": - log.debug("Interleaving only possible for M{O|Y}D13 (VIM) products! Proceeding without interleave.") + if interleave and len(muxset) != 1: + log.debug( + "Interleaving only possible for M{O|Y}D13 (VIM) products! Proceeding without interleave." + ) interleave = False if interleave: - self.satellite = "MXD" - self.product = f"MXD{products[0][3:]}" + self.satellite = list(muxset)[0] + self.product = f"{list(muxset)[0]}{products[0][3:]}" # for interleaving, exclude all dates before 1st aqua date ix = 0 - min_date = fromjulian("2002185") + min_date = fromjulian(TEMPORAL_DICT[self.product[:5]]["min_date"]) for x in self.rawdates: start = ix if fromjulian(x) >= min_date: @@ -194,7 +211,9 @@ def __init__(self, fn_code = self.vam_product_code test_vampc = REGEX_PATTERNS["VIM"] - assert test_vampc.match(self.product), f"Incomaptible VAM code {self.vam_product_code} for product {self.product}" + assert test_vampc.match( + self.product + ), f"Incomaptible VAM code {self.vam_product_code} for product {self.product}" refname = self.reference_file.name @@ -211,7 +230,8 @@ def __init__(self, def create(self, compression: str = "gzip", - chunks: Tuple[int] = None) -> None: + chunks: tuple[int] = None, + pre_allocate: bool = True) -> None: """Creates HDF5 file. If the corresponding HDF5 is not found in the target directory, @@ -232,8 +252,7 @@ def create(self, sds_indicator = VAM_PRODUCT_CODES[self.vam_product_code] ref_metadata = self._get_reference_metadata( - reference_file=str(self.reference_file), - sds_filter=sds_indicator + reference_file=str(self.reference_file), sds_filter=sds_indicator ) row_number = ref_metadata["RasterYSize"] @@ -241,7 +260,7 @@ def create(self, # check chunksize if chunks is None: - chunks = ((row_number*col_number)//25, 1) # default + chunks = ((row_number * col_number) // 25, 1) # default else: assert isinstance(chunks, tuple), "Need chunks as tuple of length = 2" assert len(chunks) == 2, "Need chunks as tuple of length = 2" @@ -259,20 +278,21 @@ def create(self, # create data array dset = h5f.create_dataset( name="data", - shape=(row_number*col_number, self.nfiles), + shape=(row_number*col_number, self.nfiles if pre_allocate else 0), dtype="int16", - maxshape=(row_number*col_number, None), + maxshape=(row_number * col_number, None), chunks=chunks, compression=compression, - fillvalue=ref_metadata["nodata"]) + fillvalue=ref_metadata["nodata"], + ) # create dates _ = h5f.create_dataset( name="dates", - shape=(self.nfiles,), + shape=(self.nfiles if pre_allocate else 0,), maxshape=(None,), dtype="S8", - compression=compression + compression=compression, ) # set attributes of data array @@ -280,27 +300,36 @@ def create(self, dset.attrs.update(ref_metadata) if self.vam_product_code == "VIM": + product = REGEX_PATTERNS["product"].findall(Path(self.filename).name)[0] valid_range = (-2000, 10000) + try: + valid_range = PRODUCT_SDS_DICT[f"{product}_NDVI"]["ValueRange"] + except KeyError: + pass elif self.vam_product_code in ["LTD", "LTN"]: valid_range = (7500, 65535) else: pass # teporal information - dset.attrs.update({ - "temporalresolution": self.temporalresolution, - "tshift": self.tshift, - "globalproduct": self.globalproduct, - "vamcode": self.filename.name.split(".")[-2], - "valid_range": valid_range, - }) + dset.attrs.update( + { + "temporalresolution": self.temporalresolution, + "tshift": self.tshift, + "globalproduct": self.globalproduct, + "vamcode": self.filename.name.split(".")[-2], + "valid_range": valid_range, + } + ) self.exists = True except Exception as _: log.error("Error creating %s", str(self.filename)) - raise HDF5CreationError(f"Error creating {str(self.filename)}! Check if file exists, or if compression / chunksize is OK.") + raise HDF5CreationError( + f"Error creating {str(self.filename)}! Check if file exists, or if compression / chunksize is OK." + ) - def update(self, force: bool = False) -> List: + def update(self, force: bool = False) -> list[str]: """Updates MODIS raw HDF5 file with raw data. The files specified in `__init__` get collected into the HDF5 file, @@ -336,8 +365,9 @@ def update(self, force: bool = False) -> List: dates_combined.sort() # assert all dates are after last update - assert dates_combined[:-len(self.rawdates)] == dates_stored, \ - "Files to be collected need to be sequential and AFTER dates of previous updates!" + assert ( + dates_combined[: -len(self.rawdates)] == dates_stored + ), "Files to be collected need to be sequential and AFTER dates of previous updates!" # New total temporal length dates_length = len(dates_combined) @@ -349,9 +379,10 @@ def update(self, force: bool = False) -> List: # get required metadata attrs = {key: dset.attrs[key] for key in ["nodata", "RasterXSize", "RasterYSize"]} - chunks = dset.chunks + chunks: tuple[int, int] = dset.chunks # start index for write + assert chunks is not None start_index = dates_combined.index(self.rawdates[0]) # Manual garbage collect to prevent out of memory @@ -361,39 +392,37 @@ def update(self, force: bool = False) -> List: arr = np.full((chunks[0], self.nfiles), attrs["nodata"], dtype="int16") sds_indicator = VAM_PRODUCT_CODES[self.vam_product_code] - ysize = chunks[0]//attrs["RasterXSize"] + ysize = chunks[0] // attrs["RasterXSize"] hdf_datasets = HDFHandler(files=self.files, sds=sds_indicator) - collected = [] + collected: set[str] = set() - block_gen = ((x, x//attrs["RasterXSize"]) for x in range(0, dataset_shape[0], chunks[0])) + block_gen = ((x, x // attrs["RasterXSize"]) for x in range(0, dataset_shape[0], chunks[0])) log.debug("Opening HDF files ... ") - with hdf_datasets.open_datasets(): + with hdf_datasets.open() as open_datasets: log.debug("Iterating chunks ... ") for yoff_ds, yoff_rst in block_gen: log.debug("Processing chunk (%s, %s)", yoff_ds, yoff_rst) - for ii, hdf in hdf_datasets.iter_handles(): + for ii, hdf, filename in open_datasets: try: - arr[:, ii] = hdf_datasets.read_chunk(hdf, yoff=int(yoff_rst), ysize=int(ysize)).flatten() - collected.append(hdf_datasets.files[ii]) + arr[:, ii] = HDFHandler.read_chunk( + hdf, yoff=int(yoff_rst), ysize=int(ysize) + ).flatten() + collected.add(filename) except AttributeError: if force: - log.warning("Error reading from %s, using nodata.", hdf_datasets.files[ii]) + log.warning("Error reading from %s, using nodata.", filename) arr[:, ii] = attrs["nodata"] continue - log.error("Error reading from %s", hdf_datasets.files[ii]) - raise IOError(f"Error reading {hdf_datasets.files[ii]}") + log.error("Error reading from %s", filename) + raise IOError(f"Error reading {filename}") # write to HDF5 write_check = self.write_chunk( - dataset="data", - arr_in=arr, - xchunk=10, - xoffset=start_index, - yoffset=yoff_ds + dataset="data", arr_in=arr, xchunk=10, xoffset=start_index, yoffset=yoff_ds ) if not write_check: @@ -406,7 +435,7 @@ def update(self, force: bool = False) -> List: dates = h5f.get("dates") dates[...] = np.array(dates_combined, dtype="S8") - return list(set(collected)) + return list(collected) @property def last_collected(self): diff --git a/modape/modis/download.py b/modape/modis/download.py index b5c45c3e..5962614b 100644 --- a/modape/modis/download.py +++ b/modape/modis/download.py @@ -7,22 +7,22 @@ # pylint: disable=E0401, E0611 +import hashlib +import logging +import re +import shutil from concurrent.futures import ThreadPoolExecutor from datetime import datetime -import logging from os.path import exists from pathlib import Path -import re -import shutil -import hashlib -from typing import List, Tuple, Union +from typing import Generator, List, Tuple, Union from xml.etree import ElementTree -from cmr import GranuleQuery import pandas as pd +from cmr import GranuleQuery from pycksum import cksum from requests.adapters import HTTPAdapter -from requests.exceptions import HTTPError, ConnectionError #pylint: disable=W0622 +from requests.exceptions import ConnectionError, HTTPError # pylint: disable=W0622 from urllib3.util import Retry from modape.exceptions import DownloadError @@ -30,6 +30,7 @@ log = logging.getLogger(__name__) + class ModisQuery(object): """Class for querying and downloading MODIS data. @@ -38,13 +39,15 @@ class ModisQuery(object): the `download` method, to fetch the resulting HDF files to local disk. """ - def __init__(self, - products: List[str], - aoi: List[Union[float, int]] = None, - begindate: datetime = None, - enddate: datetime = None, - tile_filter: List[str] = None, - version: str = "006") -> None: + def __init__( + self, + products: List[str], + aoi: List[Union[float, int]] = None, + begindate: datetime = None, + enddate: datetime = None, + tile_filter: List[str] = None, + version: str = "006", + ) -> None: """Initialize instance ModisQuery class. This creates an instance of `ModisQuery` with the basic query parameters. @@ -74,9 +77,7 @@ def __init__(self, # construct query self.api.parameters( - short_name=products, - version=version, - temporal=(begindate, enddate) + short_name=products, version=version, temporal=(begindate, enddate) ) if aoi is not None: @@ -115,7 +116,6 @@ def search(self, match_begin: bool = True) -> None: log.debug("Query complete, filtering results") for result in self._parse_response(results_all): - # skip tiles outside of filter if self.tile_filter and result["tile"]: if result["tile"] not in self.tile_filter: @@ -140,11 +140,14 @@ def search(self, match_begin: bool = True) -> None: # final results self.nresults = len(self.results) - log.debug("Search complete. Total results: %s, filtered: %s", len(results_all), self.nresults) - + log.debug( + "Search complete. Total results: %s, filtered: %s", + len(results_all), + self.nresults, + ) @staticmethod - def _parse_response(query: List[dict]) -> dict: + def _parse_response(query: List[dict]) -> Generator[dict, None, None]: """Generator for parsing API response. Args: @@ -158,7 +161,6 @@ def _parse_response(query: List[dict]) -> dict: tile_regxp = re.compile(r".+(h\d+v\d+).+") for entry in query: - entry_parsed = dict( filename=entry["producer_granule_id"], time_start=pd.Timestamp(entry["time_start"]).date(), @@ -180,14 +182,14 @@ def _parse_response(query: List[dict]) -> dict: def _parse_hdfxml(response): result = {} tree = ElementTree.fromstring(response.content) - for entry in tree.iter(tag='GranuleURMetaData'): + for entry in tree.iter(tag="GranuleURMetaData"): for datafile in entry.iter(tag="DataFiles"): for datafilecont in datafile.iter(tag="DataFileContainer"): for content in datafilecont: if content.tag in ("FileSize", "ChecksumType", "Checksum"): result.update({content.tag: content.text.strip()}) return result - + @staticmethod def _parse_cmrxml(response, hdf_filename): result = {} @@ -198,14 +200,15 @@ def _parse_cmrxml(response, hdf_filename): result.update({"Checksum": entry.find("Checksum/Value").text}) return result - - def _fetch(self, - session: SessionWithHeaderRedirection, - url: str, - destination: Path, - overwrite: bool, - check: bool, - ) -> Tuple[str, Union[None, Exception]]: + def _fetch( + self, + session: SessionWithHeaderRedirection, + url: str, + destination: Path, + overwrite: bool, + check: bool, + mirror: Path, + ) -> Tuple[str, Union[None, Exception]]: """Helper function to fetch HDF files Args: @@ -220,79 +223,125 @@ def _fetch(self, either (filename, None) for success and (URL, Exception) for error. """ + + def _check(_downloaded: Path, raise_on_error=True) -> bool: + try: + with session.get(url + ".xml", allow_redirects=True) as hdfxml: + if hdfxml.status_code == 404: + with session.get( + (url[:-3] if url.endswith(".h5") else url[:-4]) + + ".cmr.xml", + allow_redirects=True, + ) as cmrxml: + cmrxml.raise_for_status() + file_metadata = self._parse_cmrxml( + cmrxml, url.split("/")[-1] + ) + else: + hdfxml.raise_for_status() + file_metadata = self._parse_hdfxml(hdfxml) + + # check filesize + assert ( + str(_downloaded.stat().st_size).strip() == file_metadata["FileSize"] + ), f"Size: {_downloaded.stat().st_size} != {file_metadata['FileSize']}" + with open(_downloaded, "rb") as openfile: + if file_metadata["ChecksumType"] == "CKSUM": + checksum = str(cksum(openfile)) + elif file_metadata["ChecksumType"] == "MD5": + md5_hash = hashlib.md5() + chunk = openfile.read(65536) + while chunk: + md5_hash.update(chunk) + chunk = openfile.read(65536) + checksum = md5_hash.hexdigest().lower() + elif file_metadata["ChecksumType"] == "SHA256": + sha256_hash = hashlib.sha256() + chunk = openfile.read(65536) + while chunk: + sha256_hash.update(chunk) + chunk = openfile.read(65536) + checksum = sha256_hash.hexdigest().lower() + else: + raise ValueError( + f"Unknown Checksum Type: {file_metadata['ChecksumType']}" + ) + # check checksum + assert checksum == file_metadata["Checksum"], ( + f"Hash: {checksum} != {file_metadata['Checksum']}" + ) + except Exception: + if raise_on_error: + raise + return False + return True + filename = url.split("/")[-1] filename_full = destination.joinpath(filename) if not exists(filename_full) or overwrite: - - filename_temp = filename_full.with_suffix(".modapedl") + fp_download = filename_full if mirror is None else mirror.joinpath(filename) + fp_modapedl = fp_download.with_suffix(".modapedl") try: + if exists(fp_download): + if _check(fp_download, not overwrite): + # File is fine... + if mirror is None: + # ... leave as-is + pass + else: + # ... or re-do symlink from mirror into destination + if exists(filename_full): + filename_full.unlink() + filename_full.symlink_to(fp_download) + return (filename, None) + else: + # File is not valid, but since is True, no exception was raised; + # remove the file in preparation of re-download: + fp_download.unlink() with session.get(url, stream=True, allow_redirects=True) as response: response.raise_for_status() - with open(filename_temp, "wb") as openfile: - shutil.copyfileobj(response.raw, openfile, length=16*1024*1024) + with open(fp_modapedl, "wb") as openfile: + shutil.copyfileobj( + response.raw, openfile, length=16 * 1024 * 1024 + ) if check: + _check(fp_modapedl) - with session.get(url + ".xml", allow_redirects=True) as hdfxml: - if hdfxml.status_code == 404: - with session.get(url[:-4] + ".cmr.xml", allow_redirects=True) as cmrxml: - cmrxml.raise_for_status() - file_metadata = self._parse_cmrxml(cmrxml, url.split("/")[-1]) - else: - hdfxml.raise_for_status() - file_metadata = self._parse_hdfxml(hdfxml) - - # check filesize - assert str(filename_temp.stat().st_size).strip() == file_metadata["FileSize"], \ - f'Size: {filename_temp.stat().st_size} != {file_metadata["FileSize"]}' - with open(filename_temp, "rb") as openfile: - if file_metadata["ChecksumType"] == "CKSUM": - checksum = str(cksum(openfile)) - elif file_metadata["ChecksumType"] == "MD5": - md5_hash = hashlib.md5() - chunk = openfile.read(65536) - while chunk: - md5_hash.update(chunk) - chunk = openfile.read(65536) - checksum = md5_hash.hexdigest().lower() - elif file_metadata["ChecksumType"] == "SHA256": - sha256_hash = hashlib.sha256() - chunk = openfile.read(65536) - while chunk: - sha256_hash.update(chunk) - chunk = openfile.read(65536) - checksum = sha256_hash.hexdigest().lower() - else: - raise ValueError(f'Unknown Checksum Type: {file_metadata["ChecksumType"]}') - # check checksum - assert checksum == file_metadata["Checksum"], f'Hash: {checksum} != {file_metadata["Checksum"]}' - - shutil.move(filename_temp, filename_full) + shutil.move(fp_modapedl, fp_download) + if mirror is not None: + if exists(filename_full): + filename_full.unlink() + filename_full.symlink_to(fp_download) except (HTTPError, ConnectionError, AssertionError, FileNotFoundError) as e: try: - filename_temp.unlink() + fp_modapedl.unlink() except FileNotFoundError: pass return (filename, e) else: - log.info("%s exists in target. Please set overwrite to True.", filename_full) + log.info( + "%s exists in target. Please set overwrite to True.", filename_full + ) return (filename, None) - def download(self, - targetdir: Path, - username: str, - password: str, - overwrite: bool = False, - multithread: bool = False, - nthreads: int = 4, - max_retries: int = -1, - robust: bool = False, - ) -> List: + def download( + self, + targetdir: Path, + username: str, + password: str, + overwrite: bool = False, + multithread: bool = False, + nthreads: int = 4, + max_retries: int = -1, + robust: bool = False, + mirror: Path = None, + ) -> List: """Download MODIS HDF files. This method downloads the MODIS HDF files contained in the @@ -309,6 +358,7 @@ def download(self, nthreads (int): Number of threads. max_retries (int): Maximum number of retries for failed downloads (for no max, set -1). robust (bool): Perform robust downloading (checks file size and checksum). + mirror (Path): Download files to mirror instead; symlink into target directory Raises: DownloadError: If one or more errors were encountered during downloading. @@ -320,31 +370,55 @@ def download(self, assert targetdir.is_dir() assert targetdir.exists() + if mirror is not None: + assert mirror.is_dir() + assert mirror.exists() + retry_count = 0 to_download = self.results.copy() downloaded = [] while True: - with SessionWithHeaderRedirection(username, password) as session: - backoff = min(450, 2**retry_count) - retries = Retry(total=5, backoff_factor=backoff, status_forcelist=[502, 503, 504]) + retries = Retry( + total=5, backoff_factor=backoff, status_forcelist=[502, 503, 504] + ) session.mount( "https://", - HTTPAdapter(pool_connections=nthreads, pool_maxsize=nthreads*2, max_retries=retries) + HTTPAdapter( + pool_connections=nthreads, + pool_maxsize=nthreads * 2, + max_retries=retries, + ), ) if multithread: - log.debug("Multithreaded download using %s threads. Warming up connection pool.", nthreads) + log.debug( + "Multithreaded download using %s threads. Warming up connection pool.", + nthreads, + ) # warm up pool - _ = session.get(list(to_download.values())[0]["link"], stream=True, allow_redirects=True) + _ = session.get( + list(to_download.values())[0]["link"], + stream=True, + allow_redirects=True, + ) with ThreadPoolExecutor(nthreads) as executor: - - futures = [executor.submit(self._fetch, session, values["link"], targetdir, overwrite, robust) - for key, values in to_download.items()] + futures = [ + executor.submit( + self._fetch, + session, + values["link"], + targetdir, + overwrite, + robust, + mirror, + ) + for key, values in to_download.items() + ] downloaded_temp = [x.result() for x in futures] @@ -353,9 +427,15 @@ def download(self, downloaded_temp = [] for _, values in to_download.items(): - downloaded_temp.append( - self._fetch(session, values["link"], targetdir, overwrite, robust) + self._fetch( + session, + values["link"], + targetdir, + overwrite, + robust, + mirror, + ) ) # check if downloads are OK @@ -364,7 +444,7 @@ def download(self, try: del to_download[fid] except KeyError: - del to_download[fid[:-4]] + del to_download[fid[:-3] if fid.endswith(".h5") else fid[:-4]] downloaded.append(fid) if to_download: diff --git a/modape/modis/io.py b/modape/modis/io.py index f311eec8..d30d798c 100644 --- a/modape/modis/io.py +++ b/modape/modis/io.py @@ -1,21 +1,28 @@ """IO module for modape""" + # pylint: disable=E0401, C0103 -from contextlib import contextmanager import logging +import re +import uuid +from contextlib import contextmanager from pathlib import Path -from typing import List, Tuple +from typing import Any, Generator -from osgeo import gdal import h5py import numpy as np +from osgeo import gdal + +from modape.constants import PRODUCT_SDS_DICT, PRODUCT_SRS_DICT, REGEX_PATTERNS log = logging.getLogger(__name__) + class HDF5Base(object): """Parent class for interaction with HDF5 files This class serves as a parent class for ModisRawH5 and ModisSmoothH5, enabling uniform chunked read and write of datasets and attributes to and from HDF5 files.""" + def __init__(self, filename: str) -> None: """Initialize HDF5Base instance. @@ -29,11 +36,9 @@ def __init__(self, filename: str) -> None: self.filename = Path(filename) self.exists = self.filename.exists() - def read_chunked(self, - dataset: str, - xoffset: int = 0, - xchunk: int = None, - arr_out: np.ndarray = None) -> np.ndarray: + def read_chunked( + self, dataset: str, xoffset: int = 0, xchunk: int = None, arr_out: np.ndarray = None + ) -> Generator[np.ndarray, None, None]: """Read data from dataset in a chunked manner. The chunks are iterated in a row by column pattern, where @@ -74,7 +79,11 @@ def read_chunked(self, if len(ds_shape) == 1: arr_out = np.full((ychunk,), fill_value=ds.fillvalue, dtype=ds.dtype.name) else: - arr_out = np.full((ychunk, ds_shape[1]-xoffset), fill_value=ds.fillvalue, dtype=ds.dtype.name) + arr_out = np.full( + (ychunk, ds_shape[1] - xoffset), + fill_value=ds.fillvalue, + dtype=ds.dtype.name, + ) else: assert isinstance(arr_out, np.ndarray) @@ -94,24 +103,36 @@ def read_chunked(self, with h5py.File(self.filename, "r") as h5f_open: ds = h5f_open.get(dataset) - log.debug("Reading chunk %s - %s", yb, yb+ychunk) + log.debug("Reading chunk %s - %s", yb, yb + ychunk) if xsize is not None: for xb in range(0, xsize, xchunk): xb_data = xb + xoffset - log.debug("Reading arr_out[%s : %s, %s : %s] from dataset[:, %s : %s]", yb, yb+ychunk, xb, xb+xchunk, xb_data, xb_data+xchunk) - arr_out[:, xb:(xb+xchunk)] = ds[yb:(yb+ychunk), xb_data:(xb_data+xchunk)] + log.debug( + "Reading arr_out[%s : %s, %s : %s] from dataset[:, %s : %s]", + yb, + yb + ychunk, + xb, + xb + xchunk, + xb_data, + xb_data + xchunk, + ) + arr_out[:, xb : (xb + xchunk)] = ds[ + yb : (yb + ychunk), xb_data : (xb_data + xchunk) + ] else: - arr_out[...] = ds[yb:(yb+ychunk)] + arr_out[...] = ds[yb : (yb + ychunk)] yield arr_out - def write_chunk(self, - dataset: str, - arr_in: np.ndarray, - xoffset: int = 0, - xchunk: int = None, - yoffset: int = 0) -> bool: + def write_chunk( + self, + dataset: str, + arr_in: np.ndarray, + xoffset: int = 0, + xchunk: int = None, + yoffset: int = 0, + ) -> bool: """Write chunk back to HDF5 file. Writes complete chunk back to HDF5 file, iterating @@ -155,7 +176,7 @@ def write_chunk(self, if xchunk is None: xchunk = xsize except ValueError: - ysize, = arr_in.shape + (ysize,) = arr_in.shape xsize = None ysize = max(ysize, ychunk) @@ -167,17 +188,26 @@ def write_chunk(self, for xb in range(0, xsize, xchunk): xb_data = xb + xoffset - log.debug("Writing dataset[%s : %s, %s : %s] from arr_in[:, %s : %s]", yoffset, yoffset+ysize, xb_data, xb_data+xchunk, xb, xb+xchunk) - ds[yoffset:(yoffset+ysize), xb_data:(xb_data+xchunk)] = arr_in[:, xb:(xb+xchunk)] + log.debug( + "Writing dataset[%s : %s, %s : %s] from arr_in[:, %s : %s]", + yoffset, + yoffset + ysize, + xb_data, + xb_data + xchunk, + xb, + xb + xchunk, + ) + ds[yoffset : (yoffset + ysize), xb_data : (xb_data + xchunk)] = arr_in[ + :, xb : (xb + xchunk) + ] else: - log.debug("Writing dataset[%s : %s] from arr_in", yoffset, yoffset+ysize) - ds[yoffset:(yoffset+ysize)] = arr_in[...] + log.debug("Writing dataset[%s : %s] from arr_in", yoffset, yoffset + ysize) + ds[yoffset : (yoffset + ysize)] = arr_in[...] return True @staticmethod - def _get_reference_metadata(reference_file: str, - sds_filter: str = None)-> dict: + def _get_reference_metadata(reference_file: str, sds_filter: str = None) -> dict: """Helper function to get metadata from reference file. Extracts metadata from subdataset, eitjer filtered @@ -201,6 +231,23 @@ def _get_reference_metadata(reference_file: str, """ + if reference_file.endswith(".h5"): + with HDFHandler._open_h5_source(reference_file, sds_filter) as h5_source: + band: gdal.Band | None = h5_source.GetRasterBand(1) + try: + assert band is not None + c, a, b, f, d, e = h5_source.GetGeoTransform() + return dict( + RasterXSize=h5_source.RasterXSize, + RasterYSize=h5_source.RasterYSize, + geotransform=(c, a, b, f, d, e), + projection=h5_source.GetProjection(), + resolution=(a, e), + nodata=band.GetNoDataValue(), + ) + finally: + band = None + ds = gdal.Open(reference_file) sds_all = ds.GetSubDatasets() @@ -226,14 +273,55 @@ def _get_reference_metadata(reference_file: str, return metadata + class HDFHandler(object): """Class to handle reading from MODIS HDF files. This class enables reading specific subdatasets and attributes from the raw MODIS HDF files.""" - def __init__(self, - files: List[str], - sds: str) -> None: + + class HandleCollection: + def __init__(self, files, sds) -> None: + self.handles: list[gdal.Dataset | None] = [] + self.files: list[str] = [] + + for ds_handle, filename in self._gen_sds_handle(files, sds): + self.handles.append(ds_handle) + self.files.append(filename) + + def release(self): + for ii in range(len(self.handles)): + self.handles[ii] = None + self.handles = [] + + def __iter__(self) -> Generator[tuple[int, gdal.Dataset | None, str], None, None]: + """Iterates over all open dataset handles + coming from `open_datasets` and returns a Tuple with index and a + `gdal.Dataset` + filename for each. + + Returns: + Tuple with index, corresponding `gdal.Dataset` and filename + + """ + assert len(self.handles) == len(self.files) + for ix, handle in enumerate(self.handles): + yield (ix, handle, self.files[ix]) + + @staticmethod + def _gen_sds_handle( + x: list[str], sds: str + ) -> Generator[tuple[gdal.Dataset | None, str], None, None]: + for xx in x: + try: + ds = gdal.Open(xx) + ds_sds = [x[0] for x in ds.GetSubDatasets() if sds in x[0]][0] + yield gdal.Open(ds_sds), xx + ds = None + + except AttributeError: + yield None, xx + + def __init__(self, files: list[str], sds: str) -> None: """Initialize HDFHandler instance. Reads the datasets, extracts the subdatasets and keeps @@ -247,41 +335,23 @@ def __init__(self, self.files = files self.sds = sds - self.handles = [] @contextmanager - def open_datasets(self) -> None: + def open(self) -> Generator[HandleCollection, None, None]: """Opens the selected subdataset from all files within a context manager and stores them in a class variable. When the context manager closes, the refereces are removed, closing all datasets. """ - for ds_handle in self._gen_sds_handle(self.files, self.sds): - self.handles.append(ds_handle) - - yield - for ii in range(len(self.handles)): - self.handles[ii] = None - self.handles = [] - - def iter_handles(self) -> Tuple[int, "gdal.Dataset"]: - """Iterates over all open dataset handles - coming from `open_datasets` and returns a Tuple with index and a - `gdal.Dataset` for each. - - Returns: - Tuple with index and corresponding `gdal.Dataset` - - """ - ix = 0 - for handle in self.handles: - yield (ix, handle) - ix += 1 + try: + yield (handles := HDFHandler.HandleCollection(self.files, self.sds)) + finally: + handles.release() @staticmethod - def read_chunk(x: "gdal.Dataset", **kwargs: dict) -> np.ndarray: + def read_chunk(x: gdal.Dataset, **kwargs: dict[str, int | Any]) -> np.ndarray | None: """Reads a chunk of an opened subdataset. The size of the chunk being read is defined by the @@ -299,13 +369,116 @@ def read_chunk(x: "gdal.Dataset", **kwargs: dict) -> np.ndarray: return x.ReadAsArray(**kwargs) @staticmethod - def _gen_sds_handle(x: str, sds: str): - for xx in x: - try: - ds = gdal.Open(xx) - ds_sds = [x[0] for x in ds.GetSubDatasets() if sds in x[0]][0] - yield gdal.Open(ds_sds) - ds = None + @contextmanager + def _open_h5_source(path: str, sds_filter: str) -> Generator[gdal.Dataset, None, None]: + """Helper function to open .h5 sources for which GDAL has trouble + parsing metadata correctly. Also, products may have multiple NoData + values, i.e.: VNP13A2 defines -15000 as well as -13000 + """ - except AttributeError: - yield None + def write_memfile(memfile, buffer: bytes | str): + vsiFile = gdal.VSIFOpenL(memfile, "wb") + try: + if isinstance(buffer, str): + buffer = str.encode(buffer) + gdal.VSIFWriteL(buffer, 1, len(buffer), vsiFile) + finally: + gdal.VSIFCloseL(vsiFile) + + datatypes = { + gdal.GDT_Byte: ("Byte", 0, 255), + gdal.GDT_UInt16: ("UInt16", 0, 65535), + gdal.GDT_Int16: ("Int16", -32768, 32768), + gdal.GDT_UInt32: ("UInt32", 0, 4294967295), + gdal.GDT_Int32: ("Int32", -2147483648, 2147483647), + } + + product = REGEX_PATTERNS["product"].findall(Path(path).name)[0] + tile = REGEX_PATTERNS["tile"].findall(Path(path).name)[0] + sds = PRODUCT_SDS_DICT[f"{product}_{sds_filter}"] + srs = PRODUCT_SRS_DICT[product] + + # Example geotransformation for h16v07 + # tuple: + # GT = (-2223901.0393329998, 926.62543305499980, 0.0, 2223901.0393329998, 0.0, -926.62543305499980): + # explained: + # GT[0] = UL x-coordinate for UL pixel: -2223901.0393329998 + # GT[1] = W-E pixel resolution / pixel width: 926.62543305499980 + # GT[2] = row rotation: 0 + # GT[3] = UL y-coordinate for UL pixel: 2223901.0393329998 + # GT[4] = column rotation: 0 + # GT[5] = N-S pixel resolution / pixel height: -926.62543305499980 + # (negative value for a north-up image). + # computed: + # GT[0] = (16 - 18) * 1200 * 926.6254330558334 = -2223901.03933 + # GT[3] = (7 - 9) * 1200 * -926.6254330558334 = 2223901.03933 + h, v = tuple([int(hv) for hv in re.split(r"[^\d]+", tile) if len(hv) > 0]) + srs_origin = srs["Origin"] + gdal_gt = ",".join( + str(factor) + for factor in [ + ((h - srs_origin["h"]["index"]) * sds["Size"][0] * srs["PixelSize"][0]) + + srs_origin["h"]["offset"], + srs["PixelSize"][0], + 0.0, + ((v - srs_origin["v"]["index"]) * sds["Size"][1] * srs["PixelSize"][1]) + + srs_origin["v"]["offset"], + 0.0, + srs["PixelSize"][1], + ] + ) + gdal_dt = datatypes[sds["DataType"]] + assert all( + (val < min(sds["ValueRange"]) or val > max(sds["ValueRange"])) + for val in sds["NoDataValue"] + ), f"Invalid Data / NoData configuration for product: {product}_{sds_filter}" + + # Setup a LUT to reclass multiple NoData values into a single (lowest): + lut = [] + for entry in sorted( + set([gdal_dt[1], gdal_dt[2], sds["ValueRange"][0], sds["ValueRange"][1]]) + ): + if entry == min(sds["ValueRange"]): + if len(lut) > 0: + lut.append(f"{entry-1}:{min(sds['NoDataValue'])}") + lut.append(f"{entry}:{entry}") + elif entry == max(sds["ValueRange"]): + lut.append(f"{entry}:{entry}") + if entry < gdal_dt[2]: + lut.append(f"{entry+1}:{min(sds['NoDataValue'])}") + elif entry == gdal_dt[1] or ( + entry == gdal_dt[2] and entry > (max(sds["ValueRange"]) + 1) + ): + lut.append(f"{entry}:{min(sds['NoDataValue'])}") + + vrt = f"""\ + + {srs['ProjectionWKT']} + {gdal_gt} + + {min(sds['NoDataValue'])} + + HDF5:{path}:{sds['Name']} + 1 + + + + {lut} + + + + """ + fn_vsi = f"/vsimem/{str(uuid.uuid4())}.vrt" + write_memfile(fn_vsi, vrt) + ds: gdal.Dataset | None = gdal.Open(fn_vsi) + try: + assert ds is not None + yield ds + finally: + ds = None + gdal.Unlink(fn_vsi) diff --git a/modape/modis/smooth.py b/modape/modis/smooth.py index c3634d24..9b0f73ec 100644 --- a/modape/modis/smooth.py +++ b/modape/modis/smooth.py @@ -3,22 +3,34 @@ This file contains the class representing a smoothed MODIS HDF5 file. """ + +import logging + # pylint: disable=import-error from array import array +from collections import namedtuple from datetime import datetime, timedelta -import logging from pathlib import Path import h5py +import numpy as np +from osgeo import gdal, osr + from modape.constants import TEMPINT_LABELS from modape.exceptions import HDF5CreationError, HDF5WriteError from modape.modis.io import HDF5Base from modape.utils import DateHelper, fromjulian -from modape.whittaker import lag1corr, ws2d, ws2dp, ws2doptv, ws2doptvp # pylint: disable=no-name-in-module -import numpy as np +from modape.whittaker import ( # pylint: disable=no-name-in-module + lag1corr, + ws2d, + ws2doptv, + ws2doptvp, + ws2dp, +) log = logging.getLogger(__name__) + class ModisSmoothH5(HDF5Base): """Class representing HDF5 file containing smoothed MODIS data. @@ -28,11 +40,9 @@ class ModisSmoothH5(HDF5Base): to the smooth HDF5 file. """ - def __init__(self, - rawfile: str, - targetdir: str, - startdate: str = None, - tempint: int = None) -> None: + def __init__( + self, rawfile: str, targetdir: str, startdate: str = None, tempint: int = None + ) -> None: """Initialize instance of `ModisSmoothH5` class. To create an instance of `ModisSmoothH5`, a full path to @@ -77,28 +87,32 @@ def __init__(self, # Filename for smoothed HDF5 rawfile_trunk = self.rawfile.name.split(".") - smoothfile_trunk = ".".join( - rawfile_trunk[:-2] + \ - ["tx"+txflag] + \ - rawfile_trunk[-2:-1] - ) + smoothfile_trunk = ".".join(rawfile_trunk[:-2] + ["tx" + txflag] + rawfile_trunk[-2:-1]) filename = f"{targetdir}/{smoothfile_trunk}.h5" super().__init__(filename=filename) - def create(self): + def write_sgrid(self, sgrid: str): + if self.exists: + raise NotImplementedError("Updating S-grid not implemented") + return self.create(sgrid) + + def create(self, sgrid: str = None): """Creates HDF5 file. If the corresponding HDF5 is not found in the target directory, it's created. + Args: + sgrid: import sgrid dataset from provided file + Raises: HDF5CreationError: If creation of HDF5 file fails. """ # Try reading info from raw HDF5 - #pylint: disable=R1721 + # pylint: disable=R1721 with h5py.File(self.rawfile, "r") as h5f_raw: dset = h5f_raw.get("data") rawshape = dset.shape @@ -106,53 +120,106 @@ def create(self): datatype = dset.dtype compression = dset.compression raw_dates_all = [x.decode() for x in h5f_raw.get("dates")[...]] - raw_attrs = {key:value for key, value in dset.attrs.items()} + raw_attrs = {key: value for key, value in dset.attrs.items()} if self.temporalresolution is None: tempres = raw_attrs["temporalresolution"] else: tempres = self.temporalresolution - dates = DateHelper(rawdates=raw_dates_all, - rtres=int(raw_attrs["temporalresolution"]), - stres=int(tempres), - start=self.startdate) + if len("".join(raw_dates_all)) > 0: + dates = DateHelper( + rawdates=raw_dates_all, + rtres=int(raw_attrs["temporalresolution"]), + stres=int(tempres), + start=self.startdate, + ) + else: + + class _EmptyDateHelper: + target = [] + + dates = _EmptyDateHelper() dates_length = len(dates.target) - nrows = raw_attrs["RasterYSize"] - ncols = raw_attrs["RasterXSize"] + nrows = int(raw_attrs["RasterYSize"]) + ncols = int(raw_attrs["RasterXSize"]) try: with h5py.File(self.filename, "x", libver="latest") as h5f: - dset = h5f.create_dataset("data", - shape=(rawshape[0], dates_length), - dtype=datatype, maxshape=(rawshape[0], None), - chunks=rawchunks, - compression=compression, - fillvalue=raw_attrs["nodata"]) - - h5f.create_dataset("sgrid", - shape=(nrows*ncols,), - dtype="float32", - maxshape=(nrows*ncols,), - chunks=(rawchunks[0],), - compression=compression) - - h5f.create_dataset("dates", - shape=(dates_length,), - maxshape=(None,), - dtype="S8", - compression=compression, - data=np.array(dates.target, dtype="S8")) - - h5f.create_dataset("rawdates", - shape=(len(raw_dates_all),), - maxshape=(None,), - dtype="S8", - compression=compression, - data=np.array(raw_dates_all, dtype="S8")) + dset = h5f.create_dataset( + "data", + shape=(rawshape[0], dates_length), + dtype=datatype, + maxshape=(rawshape[0], None), + chunks=rawchunks, + compression=compression, + fillvalue=raw_attrs["nodata"], + ) + + if sgrid is None: + h5f.create_dataset( + "sgrid", + shape=(nrows * ncols,), + dtype="float32", + maxshape=(nrows * ncols,), + chunks=(rawchunks[0],), + compression=compression, + ) + else: + ds: gdal.Dataset = gdal.Open(sgrid) + try: + # Check projection: + assert ( + osr.SpatialReference(raw_attrs["projection"]).IsSame(ds.GetSpatialRef()) + == 1 + ) + h5gt = tuple(raw_attrs["geotransform"]) + sgt = ds.GetGeoTransform() + # Check pixel size: + assert np.isclose(h5gt[1], sgt[1]) + assert np.isclose(h5gt[5], sgt[5]) + xoff = round((h5gt[0] - sgt[0]) / h5gt[1]) + yoff = round((h5gt[3] - sgt[3]) / h5gt[5]) + # Check slicing window + assert xoff >= 0 + assert yoff >= 0 + assert (xoff + ncols) <= ds.RasterXSize + assert (yoff + nrows) <= ds.RasterYSize + band: gdal.Band = ds.GetRasterBand(1) + data: np.ndarray = band.ReadAsArray( + xoff=xoff, yoff=yoff, win_xsize=ncols, win_ysize=nrows + ).flatten() + assert data.shape == (nrows * ncols,) + h5f.create_dataset( + "sgrid", + data=data, + maxshape=(nrows * ncols,), + chunks=(rawchunks[0],), + compression=compression, + ) + finally: + ds = None + + h5f.create_dataset( + "dates", + shape=(dates_length,), + maxshape=(None,), + dtype="S8", + compression=compression, + data=np.array(dates.target, dtype="S8"), + ) + + h5f.create_dataset( + "rawdates", + shape=(len(raw_dates_all),), + maxshape=(None,), + dtype="S8", + compression=compression, + data=np.array(raw_dates_all, dtype="S8"), + ) raw_attrs["temporalresolution"] = tempres dset.attrs.update(raw_attrs) @@ -162,14 +229,15 @@ def create(self): log.error("Error creating %s", str(self.filename)) raise HDF5CreationError(f"Error creating {str(self.filename)}!") - def smooth(self, - svalue: float = None, - p: float = None, - soptimize: bool = None, - srange: np.ndarray = None, - nsmooth: int = 0, - nupdate: int = 0, - ) -> None: + def smooth( + self, + svalue: float = None, + p: float = None, + soptimize: bool = None, + srange: np.ndarray = None, + nsmooth: int = 0, + nupdate: int = 0, + ) -> None: """Applies Whittaker smoother to the data. This method reads raw data from the raw HDF5 and applies the Whittaker @@ -217,17 +285,21 @@ def smooth(self, if not isinstance(srange, np.ndarray): raise ValueError("srange needs to be supplied as numpy array") - log.info("Runnig smoother on %s", str(self.filename)) - processing_starttime = datetime.now().strftime("%Y-%m-%d %H:%M:%S") log.debug("Reading metadata from HDF5s") with h5py.File(self.rawfile, "r") as h5f_open: + raw_dates_all = [x.decode() for x in h5f_open.get("dates")] + if len("".join(raw_dates_all)) == 0: + log.info("File %s is empty; nothing to smooth", str(self.filename)) + return + else: + log.info("Running smoother on %s", str(self.filename)) raw_ds = h5f_open.get("data") raw_shape = raw_ds.shape raw_chunks = raw_ds.chunks raw_attrs = dict(raw_ds.attrs.items()) - raw_dates_all = [x.decode() for x in h5f_open.get("dates")[...]] + raw_dates_nsmooth = raw_dates_all[-nsmooth:] with h5py.File(self.filename, "r+") as h5f_open: @@ -238,14 +310,18 @@ def smooth(self, temporalresolution = smt_attrs["temporalresolution"] tshift = int(smt_attrs["tshift"]) - dates = DateHelper(rawdates=raw_dates_all, - rtres=int(raw_attrs["temporalresolution"]), - stres=int(temporalresolution), - start=self.startdate) + dates = DateHelper( + rawdates=raw_dates_all, + rtres=int(raw_attrs["temporalresolution"]), + stres=int(temporalresolution), + start=self.startdate, + ) # Resize if date list is bigger than shape of smoothed data if dates.target_length > smt_shape[1]: - log.debug("Resizing dataset! Current %s, required %s", smt_shape[1], dates.target_length) + log.debug( + "Resizing dataset! Current %s, required %s", smt_shape[1], dates.target_length + ) smt_dates = h5f_open.get("dates") smt_dates.resize((dates.target_length,)) smt_ds.resize((smt_shape[0], dates.target_length)) @@ -288,7 +364,7 @@ def smooth(self, xoffset=read_offset, xchunk=10, arr_out=arr_raw, - ) + ) if soptimize or svalue is None: sgrid_generator = self.read_chunked(dataset="sgrid") @@ -305,10 +381,10 @@ def smooth(self, log.debug("Chunk %s", chunk_counter) # create weights - wts = (arr_raw_chunk != nodata) + wts = arr_raw_chunk != nodata wts = wts.astype("uint8") - ndix = np.sum(wts, 1) >= (arr_raw.shape[1] * 0.2) # 20%+ data + ndix = np.sum(wts, 1) >= (arr_raw.shape[1] * 0.2) # 20%+ data map_index = np.where(ndix)[0] if sgrid_generator: @@ -331,27 +407,25 @@ def smooth(self, sr = srange if p is not None: - arr_raw[ix, :], arr_sgrid[ix] = ws2doptvp(y=arr_raw[ix, :], - w=w, - llas=array("d", sr), - p=p) + arr_raw[ix, :], arr_sgrid[ix] = ws2doptvp( + y=arr_raw[ix, :], w=w, llas=array("d", sr), p=p + ) else: - arr_raw[ix, :], arr_sgrid[ix] = ws2doptv(y=arr_raw[ix, :], - w=w, - llas=array("d", sr)) + arr_raw[ix, :], arr_sgrid[ix] = ws2doptv( + y=arr_raw[ix, :], w=w, llas=array("d", sr) + ) else: if svalue is None: s = 10 ** arr_sgrid[ix] else: - s = 10 ** svalue + s = 10**svalue if p is None: arr_raw[ix, :] = ws2d(y=arr_raw[ix, :], lmda=s, w=w) else: arr_raw[ix, :] = ws2dp(y=arr_raw[ix, :], lmda=s, w=w, p=p) - if self.tinterpolate: arr_smt[ix, :] = self._apply_tinterpolate( @@ -368,7 +442,7 @@ def smooth(self, arr_in=arr_smt, xoffset=write_offset, xchunk=10, - yoffset=chunk_counter*raw_chunks[0], + yoffset=chunk_counter * raw_chunks[0], ) if not write_check: @@ -383,7 +457,7 @@ def smooth(self, write_check = self.write_chunk( dataset="sgrid", arr_in=arr_sgrid, - yoffset=chunk_counter*raw_chunks[0], + yoffset=chunk_counter * raw_chunks[0], ) if not write_check: @@ -423,9 +497,7 @@ def smooth(self, except KeyError: pass - processing_info.update({ - "processingtimestamp": processing_starttime - }) + processing_info.update({"processingtimestamp": processing_starttime}) smt_ds.attrs.update(processing_info) @@ -436,36 +508,37 @@ def smooth(self, dates_ds[...] = np.array(raw_dates_all, dtype="S8") - @property - def last_collected(self): + def last_collected(self) -> str: """Last collected date in raw file""" - with h5py.File(self.rawfile, "r") as h5f_raw: - dates = h5f_raw.get("dates") - last_date = dates[-1].decode() + try: + with h5py.File(self.rawfile, "r") as h5f_raw: + dates = h5f_raw.get("dates") + last_date = dates[-1].decode() - return last_date + return last_date + except Exception: + return "" @property def last_smoothed(self): """Last smoothed date in file""" assert self.exists, "File doesn't exist!" - with h5py.File(self.filename, "r") as h5_open: - dates = h5_open.get("rawdates") - last_date = dates[-1].decode() + try: + with h5py.File(self.filename, "r") as h5_open: + dates = h5_open.get("rawdates") + last_date = dates[-1].decode() - return last_date + return last_date + except Exception: + return "" - #pylint: disable=C0103 + # pylint: disable=C0103 @staticmethod def _apply_tinterpolate(z1, nodata, vector_daily, dix): z2 = vector_daily.copy() z2[z2 != nodata] = z1 - z2[...] = ws2d( - y=z2, - lmda=0.0001, - w=np.array((z2 != nodata) * 1, dtype="double") - ) + z2[...] = ws2d(y=z2, lmda=0.0001, w=np.array((z2 != nodata) * 1, dtype="double")) return z2[dix] diff --git a/modape/modis/window.py b/modape/modis/window.py index fde4a9da..6284206e 100644 --- a/modape/modis/window.py +++ b/modape/modis/window.py @@ -5,24 +5,28 @@ Author: Valentin Pesendorfer, April 2019 """ -# pylint: disable=import-error,R0903,W0706 -from contextlib import contextmanager + import datetime import logging + +# pylint: disable=import-error,R0903,W0706 +from contextlib import contextmanager from os.path import exists from pathlib import Path -from typing import Union, List +from typing import Generator, List, Union from uuid import uuid4 -from osgeo import gdal import h5py +import numpy as np +from osgeo import gdal + from modape.constants import DATE_LABELS from modape.exceptions import HDF5MosaicError from modape.utils import fromjulian -import numpy as np log = logging.getLogger(__name__) + class ModisMosaic(object): """Class for subsetting or mosaicing HDF5 files. @@ -31,8 +35,7 @@ class ModisMosaic(object): The generated data will exported as GeoTIFF using the Python GDAL bindings. """ - def __init__(self, - input_files: List[str]) -> None: + def __init__(self, input_files: List[str]) -> None: """Initialize instance of `ModisMosaic` class. If multiple HDF5 files are specified as `input_files`, a mosaic of the files @@ -53,35 +56,42 @@ def __init__(self, for file in input_files: with h5py.File(file, "r") as h5f_open: dates = h5f_open.get("dates")[...] + if len("".join(x.decode() for x in dates)) == 0: + continue if reference is None: reference = dates + continue try: np.testing.assert_array_equal(dates, reference) except AssertionError: - raise HDF5MosaicError("Teporal axis of HDF5 input files incompatible for Mosaic") + raise HDF5MosaicError( + "Teporal axis of HDF5 input files incompatible for Mosaic" + ) self.files = input_files + dates = reference else: self.files = [input_files] with h5py.File(input_files, "r") as h5f_open: dates = h5f_open.get("dates")[...] self.dates = [fromjulian(x.decode()) for x in dates] - def generate_mosaics(self, - dataset, - targetdir, - target_srs: str = None, - aoi: List[float] = None, - overwrite: bool = False, - force_doy: bool = False, - prefix: str = None, - start: datetime.date = None, - stop: datetime.date = None, - clip_valid: bool = False, - round_int: int = None, - last_smoothed: str = None, - **kwargs, - ) -> List: + def generate_mosaics( + self, + dataset, + targetdir, + target_srs: str = None, + aoi: List[float] = None, + overwrite: bool = False, + force_doy: bool = False, + prefix: str = None, + start: datetime.date = None, + stop: datetime.date = None, + clip_valid: bool = False, + round_int: int = None, + last_smoothed: str = None, + **kwargs, + ) -> List: """Generate GeoTIFF mosaics/subsets. This method is creating a GeoTiff mosaic/subsets from the HDF5 files @@ -220,84 +230,115 @@ def generate_mosaics(self, log.info("Processing %s", filename) rasters = [] - for file in self.files: - rasters.append( - self._get_raster( - file, - dataset, - clip_valid, - round_int=round_int, - ix=ii, - last_smoothed=last_smoothed + try: + for file in self.files: + rasters.append( + self._get_raster( + file, + dataset, + clip_valid, + round_int=round_int, + ix=ii, + last_smoothed=last_smoothed, + ) ) - ) - translate_options = { - "outputType": attrs["dtype"], - "noData": attrs["nodata"], - "outputSRS": target_srs, - "projWin": aoi - } - - translate_options.update(kwargs) - - if aoi is not None and all(output_res): - translate_options.update({ - "outputBounds": aoi, - "width": abs(int(round((aoi[2] - aoi[0]) / output_res[0]))), - "height": abs(int(round((aoi[3] - aoi[1]) / output_res[1]))) - }) - - with self._mosaic(rasters, - target_srs=target_srs, - resample=resample, - dtype=dtype, - nodata=nodata, - resolution=output_res, - gdal_multithread=gdal_multithread, - ) as warped_mosaic: - - log.debug("Writing to disk") - - write_check = self._translate( - src=warped_mosaic, - dst=filename, - **translate_options - ) + translate_options = {"outputType": attrs["dtype"], "noData": attrs["nodata"]} + if target_srs is not None: + translate_options["outputSRS"] = target_srs + if aoi is not None: + translate_options["projWin"] = aoi + + translate_options.update(kwargs) + + if aoi is not None and all(output_res): + translate_options.update( + { + "outputBounds": aoi, + "width": abs(int(round((aoi[2] - aoi[0]) / output_res[0]))), + "height": abs(int(round((aoi[3] - aoi[1]) / output_res[1]))), + } + ) - try: - assert write_check, f"Error writing {filename}" - mosaics.append(filename) - except: - raise - finally: - _ = [gdal.Unlink(x) for x in rasters] + with self._mosaic( + rasters, + target_srs=target_srs, + resample=resample, + dtype=dtype, + nodata=nodata, + resolution=output_res, + gdal_multithread=gdal_multithread, + ) as warped_mosaic: + + log.debug("Writing to disk") + + write_check = self._translate( + src=warped_mosaic, dst=filename, **translate_options + ) + + try: + assert write_check, f"Error writing {filename}" + mosaics.append(filename) + except: + raise + + finally: + for x in rasters: + gdal.Unlink(x) return mosaics @staticmethod - def _get_raster(file: Union[Path, str], - dataset: str, - clip_valid: bool, - round_int: int, - ix: int = None, - last_smoothed: str = None) -> str: + def _get_raster( + file: Union[Path, str], + dataset: str, + clip_valid: bool, + round_int: int, + ix: int = None, + last_smoothed: str = None, + ) -> str: + + @contextmanager + def create_raster( + fn: str, attrs, dtype, nodata_attr=None, fill_nodata=False + ) -> Generator[gdal.Band, None, None]: + driver: gdal.Driver = gdal.GetDriverByName("GTiff") + + raster: gdal.Dataset = driver.Create( + fn, + int(attrs["RasterXSize"]), + int(attrs["RasterYSize"]), + 1, + int(dtype), + ) + raster.SetGeoTransform(attrs["geotransform"]) + raster.SetProjection(attrs["projection"]) + + raster_band: gdal.Band = raster.GetRasterBand(1) + + if nodata_attr is not None: + raster_band.SetNoDataValue(int(attrs[nodata_attr])) + if fill_nodata: + raster_band.Fill(int(attrs[nodata_attr])) + + try: + yield raster_band + finally: + raster_band.FlushCache() + raster.FlushCache() + raster_band, raster, driver = (None, None, None) if dataset not in ["data", "sgrid"]: - raise NotImplementedError("_get_raster only implemented for datasetds 'data' and 'sgrid'") + raise NotImplementedError( + "_get_raster only implemented for datasetds 'data' and 'sgrid'" + ) with h5py.File(file, "r") as h5f_open: - if last_smoothed is not None: - dates = h5f_open.get("rawdates") - last_date = dates[-1].decode() - assert last_smoothed == last_date, \ - f"Last smoothed date in {file} is {last_date} not {last_smoothed}" - + fn = f"/vsimem/{uuid4()}.tif" ds = h5f_open.get(dataset) assert ds, "Dataset doesn't exist!" dataset_shape = ds.shape - if len(dataset_shape) < 2: sgrid = True attrs = dict(h5f_open.get("data").attrs) @@ -308,100 +349,92 @@ def _get_raster(file: Union[Path, str], sgrid = False attrs = dict(ds.attrs) - chunks = ds.chunks - dtype = ds.dtype.num - - driver = gdal.GetDriverByName("GTiff") - fn = f"/vsimem/{uuid4()}.tif" - - raster = driver.Create( - fn, - int(attrs["RasterXSize"]), - int(attrs["RasterYSize"]), - 1, - int(dtype), - ) - raster.SetGeoTransform(attrs["geotransform"]) - raster.SetProjection(attrs["projection"]) - - raster_band = raster.GetRasterBand(1) - - if not sgrid: - raster_band.SetNoDataValue(int(attrs["nodata"])) + dates = [x.decode() for x in h5f_open.get("rawdates")] + if len("".join(dates)) > 0: + if last_smoothed is not None: + last_date = dates[-1] + assert ( + last_smoothed == last_date + ), f"Last smoothed date in {file} is {last_date} not {last_smoothed}" + else: + with create_raster( + fn, attrs, ds.dtype.num, "nodata" if not sgrid else None, True + ) as raster_band: + pass + return fn - block_gen = ((x, x//attrs["RasterXSize"]) for x in range(0, dataset_shape[0], chunks[0])) + chunks = ds.chunks + with create_raster( + fn, attrs, ds.dtype.num, "nodata" if not sgrid else None + ) as raster_band: - for yblock_ds, yblock in block_gen: + block_gen = ( + (x, x // attrs["RasterXSize"]) for x in range(0, dataset_shape[0], chunks[0]) + ) - if sgrid: - arr = ds[yblock_ds:(yblock_ds+chunks[0])] - else: - arr = ds[yblock_ds:(yblock_ds+chunks[0]), ix] + for yblock_ds, yblock in block_gen: - if clip_valid: - vmin, vmax = attrs["valid_range"] - arr = np.clip(arr, vmin, vmax, out=arr, where=arr != attrs["nodata"]) + if sgrid: + arr = ds[yblock_ds : (yblock_ds + chunks[0])] + else: + arr = ds[yblock_ds : (yblock_ds + chunks[0]), ix] - if round_int is not None: - arr = np.round(arr, round_int) + if clip_valid: + vmin, vmax = attrs["valid_range"] + arr = np.clip(arr, vmin, vmax, out=arr, where=arr != attrs["nodata"]) - raster_band.WriteArray( - arr.reshape(-1, int(attrs["RasterXSize"])), - xoff=0, - yoff=int(yblock) - ) + if round_int is not None: + arr = np.round(arr, round_int) - raster_band.FlushCache() - raster.FlushCache() - raster_band, raster, driver = (None, None, None) + raster_band.WriteArray( + arr.reshape(-1, int(attrs["RasterXSize"])), xoff=0, yoff=int(yblock) + ) return fn @staticmethod @contextmanager - def _mosaic(input_rasters, - target_srs, - resample, - resolution, - dtype, - nodata, - gdal_multithread): + def _mosaic(input_rasters, target_srs, resample, resolution, dtype, nodata, gdal_multithread): vrt_tempname = f"/vsimem/{uuid4()}.vrt" - vrt = gdal.BuildVRT(vrt_tempname, input_rasters) - assert vrt - vrt.FlushCache() - vrt = None - log.debug("Created VRT") - - wrp_tempname = f"/vsimem/{uuid4()}.tif" - wopt = gdal.WarpOptions( - dstSRS=target_srs, - outputType=dtype, - resampleAlg=resample, - xRes=resolution[0], - yRes=resolution[1], - srcNodata=nodata, - dstNodata=nodata, - multithread=gdal_multithread, - ) - - wrp = gdal.Warp(wrp_tempname, vrt_tempname, options=wopt) - assert wrp - - log.debug("Created WRP") - - yield wrp - - log.debug("Performing Cleanup") + try: + vrt = gdal.BuildVRT(vrt_tempname, input_rasters) + assert vrt + vrt.FlushCache() + log.debug("Created VRT") - wrp = None - vrt = None + if target_srs is None: + yield vrt + else: + wrp_tempname = f"/vsimem/{uuid4()}.tif" + try: + wopt = gdal.WarpOptions( + dstSRS=target_srs, + outputType=dtype, + resampleAlg=resample, + xRes=resolution[0], + yRes=resolution[1], + srcNodata=nodata, + dstNodata=nodata, + multithread=gdal_multithread, + ) - rc1 = gdal.Unlink(vrt_tempname) - rc2 = gdal.Unlink(wrp_tempname) - if rc1 != 0 or rc2 != 0: - log.warning("Received return codes [%s, %s] while removing MemRasters", rc1, rc2) + wrp = gdal.Warp(wrp_tempname, vrt_tempname, options=wopt) + assert wrp + log.debug("Created WRP") + yield wrp + finally: + log.debug("Performing Cleanup") + + wrp = None + if (rc := gdal.Unlink(wrp_tempname)) != 0: + log.warning( + "Received return codes %s while removing in-memory warped raster", rc + ) + finally: + vrt = None + if (rc := gdal.Unlink(vrt_tempname)) != 0: + log.warning("Received return codes %s while removing in-memory warped raster", rc) @staticmethod def _translate(src, dst, **kwargs): @@ -414,7 +447,19 @@ def _translate(src, dst, **kwargs): return True @staticmethod - def _get_metadata(reference, dataset): + def _get_metadata(reference: Union[Path, str], dataset: str): + """Get metadata + + Args: + reference (str): Path to .H5 file + dataset (type): Dataset to mosaic (data or sgrid). + """ + + if dataset not in ["data", "sgrid"]: + raise NotImplementedError( + "_get_raster only implemented for datasetds 'data' and 'sgrid'" + ) + with h5py.File(reference, "r") as h5f_open: ds = h5f_open.get(dataset) assert ds diff --git a/modape/scripts/modis_collect.py b/modape/scripts/modis_collect.py index bdcf8b8a..b05a6d40 100644 --- a/modape/scripts/modis_collect.py +++ b/modape/scripts/modis_collect.py @@ -1,41 +1,71 @@ #!/usr/bin/env python # pylint: disable=broad-except,E0401 """modis_collect.py: Collect raw MODIS data into HDF5 file.""" -from concurrent.futures import ProcessPoolExecutor, wait -from datetime import datetime +import hashlib import logging import multiprocessing as mp -from pathlib import Path import re import sys +from concurrent.futures import ProcessPoolExecutor, wait +from datetime import datetime +from itertools import chain +from pathlib import Path import click + from modape.constants import REGEX_PATTERNS from modape.modis import ModisRawH5 + @click.command() @click.argument("src_dir", type=click.Path(dir_okay=True, resolve_path=True)) -@click.option("-d", "--targetdir", type=click.Path(dir_okay=True, writable=True, resolve_path=True), - help="Destination for raw HDF5 files") -@click.option('-x', '--compression', type=click.STRING, default='gzip', help='Compression for HDF5 files') -@click.option('--vam-code', type=click.STRING, help='VAM code for dataset to process') -@click.option('--interleave', is_flag=True, help='Interleave MOD13 & MYD13 products to MXD (only works for VIM!)') -@click.option('--parallel-tiles', type=click.INT, default=1, help='Number of tiles processed in parallel (default = 1)') -@click.option('--cleanup', is_flag=True, help='Remove collected HDF files') -@click.option('--force', is_flag=True, help='Force collect process not failing on corrupt inputs') -@click.option('--last-collected', type=click.DateTime(formats=['%Y%j']), help='Last collected date in julian format (YYYYDDD - %Y%j)') +@click.option( + "-d", + "--targetdir", + type=click.Path(dir_okay=True, writable=True, resolve_path=True), + help="Destination for raw HDF5 files", +) +@click.option( + "-x", "--compression", type=click.STRING, default="gzip", help="Compression for HDF5 files" +) +@click.option("--vam-code", type=click.STRING, help="VAM code for dataset to process") +@click.option( + "--interleave", + is_flag=True, + help="Interleave MOD13 & MYD13 products to MXD (only works for VIM!)", +) +@click.option( + "--parallel-tiles", + type=click.INT, + default=1, + help="Number of tiles processed in parallel (default = 1)", +) +@click.option("--cleanup", is_flag=True, help="Remove collected HDF files") +@click.option("--force", is_flag=True, help="Force collect process not failing on corrupt inputs") +@click.option( + "--last-collected", + type=click.DateTime(formats=["%Y%j"]), + help="Last collected date in julian format (YYYYDDD - %Y%j)", +) @click.option("--tiles-required", type=click.STRING, help="Required tiles - supplied as csv list") -def cli(src_dir: str, - targetdir: str, - compression: str, - vam_code: str, - interleave: bool, - parallel_tiles: int, - cleanup: bool, - force: bool, - last_collected: datetime, - tiles_required: str, - ) -> None: +@click.option( + "--report-collected", + is_flag=True, + help="Report collected files to stdout", +) +def cli( + src_dir: str, + targetdir: str, + compression: str, + vam_code: str, + interleave: bool, + parallel_tiles: int, + cleanup: bool, + force: bool, + last_collected: datetime, + tiles_required: str, + report_collected: bool, +) -> None: """Collect raw MODIS hdf files into a raw MODIS HDF5 file. All MODIS HDF files within srcdir will be collected into a raw MODIS HDF5 file, corresponding to product type and tile (if not global). @@ -79,10 +109,10 @@ def cli(src_dir: str, # parse tiles required and check if OK if tiles_required is not None: - tile_regxp = re.compile(r'^h\d{2}v\d{2}$') + tile_regxp = re.compile(r"^h\d{2}v\d{2}$") _tiles = [] - for tile_req in tiles_required.split(','): + for tile_req in tiles_required.split(","): assert re.match(tile_regxp, tile_req.lower()) _tiles.append(tile_req.lower()) @@ -90,7 +120,7 @@ def cli(src_dir: str, click.echo("Starting MODIS COLLECT!") - hdf_files = list(input_dir.glob('*hdf')) + hdf_files = list(chain(input_dir.glob("*.hdf"), input_dir.glob("*.h5"))) if not hdf_files: raise ValueError(f"NO HDF files found in src_dir {src_dir}!") @@ -116,7 +146,7 @@ def cli(src_dir: str, tile = REGEX_PATTERNS["tile"].findall(file.name) if not tile: - tile = [''] + tile = [""] tiles.append(*tile) if tiles_required is not None: @@ -124,12 +154,16 @@ def cli(src_dir: str, for product in set(products): log.debug("Product: %s", product) - datetiles = [".".join(x.name.split(".")[1:3]) for x in hdf_files if re.match("^" + product, x.name)] + datetiles = [ + ".".join(x.name.split(".")[1:3]) + for x in hdf_files + if re.match("^" + product, x.name) + ] product_tiles = {x.split(".")[1] for x in datetiles} timestamps = [] - if product_tiles != tiles_required: - raise ValueError("Tiles for product %s don't match tiles-required!" % product) + if not product_tiles.issuperset(tiles_required): + raise ValueError("Tiles for product %s don't include tiles-required!" % product) for reqtile in tiles_required: timestamps.append({x.split(".")[0] for x in datetiles if reqtile in x}) @@ -140,7 +174,12 @@ def cli(src_dir: str, log.debug("Timestamps OK for %s", product) groups = [".*".join(x) for x in zip(products, tiles, versions)] - groups = list({re.sub('(M.{1})(D.+)', 'M.'+'\\2', x) if REGEX_PATTERNS["VIM"].match(x) else x for x in groups}) # Join MOD13/MYD13 + groups = list( + { + re.sub(r"(M.{1})(D.+)", "M." + "\\2", x) if REGEX_PATTERNS["VIM"].match(x) else x + for x in groups + } + ) # Join MOD13/MYD13 groups.sort() log.debug("Parsed groups: %s", groups) @@ -149,19 +188,25 @@ def cli(src_dir: str, collected = [] for group in groups: - group_pattern = re.compile(group + '.*hdf') + group_pattern = re.compile(group + ".*hdf") group_files = [str(x) for x in hdf_files if group_pattern.match(x.name)] + if len(group_files) == 0: + # Fallback on processing .h5 files: + group_pattern = re.compile(group + ".*h5") + group_files = [str(x) for x in hdf_files if group_pattern.match(x.name)] + assert len(group_files) > 0 group_files.sort() - _raw_h5 = ModisRawH5(files=group_files, - targetdir=targetdir, - vam_product_code=vam_code, - interleave=interleave) + _raw_h5 = ModisRawH5( + files=group_files, targetdir=targetdir, vam_product_code=vam_code, interleave=interleave + ) if last_collected is not None: if not _raw_h5.exists: - raise ValueError("Output H5 %s does not exist! Can't check last-collected!" % _raw_h5.filename) + raise ValueError( + "Output H5 %s does not exist! Can't check last-collected!" % _raw_h5.filename + ) last_collected_infile = _raw_h5.last_collected @@ -169,17 +214,27 @@ def cli(src_dir: str, raise ValueError(f"No last_collected recorded in {_raw_h5.filename}") if not last_collected == last_collected_infile: - raise ValueError(f"Last collected date in file is {last_collected_infile} not {last_collected}") + raise ValueError( + f"Last collected date in file is {last_collected_infile} not {last_collected}" + ) - to_process.update({ - group: { - "raw_h5": _raw_h5, - "compression": compression, - "force": force, + to_process.update( + { + group: { + "raw_h5": _raw_h5, + "compression": compression, + "force": force, + "create_only": ( + False + if tiles_required is None + else group.split(".")[2].strip("*") not in tiles_required + ), + } } - }) + ) log.debug("Start processing!") + ts_processing_start = datetime.now().isoformat() if parallel_tiles > 1: log.debug("Processing %s parallel tiles!", parallel_tiles) @@ -196,9 +251,7 @@ def cli(src_dir: str, log.debug("Submitting %s", group) - futures.append( - executor.submit(_worker, **parameters) - ) + futures.append(executor.submit(_worker, **parameters)) _ = wait(futures) @@ -216,9 +269,7 @@ def cli(src_dir: str, log.debug("Processing %s", group) - collected.extend( - _worker(**parameters) - ) + collected.extend(_worker(**parameters)) if cleanup and collected: tracefile = targetdir.joinpath(".collected") @@ -226,28 +277,44 @@ def cli(src_dir: str, log.debug("Cleaning up collected files") for to_remove in collected: to_remove_obj = Path(to_remove) - tf_open.write(to_remove_obj.name + "\n") + sha256_hash = hashlib.sha256() + with open(to_remove_obj, "rb") as f: + chunk = f.read(65536) + while chunk: + sha256_hash.update(chunk) + chunk = f.read(65536) + tf_open.write( + f"{to_remove_obj.name};{sha256_hash.hexdigest().lower()};{ts_processing_start}\n" + ) log.debug("Removing %s", to_remove) to_remove_obj.unlink() + if report_collected and collected: + for to_report in collected: + click.echo(f"Collected: {to_report}") + click.echo("MODIS COLLECT completed!") -def _worker(raw_h5, compression, force): + +def _worker(raw_h5: ModisRawH5, compression: str, force: bool, create_only: bool): if not raw_h5.exists: - raw_h5.create(compression=compression) + raw_h5.create(compression=compression, pre_allocate= not create_only) - collected = raw_h5.update(force=force) + if create_only: + return raw_h5.files + else: + return raw_h5.update(force=force) - return collected def cli_wrap(): """Wrapper for cli""" if len(sys.argv) == 1: - cli.main(['--help']) + cli.main(["--help"]) else: - cli() #pylint: disable=E1120 + cli() # pylint: disable=E1120 + -if __name__ == '__main__': +if __name__ == "__main__": cli_wrap() diff --git a/modape/scripts/modis_download.py b/modape/scripts/modis_download.py index 3933872a..daf5d6f4 100644 --- a/modape/scripts/modis_download.py +++ b/modape/scripts/modis_download.py @@ -9,48 +9,76 @@ from typing import List import click + from modape.exceptions import TargetNotEmptyError from modape.modis import ModisQuery + @click.command() @click.argument("products", nargs=-1, type=click.STRING) -@click.option("--roi", type=click.STRING, help="Region of interest. Either LON,LAT or xmin,ymin,xmax,ymax") -@click.option("-b", "--begin-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="Start date for query") -@click.option("-e", "--end-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="End date for query") -@click.option("-d", "--targetdir", type=click.Path(dir_okay=True, writable=True, resolve_path=True), - help="Destination directory for downloaded files") -@click.option("--target-empty", is_flag=True, help="Fail if there are hdf files in the target directory") +@click.option( + "--roi", type=click.STRING, help="Region of interest. Either LON,LAT or xmin,ymin,xmax,ymax" +) +@click.option( + "-b", "--begin-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="Start date for query" +) +@click.option( + "-e", "--end-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="End date for query" +) +@click.option( + "-d", + "--targetdir", + type=click.Path(dir_okay=True, writable=True, resolve_path=True), + help="Destination directory for downloaded files", +) +@click.option( + "--target-empty", is_flag=True, help="Fail if there are hdf files in the target directory" +) @click.option("--tile-filter", type=click.STRING, help="Filter tiles - supplied as csv list") @click.option("--username", type=click.STRING, help="Earthdata username") @click.option("--password", type=click.STRING, help="Earthdata password") -@click.option("--match-begin", is_flag=True, help="Don't allow files with timestamps outside of provided date(s)") +@click.option( + "--match-begin", + is_flag=True, + help="Don't allow files with timestamps outside of provided date(s)", +) @click.option("--print-results", is_flag=True, help="Print results to console") @click.option("--download", is_flag=True, help="Download data") @click.option("--overwrite", is_flag=True, help="Overwrite existing files") @click.option("--robust", is_flag=True, help="Perform robust download") -@click.option("--max-retries", type=click.INT, help="Max number of retries for downloading", default=-1) +@click.option( + "--max-retries", type=click.INT, help="Max number of retries for downloading", default=-1 +) @click.option("--multithread", is_flag=True, help="Use multiple threads for downloading") @click.option("--nthreads", type=click.INT, help="Number of threads to use", default=4) @click.option("-c", "--collection", type=click.STRING, default="006", help="MODIS collection") -def cli(products: List[str], - begin_date: datetime.datetime, - end_date: datetime.datetime, - targetdir: str, - roi: str, - target_empty: bool, - tile_filter: str, - username: str, - password: str, - match_begin: bool, - print_results: bool, - download: bool, - overwrite: bool, - robust: bool, - max_retries: int, - multithread: bool, - nthreads: int, - collection: str, - ) -> List: +@click.option( + "--mirror", + type=click.Path(dir_okay=True, writable=True, resolve_path=True), + default=None, + help="Mirror downloads here; create symlink in target dir", +) +def cli( + products: List[str], + begin_date: datetime.datetime, + end_date: datetime.datetime, + targetdir: str, + roi: str, + target_empty: bool, + tile_filter: str, + username: str, + password: str, + match_begin: bool, + print_results: bool, + download: bool, + overwrite: bool, + robust: bool, + max_retries: int, + multithread: bool, + nthreads: int, + collection: str, + mirror: str, +) -> List: """Query and download MODIS products. This function allows for querying and downloading MODIS products in bulk. @@ -101,10 +129,17 @@ def cli(products: List[str], assert targetdir.exists() assert targetdir.is_dir() + if mirror is not None: + mirror = pathlib.Path(mirror) + assert mirror.is_dir() + assert mirror.exists() + if target_empty: for _ in targetdir.glob("M*hdf"): try: - raise TargetNotEmptyError("Found HDF files in target directory with flag --target-empty set!") + raise TargetNotEmptyError( + "Found HDF files in target directory with flag --target-empty set!" + ) except StopIteration: pass @@ -120,7 +155,7 @@ def cli(products: List[str], if roi is not None: try: - coords = list(map(float, roi.split(','))) + coords = list(map(float, roi.split(","))) except ValueError: click.echo("Error parsing ROI coordinates. Expected numeric!") @@ -130,16 +165,16 @@ def cli(products: List[str], # parse tile filter and check if OK if tile_filter is not None: - tile_regxp = re.compile(r'^h\d{2}v\d{2}$') + tile_regxp = re.compile(r"^h\d{2}v\d{2}$") tiles = [] - for tile_sel in tile_filter.split(','): + for tile_sel in tile_filter.split(","): assert re.match(tile_regxp, tile_sel.lower()) tiles.append(tile_sel.lower()) tile_filter = tiles - click.echo('Querying NASA CMR ...') + click.echo("Querying NASA CMR ...") query = ModisQuery( products=products_parsed, @@ -153,10 +188,12 @@ def cli(products: List[str], query.search(match_begin=match_begin) if query.nresults == 0: - click.echo("No results found! Please check query or make sure CMR is available / reachable.") + click.echo( + "No results found! Please check query or make sure CMR is available / reachable." + ) return [] - click.echo(f'Done! Found {query.nresults} results!') + click.echo(f"Done! Found {query.nresults} results!") if print_results: click.echo("\n") @@ -169,7 +206,7 @@ def cli(products: List[str], if download: - click.echo('Downloading!') + click.echo("Downloading!") if query.nresults > 0: @@ -182,18 +219,21 @@ def cli(products: List[str], nthreads=nthreads, robust=robust, max_retries=max_retries, + mirror=mirror, ) - click.echo('modis_download.py COMPLETED! Bye! \n') + click.echo("modis_download.py COMPLETED! Bye! \n") return downloaded + def cli_wrap(): """Wrapper for cli""" if len(sys.argv) == 1: - cli.main(['--help']) + cli.main(["--help"]) else: - cli() #pylint: disable=E1120 + cli() # pylint: disable=E1120 + -if __name__ == '__main__': +if __name__ == "__main__": cli_wrap() diff --git a/modape/scripts/modis_smooth.py b/modape/scripts/modis_smooth.py index 876dba3a..a7fa025c 100644 --- a/modape/scripts/modis_smooth.py +++ b/modape/scripts/modis_smooth.py @@ -2,49 +2,90 @@ # pylint: disable=E0401 """modis_smooth.py: Smooth raw MODIS HDF5 file.""" -from concurrent.futures import ProcessPoolExecutor, wait import logging import multiprocessing as mp -from pathlib import Path import sys +from concurrent.futures import ProcessPoolExecutor, wait +from pathlib import Path from typing import Tuple import click +import h5py import numpy as np + from modape.exceptions import SgridNotInitializedError from modape.modis import ModisSmoothH5 log = logging.getLogger(__name__) + @click.command(name="Smooth, gapfill and interpolate processed raw MODIS HDF5 files") @click.argument("src", type=click.Path(dir_okay=True, resolve_path=True)) -@click.option("-d", "--targetdir", - type=click.Path(dir_okay=True, resolve_path=True), - help='Target directory for smoothed output' - ) +@click.option( + "-d", + "--targetdir", + type=click.Path(dir_okay=True, resolve_path=True), + help="Target directory for smoothed output", +) @click.option("-s", "--svalue", type=click.FLOAT, help="S value for smoothing (in log10)") -@click.option("-S", "--srange", type=click.FLOAT, nargs=3, help="S value range for V-curve (float log10(s) values as smin smax sstep - default -1 1 0)") +@click.option( + "-S", + "--srange", + type=click.FLOAT, + nargs=3, + help="S value range for V-curve (float log10(s) values as smin smax sstep - default -1 1 0)", +) @click.option("-p", "--pvalue", type=click.FLOAT, help="P value for asymmetric smoothing") -@click.option("-t", "--tempint", type=click.INT, help="Value for temporal interpolation (integer required - default is native temporal resolution i.e. no interpolation)") -@click.option("--tempint-start", type=click.DateTime(formats=["%Y-%m-%d", "%Y%j"]), help="Startdate for temporal interpolation") -@click.option("-n", "--nsmooth", type=click.INT, default=0, help="Number of raw timesteps used for smoothing") -@click.option("-u", "--nupdate", type=click.INT, default=0, help="Number of smoothed timesteps to be updated in HDF5 file") +@click.option( + "-t", + "--tempint", + type=click.INT, + help="Value for temporal interpolation (integer required - default is native temporal resolution i.e. no interpolation)", +) +@click.option( + "--tempint-start", + type=click.DateTime(formats=["%Y-%m-%d", "%Y%j"]), + help="Startdate for temporal interpolation", +) +@click.option( + "-n", "--nsmooth", type=click.INT, default=0, help="Number of raw timesteps used for smoothing" +) +@click.option( + "-u", + "--nupdate", + type=click.INT, + default=0, + help="Number of smoothed timesteps to be updated in HDF5 file", +) @click.option("--soptimize", is_flag=True, help="Use V-curve for s value optimization") -@click.option("--parallel-tiles", type=click.INT, help="Number of tiles processed in parallel", default=1) -@click.option('--last-collected', type=click.DateTime(formats=['%Y%j']), help='Last collected date in julian format (YYYYDDD - %Y%j)') -def cli(src: str, - targetdir: str, - svalue: float, - srange: Tuple[float], - pvalue: float, - tempint: int, - tempint_start: str, - nsmooth: int, - nupdate: int, - soptimize: bool, - parallel_tiles: int, - last_collected: str, - ) -> None: +@click.option( + "--sgrid", + type=click.Path(dir_okay=False, resolve_path=True), + help="File to import previously exported S-grid from", +) +@click.option( + "--parallel-tiles", type=click.INT, help="Number of tiles processed in parallel", default=1 +) +@click.option( + "--last-collected", + type=click.DateTime(formats=["%Y%j"]), + help="Last collected date in julian format (YYYYDDD - %Y%j)", +) +def cli( + src: str, + targetdir: str, + svalue: float, + srange: Tuple[float], + pvalue: float, + tempint: int, + tempint_start: str, + nsmooth: int, + nupdate: int, + soptimize: bool, + sgrid: str, + parallel_tiles: int, + last_collected: str, +) -> None: """Smooth, gapfill and interpolate processed raw MODIS HDF5 files. The smoothing function takes a previously created raw MODIS HDF file (as created by modis_collect) as input. @@ -75,6 +116,7 @@ def cli(src: str, nsmooth (int): Number of raw timesteps used for smoothing". nupdate (int): Number of smoothed timesteps to be updated in HDF5 file. soptimize (bool): Flag for V-Curve optimization of S. + sgrid (str): File to import previously exported S-grid from. parallel_tiles (int): Number of paralell HDF5s being processed. last_collected (str): Last collected rawdate on which smoothing is performed on. @@ -99,7 +141,7 @@ def cli(src: str, if (nsmooth != 0) and (nupdate != 0): if nsmooth < nupdate: - raise ValueError('nsmooth must be bigger or equal (>=) to nupdate!') + raise ValueError("nsmooth must be bigger or equal (>=) to nupdate!") if targetdir is None: if input_raw.is_dir(): @@ -123,15 +165,43 @@ def cli(src: str, if last_collected is not None: last_collected = last_collected.strftime("%Y%j") + # Import S-grid? + if sgrid is not None: + click.echo("Starting MODIS SMOOTH S-grid IMPORT!") + # Other arguments need to be not set: + assert ( + svalue is None + and (srange is None or len(srange) == 0) + and pvalue is None + and nsmooth == 0 + and nupdate == 0 + and not soptimize + and last_collected is None + ) + for rawfile in files: + log.debug("Processing %s", rawfile) + + smt_h5 = ModisSmoothH5( + rawfile=str(rawfile), + targetdir=str(targetdir), + tempint=tempint, + startdate=tempint_start, + ) + smt_h5.write_sgrid(sgrid) + + click.echo("MODIS SMOOTH S-grid import COMPLETED!") + return + click.echo("Starting MODIS SMOOTH!") - if len(srange) != 0: + if srange is not None and len(srange) != 0: assert len(srange) == 3, "Expected 3 values for s-range" - srange = np.arange(srange[0], - srange[1] + srange[2], - srange[2], - ).round(2) + srange = np.arange( + srange[0], + srange[1] + srange[2], + srange[2], + ).round(2) else: srange = None @@ -167,7 +237,7 @@ def cli(src: str, tempint, tempint_start, last_collected, - **smoothing_parameters + **smoothing_parameters, ) ) @@ -184,58 +254,59 @@ def cli(src: str, log.debug("Processing %s", rawfile) result = _worker( - rawfile, - targetdir, - tempint, - tempint_start, - last_collected, - **smoothing_parameters + rawfile, targetdir, tempint, tempint_start, last_collected, **smoothing_parameters ) assert result click.echo("MODIS SMOOTH completed!") -def _worker(rawfile: str, - targetdir: str, - tempint: int, - tempint_start: str, - last_collected: str, - **kwargs: dict): +def _worker( + rawfile: str, + targetdir: str, + tempint: int, + tempint_start: str, + last_collected: str, + **kwargs: dict, +): smt_h5 = ModisSmoothH5( - rawfile=str(rawfile), - targetdir=str(targetdir), - tempint=tempint, - startdate=tempint_start + rawfile=str(rawfile), targetdir=str(targetdir), tempint=tempint, startdate=tempint_start ) - if not smt_h5.exists: if not kwargs["soptimize"] and not kwargs["svalue"]: msg = "Smoothing requires Sgrid which has not been initialized. Please run --soptimize first!" raise SgridNotInitializedError(msg) smt_h5.create() + with h5py.File(str(rawfile), "r") as h5f_open: + if len("".join(x.decode() for x in h5f_open.get("dates"))) == 0: + log.info("File %s is empty; nothing to smooth", str(rawfile)) + return True + if last_collected is not None: last_collected_in_rawfile = smt_h5.last_collected if not last_collected_in_rawfile: raise ValueError(f"No last_collected recorded in {smt_h5.rawfile}") - assert last_collected == last_collected_in_rawfile, \ - f"Last collected date in {smt_h5.rawfile} is {last_collected_in_rawfile} not {last_collected}" + assert ( + last_collected == last_collected_in_rawfile + ), f"Last collected date in {smt_h5.rawfile} is {last_collected_in_rawfile} not {last_collected}" smt_h5.smooth(**kwargs) return True + def cli_wrap(): """Wrapper for cli""" if len(sys.argv) == 1: - cli.main(['--help']) + cli.main(["--help"]) else: - cli() #pylint: disable=E1120 + cli() # pylint: disable=E1120 + -if __name__ == '__main__': +if __name__ == "__main__": cli_wrap() diff --git a/modape/scripts/modis_util.py b/modape/scripts/modis_util.py new file mode 100644 index 00000000..55692104 --- /dev/null +++ b/modape/scripts/modis_util.py @@ -0,0 +1,105 @@ +import sys +from itertools import chain +from pathlib import Path +from typing import Generator + +import click +import h5py + + +@click.group() +def cli(): + pass + + +@cli.command("range") +@click.argument("pathglob", type=click.STRING) +@click.argument("min", type=click.INT) +@click.argument("max", type=click.INT) +def edit_range(pathglob: str, min: int, max: int): + assert min <= max + + path = Path("./") + glob = pathglob + try: + split_at = pathglob.rindex("/") + path = Path(pathglob[:split_at]) + glob = pathglob[split_at + 1 :] + except ValueError: + pass + + for filepath in path.glob(glob): + click.echo(f"Updating: {filepath.as_posix()}") + with h5py.File(filepath, "r+", libver="latest") as h5f: + ds_data = h5f.get("data") + ds_data.attrs.update(dict(valid_range=(min, max))) + + +@cli.command("dates") +@click.argument("pathglob", type=click.STRING) +@click.argument("start", type=click.INT) +@click.argument("end", type=click.INT) +def check_dates(pathglob: str, start: int, end: int): + + class Oct46Generator: + def __init__(self, year: int, within: tuple[int, int]): + self.__year = year + self.__within = within + + def __iter__(self) -> Generator[int, None, None]: + for doy in range(1, 367, 8): + step = (self.__year * 1000) + doy + if step < self.__within[0] or step > self.__within[1]: + continue + yield step + + @staticmethod + def for_start_end(start, end): + """ + Parameters: + - start: integer YYYYDOY + - end: integer YYYYDOY + """ + return chain( + *[ + Oct46Generator(year, (start, end)) + for year in range(start // 1000, (end // 1000) + 1) + ] + ) + + path = Path("./") + glob = pathglob + try: + split_at = pathglob.rindex("/") + path = Path(pathglob[:split_at]) + glob = pathglob[split_at + 1 :] + except ValueError: + pass + + checked = 0 + steps = 0 + for filepath in path.glob(glob): + with h5py.File(filepath, "r", libver="latest") as h5f: + dates = h5f.get("dates") + date_values = set([int(x.decode()) for x in dates[...]]) + steps = 0 + for year_doy in Oct46Generator.for_start_end(start, end): + if year_doy not in date_values: + click.echo(f"Missing date {year_doy} in file {filepath.name}!") + steps += 1 + checked += 1 + + click.echo(f"Done. Checked: {checked} files for {steps} dates (each).") + + +def cli_wrap(): + """Wrapper for cli""" + + if len(sys.argv) == 1: + cli.main(["--help"]) + else: + cli() # pylint: disable=E1120 + + +if __name__ == "__main__": + cli_wrap() diff --git a/modape/scripts/modis_window.py b/modape/scripts/modis_window.py index aeb44638..a85ff97c 100644 --- a/modape/scripts/modis_window.py +++ b/modape/scripts/modis_window.py @@ -1,56 +1,79 @@ #!/usr/bin/env python # pylint: disable=broad-except,C0103,E0401 -"""modis_window.py: Create mosaics from smooth MODIS HDF5 files and - save them as GeoTIFFs. -""" +"""modis_window.py: Create mosaics from smooth MODIS HDF5 files and save them as GeoTIFFs.""" import datetime import logging -from pathlib import Path import re import sys +from pathlib import Path from typing import Dict, List, Tuple, Union import click + from modape.constants import REGEX_PATTERNS from modape.modis import ModisMosaic log = logging.getLogger(__name__) + @click.command() @click.argument("src") -@click.option("-d", "--targetdir", type=click.Path(dir_okay=True, resolve_path=True, writable=True), help="Target directory for Tiffs") -@click.option("-b", "--begin-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="Begin date for Tiffs") -@click.option("-e", "--end-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="End date for Tiffs") +@click.option( + "-d", + "--targetdir", + type=click.Path(dir_okay=True, resolve_path=True, writable=True), + help="Target directory for Tiffs", +) +@click.option( + "-b", "--begin-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="Begin date for Tiffs" +) +@click.option( + "-e", "--end-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="End date for Tiffs" +) @click.option("--roi", type=click.STRING, help="AOI for clipping (as xmin,ymin,xmax,ymax)") -@click.option("--region", type=click.STRING, help="Region prefix for Tiffs (default is reg)", default="reg") +@click.option( + "--region", type=click.STRING, help="Region prefix for Tiffs (default is reg)", default="reg" +) @click.option("--sgrid", is_flag=True, help="Extract sgrid instead of data") @click.option("--force-doy", is_flag=True, help="Force DOY filenaming") @click.option("--filter-product", type=click.STRING, help="Filter by product") @click.option("--filter-vampc", help="Filter by VAM parameter code") @click.option("--target-srs", help="Target spatial reference for warping", default="EPSG:4326") -@click.option('--co', multiple=True, help="GDAL creationOptions", default=["COMPRESS=LZW", "PREDICTOR=2"]) +@click.option( + "--co", multiple=True, help="GDAL creationOptions", default=["COMPRESS=LZW", "PREDICTOR=2"] +) @click.option("--clip-valid", is_flag=True, help="clip values to valid range for product") -@click.option("--round-int", type=click.INT, help="Round to integer places (either decimals or exponent of 10)") +@click.option( + "--round-int", + type=click.INT, + help="Round to integer places (either decimals or exponent of 10)", +) @click.option("--gdal-kwarg", type=click.STRING, multiple=True, help="Addition kwargs for GDAL") @click.option("--overwrite", is_flag=True, help="Overwrite existsing Tiffs") -@click.option('--last-smoothed', type=click.DateTime(formats=['%Y%j']), help='Last smoothed date in julian format (YYYYDDD - %Y%j)') -def cli(src: str, - targetdir: str, - begin_date: datetime.date, - end_date: datetime.date, - roi: Union[str, List[float]], - region: str, - sgrid: bool, - force_doy: bool, - filter_product: str, - filter_vampc: str, - target_srs: str, - co: Tuple[str], - clip_valid: bool, - round_int: int, - gdal_kwarg: Union[Tuple[str], Dict[str, Union[str, int, float]]], - overwrite: bool, - last_smoothed: str) -> List: +@click.option( + "--last-smoothed", + type=click.DateTime(formats=["%Y%j"]), + help="Last smoothed date in julian format (YYYYDDD - %Y%j)", +) +def cli( + src: str, + targetdir: str, + begin_date: datetime.date, + end_date: datetime.date, + roi: Union[str, List[float]], + region: str, + sgrid: bool, + force_doy: bool, + filter_product: str, + filter_vampc: str, + target_srs: str, + co: Tuple[str], + clip_valid: bool, + round_int: int, + gdal_kwarg: Union[Tuple[str], Dict[str, Union[str, int, float]]], + overwrite: bool, + last_smoothed: str, +) -> List: """Creates GeoTiff Mosaics from HDF5 files. The input can be either raw or smoothed HDF5 files. With the latter, @@ -87,7 +110,13 @@ def cli(src: str, clip_valid (bool): Clip data to valid range. round_int (int): Round integer. gdal_kwarg (Tuple[str]): translateOptions to the internal call to gdal::translate(); - the Tuple of strings (item formatting: "key=value") is parsed into a dict. + the Tuple of strings (item formatting: "key=value") is parsed into a dict; + for the metadataOptions key, an ampersand (&) separated syntax is supported + on the command line, for example: + --gdal-kwarg metadataOptions=CONSOLIDATION_STAGE=2&FINAL=TRUE + which would add 2 metadata tags: + CONSOLIDATION_STAGE = 2 + FINAL = TRUE Alternatively, passing a dict instead of a Tuple[str] is also supported. overwrite (bool): Overwrite existing Tiffs. last_smoothed (str): Rawdate (MODIS time step) that is checked to be the last in series at time of smoothing. @@ -122,19 +151,20 @@ def cli(src: str, groups = [REGEX_PATTERNS["tile"].sub("*", x.name) for x in files] group_check = {".".join(x.split(".")[:-2]) for x in groups} if len(group_check) > 1: - raise ValueError("Multiple product groups in input. Please filter or use separate directories!") + raise ValueError( + "Multiple product groups in input. Please filter or use separate directories!" + ) groups = list(set(groups)) if roi is not None: if not isinstance(roi, list): - roi = [float(x) for x in roi.split(',')] + roi = [float(x) for x in roi.split(",")] if len(roi) != 4: raise ValueError("ROI for clip needs to be bounding box in format xmin,ymin,xmax,ymax") roi[1], roi[3] = roi[3], roi[1] - if targetdir is None: if src_input.is_dir(): targetdir = src_input @@ -172,15 +202,19 @@ def cli(src: str, gdal_kwargs = {} if gdal_kwarg: if not isinstance(gdal_kwarg, dict): - gdal_kwargs.update( - {key:value for x in gdal_kwarg for key, value in [x.split("=")]} - ) + for pair in gdal_kwarg: + if str(pair).lower().startswith("metadataoptions"): + gdal_kwargs["metadataOptions"] = [ + metaPair.strip() for metaPair in pair[len("metadataOptions=") :].split("&") + ] + else: + gdal_kwargs.update({key: value for key, value in [pair.split("=")]}) else: gdal_kwargs = gdal_kwarg - + if last_smoothed is not None: last_smoothed = last_smoothed.strftime("%Y%j") - + click.echo("\nSTARTING modis_window.py!") mosaics = [] @@ -208,19 +242,21 @@ def cli(src: str, last_smoothed=last_smoothed, creationOptions=list(co), **gdal_kwargs, - ) + ) ) click.echo("\nCOMPLETED modis_window.py!") return mosaics + def cli_wrap(): """Wrapper for cli""" if len(sys.argv) == 1: - cli.main(['--help']) + cli.main(["--help"]) else: - cli() #pylint: disable=E1120 + cli() # pylint: disable=E1120 + -if __name__ == '__main__': +if __name__ == "__main__": cli_wrap() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..bfa4b0f2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,73 @@ +[build-system] +requires = [ + "setuptools>=61.0", + "wheel", + "numpy>=1.16.1", + "cython>=0.29.0; platform_python_implementation != 'PyPy'" +] +build-backend = "setuptools.build_meta" + +[project] +name = "modape" +description = "MODIS Assimilation and Processing Engine" +dynamic = ["version"] +authors = [ + {name = "Valentin Pesendorfer", email = "valentin.pesendorfer@wfp.org"} +] +readme = "README.md" # Assuming you have a README.md file +license = {text = "MIT"} # Update with your actual license +requires-python = ">=3.6, <4" +dependencies = [ + "click<8", + "cython", + "numpy>=1.16.1", + "gdal>=2", + "h5py>=2.9", + "python-cmr>=0.4", + "requests>=2", + "pandas>=0.24", + "pycksum>=0.4.7", +] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", +] + +[project.urls] +Homepage = "http://wfp-vam.github.io/modape" +Repository = "https://github.com/WFP-VAM/modape" +Documentation = "http://wfp-vam.github.io/modape" + +[project.scripts] +modis_download = "modape.scripts.modis_download:cli_wrap" +modis_collect = "modape.scripts.modis_collect:cli_wrap" +modis_smooth = "modape.scripts.modis_smooth:cli_wrap" +modis_window = "modape.scripts.modis_window:cli_wrap" +csv_smooth = "modape.scripts.csv_smooth:cli_wrap" +modis_info = "modape.scripts.modis_info:cli_wrap" +modape_version = "_version.version_info:main" + +[project.optional-dependencies] +test = ["pytest", "pytest-runner", "click", "cython"] +dev = ["pytest", "pytest-runner", "click", "cython"] + +[tool.pytest.ini_options] +python_files = ["tests/test_*.py"] +filterwarnings = [ + "ignore::UserWarning" +] + +[tool.setuptools.packages.find] +# Equivalent to find_packages() + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.dynamic] +version = {attr = "_version.__version__"} + +# Optional: If you want to use setuptools-scm instead of _version.py +# [tool.setuptools_scm] +# write_to = "modape/_version.py" \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 18fa5e2b..00000000 --- a/setup.cfg +++ /dev/null @@ -1,7 +0,0 @@ -[aliases] -test=pytest - -[tool:pytest] -python_files = tests/test_*.py -filterwarnings = - ignore::UserWarning diff --git a/setup.py b/setup.py index 17f8b4ac..dd5ddb8a 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,10 @@ #!/usr/bin/env python # pylint: disable=invalid-name,E0401 -"""setup.py for MODAPE""" - -from setuptools import setup, Extension, find_packages +"""Minimal setup.py for MODAPE - handles only Cython extensions""" +from setuptools import setup, Extension import numpy -import _version + USE_CYTHON = "auto" if USE_CYTHON: @@ -25,58 +24,23 @@ if USE_CYTHON: ext_modules += [ Extension("modape.whittaker", - ["modape/_whittaker.pyx"], extra_compile_args=["-O3", "-ffast-math"])] + ["modape/_whittaker.pyx"], + extra_compile_args=["-O3", "-ffast-math"], + include_dirs=[numpy.get_include()])] cmdclass.update({"build_ext": build_ext}) else: ext_modules += [ Extension("modape.whittaker", - ["modape/_whittaker.c"], extra_compile_args=["-O3", "-ffast-math"])] - -# set py 3: + ["modape/_whittaker.c"], + extra_compile_args=["-O3", "-ffast-math"], + include_dirs=[numpy.get_include()])] +# Set Python 3 for Cython for ext in ext_modules: ext.cython_directives = {"language_level": "3"} - +# Only handle extensions - all other config in pyproject.toml setup( - name="modape", - description="MODIS Assimilation and Processing Engine", - version=_version.__version__, - author="Valentin Pesendorfer", - author_email="valentin.pesendorfer@wfp.org", - url="http://wfp-vam.github.io/modape", - long_description="""HDF5 based processing chain optimized for MODIS data combined with a State-of-the art whittaker smoother, implemented as fast C-extension through Cython and including a V-curve optimization of the smoothing parameter.\n\nFor more information, please visit: http://github.com/WFP-VAM/modape""", - include_dirs=[numpy.get_include()], - entry_points={ - "console_scripts":[ - "modis_download=modape.scripts.modis_download:cli_wrap", - "modis_collect=modape.scripts.modis_collect:cli_wrap", - "modis_smooth=modape.scripts.modis_smooth:cli_wrap", - "modis_window=modape.scripts.modis_window:cli_wrap", - "csv_smooth=modape.scripts.csv_smooth:cli_wrap", - "modis_info=modape.scripts.modis_info:cli_wrap", - "modape_version=_version.version_info:main", - ] - }, - packages=find_packages(), cmdclass=cmdclass, ext_modules=ext_modules, - include_package_data=True, - classifiers=[ - "Development Status :: 4 - Beta", - "Programming Language :: Python :: 3", - ], - setup_requires=["pytest-runner"], - tests_require=["pytest", "click"], - install_requires=[ - "click<8", - "numpy>=1.16.1", - "gdal>=2", - "h5py>=2.9", - "python-cmr>=0.4", - "requests>=2", - "pandas>=0.24", - "pycksum>=0.4.3", - ], - python_requires=">=3, <4", -) +) \ No newline at end of file diff --git a/tests/data/fwd/MOD13A2.A2025129.h18v09.061.2025148083640.hdf b/tests/data/fwd/MOD13A2.A2025129.h18v09.061.2025148083640.hdf new file mode 100644 index 00000000..ee682519 Binary files /dev/null and b/tests/data/fwd/MOD13A2.A2025129.h18v09.061.2025148083640.hdf differ diff --git a/tests/data/fwd/MOD13A2.A2025145.h18v09.061.2025218223451.hdf b/tests/data/fwd/MOD13A2.A2025145.h18v09.061.2025218223451.hdf new file mode 100644 index 00000000..07f02f48 Binary files /dev/null and b/tests/data/fwd/MOD13A2.A2025145.h18v09.061.2025218223451.hdf differ diff --git a/tests/data/fwd/MYD13A2.A2025121.h18v09.061.2025140114955.hdf b/tests/data/fwd/MYD13A2.A2025121.h18v09.061.2025140114955.hdf new file mode 100644 index 00000000..eaf1f8af Binary files /dev/null and b/tests/data/fwd/MYD13A2.A2025121.h18v09.061.2025140114955.hdf differ diff --git a/tests/data/fwd/MYD13A2.A2025137.h18v09.061.2025154021101.hdf b/tests/data/fwd/MYD13A2.A2025137.h18v09.061.2025154021101.hdf new file mode 100644 index 00000000..c68f9e55 Binary files /dev/null and b/tests/data/fwd/MYD13A2.A2025137.h18v09.061.2025154021101.hdf differ diff --git a/tests/data/init/MOD13A2.A2025001.h18v08.061.2025022080337.hdf b/tests/data/init/MOD13A2.A2025001.h18v08.061.2025022080337.hdf new file mode 100644 index 00000000..c512b2d6 Binary files /dev/null and b/tests/data/init/MOD13A2.A2025001.h18v08.061.2025022080337.hdf differ diff --git a/tests/data/init/MOD13A2.A2025001.h18v09.061.2025022080611.hdf b/tests/data/init/MOD13A2.A2025001.h18v09.061.2025022080611.hdf new file mode 100644 index 00000000..5154fbb1 Binary files /dev/null and b/tests/data/init/MOD13A2.A2025001.h18v09.061.2025022080611.hdf differ diff --git a/tests/data/init/MOD13A2.A2025017.h18v09.061.2025035161835.hdf b/tests/data/init/MOD13A2.A2025017.h18v09.061.2025035161835.hdf new file mode 100644 index 00000000..b19737bb Binary files /dev/null and b/tests/data/init/MOD13A2.A2025017.h18v09.061.2025035161835.hdf differ diff --git a/tests/data/init/MOD13A2.A2025033.h18v09.061.2025050175427.hdf b/tests/data/init/MOD13A2.A2025033.h18v09.061.2025050175427.hdf new file mode 100644 index 00000000..142dc3c3 Binary files /dev/null and b/tests/data/init/MOD13A2.A2025033.h18v09.061.2025050175427.hdf differ diff --git a/tests/data/init/MOD13A2.A2025049.h18v09.061.2025070114438.hdf b/tests/data/init/MOD13A2.A2025049.h18v09.061.2025070114438.hdf new file mode 100644 index 00000000..0bb35f7a Binary files /dev/null and b/tests/data/init/MOD13A2.A2025049.h18v09.061.2025070114438.hdf differ diff --git a/tests/data/init/MOD13A2.A2025065.h18v09.061.2025090103936.hdf b/tests/data/init/MOD13A2.A2025065.h18v09.061.2025090103936.hdf new file mode 100644 index 00000000..8f66057c Binary files /dev/null and b/tests/data/init/MOD13A2.A2025065.h18v09.061.2025090103936.hdf differ diff --git a/tests/data/init/MOD13A2.A2025081.h18v09.061.2025098171501.hdf b/tests/data/init/MOD13A2.A2025081.h18v09.061.2025098171501.hdf new file mode 100644 index 00000000..fdab6dfb Binary files /dev/null and b/tests/data/init/MOD13A2.A2025081.h18v09.061.2025098171501.hdf differ diff --git a/tests/data/init/MOD13A2.A2025097.h18v09.061.2025114002335.hdf b/tests/data/init/MOD13A2.A2025097.h18v09.061.2025114002335.hdf new file mode 100644 index 00000000..9504e9ad Binary files /dev/null and b/tests/data/init/MOD13A2.A2025097.h18v09.061.2025114002335.hdf differ diff --git a/tests/data/init/MOD13A2.A2025113.h18v09.061.2025148085150.hdf b/tests/data/init/MOD13A2.A2025113.h18v09.061.2025148085150.hdf new file mode 100644 index 00000000..f3dc3810 Binary files /dev/null and b/tests/data/init/MOD13A2.A2025113.h18v09.061.2025148085150.hdf differ diff --git a/tests/data/init/MYD13A2.A2025009.h18v09.061.2025030154035.hdf b/tests/data/init/MYD13A2.A2025009.h18v09.061.2025030154035.hdf new file mode 100644 index 00000000..d774c538 Binary files /dev/null and b/tests/data/init/MYD13A2.A2025009.h18v09.061.2025030154035.hdf differ diff --git a/tests/data/init/MYD13A2.A2025025.h18v09.061.2025044013608.hdf b/tests/data/init/MYD13A2.A2025025.h18v09.061.2025044013608.hdf new file mode 100644 index 00000000..5191a030 Binary files /dev/null and b/tests/data/init/MYD13A2.A2025025.h18v09.061.2025044013608.hdf differ diff --git a/tests/data/init/MYD13A2.A2025041.h18v09.061.2025058200101.hdf b/tests/data/init/MYD13A2.A2025041.h18v09.061.2025058200101.hdf new file mode 100644 index 00000000..f03ab4d8 Binary files /dev/null and b/tests/data/init/MYD13A2.A2025041.h18v09.061.2025058200101.hdf differ diff --git a/tests/data/init/MYD13A2.A2025057.h18v09.061.2025073235717.hdf b/tests/data/init/MYD13A2.A2025057.h18v09.061.2025073235717.hdf new file mode 100644 index 00000000..095c5bb6 Binary files /dev/null and b/tests/data/init/MYD13A2.A2025057.h18v09.061.2025073235717.hdf differ diff --git a/tests/data/init/MYD13A2.A2025073.h18v09.061.2025092101021.hdf b/tests/data/init/MYD13A2.A2025073.h18v09.061.2025092101021.hdf new file mode 100644 index 00000000..b88c59a8 Binary files /dev/null and b/tests/data/init/MYD13A2.A2025073.h18v09.061.2025092101021.hdf differ diff --git a/tests/data/init/MYD13A2.A2025089.h18v09.061.2025106002004.hdf b/tests/data/init/MYD13A2.A2025089.h18v09.061.2025106002004.hdf new file mode 100644 index 00000000..9c6ca8b1 Binary files /dev/null and b/tests/data/init/MYD13A2.A2025089.h18v09.061.2025106002004.hdf differ diff --git a/tests/data/init/MYD13A2.A2025105.h18v09.061.2025125115238.hdf b/tests/data/init/MYD13A2.A2025105.h18v09.061.2025125115238.hdf new file mode 100644 index 00000000..38666a33 Binary files /dev/null and b/tests/data/init/MYD13A2.A2025105.h18v09.061.2025125115238.hdf differ diff --git a/tests/data/readme.MD b/tests/data/readme.MD new file mode 100644 index 00000000..56478ce8 --- /dev/null +++ b/tests/data/readme.MD @@ -0,0 +1,18 @@ +This folder contains test data for 13A2 VI product used in the following CLI integration test: + tests/test_cli.py::TestConsoleScripts::test_collect_smooth_export_import_sgrid + +# init & fwd +Data was hand-picked and downloaded for both Terra and Aqua, respectively: + - https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MOD13A2 + - https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MYD13A2 + - https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/VNP13A2/ + +For more information on the product: + - https://www.earthdata.nasa.gov/data/catalog/lpcloud-mod13a2-061 + - https://www.earthdata.nasa.gov/data/catalog/lpcloud-myd13a2-061 + - https://lpdaac.usgs.gov/documents/621/MOD13_User_Guide_V61.pdf + - https://www.earthdata.nasa.gov/centers/lp-daac + +# viirs +Data was hand-picked and downloaded from: + - https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/5200/VNP13A2/ diff --git a/tests/data/viirs/VNP13A2.A2025001.h18v09.002.2025017085341.h5 b/tests/data/viirs/VNP13A2.A2025001.h18v09.002.2025017085341.h5 new file mode 100644 index 00000000..9a557882 Binary files /dev/null and b/tests/data/viirs/VNP13A2.A2025001.h18v09.002.2025017085341.h5 differ diff --git a/tests/data/viirs/VNP13A2.A2025009.h18v09.002.2025031192612.h5 b/tests/data/viirs/VNP13A2.A2025009.h18v09.002.2025031192612.h5 new file mode 100644 index 00000000..737f6ced Binary files /dev/null and b/tests/data/viirs/VNP13A2.A2025009.h18v09.002.2025031192612.h5 differ diff --git a/tests/test_cli.py b/tests/test_cli.py index 883fcf0a..2fa94c05 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,24 +1,37 @@ """test_cli.py: Test command line scripts.""" -#pylint: disable=E0401 + +# pylint: disable=E0401 from array import array -from datetime import date +from datetime import date, datetime from os.path import exists from pathlib import Path +import hashlib import shutil import unittest +import tempfile from unittest.mock import patch, Mock, MagicMock from click.testing import CliRunner +from modape.constants import REGEX_PATTERNS from modape.modis.collect import ModisRawH5 +from modape.modis.smooth import ModisSmoothH5 from modape.scripts.modis_download import cli as modis_download_cli from modape.scripts.modis_collect import cli as modis_collect_cli from modape.scripts.modis_smooth import cli as modis_smooth_cli from modape.scripts.modis_window import cli as modis_window_cli from modape.scripts.csv_smooth import cli as csv_smooth_cli + +from osgeo import gdal + import numpy as np +import h5py import pandas as pd -from test_modis import create_h5temp +try: + from test_modis import create_h5temp +except ImportError: + from .test_modis import create_h5temp + class TestConsoleScripts(unittest.TestCase): """Test class for console scripts.""" @@ -29,32 +42,36 @@ def setUpClass(cls): cls.testpath = Path("/tmp") - cls.lst_files = ["MYD11A2.A2002193.h18v06.006.2015146152945.hdf", - "MOD11A2.A2002209.h18v06.006.2015145152020.hdf", - "MYD11A2.A2002201.h18v06.006.2015146153241.hdf", - "MYD11A2.A2002185.h18v06.006.2015146152642.hdf", - "MOD11A2.A2002177.h18v06.006.2015144183717.hdf", - "MYD11A2.A2002209.h18v06.006.2015152152813.hdf", - "MOD11A2.A2002185.h18v06.006.2015145002847.hdf", - "MOD11A2.A2002193.h18v06.006.2015145055806.hdf", - "MOD11A2.A2002201.h18v06.006.2015145105749.hdf"] - - cls.vim_files = ["MYD13A2.A2002201.h18v06.006.2015149071105.hdf", - "MYD13A2.A2002185.h18v06.006.2015149071113.hdf", - "MOD13A2.A2002177.h18v06.006.2015149001129.hdf", - "MOD13A2.A2002209.h18v06.006.2015149180726.hdf", - "MOD13A2.A2002193.h18v06.006.2015149022847.hdf"] + cls.lst_files = [ + "MYD11A2.A2002193.h18v06.006.2015146152945.hdf", + "MOD11A2.A2002209.h18v06.006.2015145152020.hdf", + "MYD11A2.A2002201.h18v06.006.2015146153241.hdf", + "MYD11A2.A2002185.h18v06.006.2015146152642.hdf", + "MOD11A2.A2002177.h18v06.006.2015144183717.hdf", + "MYD11A2.A2002209.h18v06.006.2015152152813.hdf", + "MOD11A2.A2002185.h18v06.006.2015145002847.hdf", + "MOD11A2.A2002193.h18v06.006.2015145055806.hdf", + "MOD11A2.A2002201.h18v06.006.2015145105749.hdf", + ] + + cls.vim_files = [ + "MYD13A2.A2002201.h18v06.006.2015149071105.hdf", + "MYD13A2.A2002185.h18v06.006.2015149071113.hdf", + "MOD13A2.A2002177.h18v06.006.2015149001129.hdf", + "MOD13A2.A2002209.h18v06.006.2015149180726.hdf", + "MOD13A2.A2002193.h18v06.006.2015149022847.hdf", + ] @classmethod def tearDownClass(cls): try: shutil.rmtree("__pycache__") - except: #pylint: disable=W0702 + except Exception as _: pass try: shutil.rmtree("/tmp/data") - except: #pylint: disable=W0702 + except Exception as _: pass def setUp(self): @@ -68,8 +85,9 @@ def test_modis_download(self): test_query.nresults = 4 test_query.download = Mock(return_value="Mocking download!") - with patch("modape.scripts.modis_download.ModisQuery", return_value=test_query) as mocked_query: - + with patch( + "modape.scripts.modis_download.ModisQuery", return_value=test_query + ) as mocked_query: result = self.runner.invoke(modis_download_cli, ["MOD13A2"]) mocked_query.assert_called() @@ -91,35 +109,50 @@ def test_modis_download(self): mocked_query.assert_not_called() assert result.exit_code == 1 - result = self.runner.invoke(modis_download_cli, ["MOD13A2", "--username", "test", "--password", "test", "--download"]) + result = self.runner.invoke( + modis_download_cli, + ["MOD13A2", "--username", "test", "--password", "test", "--download"], + ) mocked_query.assert_called() mocked_query.reset_mock() - fake_hdf = self.testpath.joinpath("MOD13A2.A2002193.h18v06.006.2019256103823.hdf") + fake_hdf = self.testpath.joinpath( + "MOD13A2.A2002193.h18v06.006.2019256103823.hdf" + ) fake_hdf.touch() - result = self.runner.invoke(modis_download_cli, ["MOD13A2", "--targetdir", str(self.testpath)]) + result = self.runner.invoke( + modis_download_cli, ["MOD13A2", "--targetdir", str(self.testpath)] + ) mocked_query.assert_called() assert result.exit_code == 0 mocked_query.reset_mock() - result = self.runner.invoke(modis_download_cli, ["MOD13A2", "--targetdir", str(self.testpath), "--target-empty"]) + result = self.runner.invoke( + modis_download_cli, + ["MOD13A2", "--targetdir", str(self.testpath), "--target-empty"], + ) mocked_query.assert_not_called() assert result.exit_code == 1 fake_hdf.unlink() - result = self.runner.invoke(modis_download_cli, ["MOD13A2", "--targetdir", str(self.testpath), "--target-empty"]) + result = self.runner.invoke( + modis_download_cli, + ["MOD13A2", "--targetdir", str(self.testpath), "--target-empty"], + ) mocked_query.assert_called() assert result.exit_code == 0 mocked_query.reset_mock() - result = self.runner.invoke(modis_download_cli, ["MOD13A2", "--roi", "10,10"]) + result = self.runner.invoke( + modis_download_cli, ["MOD13A2", "--roi", "10,10"] + ) mocked_query.assert_called() args = mocked_query.call_args_list[0][1] self.assertEqual(args["aoi"], (10.0, 10.0)) @@ -127,7 +160,9 @@ def test_modis_download(self): mocked_query.reset_mock() - result = self.runner.invoke(modis_download_cli, ["MOD13A2", "--roi", "10,10,20,20"]) + result = self.runner.invoke( + modis_download_cli, ["MOD13A2", "--roi", "10,10,20,20"] + ) mocked_query.assert_called() args = mocked_query.call_args_list[0][1] self.assertEqual(args["aoi"], (10.0, 10.0, 20.0, 20.0)) @@ -135,13 +170,17 @@ def test_modis_download(self): mocked_query.reset_mock() - result = self.runner.invoke(modis_download_cli, ["MOD13A2", "--roi", "10,20,20"]) + result = self.runner.invoke( + modis_download_cli, ["MOD13A2", "--roi", "10,20,20"] + ) mocked_query.assert_not_called() assert result.exit_code == 1 mocked_query.reset_mock() - result = self.runner.invoke(modis_download_cli, ["MOD13A2", "--roi", "10,20,20,30,50"]) + result = self.runner.invoke( + modis_download_cli, ["MOD13A2", "--roi", "10,20,20,30,50"] + ) mocked_query.assert_not_called() assert result.exit_code == 1 @@ -178,9 +217,9 @@ def test_modis_collect(self): for call in mocked_worker.call_args_list: args, kwargs = call self.assertEqual(len(args), 0) - self.assertEqual(len(kwargs), 3) + self.assertEqual(len(kwargs), 4) self.assertEqual(type(kwargs["raw_h5"]), ModisRawH5) - self.assertEqual(kwargs["compression"], 'gzip') + self.assertEqual(kwargs["compression"], "gzip") self.assertEqual(kwargs["force"], False) self.assertEqual(result.exit_code, 0) @@ -194,7 +233,9 @@ def test_modis_collect(self): with patch("modape.scripts.modis_collect._worker") as mocked_worker: product_files = [str(x) for x in data_dir.glob("*hdf")] mocked_worker.return_value = product_files - result = self.runner.invoke(modis_collect_cli, ["/tmp/data", "--interleave", "--cleanup"]) + result = self.runner.invoke( + modis_collect_cli, ["/tmp/data", "--interleave", "--cleanup"] + ) mocked_worker.assert_called_once() product_files.sort() self.assertEqual(result.exit_code, 0) @@ -202,7 +243,7 @@ def test_modis_collect(self): tracefile = data_dir.joinpath(".collected") self.assertTrue(not any([Path(x).exists() for x in product_files])) with open(str(tracefile), "r") as thefile: - collected = [x.strip() for x in thefile.readlines()] + collected = [x.strip().split(";")[0] for x in thefile.readlines()] self.assertTrue(all([x in self.vim_files for x in collected])) for file in self.vim_files: @@ -211,21 +252,34 @@ def test_modis_collect(self): file_path = data_dir.joinpath(file.replace("h18v06", "h19v06")) file_path.touch() - with patch("modape.scripts.modis_collect._worker") as mocked_worker: product_files = [str(x) for x in data_dir.glob("*hdf")] product_files.sort() mocked_worker.return_value = product_files # wrong last-collected input - result = self.runner.invoke(modis_collect_cli, ["/tmp/data", "--interleave", "--cleanup", "--last-collected", "2002-01-01"]) + result = self.runner.invoke( + modis_collect_cli, + [ + "/tmp/data", + "--interleave", + "--cleanup", + "--last-collected", + "2002-01-01", + ], + ) mocked_worker.assert_not_called() self.assertEqual(result.exit_code, 2) # test last-collected checks with patch("modape.scripts.modis_collect.ModisRawH5") as mocked_rawfile: - mocked_rawfile.return_value = MagicMock(exists=True, last_collected="2020001") - result = self.runner.invoke(modis_collect_cli, ["/tmp/data", "--interleave", "--last-collected", "2020365"]) + mocked_rawfile.return_value = MagicMock( + exists=True, last_collected="2020001" + ) + result = self.runner.invoke( + modis_collect_cli, + ["/tmp/data", "--interleave", "--last-collected", "2020365"], + ) mocked_rawfile.assert_called_once() mocked_worker.assert_not_called() self.assertEqual(result.exit_code, 1) @@ -233,14 +287,19 @@ def test_modis_collect(self): mocked_rawfile.reset_mock() - result = self.runner.invoke(modis_collect_cli, ["/tmp/data", "--interleave", "--last-collected", "2020001"]) + result = self.runner.invoke( + modis_collect_cli, + ["/tmp/data", "--interleave", "--last-collected", "2020001"], + ) self.assertEqual(mocked_rawfile.call_count, 2) self.assertEqual(mocked_worker.call_count, 2) self.assertEqual(result.exit_code, 0) mocked_worker.reset_mock() - result = self.runner.invoke(modis_collect_cli, ["/tmp/data", "--interleave"]) + result = self.runner.invoke( + modis_collect_cli, ["/tmp/data", "--interleave"] + ) self.assertEqual(result.exit_code, 0) self.assertEqual(mocked_worker.call_count, 2) @@ -248,13 +307,24 @@ def test_modis_collect(self): mocked_worker.reset_mock() # check if tile-required works - result = self.runner.invoke(modis_collect_cli, ["/tmp/data", "--interleave", "--tiles-required", "h18v06,h19v06"]) + result = self.runner.invoke( + modis_collect_cli, + ["/tmp/data", "--interleave", "--tiles-required", "h18v06,h19v06"], + ) self.assertEqual(result.exit_code, 0) self.assertEqual(mocked_worker.call_count, 2) mocked_worker.reset_mock() - result = self.runner.invoke(modis_collect_cli, ["/tmp/data", "--interleave", "--tiles-required", "h18v06,h19v06,h20v06"]) + result = self.runner.invoke( + modis_collect_cli, + [ + "/tmp/data", + "--interleave", + "--tiles-required", + "h18v06,h19v06,h20v06", + ], + ) self.assertEqual(mocked_worker.call_count, 0) self.assertEqual(result.exit_code, 1) self.assertEqual(result.exc_info[0], ValueError) @@ -262,7 +332,10 @@ def test_modis_collect(self): mocked_worker.reset_mock() Path(product_files[-1]).unlink() - result = self.runner.invoke(modis_collect_cli, ["/tmp/data", "--interleave", "--tiles-required", "h18v06,h19v06"]) + result = self.runner.invoke( + modis_collect_cli, + ["/tmp/data", "--interleave", "--tiles-required", "h18v06,h19v06"], + ) mocked_worker.assert_not_called() self.assertEqual(result.exit_code, 1) self.assertEqual(result.exc_info[0], ValueError) @@ -278,42 +351,40 @@ def test_modis_smooth(self): raw_h5 = create_h5temp(10, 10, 8, 8) - with patch("modape.scripts.modis_smooth._worker", return_value=True) as mocked_worker: - - result = self.runner.invoke(modis_smooth_cli, - [str(raw_h5), - "--targetdir", - str(raw_h5)]) + with patch( + "modape.scripts.modis_smooth._worker", return_value=True + ) as mocked_worker: + result = self.runner.invoke( + modis_smooth_cli, [str(raw_h5), "--targetdir", str(raw_h5)] + ) mocked_worker.assert_not_called() self.assertEqual(result.exit_code, 1) - result = self.runner.invoke(modis_smooth_cli, - [str(raw_h5), - "--nsmooth", 1, - "--nupdate", 10]) + result = self.runner.invoke( + modis_smooth_cli, [str(raw_h5), "--nsmooth", 1, "--nupdate", 10] + ) mocked_worker.assert_not_called() self.assertEqual(result.exit_code, 1) - result = self.runner.invoke(modis_smooth_cli, - [str(raw_h5), - "--srange", 0, 1]) - + result = self.runner.invoke( + modis_smooth_cli, [str(raw_h5), "--srange", 0, 1] + ) mocked_worker.assert_not_called() self.assertEqual(result.exit_code, 2) - result = self.runner.invoke(modis_smooth_cli, - [str(raw_h5), - "--srange", 0, 1, 0.2]) - + result = self.runner.invoke( + modis_smooth_cli, [str(raw_h5), "--srange", 0, 1, 0.2] + ) mocked_worker.assert_called() self.assertEqual(result.exit_code, 0) - - with patch("modape.scripts.modis_smooth._worker", return_value=False) as mocked_worker: + with patch( + "modape.scripts.modis_smooth._worker", return_value=False + ) as mocked_worker: result = self.runner.invoke(modis_smooth_cli, [str(raw_h5)]) mocked_worker.assert_called() self.assertEqual(result.exit_code, 1) @@ -325,22 +396,36 @@ def test_modis_smooth(self): mocked_worker.assert_called() self.assertEqual(result.exit_code, 0) - with patch("modape.scripts.modis_smooth._worker", return_value=True) as mocked_worker: - + with patch( + "modape.scripts.modis_smooth._worker", return_value=True + ) as mocked_worker: result = self.runner.invoke( modis_smooth_cli, - [str(raw_h5), - "--targetdir", "/tmp", - "--svalue", 1.0, - "--srange", 0, 1, 0.2, - "--pvalue", 0.90, - "--tempint", 10, - "--tempint-start", "2001001", - "--nsmooth", 10, - "--nupdate", 10, - "--soptimize", - "--last-collected", "2002001", - ]) + [ + str(raw_h5), + "--targetdir", + "/tmp", + "--svalue", + 1.0, + "--srange", + 0, + 1, + 0.2, + "--pvalue", + 0.90, + "--tempint", + 10, + "--tempint-start", + "2001001", + "--nsmooth", + 10, + "--nupdate", + 10, + "--soptimize", + "--last-collected", + "2002001", + ], + ) mocked_worker.assert_called_once() margs, mkwargs = mocked_worker.call_args @@ -352,7 +437,9 @@ def test_modis_smooth(self): self.assertEqual(margs[4], "2002001") self.assertEqual(mkwargs["svalue"], 1.0) - np.testing.assert_array_equal(mkwargs["srange"], np.arange(0, 1.2, 0.2).round(2)) + np.testing.assert_array_equal( + mkwargs["srange"], np.arange(0, 1.2, 0.2).round(2) + ) self.assertEqual(mkwargs["p"], 0.90) self.assertEqual(mkwargs["nsmooth"], 10) self.assertEqual(mkwargs["nupdate"], 10) @@ -367,14 +454,22 @@ def test_modis_smooth(self): self.assertEqual(result.exit_code, 1) mocked_smoothfile.reset_mock() - mocked_smoothfile.return_value = MagicMock(exists=True, last_collected="2020001") - result = self.runner.invoke(modis_smooth_cli, [str(raw_h5), "--last-collected", "2020002"]) + mocked_smoothfile.return_value = MagicMock( + exists=True, last_collected="2020001" + ) + result = self.runner.invoke( + modis_smooth_cli, [str(raw_h5), "--last-collected", "2020002"] + ) mocked_smoothfile.assert_called_once() self.assertEqual(result.exit_code, 1) mocked_smoothfile.reset_mock() - mocked_smoothfile.return_value = MagicMock(exists=True, last_collected="2020001") - result = self.runner.invoke(modis_smooth_cli, [str(raw_h5), "--last-collected", "2020001"]) + mocked_smoothfile.return_value = MagicMock( + exists=True, last_collected="2020001" + ) + result = self.runner.invoke( + modis_smooth_cli, [str(raw_h5), "--last-collected", "2020001"] + ) mocked_smoothfile.assert_called_once() self.assertEqual(result.exit_code, 0) @@ -421,17 +516,27 @@ def test_modis_window(self, mocked_mosaic): result = self.runner.invoke( modis_window_cli, - ["/tmp/data", - "-b", "2020-01-01", - "-e", "2020-05-01", - "--roi", "0,0,10,10", - "--sgrid", - "--co", "COMPRESS=DEFLATE", - "--co", "PREDICTOR=1", - "--co", "TILED=YES", - "--target-srs", "EPSG:3857", - "--round-int", 2, - "--clip-valid"] + [ + "/tmp/data", + "-b", + "2020-01-01", + "-e", + "2020-05-01", + "--roi", + "0,0,10,10", + "--sgrid", + "--co", + "COMPRESS=DEFLATE", + "--co", + "PREDICTOR=1", + "--co", + "TILED=YES", + "--target-srs", + "EPSG:3857", + "--round-int", + 2, + "--clip-valid", + ], ) self.assertEqual(result.exit_code, 0) mocked_mosaic.assert_called_once() @@ -441,7 +546,9 @@ def test_modis_window(self, mocked_mosaic): self.assertEqual(mkwargs["start"], date(2020, 1, 1)) self.assertEqual(mkwargs["stop"], date(2020, 5, 1)) self.assertEqual(mkwargs["aoi"], [0, 10, 10, 0]) - self.assertEqual(mkwargs["creationOptions"], ["COMPRESS=DEFLATE", "PREDICTOR=1", "TILED=YES"]) + self.assertEqual( + mkwargs["creationOptions"], ["COMPRESS=DEFLATE", "PREDICTOR=1", "TILED=YES"] + ) self.assertEqual(mkwargs["clip_valid"], False) self.assertEqual(mkwargs["round_int"], -2) @@ -449,12 +556,19 @@ def test_modis_window(self, mocked_mosaic): result = self.runner.invoke( modis_window_cli, - ["/tmp/data", - "--gdal-kwarg", "xRes=10", - "--gdal-kwarg", "yRes=10", - "--gdal-kwarg", "outputType=1", - "--gdal-kwarg", "resampleAlg=bilinear", - "--gdal-kwarg", "noData=0"] + [ + "/tmp/data", + "--gdal-kwarg", + "xRes=10", + "--gdal-kwarg", + "yRes=10", + "--gdal-kwarg", + "outputType=1", + "--gdal-kwarg", + "resampleAlg=bilinear", + "--gdal-kwarg", + "noData=0", + ], ) self.assertEqual(result.exit_code, 0) mocked_mosaic.assert_called_once() @@ -467,7 +581,9 @@ def test_modis_window(self, mocked_mosaic): mocked_mosaic.reset_mock() - with patch("modape.scripts.modis_window.ModisMosaic.__init__", return_value=None) as mock_init: + with patch( + "modape.scripts.modis_window.ModisMosaic.__init__", return_value=None + ) as mock_init: src = self.testpath.joinpath("data") src.joinpath("MXD13A2.h21v10.006.VEM.h5").touch() result = self.runner.invoke(modis_window_cli, ["/tmp/data"]) @@ -480,7 +596,9 @@ def test_modis_window(self, mocked_mosaic): mocked_mosaic.reset_mock() src.joinpath("MYD11A2.h21v10.006.TDA.h5").touch() - with patch("modape.scripts.modis_window.ModisMosaic.__init__", return_value=None) as mock_init: + with patch( + "modape.scripts.modis_window.ModisMosaic.__init__", return_value=None + ) as mock_init: src = self.testpath.joinpath("data") src.joinpath("MXD13A2.h21v10.006.VEM.h5").touch() result = self.runner.invoke(modis_window_cli, ["/tmp/data"]) @@ -488,7 +606,254 @@ def test_modis_window(self, mocked_mosaic): mock_init.assert_not_called() mocked_mosaic.assert_not_called() - @patch("modape.scripts.csv_smooth.ws2doptvp", return_value=(np.random.rand(100), 10)) + def test_collect_smooth_export_import_sgrid(self): + """ + This is more an integration test, really... + + The following is tested: + + 1. (a) `modis_collect` will find 16 HDF-files: 15x for h18v09, 1x for h18v08 + (b) Parameter `tiles_required` reads only "h18v09", thus h18v08 will not qualify: + => for this tile the H5-file is merely initialized: no data written + 2. `modis_smooth` will intialize the smoothing grid (sgrid) and produce 12-timestep + smoothed H5-file + 3. `modis_window` is invoked to export the sgrid + 4. `modis_collect`: ingest for forward processing + 5. Do forward processing and export dekads + 6. remove SMOOTH .h5 archives + 7. Initialize new SMOOTH .h5 archives by importing from sgrid + 8. Do forward processing and export dekads in same fashion as (5) + + + """ + + tile_dates_len: dict[str, dict[str, int]] = { + "h18v08": dict(VIM=0, SMOOTH=0), + "h18v09": dict(VIM=15, SMOOTH=12), + } + sgrid_hashes: dict[str, str] = { + "h18v08": "f3f13825d8e604a5154fafba39542fbe99b75de1", + "h18v09": "cbdb80c7951203dd911e8996fef9e720cc046a96", + } + dekad_hashes: dict[str, str] = { + "202505d2": "3cb5c6e47c091b18c5ed922fc0114df9b7e40ec6", + "202505d3": "4396d68dd01234c7fc9a4a85fe96282f5dc64881", + } + + def smooth_and_export(): + modis_smooth_cli.callback( + src=Path(temp_dir).joinpath("VIM").as_posix(), + targetdir=Path(temp_dir).joinpath("VIM", "SMOOTH").as_posix(), + svalue=None, + srange=None, + pvalue=None, + tempint=10, + tempint_start=None, + nsmooth=16, + nupdate=1, + soptimize=True, + sgrid=None, + parallel_tiles=1, + last_collected=None, + ) + + modis_window_cli.callback( + src=Path(temp_dir).joinpath("VIM", "SMOOTH").as_posix(), + targetdir=Path(temp_dir).joinpath("VIM", "SMOOTH", "EXPORT").as_posix(), + begin_date=datetime(2025, 5, 15), + end_date=datetime(2025, 5, 25), + roi=None, + region="TEST", + sgrid=False, + force_doy=False, + filter_product=None, + filter_vampc=None, + target_srs="EPSG:4326", + co=[ + "COMPRESS=LZW", + "PREDICTOR=2", + "TILED=YES", + "BLOCKXSIZE=256", + "BLOCKYSIZE=256", + ], + clip_valid=True, + round_int=2, + gdal_kwarg={ + "xRes": 0.01, + "yRes": 0.01, + }, + overwrite=True, + last_smoothed=None, + ) + + for dekad in ["202505d2", "202505d3"]: + ds: gdal.Dataset = gdal.Open( + Path(temp_dir) + .joinpath("VIM", "SMOOTH", "EXPORT", f"TESTvim{dekad}.tif") + .as_posix() + ) + try: + self.assertEqual( + hashlib.sha1(ds.ReadAsArray().tobytes()).hexdigest(), + dekad_hashes[dekad], + ) + finally: + ds = None + Path(temp_dir).joinpath( + "VIM", "SMOOTH", "EXPORT", f"TESTvim{dekad}.tif" + ).unlink() + + with tempfile.TemporaryDirectory() as temp_dir: + src = Path(__file__).resolve().parent.joinpath("data", "init") + files = [file.as_posix() for file in src.glob("*.hdf")] + + # 1. `modis_collect` will find 16 HDF-files: 15x for h18v09, 1x for h18v08: + modis_collect_cli.callback( + src_dir=src.as_posix(), + targetdir=temp_dir, + compression="gzip", + vam_code="VIM", + interleave=True, + parallel_tiles=1, + cleanup=False, + force=True, + last_collected=None, + tiles_required="h18v09", + report_collected=False, + ) + + for tile, dates_len in tile_dates_len.items(): + h5 = ModisRawH5( + [ + src.joinpath(file).as_posix() + for file in files + if file.find(f".{tile}.") > -1 + ], + temp_dir, + "VIM", + True, + ) + with h5py.File(h5.filename, "r+", libver="latest") as h5f: + dates = h5f.get("dates") + self.assertEqual(len(dates), dates_len["VIM"]) + + # 2. `modis_smooth` will intialize the smoothing grid (sgrid) + modis_smooth_cli.callback( + src=Path(temp_dir).joinpath("VIM").as_posix(), + targetdir=Path(temp_dir).joinpath("VIM", "SMOOTH").as_posix(), + svalue=None, + srange=None, + pvalue=None, + tempint=10, + tempint_start=None, + nsmooth=0, + nupdate=0, + soptimize=True, + sgrid=None, + parallel_tiles=1, + last_collected=None, + ) + + for rawfile in Path(temp_dir).joinpath("VIM").glob("*.h5"): + tile = REGEX_PATTERNS["tile"].findall(rawfile.name)[0] + smt_h5 = ModisSmoothH5( + rawfile=rawfile.as_posix(), + targetdir=Path(temp_dir).joinpath("VIM", "SMOOTH").as_posix(), + tempint=10, + ) + with h5py.File(smt_h5.filename, "r+", libver="latest") as h5f: + dates = h5f.get("dates") + self.assertEqual( + hashlib.sha1(h5f.get("sgrid")[:].data.tobytes()).hexdigest(), + sgrid_hashes[tile], + ) + self.assertEqual(len(dates), tile_dates_len[tile]["SMOOTH"]) + + # 3. `modis_window` is invoked to export the sgrid + modis_window_cli.callback( + src=Path(temp_dir).joinpath("VIM", "SMOOTH").as_posix(), + targetdir=Path(temp_dir).joinpath("VIM", "SMOOTH", "EXPORT").as_posix(), + begin_date=None, + end_date=None, + roi=None, + region="TEST", + sgrid=True, + force_doy=False, + filter_product=None, + filter_vampc=None, + target_srs="none", + co=( + "COMPRESS=LZW", + "PREDICTOR=3", + "TILED=YES", + "BLOCKXSIZE=256", + "BLOCKYSIZE=256", + ), + clip_valid=False, + round_int=None, + gdal_kwarg=None, + overwrite=False, + last_smoothed=None, + ) + + # 4. `modis_collect`: ingest for forward processing + src = Path(__file__).resolve().parent.joinpath("data", "fwd") + files = [file.as_posix() for file in src.glob("*.hdf")] + + modis_collect_cli.callback( + src_dir=src.as_posix(), + targetdir=temp_dir, + compression="gzip", + vam_code="VIM", + interleave=True, + parallel_tiles=1, + cleanup=False, + force=True, + last_collected=None, + tiles_required="h18v09", + report_collected=False, + ) + + # 5. Do forward processing and export dekads + smooth_and_export() + + # 6. remove SMOOTH .h5 archives + for _archive in Path(temp_dir).joinpath("VIM", "SMOOTH").glob("*.h5"): + _archive.unlink() + + # 7. Initialize new SMOOTH .h5 archives by importing from sgrid + modis_smooth_cli.callback( + src=Path(temp_dir).joinpath("VIM").as_posix(), + targetdir=Path(temp_dir).joinpath("VIM", "SMOOTH").as_posix(), + svalue=None, + srange=None, + pvalue=None, + tempint=10, + tempint_start=None, + nsmooth=0, + nupdate=0, + soptimize=False, + sgrid=Path(temp_dir) + .joinpath("VIM", "SMOOTH", "EXPORT", "TESTvim_sgrid.tif") + .as_posix(), + parallel_tiles=1, + last_collected=None, + ) + + for _archive in Path(temp_dir).joinpath("VIM", "SMOOTH").glob("*.h5"): + tile = REGEX_PATTERNS["tile"].findall(_archive.name)[0] + with h5py.File(_archive, "r+", libver="latest") as h5f: + self.assertEqual( + hashlib.sha1(h5f.get("sgrid")[:].data.tobytes()).hexdigest(), + sgrid_hashes[tile], + ) + + # 8. Do forward processing and export dekads in same fashion as (5) + smooth_and_export() + + @patch( + "modape.scripts.csv_smooth.ws2doptvp", return_value=(np.random.rand(100), 10) + ) @patch("modape.scripts.csv_smooth.ws2doptv", return_value=(np.random.rand(100), 10)) @patch("modape.scripts.csv_smooth.ws2d", return_value=np.random.rand(100)) def test_csv_smooth(self, mock_ws2d, mock_ws2doptv, mock_ws2doptvp): @@ -500,7 +865,9 @@ def test_csv_smooth(self, mock_ws2d, mock_ws2doptv, mock_ws2doptvp): self.assertEqual(result.exit_code, 2) testfile = "/tmp/test.csv" - _ = pd.DataFrame({"TS1": np.random.rand(100), "TS2": np.random.rand(100)}).to_csv(testfile) + _ = pd.DataFrame( + {"TS1": np.random.rand(100), "TS2": np.random.rand(100)} + ).to_csv(testfile) result = self.runner.invoke(csv_smooth_cli, [testfile]) self.assertEqual(result.exit_code, 1) @@ -523,13 +890,17 @@ def test_csv_smooth(self, mock_ws2d, mock_ws2doptv, mock_ws2doptvp): mock_ws2doptv.return_value = (np.random.rand(100), 10) custom_srange = array("d", np.arange(-2, 2.2, 0.2).round(2)) - result = self.runner.invoke(csv_smooth_cli, [testfile, "--soptimize", "--srange", "-2.0", "2.0", "0.2"]) + result = self.runner.invoke( + csv_smooth_cli, [testfile, "--soptimize", "--srange", "-2.0", "2.0", "0.2"] + ) mock_ws2doptv.assert_called() margs, _ = mock_ws2doptv.call_args self.assertEqual(result.exit_code, 0) np.testing.assert_array_equal(margs[2], custom_srange) - result = self.runner.invoke(csv_smooth_cli, [testfile, "--soptimize", "-p", "0.90"]) + result = self.runner.invoke( + csv_smooth_cli, [testfile, "--soptimize", "-p", "0.90"] + ) mock_ws2doptvp.assert_called() self.assertEqual(result.exit_code, 0) self.assertTrue(exists("/tmp/test_filtoptvp.csv")) diff --git a/tests/test_modis.py b/tests/test_modis.py index d1d12f09..fb772141 100644 --- a/tests/test_modis.py +++ b/tests/test_modis.py @@ -1,5 +1,7 @@ """test_modis.py: Test MODIS classes and functions.""" + # pylint: disable=E0401,E0611,W0702,W0613,C0103 +import os from datetime import datetime from pathlib import Path, PosixPath import pickle @@ -10,7 +12,7 @@ from uuid import uuid4 import numpy as np -import h5py #pylint: disable=import-error +import h5py # pylint: disable=import-error from osgeo import gdal from modape.exceptions import DownloadError, HDF5WriteError @@ -18,32 +20,35 @@ from modape.utils import SessionWithHeaderRedirection class MockResponse: - '''Mock response for testing''' + """Mock response for testing""" + def __init__(self, content, status_code): - '''Create instance, setting content and status_code''' + """Create instance, setting content and status_code""" self._content = content self.status_code = status_code @property def content(self): - '''Return content''' + """Return content""" return self._content def raise_for_status(self): - '''don't raise for status''' - pass #pylint: disable=W0107 + """don't raise for status""" + pass # pylint: disable=W0107 + class MockedPath(PosixPath): - '''Mocked version of PosixPath''' + """Mocked version of PosixPath""" # file size for testing _filesize = 7526571 + def __init__(self, filename): super().__init__() @property def filesize(self): - '''Get file size''' + """Get file size""" return self._filesize def is_dir(self): @@ -55,6 +60,7 @@ def exists(self): def stat(self): return SimpleNamespace(st_size=self.filesize, st_mode=33188) + def create_gdal(x, y): """Create in-memory gdal dataset for testing. @@ -65,11 +71,13 @@ def create_gdal(x, y): ds = driver.Create("/vsimem/{}.tif".format(str(uuid4())), x, y, 1, 3) return ds -def create_h5temp(rows: int, - cols: int, - tr: int, - ts: int, - ) -> Path: + +def create_h5temp( + rows: int, + cols: int, + tr: int, + ts: int, +) -> Path: """Create temporary HDF5 rawfile. Args: @@ -85,16 +93,16 @@ def create_h5temp(rows: int, fn = Path("/tmp/data/MXD13A2.h21v10.006.VIM.h5") - with h5py.File(fn, "a", driver="core", backing_store=True) as h5f: - - dset = h5f.create_dataset("data", - shape=(rows*cols, 4), - dtype="int16", - maxshape=(rows*cols, None), - chunks=((rows*cols)//25, 10), - compression="gzip", - fillvalue=-3000) + dset = h5f.create_dataset( + "data", + shape=(rows * cols, 4), + dtype="int16", + maxshape=(rows * cols, None), + chunks=((rows * cols) // 24, 10), + compression="gzip", + fillvalue=-3000, + ) dset.attrs.update( dict( @@ -103,23 +111,26 @@ def create_h5temp(rows: int, tshift=ts, RasterXSize=rows, RasterYSize=cols, - geotransform=(0, 0, 0, 0, 0), + geotransform=(0, 10, 0, 0, 0, 10), projection="EPSG:4326", resolution=(1000, -1000), globalproduct=False, vamcode="VIM", - ) + ) ) - h5f.create_dataset("dates", - shape=(4,), - data=np.array(["2002185", "2002193", "2002201", "2002209"], dtype="S8"), - maxshape=(None,), - dtype="S8", - compression="gzip") + h5f.create_dataset( + "dates", + shape=(4,), + data=np.array(["2002185", "2002193", "2002201", "2002209"], dtype="S8"), + maxshape=(None,), + dtype="S8", + compression="gzip", + ) return fn + def create_h5temp_global() -> Path: """Create temporary global HDF5 rawfile. @@ -134,14 +145,15 @@ def create_h5temp_global() -> Path: cols = 7200 with h5py.File(fn, "a", driver="core", backing_store=True) as h5f: - - dset = h5f.create_dataset("data", - shape=(rows*cols, 4), - dtype="int16", - maxshape=(rows*cols, None), - chunks=((rows*cols)//25, 10), - compression="gzip", - fillvalue=-3000) + dset = h5f.create_dataset( + "data", + shape=(rows * cols, 4), + dtype="int16", + maxshape=(rows * cols, None), + chunks=((rows * cols) // 25, 10), + compression="gzip", + fillvalue=-3000, + ) dset.attrs.update( dict( @@ -150,23 +162,26 @@ def create_h5temp_global() -> Path: tshift=8, RasterXSize=rows, RasterYSize=cols, - geotransform=(0, 0, 0, 0, 0), + geotransform=(0, 10, 0, 0, 0, 10), projection="EPSG:4326", resolution=(0.05, 0.05), globalproduct=True, vamcode="VIM", - ) + ) ) - h5f.create_dataset("dates", - shape=(4,), - data=np.array(["2002185", "2002193", "2002201", "2002209"], dtype="S8"), - maxshape=(None,), - dtype="S8", - compression="gzip") + h5f.create_dataset( + "dates", + shape=(4,), + data=np.array(["2002185", "2002193", "2002201", "2002209"], dtype="S8"), + maxshape=(None,), + dtype="S8", + compression="gzip", + ) return fn + class TestModisQuery(unittest.TestCase): """Test class for ModisQuery tests.""" @@ -194,7 +209,7 @@ def setUpClass(cls): def tearDownClass(cls): try: shutil.rmtree("__pycache__") - except: + except Exception as _: pass def test_query(self): @@ -202,14 +217,17 @@ def test_query(self): self.assertEqual(self.query.api.params["short_name"], ["MOD13A2", "MYD13A2"]) self.assertEqual(self.query.api.params["bounding_box"], "10.0,10.0,20.0,20.0") - self.assertEqual(self.query.api.params["temporal"], ["2020-01-01T00:00:00Z,2020-07-24T00:00:00Z"]) + self.assertEqual( + self.query.api.params["temporal"], + ["2020-01-01T00:00:00Z,2020-07-24T00:00:00Z"], + ) def test_response_parse(self): """Test parsing of response""" - with patch("modape.modis.download.GranuleQuery.get_all", - return_value=self.api_response): - + with patch( + "modape.modis.download.GranuleQuery.get_all", return_value=self.api_response + ): self.query.search(match_begin=False) self.assertTrue(self.query.results) @@ -223,16 +241,34 @@ def test_response_parse(self): self.query.tile_filter = tiles_select self.query.search(match_begin=True) - self.assertEqual(len({values["tile"] for key, values in self.query.results.items()}), 2) - self.assertTrue(all([values["time_start"] >= self.query.begin.date() for key, values in self.query.results.items()])) - self.assertTrue(all([values["time_end"] <= self.query.end.date() for key, values in self.query.results.items()])) + self.assertEqual( + len({values["tile"] for key, values in self.query.results.items()}), 2 + ) + self.assertTrue( + all( + [ + values["time_start"] >= self.query.begin.date() + for key, values in self.query.results.items() + ] + ) + ) + self.assertTrue( + all( + [ + values["time_end"] <= self.query.end.date() + for key, values in self.query.results.items() + ] + ) + ) @patch("modape.modis.download.cksum", return_value=1534015008) @patch("modape.modis.download.shutil") @patch("modape.modis.download.open") @patch("modape.modis.download.ThreadPoolExecutor") @patch("modape.modis.download.GranuleQuery.get_all") - def test_download(self, mock_response, mock_submit, mock_open, mock_shutil, mock_cksum): + def test_download( + self, mock_response, mock_submit, mock_open, mock_shutil, mock_cksum + ): """Test download""" mock_response.return_value = self.api_response @@ -249,7 +285,9 @@ def test_download(self, mock_response, mock_submit, mock_open, mock_shutil, mock future_result = (fid, None) mock_submit.return_value.__enter__.return_value.submit.return_value.result.return_value = future_result - with patch("modape.modis.download.ModisQuery._fetch", return_value=future_result) as mocked_fetch: + with patch( + "modape.modis.download.ModisQuery._fetch", return_value=future_result + ) as mocked_fetch: self.query.download( targetdir=(self.testpath), username="test", @@ -276,15 +314,16 @@ def test_download(self, mock_response, mock_submit, mock_open, mock_shutil, mock username="test", password="test", multithread=True, - max_retries=5 + max_retries=5, ) # 1 + 5 retriess self.assertEqual(mock_submit.call_count, 6) # check robust download - with patch("modape.modis.download.SessionWithHeaderRedirection") as session_mock: - + with patch( + "modape.modis.download.SessionWithHeaderRedirection" + ) as session_mock: xml_mock = MockResponse(self.hdfxml, 200) session_mock.return_value.__enter__.return_value.get.return_value.__enter__.side_effect = [ MagicMock(), @@ -304,6 +343,7 @@ def test_download(self, mock_response, mock_submit, mock_open, mock_shutil, mock self.assertEqual(dl, ["MOD13A2.A2020001.h18v07.006.2020018001022.hdf"]) + class TestModisCollect(unittest.TestCase): """Test class for ModisQuery tests.""" @@ -311,25 +351,33 @@ class TestModisCollect(unittest.TestCase): def setUpClass(cls): """Set up testing class""" + cls.vim_files = [ + "MYD13A2.A2002201.h18v06.006.2015149071105.hdf", + "MYD13A2.A2002185.h18v06.006.2015149071113.hdf", + "MOD13A2.A2002177.h18v06.006.2015149001129.hdf", + "MOD13A2.A2002209.h18v06.006.2015149180726.hdf", + "MOD13A2.A2002193.h18v06.006.2015149022847.hdf", + ] - cls.vim_files = ["MYD13A2.A2002201.h18v06.006.2015149071105.hdf", - "MYD13A2.A2002185.h18v06.006.2015149071113.hdf", - "MOD13A2.A2002177.h18v06.006.2015149001129.hdf", - "MOD13A2.A2002209.h18v06.006.2015149180726.hdf", - "MOD13A2.A2002193.h18v06.006.2015149022847.hdf"] + cls.viirs_files = [ + "VNP13A2.A2025001.h18v09.002.2025017085341.h5", + "VNP13A2.A2025009.h18v09.002.2025031192612.h5", + ] cls.vim_files_terra = [x for x in cls.vim_files if "MOD13A2" in x] cls.vim_files_aqua = [x for x in cls.vim_files if "MYD13A2" in x] - cls.lst_files = ["MYD11A2.A2002193.h18v06.006.2015146152945.hdf", - "MOD11A2.A2002209.h18v06.006.2015145152020.hdf", - "MYD11A2.A2002201.h18v06.006.2015146153241.hdf", - "MYD11A2.A2002185.h18v06.006.2015146152642.hdf", - "MOD11A2.A2002177.h18v06.006.2015144183717.hdf", - "MYD11A2.A2002209.h18v06.006.2015152152813.hdf", - "MOD11A2.A2002185.h18v06.006.2015145002847.hdf", - "MOD11A2.A2002193.h18v06.006.2015145055806.hdf", - "MOD11A2.A2002201.h18v06.006.2015145105749.hdf"] + cls.lst_files = [ + "MYD11A2.A2002193.h18v06.006.2015146152945.hdf", + "MOD11A2.A2002209.h18v06.006.2015145152020.hdf", + "MYD11A2.A2002201.h18v06.006.2015146153241.hdf", + "MYD11A2.A2002185.h18v06.006.2015146152642.hdf", + "MOD11A2.A2002177.h18v06.006.2015144183717.hdf", + "MYD11A2.A2002209.h18v06.006.2015152152813.hdf", + "MOD11A2.A2002185.h18v06.006.2015145002847.hdf", + "MOD11A2.A2002193.h18v06.006.2015145055806.hdf", + "MOD11A2.A2002201.h18v06.006.2015145105749.hdf", + ] cls.lst_files_terra = [x for x in cls.lst_files if "MOD11A2" in x] cls.lst_files_aqua = [x for x in cls.lst_files if "MYD11A2" in x] @@ -349,7 +397,7 @@ def setUpClass(cls): def tearDownClass(cls): try: shutil.rmtree("__pycache__") - except: + except Exception as _: pass for file in Path("/tmp").glob("*h5"): @@ -358,27 +406,27 @@ def tearDownClass(cls): def tearDown(self): try: shutil.rmtree("/tmp/VIM") - except: + except Exception as _: pass try: shutil.rmtree("/tmp/TDA") - except: + except Exception as _: pass try: shutil.rmtree("/tmp/TNA") - except: + except Exception as _: pass try: shutil.rmtree("/tmp/TDT") - except: + except Exception as _: pass try: shutil.rmtree("/tmp/TNT") - except: + except Exception as _: pass def test_raw_instance(self): @@ -391,16 +439,22 @@ def test_raw_instance(self): self.assertEqual(raw_h5.tshift, 8) self.assertEqual(str(raw_h5.filename), "/tmp/VIM/MYD13A2.h18v06.006.VIM.h5") - raw_h5 = ModisRawH5(files=self.vim_files_aqua, targetdir="/tmp", vam_product_code="VEM") + raw_h5 = ModisRawH5( + files=self.vim_files_aqua, targetdir="/tmp", vam_product_code="VEM" + ) self.assertEqual(raw_h5.vam_product_code, "VEM") self.assertEqual(raw_h5.product, "MYD13A2") self.assertEqual(str(raw_h5.filename), "/tmp/VEM/MYD13A2.h18v06.006.VEM.h5") with self.assertRaises(AssertionError): - raw_h5 = ModisRawH5(files=self.vim_files_aqua + self.vim_files_terra, targetdir="/tmp") + raw_h5 = ModisRawH5( + files=self.vim_files_aqua + self.vim_files_terra, targetdir="/tmp" + ) with self.assertRaises(AssertionError): - raw_h5 = ModisRawH5(files=self.vim_files_aqua, targetdir="/tmp", vam_product_code="LTD") + raw_h5 = ModisRawH5( + files=self.vim_files_aqua, targetdir="/tmp", vam_product_code="LTD" + ) raw_h5 = ModisRawH5(files=self.vim_files, targetdir="/tmp", interleave=True) self.assertEqual(raw_h5.vam_product_code, "VIM") @@ -418,7 +472,9 @@ def test_raw_instance(self): self.assertEqual(raw_h5.tshift, 4) self.assertEqual(str(raw_h5.filename), "/tmp/TDA/MYD11A2.h18v06.006.TDA.h5") - raw_h5 = ModisRawH5(files=self.lst_files_aqua, targetdir="/tmp", vam_product_code="LTN") + raw_h5 = ModisRawH5( + files=self.lst_files_aqua, targetdir="/tmp", vam_product_code="LTN" + ) self.assertEqual(raw_h5.vam_product_code, "LTN") self.assertEqual(str(raw_h5.filename), "/tmp/TNA/MYD11A2.h18v06.006.TNA.h5") @@ -428,12 +484,16 @@ def test_raw_instance(self): self.assertEqual(str(raw_h5.filename), "/tmp/TDT/MOD11A2.h18v06.006.TDT.h5") with self.assertRaises(AssertionError): - raw_h5 = ModisRawH5(files=self.lst_files_terra, targetdir="/tmp", vam_product_code="VIM") + raw_h5 = ModisRawH5( + files=self.lst_files_terra, targetdir="/tmp", vam_product_code="VIM" + ) - raw_h5 = ModisRawH5(files=self.lst_files_terra + self.lst_duplicate, targetdir="/tmp") + raw_h5 = ModisRawH5( + files=self.lst_files_terra + self.lst_duplicate, targetdir="/tmp" + ) self.assertEqual(raw_h5.files, sorted(self.lst_files_terra)) - def test_create(self): + def test_create_modis_vim(self): """Test creation of file""" h5f = ModisRawH5( files=self.vim_files, @@ -441,6 +501,7 @@ def test_create(self): interleave=True, ) + self.assertEqual(h5f.filename.name, 'MXD13A2.h18v06.006.VIM.h5') self.assertFalse(h5f.exists) with patch.object(ModisRawH5, "_get_reference_metadata") as mocked_md: @@ -460,18 +521,34 @@ def test_create(self): dates = hdf5_file.get("dates") self.assertTrue(dates) - self.assertTrue(dates.shape, (len(h5f.rawdates,))) + self.assertTrue( + dates.shape, + ( + len( + h5f.rawdates, + ) + ), + ) h5f.filename.unlink() - @patch("modape.modis.collect.HDFHandler.open_datasets") + def test_create_viirs_vim(self): + """Test creation of file""" + h5f = ModisRawH5( + files=self.viirs_files, + targetdir="/tmp", + interleave=True, + ) + + self.assertEqual(h5f.filename.name, 'VNP13A2.h18v09.002.VIM.h5') + @patch("modape.modis.collect.HDFHandler.read_chunk") - def test_update(self, mocked_chunk, mocked_handles): + def test_update(self, mocked_chunk): """Test updating dataset""" ones = np.ones((48, 1200), dtype="int16") ones_view = ones.view() - ones_view.shape = (48*1200,) + ones_view.shape = (48 * 1200,) h5f = ModisRawH5( files=self.vim_files, @@ -484,10 +561,7 @@ def test_update(self, mocked_chunk, mocked_handles): h5f.create() mocked_chunk.return_value = ones - - with patch("modape.modis.collect.HDFHandler.iter_handles") as mocked_iter: - mocked_iter.return_value = ((ii, None) for ii, x in enumerate(h5f.files)) - h5f.update() + h5f.update() for test_arr in h5f.read_chunked("data", xchunk=10): for dim in range(test_arr.shape[1]): @@ -512,9 +586,7 @@ def test_update(self, mocked_chunk, mocked_handles): mocked_md.return_value = self.referece_metadata h5f.create() - with patch("modape.modis.collect.HDFHandler.iter_handles") as mocked_iter: - mocked_iter.return_value = ((ii, None) for ii, x in enumerate(h5f.files)) - h5f.update() + h5f.update() dates_init = h5f.rawdates @@ -528,13 +600,11 @@ def test_update(self, mocked_chunk, mocked_handles): twos = ones.copy() twos[...] = 2 twos_view = twos.view() - twos_view.shape = (48*1200,) + twos_view.shape = (48 * 1200,) mocked_chunk.return_value = twos - with patch("modape.modis.collect.HDFHandler.iter_handles") as mocked_iter: - mocked_iter.return_value = ((ii, None) for ii, x in enumerate(h5f.files)) - h5f.update() + h5f.update() for test_arr in h5f.read_chunked("data", xchunk=10): ii = 0 @@ -549,7 +619,6 @@ def test_update(self, mocked_chunk, mocked_handles): dates = [x.decode() for x in hdf5_file.get("dates")] self.assertEqual(dates, dates_init + h5f.rawdates) - h5f.filename.unlink() del h5f @@ -562,9 +631,7 @@ def test_update(self, mocked_chunk, mocked_handles): mocked_md.return_value = self.referece_metadata h5f.create() - with patch("modape.modis.collect.HDFHandler.iter_handles") as mocked_iter: - mocked_iter.return_value = ((ii, None) for ii, x in enumerate(h5f.files)) - h5f.update() + h5f.update() del h5f @@ -573,11 +640,9 @@ def test_update(self, mocked_chunk, mocked_handles): targetdir="/tmp", ) - with patch("modape.modis.collect.HDFHandler.iter_handles") as mocked_iter: - mocked_iter.return_value = ((ii, None) for ii, x in enumerate(h5f.files)) + with self.assertRaises(AssertionError): + h5f.update() - with self.assertRaises(AssertionError): - h5f.update() class TestModisSmooth(unittest.TestCase): """Test class for ModisSmooth tests""" @@ -586,15 +651,18 @@ class TestModisSmooth(unittest.TestCase): def setUpClass(cls): cls.testpath = Path("/tmp/data") cls.testpath.mkdir(exist_ok=True) + for root, dirs, files in os.walk(cls.testpath): + for f in files: + os.unlink(os.path.join(root, f)) cls.testfile = create_h5temp(12, 12, 8, 8) - cls.y_chunksize = 12*12//25 + cls.y_chunksize = 12 * 12 // 25 @classmethod def tearDownClass(cls): cls.testfile.unlink() try: shutil.rmtree(str(cls.testpath)) - except: + except Exception as _: pass def test_smooth_instance(self): @@ -621,7 +689,6 @@ def test_smooth_instance(self): self.assertEqual(smtH5.temporalresolution, 10) self.assertEqual(str(smtH5.filename), "/tmp/MXD13A2.h21v10.006.txd.VIM.h5") - smtH5 = ModisSmoothH5( rawfile=self.testfile, targetdir="/tmp", @@ -753,7 +820,10 @@ def test_smooth(self, mock_hdf5base_read, mocked_write, mocked_read): _, mkwargs = mocked_whit.call_args np.testing.assert_array_equal(mkwargs["llas"], np.arange(0, 3.2, 0.2).round(2)) - with patch("modape.modis.smooth.ws2doptv") as mocked_whit1, patch("modape.modis.smooth.ws2doptvp") as mocked_whit2: + with ( + patch("modape.modis.smooth.ws2doptv") as mocked_whit1, + patch("modape.modis.smooth.ws2doptvp") as mocked_whit2, + ): mocked_read.return_value = iter([ones[:, 0]]) mocked_whit2.return_value = (ts_test, 10) with patch("modape.modis.smooth.lag1corr", return_value=0.8): @@ -768,6 +838,7 @@ def test_smooth(self, mock_hdf5base_read, mocked_write, mocked_read): smtH5.filename.unlink() + class TestModisMosaic(unittest.TestCase): """Test class for ModisMosaic""" @@ -782,37 +853,32 @@ def setUpClass(cls): smt_file.create() cls.testfile_smt = smt_file.filename - @classmethod def tearDownClass(cls): cls.testfile.unlink() try: shutil.rmtree(str(cls.testpath)) - except: + except Exception as _: pass def test_instance(self): """Test instance creation""" - mosaic = ModisMosaic([self.testfile]*5) + mosaic = ModisMosaic([self.testfile] * 5) with h5py.File(self.testfile, "r") as h5f_open: dts = [x.decode() for x in h5f_open.get("dates")[...]] - self.assertEqual( - [x.strftime("%Y%j") for x in mosaic.dates], - dts - ) + self.assertEqual([x.strftime("%Y%j") for x in mosaic.dates], dts) self.assertTrue(len(mosaic.files), 5) mosaic = ModisMosaic(self.testfile) - self.assertEqual( - [x.strftime("%Y%j") for x in mosaic.dates], - dts - ) + self.assertEqual([x.strftime("%Y%j") for x in mosaic.dates], dts) self.assertTrue(len(mosaic.files), 1) - @patch("modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif") + @patch( + "modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif" + ) @patch("modape.modis.window.ModisMosaic._mosaic") @patch("modape.modis.window.ModisMosaic._translate") def test_generate_mosaics_basic(self, mock_translate, mock_mosaic, mock_raster): @@ -821,7 +887,7 @@ def test_generate_mosaics_basic(self, mock_translate, mock_mosaic, mock_raster): mock_mosaic.return_value.__enter__.return_value = "/inmem/warped.tif" - with self.assertRaises(ValueError): + with self.assertRaises(NotImplementedError): mosaic.generate_mosaics("not_a_dataset", "/tmp/data", "EPSG:4326") mosaic.generate_mosaics("data", "/tmp/data", "EPSG:4326") @@ -833,7 +899,7 @@ def test_generate_mosaics_basic(self, mock_translate, mock_mosaic, mock_raster): self.assertEqual(margs[1], "data") self.assertEqual(margs[2], False) self.assertEqual(mkwargs["round_int"], None) - self.assertEqual(mkwargs["ix"], len(mosaic.dates)-1) + self.assertEqual(mkwargs["ix"], len(mosaic.dates) - 1) mock_mosaic.assert_called() margs, mkwargs = mock_mosaic.call_args @@ -842,7 +908,9 @@ def test_generate_mosaics_basic(self, mock_translate, mock_mosaic, mock_raster): self.assertEqual(mkwargs["target_srs"], "EPSG:4326") self.assertEqual(mkwargs["dtype"], 3) self.assertEqual(mkwargs["nodata"], -3000) - np.testing.assert_almost_equal(mkwargs["resolution"], [1000/112000, (1000/112000)*-1]) + np.testing.assert_almost_equal( + mkwargs["resolution"], [1000 / 112000, (1000 / 112000) * -1] + ) mock_translate.assert_called() _, mkwargs = mock_translate.call_args @@ -853,8 +921,9 @@ def test_generate_mosaics_basic(self, mock_translate, mock_mosaic, mock_raster): self.assertEqual(mkwargs["noData"], -3000) self.assertEqual(mkwargs["outputType"], 3) - - @patch("modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif") + @patch( + "modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif" + ) @patch("modape.modis.window.ModisMosaic._mosaic") @patch("modape.modis.window.ModisMosaic._translate") def test_generate_mosaics_sgrid(self, mock_translate, mock_mosaic, mock_raster): @@ -881,7 +950,9 @@ def test_generate_mosaics_sgrid(self, mock_translate, mock_mosaic, mock_raster): self.assertEqual(mkwargs["target_srs"], "EPSG:4326") self.assertEqual(mkwargs["dtype"], 6) self.assertEqual(mkwargs["nodata"], 0) - np.testing.assert_almost_equal(mkwargs["resolution"], [1000/112000, (1000/112000)*-1]) + np.testing.assert_almost_equal( + mkwargs["resolution"], [1000 / 112000, (1000 / 112000) * -1] + ) mock_translate.assert_called() _, mkwargs = mock_translate.call_args @@ -892,10 +963,14 @@ def test_generate_mosaics_sgrid(self, mock_translate, mock_mosaic, mock_raster): self.assertEqual(mkwargs["noData"], 0) self.assertEqual(mkwargs["outputType"], 6) - @patch("modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif") + @patch( + "modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif" + ) @patch("modape.modis.window.ModisMosaic._mosaic") @patch("modape.modis.window.ModisMosaic._translate") - def test_generate_mosaics_kwarg_propagation(self, mock_translate, mock_mosaic, mock_raster): + def test_generate_mosaics_kwarg_propagation( + self, mock_translate, mock_mosaic, mock_raster + ): """Test mosaic creation""" mosaic = ModisMosaic(self.testfile) @@ -904,23 +979,24 @@ def test_generate_mosaics_kwarg_propagation(self, mock_translate, mock_mosaic, m cos = ["COMPRESS=LZW", "PREDICTOR=2"] aoi = [0, 0, 10, 10] - mosaic.generate_mosaics("data", - "/tmp/data", - "EPSG:3857", - aoi=aoi, - overwrite=True, - force_doy=True, - prefix="test", - clip_valid=True, - round_int=-2, - xRes=10, - yRes=10, - noData=-1, - outputType=0, - creationOptions=cos, - resampleAlg="bilinear", - multithread=True, - ) + mosaic.generate_mosaics( + "data", + "/tmp/data", + "EPSG:3857", + aoi=aoi, + overwrite=True, + force_doy=True, + prefix="test", + clip_valid=True, + round_int=-2, + xRes=10, + yRes=10, + noData=-1, + outputType=0, + creationOptions=cos, + resampleAlg="bilinear", + multithread=True, + ) mock_raster.assert_called() margs, mkwargs = mock_raster.call_args @@ -950,7 +1026,9 @@ def test_generate_mosaics_kwarg_propagation(self, mock_translate, mock_mosaic, m self.assertEqual(mkwargs["projWin"], aoi) self.assertEqual(mkwargs["outputBounds"], aoi) - @patch("modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif") + @patch( + "modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif" + ) @patch("modape.modis.window.ModisMosaic._mosaic") @patch("modape.modis.window.ModisMosaic._translate") def test_generate_mosaics_global(self, mock_translate, mock_mosaic, mock_raster): @@ -958,7 +1036,7 @@ def test_generate_mosaics_global(self, mock_translate, mock_mosaic, mock_raster) mosaic = ModisMosaic(self.testfile_global) mock_mosaic.return_value.__enter__.return_value = "/vsimem/warped.tif" - mosaic.generate_mosaics("data", "/tmp/data", None) + mosaic.generate_mosaics("data", "/tmp/data") mock_raster.assert_called() self.assertEqual(mock_raster.call_count, len(mosaic.dates)) @@ -969,7 +1047,7 @@ def test_generate_mosaics_global(self, mock_translate, mock_mosaic, mock_raster) self.assertEqual(mock_translate.call_count, len(mosaic.dates)) self.assertEqual(mkwargs["src"], "/vsimem/warped.tif") self.assertEqual(mkwargs["dst"], "/tmp/data/vim2002j209.tif") - self.assertEqual(mkwargs["outputSRS"], None) + self.assertTrue("outputSRS" not in mkwargs) mock_translate.reset_mock() mock_mosaic.reset_mock() @@ -989,7 +1067,9 @@ def test_generate_mosaics_global(self, mock_translate, mock_mosaic, mock_raster) self.assertEqual(mkwargs["dst"], "/tmp/data/vim2002j209.tif") self.assertEqual(mkwargs["outputSRS"], "EPSG:3857") - @patch("modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif") + @patch( + "modape.modis.window.ModisMosaic._get_raster", return_value="/vsimem/inmem.tif" + ) @patch("modape.modis.window.ModisMosaic._mosaic") @patch("modape.modis.window.ModisMosaic._translate") def test_generate_mosaics_dates(self, mock_translate, mock_mosaic, mock_raster): @@ -998,11 +1078,13 @@ def test_generate_mosaics_dates(self, mock_translate, mock_mosaic, mock_raster): mock_mosaic.return_value.__enter__.return_value = "/vsimem/warped.tif" - mosaic.generate_mosaics("data", - "/tmp/data", - "EPSG:4326", - start=datetime(2002, 7, 10).date(), - stop=datetime(2002, 7, 21).date()) + mosaic.generate_mosaics( + "data", + "/tmp/data", + "EPSG:4326", + start=datetime(2002, 7, 10).date(), + stop=datetime(2002, 7, 21).date(), + ) n = len(mosaic.dates[1:3])