From cc33830c5e22afd965ac44ee24d91d385d850d69 Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 6 Oct 2025 08:40:05 +0200 Subject: [PATCH 1/3] writes metadata to stdout for diagnostics --- modape/modis/download.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/modape/modis/download.py b/modape/modis/download.py index 5962614..5f4a825 100644 --- a/modape/modis/download.py +++ b/modape/modis/download.py @@ -193,11 +193,15 @@ def _parse_hdfxml(response): @staticmethod def _parse_cmrxml(response, hdf_filename): result = {} - tree = ElementTree.fromstring(response.content) - entry = tree.find(f"DataGranule/AdditionalFile[Name = '{hdf_filename}']") - result.update({"FileSize": entry.find("SizeInBytes").text}) - result.update({"ChecksumType": entry.find("Checksum/Algorithm").text}) - result.update({"Checksum": entry.find("Checksum/Value").text}) + try: + tree = ElementTree.fromstring(response.content) + entry = tree.find(f"DataGranule/AdditionalFile[Name = '{hdf_filename}']") + result.update({"FileSize": entry.find("SizeInBytes").text}) + result.update({"ChecksumType": entry.find("Checksum/Algorithm").text}) + result.update({"Checksum": entry.find("Checksum/Value").text}) + except Exception: + log.info(response.content) + raise return result def _fetch( From b7f20cb504ee9e9744298089bcc03ddd004e5491 Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 6 Oct 2025 09:27:10 +0200 Subject: [PATCH 2/3] tries to locate the problem --- modape/modis/download.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/modape/modis/download.py b/modape/modis/download.py index 5f4a825..a54befd 100644 --- a/modape/modis/download.py +++ b/modape/modis/download.py @@ -193,15 +193,11 @@ def _parse_hdfxml(response): @staticmethod def _parse_cmrxml(response, hdf_filename): result = {} - try: - tree = ElementTree.fromstring(response.content) - entry = tree.find(f"DataGranule/AdditionalFile[Name = '{hdf_filename}']") - result.update({"FileSize": entry.find("SizeInBytes").text}) - result.update({"ChecksumType": entry.find("Checksum/Algorithm").text}) - result.update({"Checksum": entry.find("Checksum/Value").text}) - except Exception: - log.info(response.content) - raise + tree = ElementTree.fromstring(response.content) + entry = tree.find(f"DataGranule/AdditionalFile[Name = '{hdf_filename}']") + result.update({"FileSize": entry.find("SizeInBytes").text}) + result.update({"ChecksumType": entry.find("Checksum/Algorithm").text}) + result.update({"Checksum": entry.find("Checksum/Value").text}) return result def _fetch( @@ -238,12 +234,24 @@ def _check(_downloaded: Path, raise_on_error=True) -> bool: allow_redirects=True, ) as cmrxml: cmrxml.raise_for_status() - file_metadata = self._parse_cmrxml( - cmrxml, url.split("/")[-1] - ) + try: + file_metadata = self._parse_cmrxml( + cmrxml, url.split("/")[-1] + ) + except Exception: + log.info( + f"CMR METADATA URL: {url.split('/')[-1]} -- {cmrxml.content}" + ) + raise else: hdfxml.raise_for_status() - file_metadata = self._parse_hdfxml(hdfxml) + try: + file_metadata = self._parse_hdfxml(hdfxml) + except Exception: + log.info( + f"HDF METADATA URL: {url.split('/')[-1]} -- {cmrxml.content}" + ) + raise # check filesize assert ( From d10345d0c810e2bd365d6d37193bb745988302d6 Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 6 Oct 2025 10:27:54 +0200 Subject: [PATCH 3/3] accepts file if metadata cannot be parsed --- modape/modis/download.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/modape/modis/download.py b/modape/modis/download.py index a54befd..443da8a 100644 --- a/modape/modis/download.py +++ b/modape/modis/download.py @@ -238,25 +238,20 @@ def _check(_downloaded: Path, raise_on_error=True) -> bool: file_metadata = self._parse_cmrxml( cmrxml, url.split("/")[-1] ) - except Exception: + except AttributeError: log.info( - f"CMR METADATA URL: {url.split('/')[-1]} -- {cmrxml.content}" + f"WARNING: Metadata cannot be parsed for {url.split('/')[-1]}!" ) - raise + return True else: hdfxml.raise_for_status() - try: - file_metadata = self._parse_hdfxml(hdfxml) - except Exception: - log.info( - f"HDF METADATA URL: {url.split('/')[-1]} -- {cmrxml.content}" - ) - raise + file_metadata = self._parse_hdfxml(hdfxml) - # check filesize + # check filesize: assert ( str(_downloaded.stat().st_size).strip() == file_metadata["FileSize"] ), f"Size: {_downloaded.stat().st_size} != {file_metadata['FileSize']}" + # check hash (checksum): with open(_downloaded, "rb") as openfile: if file_metadata["ChecksumType"] == "CKSUM": checksum = str(cksum(openfile)) @@ -278,7 +273,6 @@ def _check(_downloaded: Path, raise_on_error=True) -> bool: raise ValueError( f"Unknown Checksum Type: {file_metadata['ChecksumType']}" ) - # check checksum assert checksum == file_metadata["Checksum"], ( f"Hash: {checksum} != {file_metadata['Checksum']}" )