From 5a0400262adb4477e297a9660c9ad67b092af5c1 Mon Sep 17 00:00:00 2001 From: Marco Alba Date: Wed, 12 May 2021 17:25:38 +0200 Subject: [PATCH] Added datemodified property --- thredds_crawler/crawl.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/thredds_crawler/crawl.py b/thredds_crawler/crawl.py index 61b123b..ec4e8e9 100644 --- a/thredds_crawler/crawl.py +++ b/thredds_crawler/crawl.py @@ -255,7 +255,8 @@ def __init__(self, dataset_url, auth=None): self.name = None self.catalog_url = None self.data_size = None - + self.date_modified = None + # Get an etree object r = requests.get(dataset_url, auth=auth, verify=False) try: @@ -269,6 +270,13 @@ def __init__(self, dataset_url, auth=None): self.name = dataset.get("name") metadata = dataset.find("{%s}metadata" % INV_NS) self.catalog_url = dataset_url.split("?")[0] + + #Date modified + date_modified = dataset.find("{%s}date" % INV_NS) + if date_modified is not None: + date_type = date_modified.get('type') + if date_type=="modified": + self.date_modified = date_modified.text # Data Size - http://www.unidata.ucar.edu/software/thredds/current/tds/catalog/InvCatalogSpec.html#dataSize data_size = dataset.find("{%s}dataSize" % INV_NS) @@ -323,6 +331,24 @@ def __init__(self, dataset_url, auth=None): except BaseException as e: logger.exception('Could not process {}. {}.'.format(dataset_url, e)) + @property + def datemodified(self): + if self.date_modified is not None: + return self.date_modified + try: + dap_endpoint = next(s.get("url") for s in self.services if s.get("service").lower() == "opendap") + # Get date_modified from DDS (global attributes) + try: + import netCDF4 + nc = netCDF4.Dataset(dap_endpoint) + if "date_modified" in nc.ncattrs(): + return nc.getncattr("date_modified") + except ImportError: + logger.error("The python-netcdf4 library is required for computing the size of this dataset.") + return None + except: + return None + @property def size(self): if self.data_size is not None: