From 9ed6ff6200f9d2dd789b16f746dbba402ba8cd70 Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:24:15 -0400 Subject: [PATCH 01/18] add tgav component to factory --- cassandra/compfactory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cassandra/compfactory.py b/cassandra/compfactory.py index 3600703..2a84c98 100644 --- a/cassandra/compfactory.py +++ b/cassandra/compfactory.py @@ -11,9 +11,10 @@ 'GcamComponent': comp.GcamComponent, 'FldgenComponent': comp.FldgenComponent, 'HectorStubComponent': comp.HectorStubComponent, + 'TgavStubComponent': comp.TgavStubComponent, 'TethysComponent': comp.TethysComponent, 'XanthosComponent': comp.XanthosComponent, - 'DummyComponent': comp.DummyComponent, + 'DummyComponent': comp.DummyComponent } From 4a63ab67ff7020af5b5931fd7b66bda73b2317f2 Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:26:05 -0400 Subject: [PATCH 02/18] setup tgav component --- cassandra/components.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/cassandra/components.py b/cassandra/components.py index bdbf240..42f4cae 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1052,8 +1052,7 @@ def _read_scen_data(self, scen): from pickle import load data = pkg_resources.resource_filename('cassandra', 'data') - infile = open(join(data, f'hector-outputstream-{scen}.dat'), - 'rb') + infile = open(join(data, f'hector-outputstream-{scen}.dat'),'rb') df = load(infile) infile.close() @@ -1161,3 +1160,40 @@ def run_component(self): def report_test_results(self): """Report the component's results to the unit testing code.""" return self.results[self.name] + + +class TgavStubComponent(ComponentBase): + """Feed in external time series of temperature data to provide to fldgen to + produce new realizations. + + + """ + + # RDS file variable names + RDS_TGAV_NAME = 'tgav' + RDS_INFILES_NAME = 'infiles' + + # capability name + TGAV_CAPABILITY_NAME = 'Tgav' + + # output field order for the tgav data frame + TGAV_FIELD_ORDER = ['year', 'scenario', 'variable', 'value', 'units'] + + # component expected configuration fields + RDS_FILE_FIELD = 'rds_file' + CLIMATE_VAR_NAME_FIELD = 'climate_var_name' + SCENARIO_FIELD = 'scenario' + UNITS_FIELD = 'units' + + def __init__(self, cap_tbl): + super(TgavStubComponent, self).__init__(cap_tbl) + self.addcapability(self.TGAV_CAPABILITY_NAME) + + def run_component(self): + """Run the TgavStubComponent component + + Load the requested scenarios and make each variable available to the + rest of the system. + + """ + import pandas as pd From 0d793a20fd9fb8dceee9cacbcdf4cc222a9fd08f Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:27:10 -0400 Subject: [PATCH 03/18] validate parameters method --- cassandra/components.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cassandra/components.py b/cassandra/components.py index 42f4cae..38ddec3 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1197,3 +1197,17 @@ def run_component(self): """ import pandas as pd + + def validate_params(self): + """Ensure params are present for this component.""" + + # list of expected params + param_list = [self.RDS_FILE_FIELD, self.CLIMATE_VAR_NAME_FIELD, self.SCENARIO_FIELD, self.UNITS_FIELD] + + for i in param_list: + + if i not in self.params: + msg = f"{self.__class__} Required parameter '{i}' not in config file." + logging.error(msg) + raise KeyError(msg) + From 09d6d5fde96fa79616b6318671b68e0976e33a53 Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:27:55 -0400 Subject: [PATCH 04/18] validate year method --- cassandra/components.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/cassandra/components.py b/cassandra/components.py index 38ddec3..752a182 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1210,4 +1210,31 @@ def validate_params(self): msg = f"{self.__class__} Required parameter '{i}' not in config file." logging.error(msg) raise KeyError(msg) - + + def validate_year(self, yr, min_yr=0, max_yr=10000): + """Ensure years are within a reasonable range and are integers. + + :param yr: Target year + :type yr: int + + :param min_yr: Minimum year allowable + :type min_yr: int + + :param max_yr: Maximum year allowable + :type max_yr: int + + :return: [0] validated start year as a four digit integer + [1] validated end year as a four digit integer + + """ + + # validate that the year can be converted to an integer + valid_yr = self.validate_int(yr) + + if (valid_yr < min_yr) or (valid_yr > max_yr): + msg = f"{self.__class__} Year '{valid_yr}' is outside of the reasonable bounds [{min_yr} - {max_yr}]." + logging.error(msg) + raise ValueError(msg) + + else: + return valid_yr From 1774b6a17b4f89d114992e262a2e44a0849dee1c Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:28:24 -0400 Subject: [PATCH 05/18] validate integer method --- cassandra/components.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/cassandra/components.py b/cassandra/components.py index 752a182..1baa546 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1238,3 +1238,24 @@ def validate_year(self, yr, min_yr=0, max_yr=10000): else: return valid_yr + + def validate_int(self, value): + """Ensure the value can be converted to an integer. + + :param value: Target value + :type value: int, float, str + + :return: Validated int of value + + """ + + if type(value) == int: + return value + + try: + return int(value) + + except TypeError: + msg = f"{self.__class__} Value '{value}' not able to be converted to an integer as expected." + logging.error(msg) + raise TypeError(msg) From 0a7c21f0d4a3b3337f1fcb501ed8a87f6ebeb512 Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:29:11 -0400 Subject: [PATCH 06/18] method to read rds into dict --- cassandra/components.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/cassandra/components.py b/cassandra/components.py index 1baa546..aeebcbc 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1259,3 +1259,22 @@ def validate_int(self, value): msg = f"{self.__class__} Value '{value}' not able to be converted to an integer as expected." logging.error(msg) raise TypeError(msg) + + def rds_to_dict(self): + """Read in and convert an RDS file to a Python dictionary. + + :return: A Python dictionary of {variable name: value arrays} + + """ + import rpy2.robjects as robjects + from rpy2.robjects import pandas2ri + pandas2ri.activate() + + # get a wrapper around the readRDS R function + read_rds = robjects.r['readRDS'] + + # create ListVector object + lvect = read_rds(self.params[self.RDS_FILE_FIELD]) + + # convert the ListVector object to a Python dictionary + return dict(zip(lvect.names, map(list, list(lvect)))) From 2d96a893a46a45b27aab1b2ca71ce6b8823e9d54 Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:30:16 -0400 Subject: [PATCH 07/18] method to get a list of climate files supporting emulator --- cassandra/components.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/cassandra/components.py b/cassandra/components.py index aeebcbc..a9a4cb5 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1278,3 +1278,25 @@ def rds_to_dict(self): # convert the ListVector object to a Python dictionary return dict(zip(lvect.names, map(list, list(lvect)))) + + def get_files(self, rds_dict): + """Get a list of target files that match the climate variable specified in the configuration. + + :param rds_dict: A Python dictionary of {variable name: value arrays} derived from the emulator + RDS file. + :type rds_dict: dict + + :return: A list of file names from the `infiles` variable from the RDS file that match + the user defined climate variable name. + + """ + + target_files = [i for i in rds_dict[self.RDS_INFILES_NAME] if os.path.basename(i).split('_')[0] == self.params[self.CLIMATE_VAR_NAME_FIELD]] + + if len(target_files) == 0: + msg = f"{self.__class__} There are no datasets matching `climate_var_name` == {self.params[self.CLIMATE_VAR_NAME_FIELD]}" + logging.error(msg) + raise ValueError(msg) + + else: + return target_files From bf93d3afbe41c5fd46452c0493f88c8a6468278b Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:30:46 -0400 Subject: [PATCH 08/18] generate year list method --- cassandra/components.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/cassandra/components.py b/cassandra/components.py index a9a4cb5..4491b96 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1300,3 +1300,28 @@ def get_files(self, rds_dict): else: return target_files + + def generate_year_list(self, target_file): + """Generate a list of years that encompass the data range per realization from a parsed a file name. + + :param target_file: A file name from the `infiles` variable from the RDS file that match + the user defined climate variable name. + :type target_file: str + + :return: A list of integer years that encompass the data range per realization. + + """ + + # get a year list for each file from the file name + yrs = os.path.basename(target_file).split('_')[-1].split('.')[0].split('-') + start_yr = self.validate_year(yrs[0][:4]) + through_yr = self.validate_year(yrs[1][:4]) + + # ensure start year is not greater than through year + if start_yr > through_yr: + msg = f"{self.__class__} Start year '{start_yr}' > through year '{through_yr}' for emulator file '{target_file}'" + logging.error(msg) + raise ValueError(msg) + + # create a list of years found in each scenario + return list(range(start_yr, through_yr + 1, 1)) From 752347ee6b348bb84133e2204713e7bd434ed424 Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:31:35 -0400 Subject: [PATCH 09/18] build data list method --- cassandra/components.py | 59 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/cassandra/components.py b/cassandra/components.py index 4491b96..72105ab 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1325,3 +1325,62 @@ def generate_year_list(self, target_file): # create a list of years found in each scenario return list(range(start_yr, through_yr + 1, 1)) + + def build_data_list(self, rds_dict, target_files, year_list, target_scenario): + """Generate a list of values for a given variable name and target scenario. + + :param rds_dict: A Python dictionary of {variable name: value arrays} derived from the emulator + RDS file. + :type rds_dict: dict + + :param target_files: A list of file names from the `infiles` variable from the RDS file that match + the user defined climate variable name. + :type target_files: list + + :param year_list: A list of integer years that encompass the data range per realization. + :type year_list: list + + :param target_scenario: The scenario name (e.g., rcp26) + :type target_scenario: str + + :return: A list of values for a given variable name and target scenario. + + """ + + # get the number of years + len_yr_list = len(year_list) + + d_scenario_tgav = {} + for idx, i in enumerate(target_files): + + # split path by delim + f_split = os.path.basename(i).split('_') + + # scenario name from file name + scn_name = f_split[3] + + # get first files ending index value for slicing out data per year + if idx == 0: + start_idx = 0 + end_idx = len_yr_list + else: + start_idx = end_idx + end_idx += len_yr_list + + # add tgav data to scenario dict + if scn_name not in d_scenario_tgav: + d_scenario_tgav[scn_name] = rds_dict[self.RDS_TGAV_NAME][start_idx:end_idx] + + else: + msg = f"{self.__class__} Multiple scenarios in target files for {scn_name}." + logging.error(msg) + raise KeyError(msg) + + # ensure the target scenario is in the dictionary + if target_scenario not in d_scenario_tgav: + msg = f"{self.__class__} Scenario '{target_scenario}' is not in the RDS supporting climate file options: '{d_scenario_tgav.keys()}'" + logging.error(msg) + raise KeyError(msg) + + else: + return d_scenario_tgav[target_scenario] From 74f449dc81aee4291c3c2fbbd6ebc29ce995abc0 Mon Sep 17 00:00:00 2001 From: crvernon Date: Thu, 11 Jun 2020 17:43:56 -0400 Subject: [PATCH 10/18] update run component method and docs --- cassandra/components.py | 54 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/cassandra/components.py b/cassandra/components.py index 72105ab..60e08b8 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1163,9 +1163,30 @@ def report_test_results(self): class TgavStubComponent(ComponentBase): - """Feed in external time series of temperature data to provide to fldgen to - produce new realizations. + """Feed in an RDS file of from ESM runs used to train the fldgen emulator. + This component is used instead of the HectorStubComponent to provide `tgav` to fldgen. + + The component provides one capability: + * Tgav : global mean temperature + + The `Tgav` capability returns a data frame with data from the target scenario. + + The parameters accepted by this component are: + + :param rds_file: Full path with file name and extension to the input RDS emulator file + containing `tgav` outputs for each scenario + :type rds_file: str + + :param climate_var_name: Parent climate model variable name (e.g., tasAdjust) found in the file + name for the supporting climate data used by the emulator + :type climate_var_name: str + + :param scenario: Scenario name (e.g., rcp26) + :type scenario: str + + :param units_field: Unit name from the `tgav` data + :type units_field: str """ @@ -1198,6 +1219,35 @@ def run_component(self): """ import pandas as pd + # ensure required params are present + self.validate_params() + + # convert the ListVector object from reading a RDS file to a Python dictionary + rds_dict = self.rds_to_dict() + + # get a list of target files that match the climate variable specified in the configuration + target_files = self.get_files(rds_dict) + + # generate a list of years in a realization + year_list = self.generate_year_list(target_files[0]) + + # generate a dictionary of {scenario: data_array} for a tgav for a valid scenario + tgav_list = self.build_data_list(rds_dict, target_files, year_list, self.params[self.SCENARIO_FIELD]) + + # build a pandas data frame to hold tgav output + tgav_df = pd.DataFrame({'year': year_list, + 'value': tgav_list}) + + # additional expected fields + tgav_df['scenario'] = self.params[self.SCENARIO_FIELD] + tgav_df['variable'] = self.TGAV_CAPABILITY_NAME + tgav_df['units'] = self.params[self.UNITS_FIELD] + + # add to cassandra result queue + self.addresults(self.TGAV_CAPABILITY_NAME, tgav_df[self.TGAV_FIELD_ORDER]) + + return 0 + def validate_params(self): """Ensure params are present for this component.""" From a2cdf5c342355a43f9b21a24ef5e32014ba09dd3 Mon Sep 17 00:00:00 2001 From: crvernon Date: Fri, 12 Jun 2020 11:27:21 -0400 Subject: [PATCH 11/18] streamline tgav component and add metadata validation --- cassandra/components.py | 226 ++++++++++++++++++++++++---------------- 1 file changed, 139 insertions(+), 87 deletions(-) diff --git a/cassandra/components.py b/cassandra/components.py index 60e08b8..a7c6055 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1167,13 +1167,19 @@ class TgavStubComponent(ComponentBase): This component is used instead of the HectorStubComponent to provide `tgav` to fldgen. - The component provides one capability: - * Tgav : global mean temperature + The component provides two capabilities: + :param Tgav: A data frame containing global mean temperature with fields + ['year', 'scenario', 'variable', 'value', 'units']; where value is 'Tgav' + :type Tgav: Pandas DataFrame - The `Tgav` capability returns a data frame with data from the target scenario. + :param tgav_metadata: A dictionary of metadata for the `Tgav` capability including: + [rds_file, scenario, climate_var_name, source_climate_data, units, + count, mean, median, min, max, std, na_count, null_count, all_finite] + designations + :type tgav_metadata: dict - The parameters accepted by this component are: + The parameters accepted by this component are: :param rds_file: Full path with file name and extension to the input RDS emulator file containing `tgav` outputs for each scenario :type rds_file: str @@ -1188,6 +1194,12 @@ class TgavStubComponent(ComponentBase): :param units_field: Unit name from the `tgav` data :type units_field: str + :param start_year: Start year of the climate data + :type start_year: int + + :param through_year: Year climate data goes through + :type through_year: int + """ # RDS file variable names @@ -1205,18 +1217,18 @@ class TgavStubComponent(ComponentBase): CLIMATE_VAR_NAME_FIELD = 'climate_var_name' SCENARIO_FIELD = 'scenario' UNITS_FIELD = 'units' + START_YEAR_FIELD = 'start_year' + THROUGH_YEAR_FIELD = 'through_year' def __init__(self, cap_tbl): super(TgavStubComponent, self).__init__(cap_tbl) + + # add capabilities to Cassandra self.addcapability(self.TGAV_CAPABILITY_NAME) + self.addcapability('tgav_metadata') def run_component(self): - """Run the TgavStubComponent component - - Load the requested scenarios and make each variable available to the - rest of the system. - - """ + """Run the TgavStubComponent component""" import pandas as pd # ensure required params are present @@ -1225,19 +1237,22 @@ def run_component(self): # convert the ListVector object from reading a RDS file to a Python dictionary rds_dict = self.rds_to_dict() - # get a list of target files that match the climate variable specified in the configuration - target_files = self.get_files(rds_dict) + # get a dictionary of the target file name and the file index for the specified configuration + target_file_dict = self.build_file_dict(rds_dict) # generate a list of years in a realization - year_list = self.generate_year_list(target_files[0]) + year_list = self.build_year_list() - # generate a dictionary of {scenario: data_array} for a tgav for a valid scenario - tgav_list = self.build_data_list(rds_dict, target_files, year_list, self.params[self.SCENARIO_FIELD]) + # generate a list of values for a given variable name and target scenario + tgav_list = self.build_data_list(rds_dict, target_file_dict, year_list) # build a pandas data frame to hold tgav output tgav_df = pd.DataFrame({'year': year_list, 'value': tgav_list}) + # report data summary + meta_dict = self.tgav_metadata(tgav_df, target_file_dict) + # additional expected fields tgav_df['scenario'] = self.params[self.SCENARIO_FIELD] tgav_df['variable'] = self.TGAV_CAPABILITY_NAME @@ -1245,21 +1260,46 @@ def run_component(self): # add to cassandra result queue self.addresults(self.TGAV_CAPABILITY_NAME, tgav_df[self.TGAV_FIELD_ORDER]) + self.addresults('tgav_metadata', meta_dict) return 0 + @staticmethod + def log_raise_exception(exception, message, log_msg=True, raise_msg=True): + """Log an error and raise an exception. + + :param exception: Exception class + :type exception: class object + + :param message: Message to log and raise + :type message: str + + :param log_msg: Optional. Log as error if True; default True + :type log_msg: bool + + :param raise_msg: Optional. Raise exception if True; default True + :type raise_msg: bool + + """ + + if log_msg: + logging.error(message) + + if raise_msg: + raise exception(message) + def validate_params(self): """Ensure params are present for this component.""" # list of expected params - param_list = [self.RDS_FILE_FIELD, self.CLIMATE_VAR_NAME_FIELD, self.SCENARIO_FIELD, self.UNITS_FIELD] + param_list = [self.RDS_FILE_FIELD, self.CLIMATE_VAR_NAME_FIELD, self.SCENARIO_FIELD, self.UNITS_FIELD, + self.START_YEAR_FIELD, self.THROUGH_YEAR_FIELD] for i in param_list: if i not in self.params: msg = f"{self.__class__} Required parameter '{i}' not in config file." - logging.error(msg) - raise KeyError(msg) + self.log_raise_exception(KeyError, msg) def validate_year(self, yr, min_yr=0, max_yr=10000): """Ensure years are within a reasonable range and are integers. @@ -1283,8 +1323,7 @@ def validate_year(self, yr, min_yr=0, max_yr=10000): if (valid_yr < min_yr) or (valid_yr > max_yr): msg = f"{self.__class__} Year '{valid_yr}' is outside of the reasonable bounds [{min_yr} - {max_yr}]." - logging.error(msg) - raise ValueError(msg) + self.log_raise_exception(ValueError, msg) else: return valid_yr @@ -1299,7 +1338,7 @@ def validate_int(self, value): """ - if type(value) == int: + if type(value) is int: return value try: @@ -1307,8 +1346,7 @@ def validate_int(self, value): except TypeError: msg = f"{self.__class__} Value '{value}' not able to be converted to an integer as expected." - logging.error(msg) - raise TypeError(msg) + self.log_raise_exception(TypeError, msg) def rds_to_dict(self): """Read in and convert an RDS file to a Python dictionary. @@ -1316,6 +1354,7 @@ def rds_to_dict(self): :return: A Python dictionary of {variable name: value arrays} """ + import rpy2.robjects as robjects from rpy2.robjects import pandas2ri pandas2ri.activate() @@ -1329,108 +1368,121 @@ def rds_to_dict(self): # convert the ListVector object to a Python dictionary return dict(zip(lvect.names, map(list, list(lvect)))) - def get_files(self, rds_dict): + def build_file_dict(self, rds_dict): """Get a list of target files that match the climate variable specified in the configuration. :param rds_dict: A Python dictionary of {variable name: value arrays} derived from the emulator RDS file. :type rds_dict: dict - :return: A list of file names from the `infiles` variable from the RDS file that match - the user defined climate variable name. + :return: A dictonary of file names and their corresponding index from the + `infiles` variable from the RDS file that match the user defined + configuration parameters. + Format: {'files': [''], 'file_index': []} """ - target_files = [i for i in rds_dict[self.RDS_INFILES_NAME] if os.path.basename(i).split('_')[0] == self.params[self.CLIMATE_VAR_NAME_FIELD]] + target_file_dict = {} + for index, i in enumerate(rds_dict[self.RDS_INFILES_NAME]): - if len(target_files) == 0: - msg = f"{self.__class__} There are no datasets matching `climate_var_name` == {self.params[self.CLIMATE_VAR_NAME_FIELD]}" - logging.error(msg) - raise ValueError(msg) + # get file name from path + base = os.path.basename(i) - else: - return target_files + # get only files matching search criteria from params + if (self.params[self.SCENARIO_FIELD] in base) \ + and (self.params[self.CLIMATE_VAR_NAME_FIELD] in base) \ + and (str(self.params[self.START_YEAR_FIELD]) in base) \ + and (str(self.params[self.THROUGH_YEAR_FIELD]) in base): - def generate_year_list(self, target_file): - """Generate a list of years that encompass the data range per realization from a parsed a file name. + # add file name to dictionary + target_file_dict.setdefault('files', []).append(i) - :param target_file: A file name from the `infiles` variable from the RDS file that match - the user defined climate variable name. - :type target_file: str + # add file index to dictionary + target_file_dict.setdefault('file_index', []).append(index) - :return: A list of integer years that encompass the data range per realization. + # the number of target files found matching the search criteria + n_files = len(target_file_dict['files']) - """ + if n_files == 0: + msg = f"{self.__class__} There are no data sets matching the input parameters in file list: {rds_dict[self.RDS_INFILES_NAME]}. One matching file required." + self.log_raise_exception(ValueError, msg) + + elif n_files > 1: + msg = f"{self.__class__} There are {n_files} data sets matching the input parameters in file list: {rds_dict[self.RDS_INFILES_NAME]}. One matching file required." + self.log_raise_exception(ValueError, msg) + + else: + return target_file_dict - # get a year list for each file from the file name - yrs = os.path.basename(target_file).split('_')[-1].split('.')[0].split('-') - start_yr = self.validate_year(yrs[0][:4]) - through_yr = self.validate_year(yrs[1][:4]) + def build_year_list(self): + """Construct a list of years that the data provides.""" - # ensure start year is not greater than through year - if start_yr > through_yr: - msg = f"{self.__class__} Start year '{start_yr}' > through year '{through_yr}' for emulator file '{target_file}'" - logging.error(msg) - raise ValueError(msg) + # validate years + start_yr = self.validate_year(self.params[self.START_YEAR_FIELD]) + through_yr = self.validate_year(self.params[self.THROUGH_YEAR_FIELD]) - # create a list of years found in each scenario return list(range(start_yr, through_yr + 1, 1)) - def build_data_list(self, rds_dict, target_files, year_list, target_scenario): + def build_data_list(self, rds_dict, target_file_dict, year_list): """Generate a list of values for a given variable name and target scenario. :param rds_dict: A Python dictionary of {variable name: value arrays} derived from the emulator RDS file. :type rds_dict: dict - :param target_files: A list of file names from the `infiles` variable from the RDS file that match - the user defined climate variable name. - :type target_files: list + :param target_file_dict: A dictonary of file names and their corresponding index from the + `infiles` variable from the RDS file that match the user defined + configuration parameters. + Format: {'files': [''], 'file_index': []} + :type target_file_dict: dict :param year_list: A list of integer years that encompass the data range per realization. :type year_list: list - :param target_scenario: The scenario name (e.g., rcp26) - :type target_scenario: str - - :return: A list of values for a given variable name and target scenario. + :return: A list of values for a target parameterization """ - # get the number of years - len_yr_list = len(year_list) + # number of years for a realization + n_years = len(year_list) - d_scenario_tgav = {} - for idx, i in enumerate(target_files): + # index of file corresponding to the position of the window containing the data for the target params + file_index = target_file_dict['file_index'][0] - # split path by delim - f_split = os.path.basename(i).split('_') + # start and end index for the data window to extract + start_index = n_years * file_index + end_index = n_years * (file_index + 1) - # scenario name from file name - scn_name = f_split[3] + return rds_dict[self.RDS_TGAV_NAME][start_index:end_index] - # get first files ending index value for slicing out data per year - if idx == 0: - start_idx = 0 - end_idx = len_yr_list - else: - start_idx = end_idx - end_idx += len_yr_list + def tgav_metadata(self, df, target_file_dict): + """Create a dictionary holding a data summary about the 'Tgav' dataset and parameter assumptions. - # add tgav data to scenario dict - if scn_name not in d_scenario_tgav: - d_scenario_tgav[scn_name] = rds_dict[self.RDS_TGAV_NAME][start_idx:end_idx] + :param df: - else: - msg = f"{self.__class__} Multiple scenarios in target files for {scn_name}." - logging.error(msg) - raise KeyError(msg) + :return: - # ensure the target scenario is in the dictionary - if target_scenario not in d_scenario_tgav: - msg = f"{self.__class__} Scenario '{target_scenario}' is not in the RDS supporting climate file options: '{d_scenario_tgav.keys()}'" - logging.error(msg) - raise KeyError(msg) + """ - else: - return d_scenario_tgav[target_scenario] + from numpy import isfinite + + meta_dict = {'rds_file': self.params[self.RDS_FILE_FIELD], + 'scenario': self.params[self.SCENARIO_FIELD], + 'climate_var_name': self.params[self.CLIMATE_VAR_NAME_FIELD], + 'source_climate_data': target_file_dict['files'][0], + 'units': self.params[self.UNITS_FIELD], + 'count': df['value'].count(), + 'mean': df['value'].mean(), + 'median': df['value'].median(), + 'min': df['value'].min(), + 'max': df['value'].max(), + 'std': df['value'].std(), + 'na_count': df['value'].isna().sum(), + 'null_count': df['value'].isnull().sum(), + 'all_finite': isfinite(df['value']).all()} + + for k in meta_dict.keys(): + logging.info(f"{self.__class__} 'Tgav' data summary: {k}=={meta_dict[k]}") + print(f"{self.__class__} '{self.TGAV_CAPABILITY_NAME}' data summary: {k}=={meta_dict[k]}") + + return meta_dict From c8cbe246e674463e3a70629e89e1c15faf375b18 Mon Sep 17 00:00:00 2001 From: crvernon Date: Fri, 12 Jun 2020 14:07:26 -0400 Subject: [PATCH 12/18] test suite for tgav stub --- cassandra/test/test_tgav_stub.py | 190 +++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 cassandra/test/test_tgav_stub.py diff --git a/cassandra/test/test_tgav_stub.py b/cassandra/test/test_tgav_stub.py new file mode 100644 index 0000000..07d9644 --- /dev/null +++ b/cassandra/test/test_tgav_stub.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +"""Test for the Tgav stub component.""" + +import os +import unittest +import pkg_resources + +from cassandra.components import TgavStubComponent + +# if necessary, set the path to your R_HOME environment variable +# os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Resources' + + +class TestTgavStubComponent(unittest.TestCase): + + def setUp(self): + """Set up a HectorStub component for testing.""" + + capability_table = {} + + # configuration + self.rds_file = pkg_resources.resource_filename('cassandra', 'test/data/fldgen-IPSL-CM5A-LR_test.rds') + self.climate_var_name = 'tasAdjust' + self.scenario = 'rcp26' + self.units = 'Kelvin' + self.start_year = 1861 + self.through_year = 2099 + + # expected output + self.meta_dict = {'rds_file': self.rds_file, + 'scenario': self.scenario, + 'climate_var_name': self.climate_var_name, + 'source_climate_data': './training-data/tasAdjust_annual_IPSL-CM5A-LR_rcp26_18610101-20991231.nc', + 'units': self.units, + 'count': 239, + 'mean' : 286.8046116940164, + 'median': 286.30762280534697, + 'min': 284.78008340211915, + 'max': 288.6382439866686, + 'std': 1.182328423261446, + 'na_count': 0, + 'null_count': 0, + 'all_finite': True} + + # instantiate class + self.stub = TgavStubComponent(capability_table) + + # build parameterization + self.stub.addparam('rds_file', self.rds_file) + self.stub.addparam('climate_var_name', self.climate_var_name) + self.stub.addparam('scenario', self.scenario) + self.stub.addparam('units', self.units) + self.stub.addparam('start_year', self.start_year) + self.stub.addparam('through_year', self.through_year) + self.stub.finalize_parsing() + + # read in RDS file to dictionary + self.rds_dict = self.stub.rds_to_dict() + + # generate target file dictionary + self.target_file_dict = self.stub.build_file_dict(self.rds_dict) + + # generate year list + self.year_list = self.stub.build_year_list() + + # generate tgav list + self.tgav_list = self.stub.build_data_list(self.rds_dict, self.target_file_dict, self.year_list) + + # generate tgav dataframe + self.tgav_df = self.stub.build_dataframe(self.year_list, self.tgav_list) + + def test_log_raise_exception(self): + """Ensure correct exception is raised.""" + + with self.assertRaises(ValueError): + self.stub.log_raise_exception(ValueError, 'value_error', log_msg=False) + + def test_validate_int(self): + """Expect correct exception and type return.""" + + with self.assertRaises(ValueError): + self.stub.validate_int('fail') + + val = self.stub.validate_int('1984') + self.assertTrue(type(val), int) + self.assertEqual(val, 1984) + + def test_validate_year(self): + """Expect correct exception.""" + + # check min bounds error + with self.assertRaises(ValueError): + self.stub.validate_year(-1) + + # check max bounds error + with self.assertRaises(ValueError): + self.stub.validate_year(999999) + + def test_validate_file_exist(self): + """Expect correct exception.""" + + with self.assertRaises(FileNotFoundError): + self.stub.validate_file_exist('/not/a/file.txt') + + fcheck = self.stub.validate_file_exist(self.rds_file) + self.assertEqual(fcheck, self.rds_file) + + def test_rds_to_dict(self): + """Check output dict for data.""" + + # check for keys + self.assertTrue('tgav' in self.rds_dict) + self.assertTrue('infiles' in self.rds_dict) + + # check for data + self.assertEqual(len(self.rds_dict['tgav']), 956) + self.assertEqual(len(self.rds_dict['infiles']), 8) + + def test_build_file_dict(self): + """Ensure correct exception and content.""" + + # check for missing infiles key + with self.assertRaises(KeyError): + self.stub.build_file_dict({}) + + # check for no matching data + with self.assertRaises(ValueError): + self.stub.build_file_dict({'infiles': []}) + + # check for too many matching files + with self.assertRaises(ValueError): + self.stub.build_file_dict({'infiles': ['a', 'a']}) + + # valid outputs + self.assertEqual(self.target_file_dict['files'][0], self.meta_dict['source_climate_data']) + self.assertEqual(self.target_file_dict['file_index'][0], 0) + + def test_build_year_list(self): + """Ensure year list returns correct number of years.""" + + # check type + self.assertTrue(type(self.year_list), list) + + # check first and last year + self.assertEqual(self.year_list[0], self.start_year) + self.assertEqual(self.year_list[-1], self.through_year) + + # check the number of years + self.assertEqual(self.through_year - self.start_year + 1, len(self.year_list)) + + def test_build_data_list(self): + """Check expected data outcome.""" + + self.assertTrue(type(self.tgav_list), list) + + # check number of values for a single scenario and variable + self.assertEqual(len(self.tgav_list), 239) + + def test_build_dataframe(self): + """Confirm fields and shape.""" + + # check data frame shape + self.assertEqual(self.tgav_df.shape, (239, 5)) + + # check column names + self.assertEqual(list(self.tgav_df.columns), ['year', 'scenario', 'variable', 'value', 'units']) + + def test_tgav_metadata(self): + """Test expected output.""" + + meta_dict = self.stub.tgav_metadata(self.tgav_df, self.target_file_dict) + + # check like keys + self.assertEqual(meta_dict.keys(), self.meta_dict.keys()) + + # check value equality + for k in meta_dict.keys(): + self.assertEqual(meta_dict[k], self.meta_dict[k]) + + def test_run_component(self): + """Test expected output.""" + + rval = self.stub.run_component() + + # test run success + self.assertEqual(rval, 0) + + +if __name__ == '__main__': + unittest.main() From 0eaabf459d9f8588bda5d8ba93733324114b2c2b Mon Sep 17 00:00:00 2001 From: crvernon Date: Fri, 12 Jun 2020 14:07:47 -0400 Subject: [PATCH 13/18] reduced data for tgav tests --- cassandra/test/data/fldgen-IPSL-CM5A-LR_test.rds | Bin 0 -> 3934 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 cassandra/test/data/fldgen-IPSL-CM5A-LR_test.rds diff --git a/cassandra/test/data/fldgen-IPSL-CM5A-LR_test.rds b/cassandra/test/data/fldgen-IPSL-CM5A-LR_test.rds new file mode 100644 index 0000000000000000000000000000000000000000..9bcead5453e351a85434ec7c2eb9fab228c137a6 GIT binary patch literal 3934 zcmV-k525fMiwFP!000001MQi4G}qg^$G_$&BC~{wD9Jn&u??9^5ea2XWT@}wDIu)fl5h+j==HY;Ig4@#z1Awp+oz1s z-^b?Y$jG5RI_ueQUkJT%5zlMWI70tqZ}K*49ie{+^epO4f*hx`7mX15$0#RPbq>h6 zoK4=sgx=uloOR(Aq1OrKbT2y%xs_vpu@CWLr}up9LOwHwFY!YD!~I9gLf}uyvtQf^ zJG1lA9}AJEq2tZuC$#I;eyP4d=uJN7rJ_<{_v}0zE(-f2UdEF}g#I#I@4*ux)XQ@; z)EE7|zhmj3FN1N!4HdjvI#b`E+-+Avel$?(DQzrD9r)!jClOac@f7$AyU(uHqpUt7aI`?=Y0`W5Ip2^O{ zXWnt8*Q35>2Z7^@(JtE6-;I9iPWfeS?Zi0MLQ~$|#CS}D20U9Z{(R%}iu2(=?z`bk z1^k$HMXnZ*+qHu&=D@G0nk6k5_L7F2l{51gk^eL`4f7yh7&Ih{cx&dWxtO<)74lcq zwV|)DjH5fj&h+XraD={-cXQ1>^gG^P*YtzXKl96&-$%aYuyZzCXW$n(&opu^^4siD zU`oNhlp&rRMCh$X=E>LF2>t7#FoUFH@V|S0*!y?*R~-^RaUOX`iuP_WBlIt|oAU(O z3B4s}v(sBXwEJ2lq~d$k<(?~M3&VUw{vjVdjJT47BUNhff9BfkZHs;!yRPC^N$B5tIKFY=dp22qXcF#5+;UZqC~J&sgTdT0 z3D9$&Y_eB}bPA+B*N5CV^6oYp>dKx!P&$A*z29)%e2F|0{5)P5M;lwifn?ZQxV=mj zJJ7GDLsvNR)c-g*XW281TeWkM9oBDs+xla6`N${bSANS6c{Z6>md!&QQr4t{1#tz^ zN=XjLQ=5Izc`wGvt@_Ft{w-#4ZW#wrU+YaP{<~-|Vu@(L`f6=s|E_}d^Hr_RAS?y4 zq;X&IYDnRX$tV8E$9Jtr;}QH;nHPM({%bAOII&&;cBK}-xld4cYfrz4$IN$MoRS-e zc~W^%XO4B+Su5$>WCwj|;QkTh?b2g=ynvI?e+uT@IEVZlm%J{u`66D8T{~eLq4yZ1 zhRwYWKW>_v&H=e!3c6+PNdtOTD{lcOyS=d+ovKBHou`UK5rTh9~ zKMaO!vJJyJ8#utKeFpUn_`T0{tRi%#VTGk8&X-@0`!A33!q5K-V<-CSAKAB)4fDp;%&?&nmAX^ogpNVC@U= z=dWX{ts)c;C2P4-lu(=}P7l9RL4SIuyNf(wCwEq-{e*mcU?So*q-oSO>1@c2B7^tsc z)Jmc5oJ;=A{;)?W9G)_Rz47X6*9hblF^;!i4?ml`gL3OIAD>N~Lr%ai&wx|12kk;l zdErIybJ?fyQwMgo*4ou8(0;aKdkKS3ygEh>X-|+ZFn!136xgS`Lb3lT?h?QAoSQL^ zN-rf44WF_LebrRe!qyOnJmwM-!?dFv z51h-Jr4HyKueQ&j4AjlO@xG!o`r!-9BEO%$Q zJImc!?*8-K9oU%=k%;r5|MI5ba?IBNlgrmP7V&Rx?bVki^r78$Wxc3#aBZRT+C$*& zx&cRiteY{hToQ+V$B!TVY-fn}2CK*1;OnsV8VQ~l^t1VQ-%gBcOyHEl+5*TWFO)Ro z!8>CbGu;^a#hL(CRrv20(lXcz9&hQX?YM*X{U6Pyo50DC{!`UBFKMriKfD7Tk54m4 z>y{(ld#M^1_(3CzC51a7BT9GO#W_x!iZR=wfpb0G^IWZW=NS`fZdUfXLHP62pBjaNg97HH zvI2GSM^)GTtcKoZYV5s;P{LnaS6eQG&Wt`Z!((BKx=Z_;5#K+2sLc@aRD7!6K0*n8 z(0?5V4hywC)K16!U~a&Jln!uEIR3tz(jcKk0yuBP;GQ9&s`w#^4Kg(2_dUN8O001_ zO90#y_tTZS1ultrXGwG7J|Gsd;L^Y&#MdjwC5917dcx&}$tuL{y8KlT{fWqK8PZTB zl<0P`CFRH~nz#Dx3-C~M+lWI7>KE5o7IOsL7c-3>XlO$Fym|*7^dqV{mOGc1P*RJ! zE-XQPQm?Yse0M?KUp3`AmXHx5585# zj;kT=MUoHmlW&xxy*;ntFy?o@!N{F0^9dz;y7*g~+AMcxxjW0Px++_}GC$8*= z)TI8f1@HQ-SG34s9{PK;?p4qDF=VPtu}+xmFXy#j{WCqCdR#D`5&eaaxN#nh*`EsE zz7agst$QDO3A%{{S;GpBzUm-670{3Edc@hlJVZL_dCu4&QA7Nu5AU6Co@&LJLB=rm9uH&D#knZq=!`? zzQ?o=%dPp?7gGtVYdq`0%i>Z8z8lD6TCLUMIP>1EDKWyi%_h-Pv=`^?WMXA&OanMA zQxU@D0S;^ZY&c^FF2^KUoM%P9dy-9*Zo$7*tHc_7WaH(Czqu9MEnswd;GM{IRbs{Y zqu^-UyG6&rSAoY-dMvS!DGSwH!7W~c+SrY zy=Pa8z(c;1RdZI|11CK{%{|F*a!+XX*fRife<4)d)|8nte5$cpNvhs2HLMXZC#amPQVV@uPZ5ZQ_70I6R z#P?iKEum70cd~SBXi*gSD-*Nbqy9eZ3SZbfrXf#qOul_csKr6-*2>_yqEhCmD2z|x zx8o<49wC&<LcW{4i5VMnRo8{12tIp zzpmK?$SZ=E1-nlq;JXYNsW3$F{vEb5j(?pF{b`>3RqV^bhJuMP?8h`m?{xTqdsn5B((X?^&@ z!%^U*PTMiNSu>9~&&mt!x^;@)C-L3?8o-kYmWt7cSBpuOkY{0@A#H8rb)guqwxs?U8l zaF4Nw&TkwnA(UA|JJU^pP_|3tf7OGt7KWgoDiHWs?`*0HhA@N?=Iy~u<2uhWC!(FAbcrK`~G zXBv&h&Pt=vSZM4to|#_=`}hYz@c-AZfY~1Y+eD*H;YScZ+<&@Ru22o3JpF_HgSV^r zc!qeYhIlfJeRk|-gt&VK2k-U_bhldXV5?%fR@+#`*4~}+-ld`AuCAw}u7*D<8fq(7 zs%vPfuL$=3Kj+iZ{?F#qQTxy4)BDT%EdFP{UDW?)`u=vhzn;Cno$jwE?{BC3-{$VW sFIA3U&!BCLKNlwJpN;Gx+dV`7{5O|>u&;mMzg?#O1)YpK!0sRb0Qqwjp8x;= literal 0 HcmV?d00001 From 0bccfbe3b922ef7a91e7dbe4a469e319f9640bc7 Mon Sep 17 00:00:00 2001 From: crvernon Date: Fri, 12 Jun 2020 14:08:19 -0400 Subject: [PATCH 14/18] add data frame bulder method --- cassandra/components.py | 97 +++++++++++++++++++++++++++++++++-------- 1 file changed, 79 insertions(+), 18 deletions(-) diff --git a/cassandra/components.py b/cassandra/components.py index a7c6055..37d03d4 100644 --- a/cassandra/components.py +++ b/cassandra/components.py @@ -1229,7 +1229,6 @@ def __init__(self, cap_tbl): def run_component(self): """Run the TgavStubComponent component""" - import pandas as pd # ensure required params are present self.validate_params() @@ -1247,17 +1246,11 @@ def run_component(self): tgav_list = self.build_data_list(rds_dict, target_file_dict, year_list) # build a pandas data frame to hold tgav output - tgav_df = pd.DataFrame({'year': year_list, - 'value': tgav_list}) + tgav_df = self.build_dataframe(year_list, tgav_list) # report data summary meta_dict = self.tgav_metadata(tgav_df, target_file_dict) - # additional expected fields - tgav_df['scenario'] = self.params[self.SCENARIO_FIELD] - tgav_df['variable'] = self.TGAV_CAPABILITY_NAME - tgav_df['units'] = self.params[self.UNITS_FIELD] - # add to cassandra result queue self.addresults(self.TGAV_CAPABILITY_NAME, tgav_df[self.TGAV_FIELD_ORDER]) self.addresults('tgav_metadata', meta_dict) @@ -1344,9 +1337,26 @@ def validate_int(self, value): try: return int(value) - except TypeError: + except ValueError: msg = f"{self.__class__} Value '{value}' not able to be converted to an integer as expected." - self.log_raise_exception(TypeError, msg) + self.log_raise_exception(ValueError, msg) + + def validate_file_exist(self, file_path): + """Ensure file exists. + + :param file_path: Full path with file name an extension to in input file. + :type file_path: str + + :return: Validated file path + + """ + + if os.path.isfile(file_path): + return file_path + + else: + msg = f"Input file '{file_path}' does not exist." + self.log_raise_exception(FileNotFoundError, msg) def rds_to_dict(self): """Read in and convert an RDS file to a Python dictionary. @@ -1363,7 +1373,8 @@ def rds_to_dict(self): read_rds = robjects.r['readRDS'] # create ListVector object - lvect = read_rds(self.params[self.RDS_FILE_FIELD]) + rds_file = self.validate_file_exist(self.params[self.RDS_FILE_FIELD]) + lvect = read_rds(rds_file) # convert the ListVector object to a Python dictionary return dict(zip(lvect.names, map(list, list(lvect)))) @@ -1382,8 +1393,21 @@ def build_file_dict(self, rds_dict): """ + try: + infile_list = rds_dict[self.RDS_INFILES_NAME] + except KeyError: + msg = f"Field '{self.RDS_INFILES_NAME}' is not in the RDS dictionary." + self.log_raise_exception(KeyError, msg) + target_file_dict = {} - for index, i in enumerate(rds_dict[self.RDS_INFILES_NAME]): + + # add file name to dictionary + files = target_file_dict.setdefault('files', []) + + # add file index to dictionary + file_index = target_file_dict.setdefault('file_index', []) + + for index, i in enumerate(infile_list): # get file name from path base = os.path.basename(i) @@ -1395,20 +1419,20 @@ def build_file_dict(self, rds_dict): and (str(self.params[self.THROUGH_YEAR_FIELD]) in base): # add file name to dictionary - target_file_dict.setdefault('files', []).append(i) + files.append(i) # add file index to dictionary - target_file_dict.setdefault('file_index', []).append(index) + file_index.append(index) # the number of target files found matching the search criteria n_files = len(target_file_dict['files']) if n_files == 0: - msg = f"{self.__class__} There are no data sets matching the input parameters in file list: {rds_dict[self.RDS_INFILES_NAME]}. One matching file required." + msg = f"{self.__class__} There are no data sets matching the input parameters in file list: {infile_list}. One matching file required." self.log_raise_exception(ValueError, msg) elif n_files > 1: - msg = f"{self.__class__} There are {n_files} data sets matching the input parameters in file list: {rds_dict[self.RDS_INFILES_NAME]}. One matching file required." + msg = f"{self.__class__} There are {n_files} data sets matching the input parameters in file list: {infile_list}. One matching file required." self.log_raise_exception(ValueError, msg) else: @@ -1455,12 +1479,49 @@ def build_data_list(self, rds_dict, target_file_dict, year_list): return rds_dict[self.RDS_TGAV_NAME][start_index:end_index] + def build_dataframe(self, year_list, tgav_list): + """Build output data frame for Tgav. + + :param year_list: A list of integer years that encompass the data range per realization. + :type year_list: list + + :param tgav_list: A list of values for a given variable name and target scenario + :type tgav_list: list + + :param target_file_dict: A dictonary of file names and their corresponding index from the + `infiles` variable from the RDS file that match the user defined + configuration parameters. + Format: {'files': [''], 'file_index': []} + :type target_file_dict: dict + + :return: A data frame holding Tgav outputs and required ancillary data + + """ + import pandas as pd + + # build a pandas data frame to hold tgav output + df = pd.DataFrame({'year': year_list, 'value': tgav_list}) + + # additional expected fields + df['scenario'] = self.params[self.SCENARIO_FIELD] + df['variable'] = self.TGAV_CAPABILITY_NAME + df['units'] = self.params[self.UNITS_FIELD] + + return df[self.TGAV_FIELD_ORDER] + def tgav_metadata(self, df, target_file_dict): """Create a dictionary holding a data summary about the 'Tgav' dataset and parameter assumptions. - :param df: + :param df: A data frame holding Tgav outputs and required ancillary data + :type df: data frame + + :param target_file_dict: A dictonary of file names and their corresponding index from the + `infiles` variable from the RDS file that match the user defined + configuration parameters. + Format: {'files': [''], 'file_index': []} + :type target_file_dict: dict - :return: + :return: A dictionary of metadata for the Tgav data """ From e72c04332b1ad696285f3bdd3b89d3ccc5643ae5 Mon Sep 17 00:00:00 2001 From: crvernon Date: Fri, 12 Jun 2020 15:32:36 -0400 Subject: [PATCH 15/18] update reqs and install r for ci --- .travis.yml | 1 + requirements.txt | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 3eb28ff..df60737 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ matrix: dist: xenial cache: pip install: + - echo 'source("https://bioconductor.org/biocLite.R"); biocLite("S4Vectors"); biocLite("GenomicRanges")' > install.R - pip install git+https://github.com/JGCRI/gcam_reader - pip install git+https://github.com/JGCRI/tethys - pip install git+https://github.com/JGCRI/xanthos diff --git a/requirements.txt b/requirements.txt index f757a9e..35fae0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ configobj>=5.0.6 pandas>=0.20 +numpy>=1.16 +rpy2>=2.9 From 84290411d3dd1ab58614efd2a1a6b707dbdd1bc6 Mon Sep 17 00:00:00 2001 From: crvernon Date: Fri, 12 Jun 2020 15:46:11 -0400 Subject: [PATCH 16/18] working on ci --- .travis.yml | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index df60737..fea2f81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,35 @@ language: python + matrix: include: - - python: 3.6 - - python: 3.7 + - os: linux dist: xenial + python: 3.6 + - os: linux + dist: xenial + python: 3.7 + +before_install: + - | + sudo apt-get install -y lsb-release + sudo apt-key adv \ + --keyserver keyserver.ubuntu.com \ + --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 + sudo add-apt-repository \ + --yes \ + "deb https://cloud.r-project.org/bin/linux/ubuntu/ $(lsb_release -c -s)-cran36/" + cat /etc/apt/sources.list + sudo apt-get update -qq + sudo apt-get install -y r-base + - sudo Rscript ./install_r_packages.r dplyr ggplot2 tidyr + cache: pip + install: - - echo 'source("https://bioconductor.org/biocLite.R"); biocLite("S4Vectors"); biocLite("GenomicRanges")' > install.R - pip install git+https://github.com/JGCRI/gcam_reader - pip install git+https://github.com/JGCRI/tethys - pip install git+https://github.com/JGCRI/xanthos - pip install . + script: - python -m unittest From 94a125905edeea8c7125d38590f033d907d157e0 Mon Sep 17 00:00:00 2001 From: crvernon Date: Fri, 12 Jun 2020 15:49:05 -0400 Subject: [PATCH 17/18] working on ci --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index fea2f81..b20f95c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,6 @@ before_install: cat /etc/apt/sources.list sudo apt-get update -qq sudo apt-get install -y r-base - - sudo Rscript ./install_r_packages.r dplyr ggplot2 tidyr cache: pip From 630eaa2a411842e16dc87cd9182afca9448fc4b7 Mon Sep 17 00:00:00 2001 From: crvernon Date: Fri, 12 Jun 2020 16:03:30 -0400 Subject: [PATCH 18/18] working on ci --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index b20f95c..514c906 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,6 +25,7 @@ before_install: cache: pip install: + - pip install -Iv rpy2==3.3.3 - pip install git+https://github.com/JGCRI/gcam_reader - pip install git+https://github.com/JGCRI/tethys - pip install git+https://github.com/JGCRI/xanthos