diff --git a/data_cleaning/clean_wpdx_sample_data.py b/data_cleaning/clean_wpdx_sample_data.py index 414d06f..9e7ca4e 100644 --- a/data_cleaning/clean_wpdx_sample_data.py +++ b/data_cleaning/clean_wpdx_sample_data.py @@ -26,4 +26,17 @@ def clean_col_country_name(input_data): if __name__ == '__main__': - clean_columns('wpdx_sample_data.csv', 'cleaned_wpdx_sample_data.csv') \ No newline at end of file + clean_columns('wpdx_sample_data.csv', 'cleaned_wpdx_sample_data.csv') + + +def clean_col_count(input_data): + """ + Clean values in column: "count" + Trello card: https://trello.com/c/HHzNs0hS/1-column-countryname + """ + try: + input_data = int(input_data) + except ValueError: + input_data = None + + return input_data diff --git a/data_cleaning/test_clean_wpdx_sample_data.py b/data_cleaning/test_clean_wpdx_sample_data.py index efc00e8..adc4ab5 100644 --- a/data_cleaning/test_clean_wpdx_sample_data.py +++ b/data_cleaning/test_clean_wpdx_sample_data.py @@ -7,3 +7,12 @@ def test_clean_col_country_name(): Test the cleaning for column: "country_name" """ assert clean_wpdx_sample_data.clean_col_country_name('NA') == 'NA' + + +def test_clean_col_count(): + """ + Test the cleaning for column: "country_name" + """ + assert clean_wpdx_sample_data.clean_col_count('hello') == None + +