-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
94 lines (76 loc) · 3.74 KB
/
data.py
File metadata and controls
94 lines (76 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/python
# load data from csv and munge it some
# 4/16/18
# updated 9/3/18
import csv
import logging
import numpy as np
class Data:
paths = {
'sea': '/home/pi/gitbucket/scroll/data/sea_level_rise.csv',
'hot': '/home/pi/gitbucket/scroll/data/avg_hottest_day.csv',
'precip': '/home/pi/gitbucket/scroll/data/precip_lowest_3_years_inches.csv'
}
def __init__(self, data_path=paths['sea']):
self.logger = self._init_logger()
self.data_path = data_path
self.original_data = self._load_data()
self.data = self._parse_data()
self.x, self.y = self.data[:,0], self.data[:,1]
self.f = self._estimate_function()
self.integrals = self._compute_discrete_integrals()
self.percents = self.calculate_percentages(datapoints=self.integrals)
# self.normalized_data = self._normalize_data()
def _init_logger(self):
logger = logging.getLogger('data')
logger.info('data logger instantiated')
return logger
def _load_data(self):
'''return list of tuples with entries formatted as (x, y)'''
self.logger.info('loading data from {}'.format(self.data_path))
with open(self.data_path, 'r') as sheet:
reader = csv.reader(sheet)
next(reader, None) # skip the header
return [(float(row[0]), float(row[1])) for row in reader]
def _parse_data(self):
'''reset x-axis to between 0 and len(data) - 1 and represent data as numpy array'''
self.logger.info('resetting x-axis to 0 - (len(data) - 1) and converting to numpy array')
new_data = [(i, self.original_data[i][1]) for i in range(len(self.original_data))]
return np.array(new_data)
def _estimate_function(self, degree=3):
'''
estimate the function described by the dataset. z is a list of coefficients
for the polynomial. these are returned as a numpy.poly1d object which
represents the function in code.
'''
self.logger.info('estimating function from the datapoints')
z = np.polyfit(self.x, self.y, degree)
return np.poly1d(z)
def _compute_discrete_integrals(self):
'''
compute the integral between each unit of x and return them in a list.
the sum of the list is equal to the total area under the curve.
'''
self.logger.info('computing dicrete integrals between the datapoints')
return [np.trapz([self.y[i], self.y[i + 1]], dx=1) for i in range(len(self.y)) if i < len(self.y) - 1]
def calculate_percentages(self, datapoints):
'''return list of percentages of each y datapoint of the total y dataset'''
self.logger.info('calculating the percentages of discrete integrals to total integral')
return [datapoints[i] / sum(datapoints) for i in range(len(datapoints))]
def normalize_data(self, datapoints):
'''return list of datapoints normalized between 0.0 - 1.0'''
self.logger.info('normalizing the datapoints')
data_min = min(datapoints)
data_range = max(datapoints) - data_min
return [(n - data_min) / data_range for n in datapoints]
def translate(self, new_min=-1.0, new_max=1.0):
'''
translate normalized datapoints to range specified by new_min and new_max.
defaults to a range of -1.0 to 1.0
'''
self.logger.info('translating normalized datapoints to new range of {} - {}'.format(new_min, new_max))
return [new_min + (n * (new_max - new_min)) for n in self.normalized_data]
def print_data(self):
for datapoint in self.original_data:
print('x: {} y: {}'.format(datapoint[0], datapoint[1]))
# self.logger.info('x: {} y: {}'.format(datapoint[0], datapoint[1]))