-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
119 lines (99 loc) · 4.23 KB
/
utils.py
File metadata and controls
119 lines (99 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import random
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
def load_train(data_path):
'''
:param :train data path
:return: ndarray
'''
data = pd.read_csv(data_path,
header=0,
sep=',',
encoding='utf-8')
print('Number of training samples: {}'.format(data.shape[0]))
return np.array(data)
def load_validate(data_path):
'''
:param :validate data path
:return: ndarray
'''
data = pd.read_csv(data_path,
header=0,
sep=',',
encoding='utf-8')
print('Number of validating samples: {}'.format(data.shape[0]))
return np.array(data)
def load_test(data_path):
'''
:param :validate data path
:return: ndarray
'''
data = pd.read_csv(data_path,
header=0,
sep=',',
encoding='utf-8')
return np.array(data)
class MyData(Dataset):
def __init__(self, data, mean, std, input_dim, interval, noise_std, phase):
'''
:param data: ndarray with shape (n_features and 1 property as label)
:param input_dim: number of input features (components)
:param train: boolean indicating for loading training set or validation set
:param std: the standard deviation of the noise for data augmentation
:param interval: the interval of glass transition temperatures to be screened
'''
self.input_dim = input_dim
self.data = torch.tensor(data[:, :self.input_dim], dtype=torch.float32)
self.noise_std = noise_std
self.phase = phase
self.mean = torch.tensor(mean, dtype=torch.float32)
self.std = torch.tensor(std, dtype=torch.float32)
if self.phase != 'Screening':
self.GT = torch.tensor(data[:, self.input_dim], dtype=torch.float32)
self.label = torch.tensor(
[interval[0] <= self.GT[id] <= interval[1] for id in range(len(self))]).float()
self.valid_id = [id for id in range(len(self)) if self.label[id] == 1.]
self.invalid_id = [id for id in range(len(self)) if self.label[id] == 0.]
def __getitem__(self, idx):
if self.phase == 'Training':
feature = self.data[idx]
label = self.label[idx]
if label == 1.:
pos = self.data[np.random.choice(self.valid_id)]
neg = self.data[np.random.choice(self.invalid_id)]
else:
pos = self.data[np.random.choice(self.invalid_id)]
neg = self.data[np.random.choice(self.valid_id)]
feature, pos, neg = self.perbulation(1, self.noise_std, feature), self.perbulation(1, self.noise_std,
pos), self.perbulation(1,
self.noise_std,
neg)
return self.normalize(feature), self.normalize(pos), self.normalize(neg)
elif self.phase == 'Evaluation':
feature = self.data[idx]
label = self.label[idx]
return self.normalize(feature), label
elif self.phase == 'Screening':
feature = self.data[idx]
return self.normalize(feature)
def __len__(self):
return len(self.data)
def perbulation(self, miu, sigma, feature):
'''
:param miu: mean value of the perbulation distribution
:param sigma: standard deviation of the percolation distribution
:param feature: input feature of dimension 19
:return: augmentation feature
'''
perb = torch.normal(miu, sigma, [self.input_dim])
augmentation = feature * perb
return augmentation
def normalize(self, feature):
'''
:param feature: input feature of dimension 18
:return: z-scored normalized feature
'''
normalized_feature = (feature - self.mean) / self.std
return normalized_feature