-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathDataModifier.py
More file actions
136 lines (123 loc) · 4.83 KB
/
DataModifier.py
File metadata and controls
136 lines (123 loc) · 4.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import math
import sys
import numpy as np
from sklearn.datasets import make_circles
class DataModifier:
def __init__(self):
pass
def add_gaussian_noise(self, V, noise=0.1):
max_x, max_y = -1.0, -1.0
min_x, min_y = sys.maxsize, sys.maxsize
for v in V:
max_x = max(max_x, v[0])
max_y = max(max_y, v[1])
min_x = min(min_x, v[0])
min_y = min(min_y, v[1])
size = int(np.ceil(len(V) * noise))
for i in range(size):
x = np.random.uniform(min_x, max_x)
y = np.random.uniform(min_y, max_y)
V.append((x, y))
return V, len(V)
def add_clustered_noise(self, V, type='', noise=0.1):
max_x, max_y = -1.0, -1.0
min_x, min_y = sys.maxsize, sys.maxsize
for v in V:
max_x = max(max_x, v[0])
max_y = max(max_y, v[1])
min_x = min(min_x, v[0])
min_y = min(min_y, v[1])
size = int(np.ceil(len(V) * noise))
if type == '':
NotImplementedError('No parameter given')
elif type == 'circle':
center = ((max_x + min_x) / 2, (max_y + min_y) / 2)
radius = min(max_x - min_x, max_y - min_y) * 0.7 / 2
pi = math.pi
for i in range(size):
V.append((math.cos(2 * pi / size * i) * radius + center[0],
math.sin(2 * pi / size * i) * radius + center[1]))
elif type == 'horizontal_line':
y_center = (max_y + min_y) / 2
interval = (max_x - min_x) / size
current_x = min_x
for i in range(size):
V.append((current_x, y_center))
current_x += interval
elif type == 'vertical_line':
x_center = (max_x + min_x) / 2
interval = (max_y - min_y) / size
current_y = min_y
for i in range(size):
V.append((x_center, current_y))
current_y += interval
else:
NotImplementedError('%s not implemented' % type)
return V, len(V)
def delete_data_area(self, V, dataset=''):
if dataset == 'two_moons':
NotImplementedError('%s not implemented' % dataset)
elif dataset == 'two_circles':
NotImplementedError('%s not implemented' % dataset)
elif dataset == 'aniso':
NotImplementedError('%s not implemented' % dataset)
elif dataset == 'varied':
NotImplementedError('%s not implemented' % dataset)
elif dataset == 'blobs':
NotImplementedError('%s not implemented' % dataset)
else:
NotImplementedError('%s not implemented' % dataset)
def delete_random_vertices(self, adj, percentage=0.01):
to_delete = np.ceil(len(adj) * percentage)
for i in range(to_delete):
vertex = np.floor(np.random.uniform(0, 1500))
found = False
key = -1
for j in range(len(adj)):
if adj[j][0] == vertex:
found = True
key = j
break
if found:
if key >= 0:
del adj[key]
for j in range(len(adj)):
found = False
for k in range(len(adj[j][1])):
if adj[j][1][k] == vertex:
found = True
key = k
break
if found:
del adj[j][1][key]
return adj
def delete_random_edges(self, adj, percentage=0.01):
edges = [len(vertex[1]) for vertex in adj]
E = sum(edges) / 2 # every edge is in there twice
to_delete = np.ceil(E * percentage)
for i in range(to_delete):
u, v = np.floor(np.random.uniform(0, 1500)), np.floor(np.random.uniform(0, 1500))
while u == v:
v = np.floor(np.random.uniform(0, 1500))
u_found, v_found = False, False
key_u, key_v = (-1, -1), (-1, -1)
for k in range(len(adj)):
if adj[k][0] == u:
for j in range(len(adj[k][1])):
if adj[k][1][j] == v:
key_u = (k, j)
u_found = True
if adj[k][0] == v:
for j in range(len(adj[k][1])):
if adj[k][1][j] == u:
key_v = (k, j)
v_found = True
if u_found and v_found:
break
if u_found and v_found:
del adj[key_u[0]][1][key_u[1]]
del adj[key_v[0]][1][key_v[1]]
else:
i -= 1
print(u, v)
return adj