-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
573 lines (435 loc) · 25.6 KB
/
data.py
File metadata and controls
573 lines (435 loc) · 25.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
import csv
import pandas as pd
import statistics
from images import *
import os
import puncta_tracker as tracker
from blob_detector import *
from skimage import io
import datetime
import collections
import math
def write_dot_to_file(dot_id, data_array):
# data_array must be a 3D array; dot_id must be an integer
# dot is then saved as a text file to the Dots folder; to change file path, edit directly below
# the read and write functions are basically from
# https://stackoverflow.com/questions/3685265/how-to-write-a-multidimensional-array-to-a-text-file/18145279
now = datetime.date.today()
file_name = 'Dots/' + now.isoformat() + '/' + str(dot_id) + '.txt'
with open(file_name, 'w') as outfile:
# Any line starting with "#" will be ignored by numpy.loadtxt
outfile.write('# Array shape: {0}\n'.format(data_array.shape))
for data_slice in data_array:
# The formatting string indicates that
# the values in left-justified columns 7 characters in width
# with 2 decimal places.
np.savetxt(outfile, data_slice, fmt='%-7.2f')
# Writing out a break to indicate different slices...
outfile.write('# New slice\n')
def read_dot_file(path, dot_id, data_shape):
# data shape is required to parse the data stored using "write_dot_to_file"
# path is the string specifying the folder where the data are stored
# include the final '/' in the path
# ID needs to be an integer
# data_shape is a three tuple in (depths, rows, columns)
# returns data as a ndarray of the specified shape
file_name = path + str(dot_id) + '.txt'
data = np.loadtxt(file_name).reshape(data_shape)
return data
def trace_csv_writer(file_path_name, trace_vector):
# trace_vector is a 1D array of the measured trace
# this for now is used for exporting trace data to MatLab for trace analysis;
# file_path_name is a string specifying the output location as well as the file name
# for example: "folder/file.csv"
with open(file_path_name, 'a') as file:
writer = csv.writer(file, delimiter=',')
for i in range(len(trace_vector)):
writer.writerow([trace_vector[i]])
def compile_trace_data(dots_database: pd.DataFrame, dot_trace_mapping: dict, frame_time_interval=1):
# This function takes in dot_database (generated by find_puncta) and the dot_trace_mapping (generated by simple_
# tracker) to produce a data_frame with the following columns: trace_id, ave_xcoor, ave_ycoor, first_frame,
# last_frame, dwell_by_frame, dwell_time
# The way the dataFrame is compiled is similar to how the dot_database is constructed
traces = {'trace_ID': [],
'ave_xcoor': [],
'ave_ycoor': [],
'msd': [],
'first_frame': [],
'last_frame': [],
'dwell_by_frame': [],
'dwell_time': []}
# find out the range of trace_ids, as reflected by the numbering of the trace_mapping_dict
number_of_traces = max(dot_trace_mapping.values())
for current_trace_id in range(1, 1 + number_of_traces):
# find all the dots associated with the trace_ID
dots_list = [] # initialize the container for finding associated dots
for dot_id, trace_id in dot_trace_mapping.items():
if trace_id == current_trace_id:
dots_list.append(dot_id)
# after collecting relevant dot_ids, use dot_database to slice out the information of the associated dots
# and transform the data needed for the trace
filtered_dots_database = dots_database[dots_database['dot_ID'].isin(dots_list)]
ave_xcoor = statistics.mean(filtered_dots_database['xcoor'])
ave_ycoor = statistics.mean(filtered_dots_database['ycoor'])
first_frame = min(filtered_dots_database['frame'])
last_frame = max(filtered_dots_database['frame'])
dwell_by_frame = last_frame - first_frame + 1
dwell_time = dwell_by_frame * frame_time_interval
# on 5/20/19, I added the new statistics to the function, mean square displacement (MSD)
if dwell_by_frame > 1:
msd = mean_square_displacement(filtered_dots_database['xcoor'], filtered_dots_database['ycoor'])
else:
msd = 0
# add the calculated data to the traces database
traces['trace_ID'].append(current_trace_id)
traces['ave_xcoor'].append(ave_xcoor)
traces['ave_ycoor'].append(ave_ycoor)
traces['msd'].append(msd)
traces['first_frame'].append(first_frame)
traces['last_frame'].append(last_frame)
traces['dwell_by_frame'].append(dwell_by_frame)
traces['dwell_time'].append(dwell_time)
return pd.DataFrame.from_dict(traces)
def filter_dot_database_by_gaussian_fit(dot_database: pd.DataFrame, image_stack, gaussian_fit_diameter,
max_centroid_deviation, min_height_threshold, max_elliptic_aspect_ratio):
# This function is depreciated
"""The function first generates a list of dot_ids that will be kept, and use this list to reduce the dot_database.
The reduced database then is returned"""
dots_to_keep = []
for dot in dot_database.itertuples():
quality = dot_gaussian_quality_filter(dot.xcoor, dot.ycoor, image_stack[dot.frame - 1],
gaussian_fit_diameter,
max_centroid_deviation,
min_height_threshold,
max_elliptic_aspect_ratio)
if dot.dot_ID % 100 == 0:
print('Gaussian-fitting dot #' + str(dot.dot_ID))
if quality == 1:
dots_to_keep.append(dot.dot_ID)
filtered_dots = dot_database[dot_database['dot_ID'].isin(dots_to_keep)]
filtered_dots.reset_index(drop=True)
return filtered_dots
def export_trace_signals(trace_database: pd.DataFrame, image_stack: np.ndarray):
# Plots are saved to a subfolder under "Output/"
# Measuring disc size is not accessible from outside the function, and by default has a diameter of 9
# signals are calculated as the mean intensity of the pixels within the measuring disc
file_path = "Output/plots/"
measuring_disc_diameter = 9 # this is the default value, change if necessary
stack_depth = image_stack.shape[0]
for trace in trace_database.itertuples():
signals = [0]*stack_depth # initialize signals
for index in range(trace.first_frame - 1, trace.last_frame):
signals[index] = 1 # mark the signals picked up by puncta tracker
# Below are steps to measure an entire stack over at a specific (x,y)
# first make an ROI mask to make everything outside of the mask to be 0
# mask = create_circular_mask(source_h, source_w, measuring_disc_diameter)
# then measure the mean using the 'measure_stack_mean' method defined in the images module
mean = measure_stack_profile(int(round(trace.ave_xcoor)),
int(round(trace.ave_ycoor)),
image_stack,
measuring_disc_diameter)
if not os.path.exists(file_path):
os.makedirs(file_path)
plt.clf()
plt.title(str(trace.trace_ID))
plt.subplot(211)
plt.plot(np.arange(1, stack_depth + 1), mean, 'k-')
plt.title('Input')
plt.subplot(212)
plt.step(np.arange(1, stack_depth + 1), signals, color='red', lw=2)
plt.ylim(-0.2, 1.2)
plt.title('Signals')
plt.tight_layout()
plt.savefig(file_path + str(trace.trace_ID), dpi=300)
def fill_obvious_gaps_in_traces(trace_database: pd.DataFrame, max_spatial_difference, max_gaps_allowed):
"""For data with dim signals but stable stage and low puncta density, it is possible to link gaps just by looking at
the localization of the traces. This function contains some of the most confusing things I've ever done so be
careful; back then I didn't know the existence of the drop function from Pandas"""
# initialize a variable to store trace IDs for ones that need to be dropped;
# also, initialize a dictionary to store temporarily the combined traces
traces_to_drop = []
edited_traces = {'trace_ID': [],
'ave_xcoor': [],
'ave_ycoor': [],
'msd': [],
'first_frame': [],
'last_frame': [],
'dwell_by_frame': [],
'dwell_time': []}
# I have to assign a temporary trace ID for the combined traces, because otherwise I couldn't merge the combined
# trace database with the original one. In other words, I need to keep datatype the same for individual columns
current_trace_id = trace_database['trace_ID'].max() + 1
for trace in trace_database.itertuples():
# skip traces that are already processed using the if statement below
if trace.trace_ID in traces_to_drop:
continue
overlapped = trace_database[(trace_database['ave_xcoor'] - trace.ave_xcoor)**2 +
(trace_database['ave_ycoor'] - trace.ave_ycoor)**2 <=
max_spatial_difference**2]
# decide if the trace needs to combine: if the overlapped database returns exactly one, it means that the trace
# has no duplicated dots at different temporal point
if len(overlapped) == 1:
continue
# I need to write something to cap the max gap_filling allowed. Below is my second attempt. The attempt is
# hardly elegant but this is a crucial function
start_frames = np.asarray(overlapped['first_frame'])
end_frames = np.asarray(overlapped['last_frame'])
gaps = start_frames[1:] - end_frames[:-1]
overlapped.reset_index(drop=True)
slice_index = 1
for gap in gaps:
if gap > max_gaps_allowed:
overlapped = overlapped[:1]
break
else:
slice_index += 1
# Check again after the previous operation.
# This function is the worst I've ever written. I probably will get an F if I turn this in for homework
if len(overlapped) == 1:
continue
first_frame = overlapped['first_frame'].min()
last_frame = overlapped['last_frame'].max()
dwell_by_frame = last_frame - first_frame + 1
dwell_time = (last_frame - first_frame + 1) * (trace.dwell_time/trace.dwell_by_frame)
# this mean is not accurate because it is not based on trace interpolation, but the difference is negligible
weighted_xcoor = overlapped['ave_xcoor'] * overlapped['dwell_by_frame']
weighted_ycoor = overlapped['ave_ycoor'] * overlapped['dwell_by_frame']
ave_xcoor = weighted_xcoor.sum() / dwell_by_frame
ave_ycoor = weighted_ycoor.sum() / dwell_by_frame
# I don't know the correct way to merge msd without the raw data, so my best attempt here is to do a weighted
# average for msd
msd_weighted = overlapped['msd'] * overlapped['dwell_by_frame']
msd = msd_weighted.sum() / dwell_by_frame
traces_to_drop += overlapped['trace_ID'].tolist()
edited_traces['trace_ID'].append(current_trace_id)
edited_traces['ave_xcoor'].append(ave_xcoor)
edited_traces['ave_ycoor'].append(ave_ycoor)
edited_traces['msd'].append(msd)
edited_traces['first_frame'].append(first_frame)
edited_traces['last_frame'].append(last_frame)
edited_traces['dwell_by_frame'].append(dwell_by_frame)
edited_traces['dwell_time'].append(dwell_time)
current_trace_id += 1
# ~ is equivalent to "not". This statement below thus filters out the dots that appear in 'traces_to_drop' list
edited_traces = pd.DataFrame.from_dict(edited_traces)
if edited_traces.empty:
return trace_database
else:
trimmed_trace_database = trace_database[~trace_database['trace_ID'].isin(traces_to_drop)]
return pd.concat([trimmed_trace_database, edited_traces], ignore_index=True)
def remove_traces_with_long_gaps(trace_database: pd.DataFrame, max_spatial_difference, gap_threshold):
# initialize a variable to store trace IDs for ones that need to be dropped;
# also, initialize a dictionary to store temporarily the combined traces
traces_to_drop = []
for trace in trace_database.itertuples():
# skip traces that are already processed using the if statement below
if trace.trace_ID in traces_to_drop:
continue
overlapped = trace_database[(trace_database['ave_xcoor'] - trace.ave_xcoor) ** 2 +
(trace_database['ave_ycoor'] - trace.ave_ycoor) ** 2 <=
max_spatial_difference ** 2]
# decide if there are traces broken up: if the overlapped database returns exactly one, it means that the trace
# has no duplicated dots at different temporal point other than itself
if len(overlapped) == 1:
continue
# Otherwise, update the overlapped list to get rid of traces that have been processed in previous iterations
overlapped = overlapped[overlapped['trace_ID'] >= trace.trace_ID]
# if overlapped contain more than one traces, it means that this was probably a single trace broken up due to
# poor signal quality. It is likely that there are more than one break point caused by this.
# The first problem here is to calculate the length of gaps between these overlapped traces:
# find all the start frames
start_frames = np.asarray(overlapped['first_frame'])
# find all the end frames
end_frames = np.asarray(overlapped['last_frame'])
# use start-frame number of the (n+1)th trace to subtract the end-frame number of the nth trace. This creates
# a list of gaps
gaps = start_frames[1:] - end_frames[:-1]
# now, examine the gap against the max-gap parameter set by the function argument. Use reset_index on
# the gaps list to allow easy retrieval of specific traces involved
overlapped.reset_index(drop=True)
trace_to_drop_index = 0
for gap in gaps:
if gap < gap_threshold:
# if a gap is found to be smaller than the max-gap allowed, this means that the two traces flanking this
# gap is now consdered a single trace broken up into two. Hence, both of these traces need to be included
# in the traces_to_drop list
traces_to_drop.append(overlapped.iloc[trace_to_drop_index]['trace_ID'])
traces_to_drop.append(overlapped.iloc[trace_to_drop_index + 1]['trace_ID'])
trace_to_drop_index += 1
else:
# if a gap is found to be greater than the max-gap allowed, this means that the two traces flanking this
# gap is now considered to be two separate traces, and hence should be excluded from the overlapped list
trace_to_drop_index += 1
# finally, remove duplicates in traces_to_drop list, by converting it do dictionary and then back to list
traces_to_drop = list(dict.fromkeys(traces_to_drop))
filtered = trace_database[~trace_database['trace_ID'].isin(traces_to_drop)]
return filtered
def add_gaussian_fit_params_to_dot_database(dot_database: pd.DataFrame, image_stack, gaussian_fit_diameter):
"""the function takes a dot_database, and return a dot_database added with dot gaussian_fit parameters"""
dot_database.reset_index(drop=True)
gaussian_params = {'height': [],
'gaussian_x': [],
'gaussian_y': [],
'squared_deviation_from_centroid': [],
'gaussian_width_x': [],
'gaussian_width_y': [],
'elliptic_aspect_ratio': []}
counter = 0
for dot in dot_database.itertuples():
if counter % 1000 == 0:
current_time = datetime.datetime.now()
print('Gaussian-fitted {} dots at {}'.format(counter, current_time.strftime("%H:%M:%S")))
height, x, y, width_x, width_y = dot_gaussian_fit(dot.xcoor, dot.ycoor,
image_stack[dot.frame - 1], gaussian_fit_diameter)
centroid_x = centroid_y = gaussian_fit_diameter / 2
squared_deviation_from_centroid = (x - centroid_x) ** 2 + (y - centroid_y) ** 2
elliptic_aspect_ratio = max(width_x, width_y) / min(width_x, width_y)
gaussian_params['height'].append(height)
gaussian_params['gaussian_x'].append(x)
gaussian_params['gaussian_y'].append(y)
gaussian_params['squared_deviation_from_centroid'].append(squared_deviation_from_centroid)
gaussian_params['gaussian_width_x'].append(width_x)
gaussian_params['gaussian_width_y'].append(width_y)
gaussian_params['elliptic_aspect_ratio'].append(elliptic_aspect_ratio)
counter += 1
gaussian_params = pd.DataFrame.from_dict(gaussian_params)
return pd.concat([dot_database, gaussian_params], axis=1)
def exclude_puncta_using_negative_control(experimental: pd.DataFrame,
puncta_from_negative_control: pd.DataFrame,
min_distance: int):
# min_distance is used to specify the radius from puncta in NC data; puncta from experimental data will be dropped
# if the puncta lines within the min_distance from puncta in NC
# the experimental data frame is a database of traces; the negative_control
filtered_data = experimental # I prefer not to work directly on the original data
for negative_control_dot in puncta_from_negative_control.itertuples():
filtered_data = filtered_data[(filtered_data['ave_xcoor'] - negative_control_dot.xcoor)**2 +
(filtered_data['ave_ycoor'] - negative_control_dot.ycoor)**2 >=
min_distance**2]
return filtered_data
def gather_dots(data_image_file_name, blob_threshold):
print('currently finding dots in sample...' + data_image_file_name)
data_image = io.imread(data_image_file_name)
data_image = np.asarray(data_image, dtype=np.float64)
dots = tracker.find_puncta(data_image,
target_radius=5,
blob_min_radius=3,
blob_threshold=blob_threshold)
return dots
def filter_dots(dot_dataframe,
mean_threshold=None,
max_blob_r=None,
max_intensity=None,
max_gaussian_deviation=None,
max_elliptic_ratio=None,
min_gaussian_height=None,
max_gaussian_height=None,
max_mean_height_percentage_difference=None,
filter_dots_on_first_and_last_frames=False):
# pass the original data to the local variable filtered_dots
filtered_dots = dot_dataframe
last_frame = int(dot_dataframe['frame'].max())
if not filter_dots_on_first_and_last_frames:
filtered_dots = filtered_dots[filtered_dots['frame'] > 1]
filtered_dots = filtered_dots[filtered_dots['frame'] < last_frame]
if max_blob_r is not None:
filtered_dots = filtered_dots[filtered_dots['blob_r'] < max_blob_r]
if mean_threshold is not None:
filtered_dots = filtered_dots[filtered_dots['mean_intensity'] > mean_threshold]
if max_intensity is not None:
filtered_dots = filtered_dots[filtered_dots['mean_intensity'] < max_intensity]
if 'height' in dot_dataframe.keys() and min_gaussian_height is not None:
filtered_dots = filtered_dots[filtered_dots['height'] > min_gaussian_height]
if 'height' in dot_dataframe.keys() and max_gaussian_height is not None:
filtered_dots = filtered_dots[filtered_dots['height'] < max_gaussian_height]
if 'elliptic_aspect_ratio' in dot_dataframe.keys() and max_elliptic_ratio is not None:
filtered_dots = filtered_dots[filtered_dots['elliptic_aspect_ratio'] < max_elliptic_ratio]
if 'squared_deviation_from_centroid' in dot_dataframe.keys() and max_gaussian_deviation is not None:
filtered_dots = filtered_dots[filtered_dots['squared_deviation_from_centroid'] < max_gaussian_deviation ** 2]
if max_mean_height_percentage_difference is not None:
filtered_dots = filtered_dots[filtered_dots['mean_intensity'] / filtered_dots['height'] >
(1-max_mean_height_percentage_difference)]
filtered_dots = filtered_dots[filtered_dots['mean_intensity'] / filtered_dots['height'] <
(1+max_mean_height_percentage_difference)]
if not filter_dots_on_first_and_last_frames:
first_frame_dots = dot_dataframe[dot_dataframe['frame'] == 1]
last_frame_dots = dot_dataframe[dot_dataframe['frame'] == last_frame]
combined_dots = pd.concat([first_frame_dots, filtered_dots, last_frame_dots], join='inner')
return combined_dots
else:
return filtered_dots
def filter_trace_by_start_end_and_xy(trace_data: pd.DataFrame, min_x, max_x, min_y, max_y):
last_frame = trace_data['last_frame'].max()
filtered_traces = trace_data[trace_data['first_frame'] > 1]
filtered_traces = filtered_traces[filtered_traces['last_frame'] < last_frame]
filtered_traces = filtered_traces[filtered_traces['ave_xcoor'] > min_x]
filtered_traces = filtered_traces[filtered_traces['ave_xcoor'] < max_x]
filtered_traces = filtered_traces[filtered_traces['ave_ycoor'] > min_y]
filtered_traces = filtered_traces[filtered_traces['ave_ycoor'] < max_y]
return filtered_traces
def count_instances(input_dataframe: pd.DataFrame, column_name: str):
cnt = collections.Counter()
for element in input_dataframe[column_name]:
cnt[element] += 1
elements = []
counts = []
for key, value in cnt.items():
elements.append(key)
counts.append(value)
output = {column_name: elements,
'occurrences': counts}
output = pd.DataFrame.from_dict(output)
output = output.sort_values(by=column_name)
return output.reset_index(drop=True)
def filter_traces_sample_space(trace_data: pd.DataFrame, dwell_time_limit=None):
# trace_data should be filtered already
interval = trace_data['dwell_time'].reindex().iloc[0] / trace_data['dwell_by_frame'].reindex().iloc[0]
last_frame = max(trace_data['last_frame'])
max_dwell_time = last_frame * interval
if dwell_time_limit is None:
dwell_time_limit = math.ceil(max_dwell_time/2)
first_frame_limit = math.ceil((max_dwell_time - dwell_time_limit)/interval)
filtered_traces = trace_data[trace_data['first_frame'] <= first_frame_limit]
filtered_traces = filtered_traces[filtered_traces['dwell_time'] < dwell_time_limit]
return filtered_traces
def remove_concurrent_and_overlapping_traces(trace_database: pd.DataFrame,
spatial_difference_threshold):
# initialize a variable to store trace IDs for ones that need to be dropped;
traces_to_drop = []
for trace in trace_database.itertuples():
# skip traces that are already processed using the if statement below
if trace.trace_ID in traces_to_drop:
continue
# first, find if there are concurrent traces that appear before the trace in question in its vicinity
overlapped = trace_database[trace_database['first_frame'] < trace.first_frame]
overlapped = overlapped[overlapped['last_frame'] >= trace.first_frame]
# these overlapped traces won't include our trace in question
# then, among these candidates, search for spatial proximity
overlapped = overlapped[(overlapped['ave_xcoor'] - trace.ave_xcoor) ** 2 +
(overlapped['ave_ycoor'] - trace.ave_ycoor) ** 2 <=
spatial_difference_threshold ** 2]
# Test 1: If the overlapped database returns zero, it means that the trace in question has no one in
# its vicinity before this trace appears
if len(overlapped) == 0:
# Test 2:
# the next scenario is whether there are new traces that happen to start simultaneously on the same frame as
# the trace in question that are also in close proximity
overlapped = trace_database[trace_database['first_frame'] == trace.first_frame]
overlapped = overlapped[(overlapped['ave_xcoor'] - trace.ave_xcoor) ** 2 +
(overlapped['ave_ycoor'] - trace.ave_ycoor) ** 2 <=
spatial_difference_threshold ** 2]
# if the overlapped database returns one, it means that there are no other traces in close proximity to the
# trace in question, in this case, move on to the next execution of the for loop
if len(overlapped) == 1:
continue
# otherwise, add these traces to the list for failing Test 2.
else:
trace_IDs_list = list(overlapped['trace_ID'])
traces_to_drop += trace_IDs_list
# otherwise, this trace in question needs to be added to the drop off list for failing Test 1.
else:
# update the overlapped list to get rid of traces
traces_to_drop.append(trace.trace_ID)
# finally, remove duplicates in traces_to_drop list, by converting it do dictionary and then back to list
traces_to_drop = list(dict.fromkeys(traces_to_drop))
filtered = trace_database[~trace_database['trace_ID'].isin(traces_to_drop)]
return filtered