-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbg_filter.py
More file actions
139 lines (112 loc) · 4.96 KB
/
bg_filter.py
File metadata and controls
139 lines (112 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import h5py
import pandas as pd
import yaml
import numpy as np
###############################################################################################
# HDF5 and YAML files loading and saving
###############################################################################################
def load_locs(path):
"""Load localizations from HDF5 file."""
with h5py.File(path, "r") as locs_file:
locs = locs_file["locs"][...]
locs = np.rec.array(locs, dtype=locs.dtype)
info = load_info(path)
return locs, info
def load_info(path):
"""Load metadata information from the corresponding YAML file."""
path_base = path.rsplit(".", 1)[0]
filename = path_base + ".yaml"
try:
with open(filename, "r") as info_file:
info = list(yaml.load_all(info_file, Loader=yaml.UnsafeLoader))
except FileNotFoundError as e:
print(f"Could not find metadata file: {filename}")
raise FileNotFoundError(e)
return info
def save_locs(path, locs, info):
"""Save localizations to an HDF5 file."""
with h5py.File(path, "w") as locs_file:
locs_file.create_dataset("locs", data=locs)
base = path.rsplit(".", 1)[0]
save_info(base + ".yaml", info)
def save_info(path, info):
"""Save metadata information to a YAML file."""
with open(path, "w") as file:
yaml.dump_all(info, file, default_flow_style=False)
###############################################################################################
# Filtering functions
###############################################################################################
def process_filtering(input_folder, filter_params, input_extension, output_extension):
"""Process all HDF5 files in the input folder with the specified filters."""
hdf5_files = []
for root, _, files in os.walk(input_folder):
for f in files:
if f.endswith(f'{input_extension}.hdf5') and output_extension not in f:
hdf5_files.append((root, f))
print(f"Found {len(hdf5_files)} files to process")
for root, filename in hdf5_files:
input_file_path = os.path.join(root, filename)
# Save in the same directory as input file
output_file_name = filename.replace(".hdf5", f"{output_extension}.hdf5")
output_file_path = os.path.join(root, output_file_name)
print(f"\nProcessing file: {filename}")
try:
locs, info = load_locs(input_file_path)
# Filter the localizations
filtered_df = slxyE_filter(locs, filter_params)
filtered_locs = np.rec.array(filtered_df.to_records(index=False))
# Add filtering info to metadata
info.append({
'Generated by': 'Picasso_batch_filtering',
'Filter parameters': filter_params
})
# Save filtered data
save_locs(output_file_path, filtered_locs, info)
print(f"Successfully processed {filename}")
except Exception as e:
print(f"Error processing {filename}: {str(e)}")
continue
print("\nAll files filtered")
def slxyE_filter(locs, filter_params):
"""Filter localizations based on user-defined parameters."""
df = pd.DataFrame(locs)
# Apply filters dynamically
for key, value in filter_params.items():
if value is not None and key in df.columns:
df = df[(df[key] >= value[0]) & (df[key] <= value[1])]
return df
if __name__ == "__main__":
with open('config.yaml', 'r') as f:
config = yaml.safe_load(f)
filter_params = config['bg_signal_filtering'].copy()
input_extension = filter_params.pop('input_extension')
output_extension = filter_params.pop('output_extension')
input_folder = config['paths']['input_folder']
mode = config['paths']['mode']
file_endings = config['paths']['file_endings']
input_files = []
if mode == "folder":
for file in os.listdir(input_folder):
if file.endswith('.hdf5'):
for ending in file_endings:
if file.endswith(f"{ending}.hdf5"):
input_files.append(os.path.join(input_folder, file))
break
elif mode == "subfolders":
for root, _, files in os.walk(input_folder):
for file in files:
if file.endswith('.hdf5'):
for ending in file_endings:
if file.endswith(f"{ending}.hdf5"):
input_files.append(os.path.join(root, file))
break
print(f"Found {len(input_files)} files to process")
for input_file in input_files:
output_dir = os.path.dirname(input_file)
process_filtering(
input_file=input_file,
output_dir=output_dir,
filter_params=filter_params,
output_extension=output_extension
)