-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathduplicateMZFinder.py
More file actions
85 lines (65 loc) · 2.36 KB
/
duplicateMZFinder.py
File metadata and controls
85 lines (65 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
'''
Little tool that takes in list of m/zs and then outputs
each mz that was present as well as how many times it showed up
overall in all of the inputted lists of m/zs. Rounds to 4 decimal places
as is.
'''
import csv
import pandas as pd
# short helper to pass into map. Takes number and rounds to 3 decimal places
def roundto4(x):
return round(x, 4)
# Takes list of values and returns the unique m/z values and their counts
def finder(in_list):
# Rounds each value in each list of mzs (1 list per file input)
for i in range(len(in_list)):
in_list[i] = map(roundto4, in_list[i])
# Removes duplicates
no_dups = []
for i in in_list:
for j in i:
if j not in no_dups:
no_dups.append(j)
counts = [0] * len(no_dups)
# For each unique m/z, get a count of how many times it appears
for x in no_dups:
for i in in_list:
if x in i:
counts[no_dups.index(x)] += 1
return [no_dups, counts]
# Takes a list containing two lists of equal lengths and writes them to
# a csv (need to have .csv in filename
def writeToCSV(mzs_and_counts, output_filename):
with open(output_filename, 'wb') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['mz', 'count'])
for x in range(0, len(mzs_and_counts[1])):
writer.writerow([mzs_and_counts[0][x], mzs_and_counts[1][x]])
# Reads in mz values from a file and returns list of mzs
# FILE MUST HAVE 'mz' AS COLUMN HEADER
def readFromCSV(filename):
df = pd.read_csv(filename)
return list(df['mz']) # Make sure file has a column named mz
def processDuplicates(files_list, outputname):
mzs = []
for f in files_list:
mzs.append(readFromCSV(f))
writeToCSV(finder(mzs), outputname)
# # Example use
# nfiles = [
# 'neg-ACM_sept16_T1R2_GL2_method1.csv',
# 'neg-ACM_sept16_T1R2_GL7_method1.csv',
# 'neg-ACM_sept16_T1R3_GL7_method1.csv',
# 'neg-ACM_sept16_T1R3_GL21_method1.csv'
# ]
#
# pfiles = [
# 'pos-ACM_sept16_T1R2_GL2_method1.csv',
# 'pos-ACM_sept16_T1R2_GL7_method1.csv',
# 'pos-ACM_sept16_T1R3_GL7_method1.csv',
# 'pos-ACM_sept16_T1R3_GL21_method1.csv'
# ]
#
# processDuplicates(nfiles, 'neg-ACM-dups.csv')
# processDuplicates(pfiles, 'pos-ACM-dups.csv')
print 'boom'