-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinchiTest.py
More file actions
107 lines (95 loc) · 3.82 KB
/
Copy pathinchiTest.py
File metadata and controls
107 lines (95 loc) · 3.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
import os
import sys
import re
"""
This script turns a file of NP-ID and SMILES strings combinations in to a file that contains NP-ID, SMILES and InchiKey.
WARNING this requires molconvert a tool included in jchem. https://chemaxon.com/products/instant-jchem/download.
CFM_pipeline step 2:
Second step in the pipeline. takes each of the splitted files and adds the inchi keys to it so they can later be added to the mgf files.
next script run is CFMrunner.py.
Made by: Rutger Ozinga
Last edit: 10/10/2018
"""
import os;
import re;
import sys;
"""
creates 3 lists with the given file.
File should contain a ID, a neutral smiles string and an original smiles string.
"""
def createLists(path):
idList = [];
smileList = [];
altSmileList = [];
for line in open(path):
line = line.strip();
splittedLine = line.split(",");
idList.append(splittedLine[0]);
if len(splittedLine) > 2:
smileList.append(splittedLine[1]);
altSmileList.append(splittedLine[2]);
else:
smileList.append(splittedLine[1]);
altSmileList.append(splittedLine[1]);
return idList, smileList, altSmileList;
"""
creates a newFile from a list of smiles.
smileList = list of smiles strings
newPath = path for the new file to be placed
"""
def newFile(smileList,newPath):
newFile = open(newPath,"w");
for line in smileList:
newFile.write(line + "\n");
newFile.close();
def createInchiKeys(molConvertPath,tempPath,smilePath):
newOut = tempPath;
#run commandline program molconvert to turn a file of smiles strings in to a file of inchikeys.
inchiKeyList = [];
for smile in open(smilePath,"r"):
os.system("{0} -2:e inchikey {1} -o {2}".format(molConvertPath + "molconvert" ,"'" + smile + "'",newOut));
if os.path.exists(newOut) and os.path.getsize(newOut) > 0:
for inchiKey in open(newOut,"r"):
inchiKeyList.append(inchiKey);
os.system("rm {}".format(newOut));
else:
inchiKeyList.append("None\n");
return inchiKeyList;
"""
takes the id list (contains all the id's) smile list, alternative smile list, inchiKeyList, alternative inchikeyList and the path to
the new file and writes the data in to the file
"""
def makeInchiSmileFile(idList,smileList,altSmileList,inchiKeyList, altInchiKeyList, finalOutput):
finalOut = open(finalOutput,"w");
print(len(idList), len(smileList),len(altSmileList), len(inchiKeyList), len(altInchiKeyList));
for i in range(len(idList)):
if smileList[i] != altSmileList[i]:
#strip for the new inchi keys because molconvert realy likes to add line endings
newLine = idList[i] + " " + smileList[i] + " " + altSmileList[i] + " " + inchiKeyList[i].strip() + " " + altInchiKeyList[i];
finalOut.write(newLine);
else:
newLine = idList[i] + " " + smileList[i] + " " + inchiKeyList[i];
finalOut.write(newLine);
finalOut.close();
def main(molConvertPath,filePath, fileName):
newAltPath = filePath + "tempAltSmiles.txt";
print('working in ' + filePath + fileName);
splitName = fileName.split(".");
path = filePath + fileName;
#temporary files to store the smiles strings in and the inchi keys
newPath = filePath + "tempSmiles.txt";
newAltPath = filePath + "tempAltSmiles.txt";
tempPath = filePath + "tempInchiKeys.txt";
tempAltPath = filePath + "tempAltInchiKeys.txt";
#new path for the output of the dataFile. Will contain the ID,SMILES,InchIKey, Alternative Smile and alternative inchiKey.
finalOutput = filePath + splitName[0] + "_dataFile.txt";
idList, smileList, altSmileList = createLists(path);
newFile(smileList,newPath);
newFile(altSmileList, newAltPath);
inchiKeyList = createInchiKeys(molConvertPath,tempPath,newPath);
altInchiKeyList = createInchiKeys(molConvertPath,tempAltPath,newAltPath);
makeInchiSmileFile(idList, smileList, altSmileList, inchiKeyList, altInchiKeyList, finalOutput);
os.system("rm {}".format(newPath));
if __name__ == '__main__':
main(sys.argv[1],sys.argv[2],sys.argv[3]);