edeutsch · nithishbn · Oct 18, 2018 · Nov 3, 2018
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,5 @@
 \.idea/
 
 Python/__pycache__/
+
+\.vscode/
diff --git a/Python/Response.py b/Python/Response.py
@@ -10,5 +10,14 @@ def code(self):
     def code(self, code):
         self._code = code
 
+    @property
+    def message(self):
+        return self._message
+    @message.setter
+    def message(self, message):
+        self._message = message
+
     def __init__(self):
         self._code = "NEW"
+        self._message = None
+
diff --git a/Python/Spectrum.py b/Python/Spectrum.py
@@ -1,14 +1,15 @@
 
 import requests
-import Response 
+
+from Response import Response
 
 
 class Spectrum(object):
 
     # directly takes an USI
     def __init__(self, usi):
         self.usi = usi.usi
-        self.r = Response.Response()
+        self.r = Response()
         self.results = None
         self.name = None
         self.precursorMZ = None
@@ -20,13 +21,13 @@ def fetch(self, source: str) -> Response:
         res = requests.get(
             "https://db.systemsbiology.net/dev2/sbeams/cgi/{source}/Spectrum?usi={usi}".format(source=source,
                                                                                                usi=self.usi))
-
         if res.status_code == 200:
             data = res.json()
             # spectrum json tag
             if(data["nErrors"]>0):
-                print(data["message"])
+
                 self.r.code = "ERROR"
+                self.r.message = data["message"]
             else:
                 self.results = data["results"][0]["Spectrum"]
 
@@ -36,9 +37,12 @@ def fetch(self, source: str) -> Response:
                 self.numPeaks = self.results["NumPeaks"]
                 self.peakList = self.results["PeakList"]
                 self.r.code = "OK"
+                self.r.message = data["message"]
+
         else:
             self.r.code = "ERROR"
-
+            # self.r.message = ""
+
         return self.r
 
     # prints out attributes

diff --git a/Python/UniversalSpectrumIdentifier.py b/Python/UniversalSpectrumIdentifier.py
@@ -1,5 +1,9 @@
 import re
-import Response
+from Python.Response import Response
+from pyteomics.mass.mass import Unimod
+# Unimod
+
+
 class UniversalSpectrumIdentifier(object):
 
     # usi object takes usiStr an automatically parses it and stores attributes
@@ -10,6 +14,7 @@ def __init__(self, usi):
 
         self.valid = False
         self.usi = usi
+        self.usiMzspec = None
         self.datasetIdentifier = None
         self.datasetSubfolder = None
         self.msRunName = None
@@ -20,10 +25,6 @@ def __init__(self, usi):
         self.charge = None
         self.provenanceIdentifier = None
         self.error = 0
-
-        # parse out usi and store response
-
-
 
     # Attributes:
     #   usi
@@ -38,17 +39,17 @@ def __init__(self, usi):
 
     # parses USI string
     def parse(self, verbose):
-        r = Response.Response()
+        r = Response()
         print()
-        verboseprint = print if verbose else lambda *a, **k: None
-        verboseprint("\nINFO: Parsing USI string '" + self.usi + "'")
+        verbosePrint = print if verbose else lambda *a, **k: None
+        verbosePrint("\nINFO: Parsing USI string '" + self.usi + "'")
         elementOffset = 0
-        offset = 0
+
         if self.usi.startswith("mzspec:"):
             self.usiMzspec = self.usi[len("mzspec:"):]
         else:
             self.error += 1
-            verboseprint("ERROR: USI does not begin with prefix 'mszpec:'")
+            verbosePrint("ERROR: USI does not begin with prefix 'mszpec:'")
             r.code = "ERROR"
             return r
 
@@ -60,7 +61,7 @@ def parse(self, verbose):
 
         # checks if usi has at least 4 colon-separated fields
         if nElements < 4:
-            verboseprint("ERROR: USI does not have the minimum required 4 colon-separated fields after mzspec")
+            verbosePrint("ERROR: USI does not have the minimum required 4 colon-separated fields after mzspec")
             self.error += 1
             r.code = "ERROR"
             return r
@@ -69,32 +70,32 @@ def parse(self, verbose):
         # datasetIdentifier field
         self.datasetIdentifier = elements[offset]
         if self.datasetIdentifier is None:
-            verboseprint("Dataset identifier is empty. Not permitted.")
+            verbosePrint("Dataset identifier is empty. Not permitted.")
             self.error += 1
 
         # this is the way it has been implemented now, but it can easily be changed to regex for other types of datasets
         elif self.datasetIdentifier.startswith("PXD"):
             self.datasetIdentifier = elements[offset]
-            verboseprint("Dataset identifier is PXD compliant. Allowed.")
+            verbosePrint("Dataset identifier is PXD compliant. Allowed.")
         else:
-            verboseprint("Dataset identifier unknown. Not permitted.")
+            verbosePrint("Dataset identifier unknown. Not permitted.")
             self.error += 1
         elementOffset += 1
         offset = elementOffset
         nextField = elements[offset]
         offsetShift = 0
         # empty datasetsubfolder
         if nextField == '':
-            verboseprint("old style. empty is ok. Empty datasetsubfolder probably.")
+            verbosePrint("old style. empty is ok. Empty datasetsubfolder probably.")
             offsetShift = 1
 
         offset = elementOffset + offsetShift
         self.msRunName = elements[offset]
 
         if self.msRunName:
-            verboseprint("MS run equals " + self.msRunName)
+            verbosePrint("MS run equals " + self.msRunName)
         else:
-            verboseprint("MS Run identifier empty. Not permitted.")
+            verbosePrint("MS Run identifier empty. Not permitted.")
             self.error += 1
 
         elementOffset += 1
@@ -105,7 +106,7 @@ def parse(self, verbose):
         if self.indexFlag:
             # is it scan or mgfi
             if self.indexFlag == "scan" or self.indexFlag == "mgfi":
-                verboseprint("indexFlag is OK.")
+                verbosePrint("indexFlag is OK.")
             # is there potentially some weird colon escaping in the msRun name?
             else:
                 potentialOffsetShift = offsetShift
@@ -129,21 +130,21 @@ def parse(self, verbose):
 
                 # colon escape fixed and msRun field updated
                 if repaired:
-                    verboseprint("Unescaped colon in msRun name. Hopefully taken care of. Please fix this")
-                    verboseprint("msRun name revised to '{}'".format(self.msRunName))
+                    verbosePrint("Unescaped colon in msRun name. Hopefully taken care of. Please fix this")
+                    verbosePrint("msRun name revised to '{}'".format(self.msRunName))
 
                 # no 'scan' or 'mgfi' fields found later. assume broken index flag
                 else:
                     self.error += 1
-                    verboseprint("Index type invalid. Must be 'scan' or 'mgfi'")
+                    verbosePrint("Index type invalid. Must be 'scan' or 'mgfi'")
                     self.indexFlag = "ERROR"
                     r.code = "ERROR"
                     return r
 
         # no index flag
         else:
             self.error += 1
-            verboseprint("Index flag empty! Not permitted.")
+            verbosePrint("Index flag empty! Not permitted.")
             self.indexFlag = "ERROR"
             r.code = "ERROR"
             return r
@@ -153,9 +154,9 @@ def parse(self, verbose):
         # index for index flag if flag is valid. useless if index flag is invalid
         self.index = elements[offset]
         if self.index:
-            verboseprint("Index is " + self.index)
+            verbosePrint("Index is " + self.index)
         else:
-            verboseprint("Index field empty. Not permitted.")
+            verbosePrint("Index field empty. Not permitted.")
             self.error += 1
 
         elementOffset += 1
@@ -164,25 +165,40 @@ def parse(self, verbose):
         # if statement check to see if the USI even has an interpretation field
         if offset < nElements:
             self.interpretation = elements[offset]
+            print(self.interpretation)
             self.peptidoform = ''
             self.charge = ''
             if self.interpretation and self.interpretation != '':
-                find = re.match("^\s*(.+)\/(\d+)\s*$", self.interpretation)
+                find = re.match("^\s*(.+)(\d+)\s*$", self.interpretation)
                 # match
                 if find:
                     # subfields of interpretation
                     self.peptidoform = find.group(1)
+                    # print(self.peptidoform)
                     self.charge = find.group(2)
-                    verboseprint("Interpreted peptidoform = {}, charge = {}".format(self.peptidoform, self.charge))
+                    verbosePrint("Interpreted peptidoform = {}, charge = {}".format(self.peptidoform, self.charge))
+                    # peptido = Unimod(source="http://www.unimod.org/xml/unimod.xml")
+                    find2 = str(re.findall('\[([A-Z].+)\]', str(self.peptidoform))[0])
+                    peptide = Unimod(source="http://www.unimod.org/xml/unimod.xml")
+                    res = peptide.by_title(str(find2))
+                    if res is not None:
+                        verbosePrint("Valid peptidoform {}".format(find2))
+                        verbosePrint("     id= {}".format(res["record_id"]))
+                        verbosePrint("     mono= {}".format(res["mono_mass"]))
+                    else:
+                        verbosePrint("ERROR: Modification " + find2 + " is not found!")
+
+
+
                 else:
-                    verboseprint("Unable to parse interpretation {} as peptidoform/charge".format(self.interpretation))
+                    verbosePrint("Unable to parse interpretation {} as peptidoform/charge".format(self.interpretation))
             else:
-                verboseprint("Interpretation field not provided. OK.")
+                verbosePrint("Interpretation field not provided. OK.")
 
         # provenance identifier
         if offset < nElements:
             self.provenanceIdentifier = elements[offset]
-            print("Provenance Identifier = ".format(self.provenanceIdentifier))
+            verbosePrint("Provenance Identifier = {}".format(self.provenanceIdentifier))
         # returns count of errors found in usi. useful for checking if the entire identifier is valid.
 
         if self.error > 0:
@@ -191,17 +207,16 @@ def parse(self, verbose):
             r.code = "OK"
         # no errors
         if r.code == "OK":
-            print()
+
             print("Found index '" + self.index
-                + "' from USI " + self.usi + "\n")
+                  + "' from USI " + self.usi + "\n")
             # self.show()
             self.valid = True
         # errors found in usi
         else:
             print("Number of errors: " + str(self.error))
             self.valid = False
             print("ERROR: Invalid USI " + self.usi)
-        print()
 
     # prints out USI attributes
     def show(self):
@@ -213,6 +228,7 @@ def show(self):
         print("Index: " + str(self.index))
         print("Peptido form: " + str(self.peptidoform))
         print("Charge: " + str(self.charge))
+        print("Provenance Identifier: " + str(self.provenanceIdentifier))
 
 
 # If this class is run from the command line, perform a short little test to see if it is working correctly
@@ -249,7 +265,6 @@ def main():
     # check to see if parsing is correct
     print(testUSIsValid)
 
-
 # if __name__ == "__main__": main()
 # inp = input("usi: ")
 # usi = UniversalSpectrumIdentifier(inp)

diff --git a/Python/main.py b/Python/main.py
@@ -1,18 +1,21 @@
-from Spectrum import Spectrum
-from UniversalSpectrumIdentifier import UniversalSpectrumIdentifier
+from Python.Spectrum import Spectrum
+from Python.UniversalSpectrumIdentifier import UniversalSpectrumIdentifier
 
 # USI created
-usi = UniversalSpectrumIdentifier("asdf:PXD000561::Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:VLHPLEGAVVIIFK/2")
-# usi = UniversalSpectrumIdentifier("mzspec:PXD002437:00261_A06_P001564_B00E_A00_R1:scan:10951:PEPT[Phospho]IDELVISK/2")
+# usi = UniversalSpectrumIdentifier("asdf:PXD000561::Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:VLHPLEGAVVIIFK/2")
+
+
+usi = UniversalSpectrumIdentifier("mzspec:PXD002437:00261_A06_P001564_B00E_A00_R1:scan:10951:PEPT[Phospho]IDELVISK/2")
 # usi = UniversalSpectrumIdentifier("mzspec:PXD005712::20152002_RG_150218_Saita_Ctrl_3XXXXX:scan:5748:AVAAVAATGPASAPGPGGGR/2")
-usi.parse(verbose=False)
+usi.parse(verbose=True)
 # if the USI is okay then create a spectrum class to fetch from the online database
 if usi.valid:
     # spectrum class just takes in a USI
     spectrum = Spectrum(usi)
     # fetches the USI from the PeptideAtlas database or whatever database is specified
     resp = spectrum.fetch('PeptideAtlas')
     print(resp.code)
+    print(resp.message)
     if resp.code == 'OK':
         spectrum.show()
 

diff --git a/Python/proteome_xml_to_json.py b/Python/proteome_xml_to_json.py
@@ -0,0 +1,14 @@
+# import json
+
+from xmltodict import parse
+import requests
+from json import dumps, loads
+
+id = input("ID: ")
+response = requests.get(
+    "http://proteomecentral.proteomexchange.org/cgi/GetDataset?ID={id}&outputMode=XML".format(id=id))
+if response.status_code == 200:
+    data = parse(response.text)
+    jsonData = dumps(data)
+    print(dumps(loads(jsonData), indent=4, sort_keys=True))
+print("Done")
diff --git a/Python/unimodParse.py b/Python/unimodParse.py
@@ -0,0 +1,4 @@
+from pyteomics.mass import Unimod
+
+modTest = Unimod(source="http://www.unimod.org/xml/unimod.xml")
+print(modTest.by_title("Phospho"))