diff --git a/.gitignore b/.gitignore index 4bcbd92..f32dd70 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ \.idea/ Python/__pycache__/ + +\.vscode/ diff --git a/Python/Response.py b/Python/Response.py index 6f58204..82aeb72 100644 --- a/Python/Response.py +++ b/Python/Response.py @@ -10,5 +10,14 @@ def code(self): def code(self, code): self._code = code + @property + def message(self): + return self._message + @message.setter + def message(self, message): + self._message = message + def __init__(self): self._code = "NEW" + self._message = None + diff --git a/Python/Spectrum.py b/Python/Spectrum.py index 25871d8..7b7b41e 100644 --- a/Python/Spectrum.py +++ b/Python/Spectrum.py @@ -1,6 +1,7 @@ import requests -import Response + +from Response import Response class Spectrum(object): @@ -8,7 +9,7 @@ class Spectrum(object): # directly takes an USI def __init__(self, usi): self.usi = usi.usi - self.r = Response.Response() + self.r = Response() self.results = None self.name = None self.precursorMZ = None @@ -20,13 +21,13 @@ def fetch(self, source: str) -> Response: res = requests.get( "https://db.systemsbiology.net/dev2/sbeams/cgi/{source}/Spectrum?usi={usi}".format(source=source, usi=self.usi)) - if res.status_code == 200: data = res.json() # spectrum json tag if(data["nErrors"]>0): - print(data["message"]) + self.r.code = "ERROR" + self.r.message = data["message"] else: self.results = data["results"][0]["Spectrum"] @@ -36,9 +37,12 @@ def fetch(self, source: str) -> Response: self.numPeaks = self.results["NumPeaks"] self.peakList = self.results["PeakList"] self.r.code = "OK" + self.r.message = data["message"] + else: self.r.code = "ERROR" - + # self.r.message = "" + return self.r # prints out attributes diff --git a/Python/UniversalSpectrumIdentifier.py b/Python/UniversalSpectrumIdentifier.py index f0dc10e..b1ef823 100644 --- a/Python/UniversalSpectrumIdentifier.py +++ b/Python/UniversalSpectrumIdentifier.py @@ -1,5 +1,9 @@ import re -import Response +from Python.Response import Response +from pyteomics.mass.mass import Unimod +# Unimod + + class UniversalSpectrumIdentifier(object): # usi object takes usiStr an automatically parses it and stores attributes @@ -10,6 +14,7 @@ def __init__(self, usi): self.valid = False self.usi = usi + self.usiMzspec = None self.datasetIdentifier = None self.datasetSubfolder = None self.msRunName = None @@ -20,10 +25,6 @@ def __init__(self, usi): self.charge = None self.provenanceIdentifier = None self.error = 0 - - # parse out usi and store response - - # Attributes: # usi @@ -38,17 +39,17 @@ def __init__(self, usi): # parses USI string def parse(self, verbose): - r = Response.Response() + r = Response() print() - verboseprint = print if verbose else lambda *a, **k: None - verboseprint("\nINFO: Parsing USI string '" + self.usi + "'") + verbosePrint = print if verbose else lambda *a, **k: None + verbosePrint("\nINFO: Parsing USI string '" + self.usi + "'") elementOffset = 0 - offset = 0 + if self.usi.startswith("mzspec:"): self.usiMzspec = self.usi[len("mzspec:"):] else: self.error += 1 - verboseprint("ERROR: USI does not begin with prefix 'mszpec:'") + verbosePrint("ERROR: USI does not begin with prefix 'mszpec:'") r.code = "ERROR" return r @@ -60,7 +61,7 @@ def parse(self, verbose): # checks if usi has at least 4 colon-separated fields if nElements < 4: - verboseprint("ERROR: USI does not have the minimum required 4 colon-separated fields after mzspec") + verbosePrint("ERROR: USI does not have the minimum required 4 colon-separated fields after mzspec") self.error += 1 r.code = "ERROR" return r @@ -69,15 +70,15 @@ def parse(self, verbose): # datasetIdentifier field self.datasetIdentifier = elements[offset] if self.datasetIdentifier is None: - verboseprint("Dataset identifier is empty. Not permitted.") + verbosePrint("Dataset identifier is empty. Not permitted.") self.error += 1 # this is the way it has been implemented now, but it can easily be changed to regex for other types of datasets elif self.datasetIdentifier.startswith("PXD"): self.datasetIdentifier = elements[offset] - verboseprint("Dataset identifier is PXD compliant. Allowed.") + verbosePrint("Dataset identifier is PXD compliant. Allowed.") else: - verboseprint("Dataset identifier unknown. Not permitted.") + verbosePrint("Dataset identifier unknown. Not permitted.") self.error += 1 elementOffset += 1 offset = elementOffset @@ -85,16 +86,16 @@ def parse(self, verbose): offsetShift = 0 # empty datasetsubfolder if nextField == '': - verboseprint("old style. empty is ok. Empty datasetsubfolder probably.") + verbosePrint("old style. empty is ok. Empty datasetsubfolder probably.") offsetShift = 1 offset = elementOffset + offsetShift self.msRunName = elements[offset] if self.msRunName: - verboseprint("MS run equals " + self.msRunName) + verbosePrint("MS run equals " + self.msRunName) else: - verboseprint("MS Run identifier empty. Not permitted.") + verbosePrint("MS Run identifier empty. Not permitted.") self.error += 1 elementOffset += 1 @@ -105,7 +106,7 @@ def parse(self, verbose): if self.indexFlag: # is it scan or mgfi if self.indexFlag == "scan" or self.indexFlag == "mgfi": - verboseprint("indexFlag is OK.") + verbosePrint("indexFlag is OK.") # is there potentially some weird colon escaping in the msRun name? else: potentialOffsetShift = offsetShift @@ -129,13 +130,13 @@ def parse(self, verbose): # colon escape fixed and msRun field updated if repaired: - verboseprint("Unescaped colon in msRun name. Hopefully taken care of. Please fix this") - verboseprint("msRun name revised to '{}'".format(self.msRunName)) + verbosePrint("Unescaped colon in msRun name. Hopefully taken care of. Please fix this") + verbosePrint("msRun name revised to '{}'".format(self.msRunName)) # no 'scan' or 'mgfi' fields found later. assume broken index flag else: self.error += 1 - verboseprint("Index type invalid. Must be 'scan' or 'mgfi'") + verbosePrint("Index type invalid. Must be 'scan' or 'mgfi'") self.indexFlag = "ERROR" r.code = "ERROR" return r @@ -143,7 +144,7 @@ def parse(self, verbose): # no index flag else: self.error += 1 - verboseprint("Index flag empty! Not permitted.") + verbosePrint("Index flag empty! Not permitted.") self.indexFlag = "ERROR" r.code = "ERROR" return r @@ -153,9 +154,9 @@ def parse(self, verbose): # index for index flag if flag is valid. useless if index flag is invalid self.index = elements[offset] if self.index: - verboseprint("Index is " + self.index) + verbosePrint("Index is " + self.index) else: - verboseprint("Index field empty. Not permitted.") + verbosePrint("Index field empty. Not permitted.") self.error += 1 elementOffset += 1 @@ -164,25 +165,40 @@ def parse(self, verbose): # if statement check to see if the USI even has an interpretation field if offset < nElements: self.interpretation = elements[offset] + print(self.interpretation) self.peptidoform = '' self.charge = '' if self.interpretation and self.interpretation != '': - find = re.match("^\s*(.+)\/(\d+)\s*$", self.interpretation) + find = re.match("^\s*(.+)(\d+)\s*$", self.interpretation) # match if find: # subfields of interpretation self.peptidoform = find.group(1) + # print(self.peptidoform) self.charge = find.group(2) - verboseprint("Interpreted peptidoform = {}, charge = {}".format(self.peptidoform, self.charge)) + verbosePrint("Interpreted peptidoform = {}, charge = {}".format(self.peptidoform, self.charge)) + # peptido = Unimod(source="http://www.unimod.org/xml/unimod.xml") + find2 = str(re.findall('\[([A-Z].+)\]', str(self.peptidoform))[0]) + peptide = Unimod(source="http://www.unimod.org/xml/unimod.xml") + res = peptide.by_title(str(find2)) + if res is not None: + verbosePrint("Valid peptidoform {}".format(find2)) + verbosePrint(" id= {}".format(res["record_id"])) + verbosePrint(" mono= {}".format(res["mono_mass"])) + else: + verbosePrint("ERROR: Modification " + find2 + " is not found!") + + + else: - verboseprint("Unable to parse interpretation {} as peptidoform/charge".format(self.interpretation)) + verbosePrint("Unable to parse interpretation {} as peptidoform/charge".format(self.interpretation)) else: - verboseprint("Interpretation field not provided. OK.") + verbosePrint("Interpretation field not provided. OK.") # provenance identifier if offset < nElements: self.provenanceIdentifier = elements[offset] - print("Provenance Identifier = ".format(self.provenanceIdentifier)) + verbosePrint("Provenance Identifier = {}".format(self.provenanceIdentifier)) # returns count of errors found in usi. useful for checking if the entire identifier is valid. if self.error > 0: @@ -191,9 +207,9 @@ def parse(self, verbose): r.code = "OK" # no errors if r.code == "OK": - print() + print("Found index '" + self.index - + "' from USI " + self.usi + "\n") + + "' from USI " + self.usi + "\n") # self.show() self.valid = True # errors found in usi @@ -201,7 +217,6 @@ def parse(self, verbose): print("Number of errors: " + str(self.error)) self.valid = False print("ERROR: Invalid USI " + self.usi) - print() # prints out USI attributes def show(self): @@ -213,6 +228,7 @@ def show(self): print("Index: " + str(self.index)) print("Peptido form: " + str(self.peptidoform)) print("Charge: " + str(self.charge)) + print("Provenance Identifier: " + str(self.provenanceIdentifier)) # If this class is run from the command line, perform a short little test to see if it is working correctly @@ -249,7 +265,6 @@ def main(): # check to see if parsing is correct print(testUSIsValid) - # if __name__ == "__main__": main() # inp = input("usi: ") # usi = UniversalSpectrumIdentifier(inp) diff --git a/Python/main.py b/Python/main.py index 5d40ae9..7f5987e 100644 --- a/Python/main.py +++ b/Python/main.py @@ -1,11 +1,13 @@ -from Spectrum import Spectrum -from UniversalSpectrumIdentifier import UniversalSpectrumIdentifier +from Python.Spectrum import Spectrum +from Python.UniversalSpectrumIdentifier import UniversalSpectrumIdentifier # USI created -usi = UniversalSpectrumIdentifier("asdf:PXD000561::Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:VLHPLEGAVVIIFK/2") -# usi = UniversalSpectrumIdentifier("mzspec:PXD002437:00261_A06_P001564_B00E_A00_R1:scan:10951:PEPT[Phospho]IDELVISK/2") +# usi = UniversalSpectrumIdentifier("asdf:PXD000561::Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:VLHPLEGAVVIIFK/2") + + +usi = UniversalSpectrumIdentifier("mzspec:PXD002437:00261_A06_P001564_B00E_A00_R1:scan:10951:PEPT[Phospho]IDELVISK/2") # usi = UniversalSpectrumIdentifier("mzspec:PXD005712::20152002_RG_150218_Saita_Ctrl_3XXXXX:scan:5748:AVAAVAATGPASAPGPGGGR/2") -usi.parse(verbose=False) +usi.parse(verbose=True) # if the USI is okay then create a spectrum class to fetch from the online database if usi.valid: # spectrum class just takes in a USI @@ -13,6 +15,7 @@ # fetches the USI from the PeptideAtlas database or whatever database is specified resp = spectrum.fetch('PeptideAtlas') print(resp.code) + print(resp.message) if resp.code == 'OK': spectrum.show() diff --git a/Python/proteome_xml_to_json.py b/Python/proteome_xml_to_json.py new file mode 100644 index 0000000..eb19444 --- /dev/null +++ b/Python/proteome_xml_to_json.py @@ -0,0 +1,14 @@ +# import json + +from xmltodict import parse +import requests +from json import dumps, loads + +id = input("ID: ") +response = requests.get( + "http://proteomecentral.proteomexchange.org/cgi/GetDataset?ID={id}&outputMode=XML".format(id=id)) +if response.status_code == 200: + data = parse(response.text) + jsonData = dumps(data) + print(dumps(loads(jsonData), indent=4, sort_keys=True)) +print("Done") diff --git a/Python/unimodParse.py b/Python/unimodParse.py new file mode 100644 index 0000000..96fcc95 --- /dev/null +++ b/Python/unimodParse.py @@ -0,0 +1,4 @@ +from pyteomics.mass import Unimod + +modTest = Unimod(source="http://www.unimod.org/xml/unimod.xml") +print(modTest.by_title("Phospho"))