Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
\.idea/

Python/__pycache__/

\.vscode/
9 changes: 9 additions & 0 deletions Python/Response.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,14 @@ def code(self):
def code(self, code):
self._code = code

@property
def message(self):
return self._message
@message.setter
def message(self, message):
self._message = message

def __init__(self):
self._code = "NEW"
self._message = None

14 changes: 9 additions & 5 deletions Python/Spectrum.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@

import requests
import Response

from Response import Response


class Spectrum(object):

# directly takes an USI
def __init__(self, usi):
self.usi = usi.usi
self.r = Response.Response()
self.r = Response()
self.results = None
self.name = None
self.precursorMZ = None
Expand All @@ -20,13 +21,13 @@ def fetch(self, source: str) -> Response:
res = requests.get(
"https://db.systemsbiology.net/dev2/sbeams/cgi/{source}/Spectrum?usi={usi}".format(source=source,
usi=self.usi))

if res.status_code == 200:
data = res.json()
# spectrum json tag
if(data["nErrors"]>0):
print(data["message"])

self.r.code = "ERROR"
self.r.message = data["message"]
else:
self.results = data["results"][0]["Spectrum"]

Expand All @@ -36,9 +37,12 @@ def fetch(self, source: str) -> Response:
self.numPeaks = self.results["NumPeaks"]
self.peakList = self.results["PeakList"]
self.r.code = "OK"
self.r.message = data["message"]

else:
self.r.code = "ERROR"

# self.r.message = ""

return self.r

# prints out attributes
Expand Down
81 changes: 48 additions & 33 deletions Python/UniversalSpectrumIdentifier.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import re
import Response
from Python.Response import Response
from pyteomics.mass.mass import Unimod
# Unimod


class UniversalSpectrumIdentifier(object):

# usi object takes usiStr an automatically parses it and stores attributes
Expand All @@ -10,6 +14,7 @@ def __init__(self, usi):

self.valid = False
self.usi = usi
self.usiMzspec = None
self.datasetIdentifier = None
self.datasetSubfolder = None
self.msRunName = None
Expand All @@ -20,10 +25,6 @@ def __init__(self, usi):
self.charge = None
self.provenanceIdentifier = None
self.error = 0

# parse out usi and store response



# Attributes:
# usi
Expand All @@ -38,17 +39,17 @@ def __init__(self, usi):

# parses USI string
def parse(self, verbose):
r = Response.Response()
r = Response()
print()
verboseprint = print if verbose else lambda *a, **k: None
verboseprint("\nINFO: Parsing USI string '" + self.usi + "'")
verbosePrint = print if verbose else lambda *a, **k: None
verbosePrint("\nINFO: Parsing USI string '" + self.usi + "'")
elementOffset = 0
offset = 0

if self.usi.startswith("mzspec:"):
self.usiMzspec = self.usi[len("mzspec:"):]
else:
self.error += 1
verboseprint("ERROR: USI does not begin with prefix 'mszpec:'")
verbosePrint("ERROR: USI does not begin with prefix 'mszpec:'")
r.code = "ERROR"
return r

Expand All @@ -60,7 +61,7 @@ def parse(self, verbose):

# checks if usi has at least 4 colon-separated fields
if nElements < 4:
verboseprint("ERROR: USI does not have the minimum required 4 colon-separated fields after mzspec")
verbosePrint("ERROR: USI does not have the minimum required 4 colon-separated fields after mzspec")
self.error += 1
r.code = "ERROR"
return r
Expand All @@ -69,32 +70,32 @@ def parse(self, verbose):
# datasetIdentifier field
self.datasetIdentifier = elements[offset]
if self.datasetIdentifier is None:
verboseprint("Dataset identifier is empty. Not permitted.")
verbosePrint("Dataset identifier is empty. Not permitted.")
self.error += 1

# this is the way it has been implemented now, but it can easily be changed to regex for other types of datasets
elif self.datasetIdentifier.startswith("PXD"):
self.datasetIdentifier = elements[offset]
verboseprint("Dataset identifier is PXD compliant. Allowed.")
verbosePrint("Dataset identifier is PXD compliant. Allowed.")
else:
verboseprint("Dataset identifier unknown. Not permitted.")
verbosePrint("Dataset identifier unknown. Not permitted.")
self.error += 1
elementOffset += 1
offset = elementOffset
nextField = elements[offset]
offsetShift = 0
# empty datasetsubfolder
if nextField == '':
verboseprint("old style. empty is ok. Empty datasetsubfolder probably.")
verbosePrint("old style. empty is ok. Empty datasetsubfolder probably.")
offsetShift = 1

offset = elementOffset + offsetShift
self.msRunName = elements[offset]

if self.msRunName:
verboseprint("MS run equals " + self.msRunName)
verbosePrint("MS run equals " + self.msRunName)
else:
verboseprint("MS Run identifier empty. Not permitted.")
verbosePrint("MS Run identifier empty. Not permitted.")
self.error += 1

elementOffset += 1
Expand All @@ -105,7 +106,7 @@ def parse(self, verbose):
if self.indexFlag:
# is it scan or mgfi
if self.indexFlag == "scan" or self.indexFlag == "mgfi":
verboseprint("indexFlag is OK.")
verbosePrint("indexFlag is OK.")
# is there potentially some weird colon escaping in the msRun name?
else:
potentialOffsetShift = offsetShift
Expand All @@ -129,21 +130,21 @@ def parse(self, verbose):

# colon escape fixed and msRun field updated
if repaired:
verboseprint("Unescaped colon in msRun name. Hopefully taken care of. Please fix this")
verboseprint("msRun name revised to '{}'".format(self.msRunName))
verbosePrint("Unescaped colon in msRun name. Hopefully taken care of. Please fix this")
verbosePrint("msRun name revised to '{}'".format(self.msRunName))

# no 'scan' or 'mgfi' fields found later. assume broken index flag
else:
self.error += 1
verboseprint("Index type invalid. Must be 'scan' or 'mgfi'")
verbosePrint("Index type invalid. Must be 'scan' or 'mgfi'")
self.indexFlag = "ERROR"
r.code = "ERROR"
return r

# no index flag
else:
self.error += 1
verboseprint("Index flag empty! Not permitted.")
verbosePrint("Index flag empty! Not permitted.")
self.indexFlag = "ERROR"
r.code = "ERROR"
return r
Expand All @@ -153,9 +154,9 @@ def parse(self, verbose):
# index for index flag if flag is valid. useless if index flag is invalid
self.index = elements[offset]
if self.index:
verboseprint("Index is " + self.index)
verbosePrint("Index is " + self.index)
else:
verboseprint("Index field empty. Not permitted.")
verbosePrint("Index field empty. Not permitted.")
self.error += 1

elementOffset += 1
Expand All @@ -164,25 +165,40 @@ def parse(self, verbose):
# if statement check to see if the USI even has an interpretation field
if offset < nElements:
self.interpretation = elements[offset]
print(self.interpretation)
self.peptidoform = ''
self.charge = ''
if self.interpretation and self.interpretation != '':
find = re.match("^\s*(.+)\/(\d+)\s*$", self.interpretation)
find = re.match("^\s*(.+)(\d+)\s*$", self.interpretation)
# match
if find:
# subfields of interpretation
self.peptidoform = find.group(1)
# print(self.peptidoform)
self.charge = find.group(2)
verboseprint("Interpreted peptidoform = {}, charge = {}".format(self.peptidoform, self.charge))
verbosePrint("Interpreted peptidoform = {}, charge = {}".format(self.peptidoform, self.charge))
# peptido = Unimod(source="http://www.unimod.org/xml/unimod.xml")
find2 = str(re.findall('\[([A-Z].+)\]', str(self.peptidoform))[0])
peptide = Unimod(source="http://www.unimod.org/xml/unimod.xml")
res = peptide.by_title(str(find2))
if res is not None:
verbosePrint("Valid peptidoform {}".format(find2))
verbosePrint(" id= {}".format(res["record_id"]))
verbosePrint(" mono= {}".format(res["mono_mass"]))
else:
verbosePrint("ERROR: Modification " + find2 + " is not found!")



else:
verboseprint("Unable to parse interpretation {} as peptidoform/charge".format(self.interpretation))
verbosePrint("Unable to parse interpretation {} as peptidoform/charge".format(self.interpretation))
else:
verboseprint("Interpretation field not provided. OK.")
verbosePrint("Interpretation field not provided. OK.")

# provenance identifier
if offset < nElements:
self.provenanceIdentifier = elements[offset]
print("Provenance Identifier = ".format(self.provenanceIdentifier))
verbosePrint("Provenance Identifier = {}".format(self.provenanceIdentifier))
# returns count of errors found in usi. useful for checking if the entire identifier is valid.

if self.error > 0:
Expand All @@ -191,17 +207,16 @@ def parse(self, verbose):
r.code = "OK"
# no errors
if r.code == "OK":
print()

print("Found index '" + self.index
+ "' from USI " + self.usi + "\n")
+ "' from USI " + self.usi + "\n")
# self.show()
self.valid = True
# errors found in usi
else:
print("Number of errors: " + str(self.error))
self.valid = False
print("ERROR: Invalid USI " + self.usi)
print()

# prints out USI attributes
def show(self):
Expand All @@ -213,6 +228,7 @@ def show(self):
print("Index: " + str(self.index))
print("Peptido form: " + str(self.peptidoform))
print("Charge: " + str(self.charge))
print("Provenance Identifier: " + str(self.provenanceIdentifier))


# If this class is run from the command line, perform a short little test to see if it is working correctly
Expand Down Expand Up @@ -249,7 +265,6 @@ def main():
# check to see if parsing is correct
print(testUSIsValid)


# if __name__ == "__main__": main()
# inp = input("usi: ")
# usi = UniversalSpectrumIdentifier(inp)
Expand Down
13 changes: 8 additions & 5 deletions Python/main.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
from Spectrum import Spectrum
from UniversalSpectrumIdentifier import UniversalSpectrumIdentifier
from Python.Spectrum import Spectrum
from Python.UniversalSpectrumIdentifier import UniversalSpectrumIdentifier

# USI created
usi = UniversalSpectrumIdentifier("asdf:PXD000561::Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:VLHPLEGAVVIIFK/2")
# usi = UniversalSpectrumIdentifier("mzspec:PXD002437:00261_A06_P001564_B00E_A00_R1:scan:10951:PEPT[Phospho]IDELVISK/2")
# usi = UniversalSpectrumIdentifier("asdf:PXD000561::Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:VLHPLEGAVVIIFK/2")


usi = UniversalSpectrumIdentifier("mzspec:PXD002437:00261_A06_P001564_B00E_A00_R1:scan:10951:PEPT[Phospho]IDELVISK/2")
# usi = UniversalSpectrumIdentifier("mzspec:PXD005712::20152002_RG_150218_Saita_Ctrl_3XXXXX:scan:5748:AVAAVAATGPASAPGPGGGR/2")
usi.parse(verbose=False)
usi.parse(verbose=True)
# if the USI is okay then create a spectrum class to fetch from the online database
if usi.valid:
# spectrum class just takes in a USI
spectrum = Spectrum(usi)
# fetches the USI from the PeptideAtlas database or whatever database is specified
resp = spectrum.fetch('PeptideAtlas')
print(resp.code)
print(resp.message)
if resp.code == 'OK':
spectrum.show()

Expand Down
14 changes: 14 additions & 0 deletions Python/proteome_xml_to_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# import json

from xmltodict import parse
import requests
from json import dumps, loads

id = input("ID: ")
response = requests.get(
"http://proteomecentral.proteomexchange.org/cgi/GetDataset?ID={id}&outputMode=XML".format(id=id))
if response.status_code == 200:
data = parse(response.text)
jsonData = dumps(data)
print(dumps(loads(jsonData), indent=4, sort_keys=True))
print("Done")
4 changes: 4 additions & 0 deletions Python/unimodParse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from pyteomics.mass import Unimod

modTest = Unimod(source="http://www.unimod.org/xml/unimod.xml")
print(modTest.by_title("Phospho"))