For calculating peptide and fragment masses.
#!/usr/bin/python ############################################################################## # file: tools_aahelper.py # # Mascot Parser toolkit example code # ############################################################################## # COPYRIGHT NOTICE # # Copyright 1998-2010 Matrix Science Limited All Rights Reserved. # # # ############################################################################## # $Source: parser/examples/test_python/tools_aahelper.py $ # $Author: villek@matrixscience.com $ # $Date: 2018-07-30 16:23:53 +0100 $ # $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ ############################################################################## import msparser import sys def main() : if len(sys.argv) < 3 : # We need an enzyme to build a list of peptides and mod_file file if we # want to apply any modifications. print("Location of enzymes file and mod_file has to be specified as parameters") return 1 enzymefile = open_enzymefile(sys.argv[1]) # Note: both enzymefile *must* be kept in scope for as long as you use # Trypsin. See "Using the toolkit from Perl, Java and Python" in Mascot Parser # manual. Trypsin = enzymefile.getEnzymeByName('Trypsin') if not Trypsin : print("Cannot find 'Trypsin' in the enzyme file. Cannot continue.") return 1 modfile = open_modfile(sys.argv[2]) aahelper = msparser.ms_aahelper() # Note: both modfile and Trypsin *must* be kept in scope for as # long as you use aahelper. See "Using the toolkit from Perl, Java and # Python" in Mascot Parser manual. aahelper.setMasses(modfile.getMassFile()) aahelper.setEnzyme(Trypsin) # Now we can generate peptides for a given protein. This is # RL29_METTP (50S ribosomal protein L29P OS=Methanosaeta thermophila (strain # DSM 6194 / PT) GN=rpl29p PE=3 SV=1) from SwissProt 2010_09. proteinStr = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK" # No missed cleavages are allowed (third parameter). aahelper.startIteratePeptides(proteinStr, len(proteinStr), 0); print("List of peptides") while aahelper.getNextPeptide() : start = aahelper.getPepStart(); end = aahelper.getPepEnd() # getPepStart() and getPeptideEnd() return one-based index. peptideStr = proteinStr[start - 1 : end] print(peptideStr) print("End of list") # Create a list of fixed modifications. vecFixed = msparser.ms_modvector() vecFixed.appendModification( modfile.getModificationByName('Phospho (Y)') ) # Create a list of variable modifications. vecVariable = msparser.ms_modvector() vecVariable.appendModification( modfile.getModificationByName('Oxidation (M)') ) vecVariable.appendModification( modfile.getModificationByName('Acetyl (N-term)') ) # Note: both vecFixed and vecVariable *must* be kept in scope for as # long as you use aahelper. See "Using the toolkit from Perl, Java and # Python" in Mascot Parser manual. aahelper.setAvailableModifications(vecFixed, vecVariable) # ms_aahelper can also contain errors that might happen when applying # modifications, for instance when we have a conflict between two # modifications (same residue or same peptide end). if not aahelper.isValid() : print("Error while setting available modifications: %s" % aahelper.getLastErrorString()) return 1 # We will need also a separate error object for collecting peptide-specific # errors. err = msparser.ms_errs() # Example of how to call calcPeptideMZ(). It will often be more convenient to # create an ms_peptide instead, and then call getMrCalc() on that object. numThatMustBeModded = msparser.vectori() numThatMustBeModded.append(1); # 1 acetylNterm modification numThatMustBeModded.append(1); # 1 site is oxidised mr = aahelper.calcPeptideMZ( proteinStr, len(proteinStr), 1, 10, # peptide ends (1-based) numThatMustBeModded, 0, # no charge - i.e. Mr msparser.MASS_TYPE_MONO, err ) if err.isValid() : print("Peptide mass calculated using 'calcPeptideMZ' is %8.3f" % mr) else : print("Error while calculating peptide mass: %s" % err.getLastErrorString()) # Don't need to halt as they are not fatal errors. err.clearAllErrors(); # Create a peptide - which we can then fragment. # # Specify which residues are modified by which modification as it has to # correspond to a modification string: # # Nterm modification + 9 residues + Cterm modification numModded = msparser.vectori() numModded.append(2) # N-term - modified by "Acetyl (N-term)" numModded.append(1) # M - modified by "Oxidation (M)" numModded.append(0) # A numModded.append(0) # I numModded.append(0) # F numModded.append(0) # R numModded.append(0) # I numModded.append(0) # D numModded.append(0) # E numModded.append(0) # I numModded.append(0) # R numModded.append(0) # C-term # We have to specify (or at least supply an empty vector) which neutral loss # value to use, in case there are more than one available for a modification. whichNl = msparser.vectori() whichNl.append(0) # N-term whichNl.append(1) # M - has 2 neutral losses. Specify the first (-98) whichNl.append(0) # A whichNl.append(0) # I whichNl.append(0) # F whichNl.append(0) # R whichNl.append(0) # I whichNl.append(0) # D whichNl.append(0) # E whichNl.append(0) # I whichNl.append(0) # R whichNl.append(0) # C-term peptide = aahelper.createPeptide( proteinStr, len(proteinStr), 1, 10, # end positions numModded, # modification string-like vector whichNl, # which neutral loss to use 0, # no charge msparser.MASS_TYPE_MONO, err ) if not err.isValid() : print("Error while creating a peptide: %s" % err.getLastErrorString()) # Don't need to halt as they are not fatal errors. err.clearAllErrors() else : print("Peptide has been created successfully: %s" % peptide.getPeptideStr()) # Keep a list of fragments from all series all_fragments = msparser.ms_fragmentvector() b_ions = fragmentPeptide( aahelper, peptide, msparser.ms_fragmentationrules.FRAG_B_SERIES, 'b-ion series', 0, # single-charged ions only mr # maximal fragment mass to return ) # copyFrom() can only be used to populate the list for the first time. all_fragments.copyFrom(b_ions) fragments = fragmentPeptide( aahelper, peptide, msparser.ms_fragmentationrules.FRAG_Y_SERIES, 'y-ion series', 0, # single-charged ions only mr # maximal fragment mass to return ) for i in range(fragments.getNumberOfFragments()) : all_fragments.appendFragment(fragments.getFragmentByNumber(i)) fragments = fragmentPeptide( aahelper, peptide, msparser.ms_fragmentationrules.FRAG_Y_SERIES, 'y++-ion series', 2, # double-charged ions only mr # maximal fragment mass to return ) for i in range(fragments.getNumberOfFragments()) : all_fragments.appendFragment(fragments.getFragmentByNumber(i)) fragments = fragmentPeptide( aahelper, peptide, msparser.ms_fragmentationrules.FRAG_INTERNAL_YB, 'internal yb-ion series', 0, # single-charged ions only 700 # maximal fragment mass to return ) for i in range(fragments.getNumberOfFragments()) : all_fragments.appendFragment(fragments.getFragmentByNumber(i)) print("Paste the following into a Mascot search query window to verify this output:") displayMascotTestSearch( vecFixed, vecVariable, Trypsin, peptide.getMrCalc(), b_ions # or you can use all_fragments ) def open_enzymefile(filename) : enzymefile = msparser.ms_enzymefile(filename) if not enzymefile.isValid : print("Error while opening enzyme file: %s " % enzymefile.getLastErrorString()) sys.exit(1) return enzymefile def open_modfile(filename) : # We can use the default masses in this example. masses = msparser.ms_masses() modfile = msparser.ms_modfile(filename, masses) if not modfile.isValid() : print("Error while opening mod file: %s" % modfile.getLastErrorString()) sys.exit(1) for mod in ['Oxidation (M)', 'Acetyl (N-term)', 'Phospho (Y)'] : if not modfile.getModificationByName(mod) : print("Cannot find '%s' in the mod file. Cannot continue." % mod) sys.exit(1) return modfile def fragmentPeptide(aahelper, peptide, series, series_label, doubleCharged, mass_max) : fragments = msparser.ms_fragmentvector() err = msparser.ms_errs() aahelper.calcFragments( peptide, series, doubleCharged, 100.0, mass_max, msparser.MASS_TYPE_MONO, fragments, err ) # Check err here. print(series_label + " fragments:") printFragmentsTable(fragments) return fragments def printFragmentsTable(fragments) : print("Number of fragments: %d" % fragments.getNumberOfFragments()) headerfmt = "%5s %5s %5s %-10s %7s %7s %4s %5s %6s %4s" fmt = "%5s %5s %5s %-10s %7.2f %7.2f %4s %5s %6s %4s" print(headerfmt % ("Col", "Start", "End", "Label", "Mass", "NL", "Name", "Immon", "Intern", "Reg")) for i in range(fragments.getNumberOfFragments()) : frag = fragments.getFragmentByNumber(i) print(fmt % ( frag.getColumn(), frag.getStart(), frag.getEnd(), frag.getLabel(), frag.getMass(), frag.getNeutralLoss(), frag.getSeriesName(), frag.isImmonium(), frag.isInternal(), frag.isRegular() )) print(" ") def displayMascotTestSearch(vecFixed, vecVariable, enzyme, mr, fragments) : """ vecFixed contains a list of fixed mods applied to the peptide vecVariable contains a list of variable mods applied to the peptide enzyme is the enzyme used in fragmentation mr is the peptide Mr(calc) fragments contains a list of b-ions from a peptide Use this information to generate an test search that can be run on Mascot. """ for i in range(vecFixed.getNumberOfModifications()) : print("MODS=%s" % vecFixed.getModificationByNumber(i).getTitle()) for i in range(vecVariable.getNumberOfModifications()) : print("IT_MODS=%s" % vecVariable.getModificationByNumber(i).getTitle()) print("CHARGE=Mr") print("CLE=%s" % enzyme.getTitle()) print("INSTRUMENT=MALDI-TOF-TOF") masses = [] for i in range(fragments.getNumberOfFragments()) : masses.append("%.3f" % fragments.getFragmentByNumber(i).getMass()) print("%.3f ions(%s)" % (mr, ", ".join(masses))) if __name__ == "__main__" : sys.exit(main()) """ Running the program as python tools_aahelper.pl /usr/local/mascot/config/enzymes /usr/local/mascot/config/mod_file will give the following output under Mascot 2.3: List of peptides M MAIFR AIFR IDEIR NMSSEELEEELR K LEVELIR ER GAVR AGGAPEKPGR IR EIR R TIAR MK TVQR ER VR K End of list Peptide mass calculated using 'calcPeptideMZ' is 1320.686 Peptide has been created successfully: MAIFRIDEIR b-ion series fragments: Number of fragments: 9 Col Start End Label Mass NL Name Immon Intern Reg 1 1 -1 b(1) 190.05 0.00 b False False True 2 2 -1 b(2) 261.09 0.00 b False False True 3 3 -1 b(3) 374.17 0.00 b False False True 4 4 -1 b(4) 521.24 0.00 b False False True 5 5 -1 b(5) 677.34 0.00 b False False True 6 6 -1 b(6) 790.43 0.00 b False False True 7 7 -1 b(7) 905.45 0.00 b False False True 8 8 -1 b(8) 1034.50 0.00 b False False True 9 9 -1 b(9) 1147.58 0.00 b False False True y-ion series fragments: Number of fragments: 9 Col Start End Label Mass NL Name Immon Intern Reg 9 9 -1 y(9) 1132.65 0.00 y False False True 8 8 -1 y(8) 1061.61 0.00 y False False True 7 7 -1 y(7) 948.53 0.00 y False False True 6 6 -1 y(6) 801.46 0.00 y False False True 5 5 -1 y(5) 645.36 0.00 y False False True 4 4 -1 y(4) 532.27 0.00 y False False True 3 3 -1 y(3) 417.25 0.00 y False False True 2 2 -1 y(2) 288.20 0.00 y False False True 1 1 -1 y(1) 175.12 0.00 y False False True y++-ion series fragments: Number of fragments: 8 Col Start End Label Mass NL Name Immon Intern Reg 9 9 -1 y(9)++ 566.83 0.00 y False False True 8 8 -1 y(8)++ 531.31 0.00 y False False True 7 7 -1 y(7)++ 474.77 0.00 y False False True 6 6 -1 y(6)++ 401.23 0.00 y False False True 5 5 -1 y(5)++ 323.18 0.00 y False False True 4 4 -1 y(4)++ 266.64 0.00 y False False True 3 3 -1 y(3)++ 209.13 0.00 y False False True 2 2 -1 y(2)++ 144.61 0.00 y False False True internal yb-ion series fragments: Number of fragments: 22 Col Start End Label Mass NL Name Immon Intern Reg 2 2 3 AI 185.13 0.00 yb False True False 2 2 4 AIF 332.20 0.00 yb False True False 2 2 5 AIFR 488.30 0.00 yb False True False 2 2 6 AIFRI 601.38 0.00 yb False True False 3 3 4 IF 261.16 0.00 yb False True False 3 3 5 IFR 417.26 0.00 yb False True False 3 3 6 IFRI 530.34 0.00 yb False True False 3 3 7 IFRID 645.37 0.00 yb False True False 4 4 5 FR 304.18 0.00 yb False True False 4 4 6 FRI 417.26 0.00 yb False True False 4 4 7 FRID 532.29 0.00 yb False True False 4 4 8 FRIDE 661.33 0.00 yb False True False 5 5 6 RI 270.19 0.00 yb False True False 5 5 7 RID 385.22 0.00 yb False True False 5 5 8 RIDE 514.26 0.00 yb False True False 5 5 9 RIDEI 627.35 0.00 yb False True False 6 6 7 ID 229.12 0.00 yb False True False 6 6 8 IDE 358.16 0.00 yb False True False 6 6 9 IDEI 471.24 0.00 yb False True False 7 7 8 DE 245.08 0.00 yb False True False 7 7 9 DEI 358.16 0.00 yb False True False 8 8 9 EI 243.13 0.00 yb False True False Paste the following into a Mascot search query window to verify this output: MODS=Phospho (Y) IT_MODS=Oxidation (M) IT_MODS=Acetyl (N-term) CHARGE=Mr CLE=Trypsin INSTRUMENT=MALDI-TOF-TOF 1320.686 ions(190.053, 261.090, 374.174, 521.243, 677.344, 790.428, 905.455, 1034.498, 1147.582) """
Copyright © 2022 Matrix Science Ltd. All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29 |