Matrix Science header

tools_aahelper.py

For calculating peptide and fragment masses.

#!/usr/bin/python
##############################################################################
# file: tools_aahelper.py                                                    #
# Mascot Parser toolkit example code                                         #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2010 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#     $Source: parser/examples/test_python/tools_aahelper.py $
#     $Author: villek@matrixscience.com $
#       $Date: 2018-07-30 16:23:53 +0100 $
#   $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $
##############################################################################

import msparser
import sys

def main() :
    if len(sys.argv) < 3 :
        # We need an enzyme to build a list of peptides and mod_file file if we
        # want to apply any modifications.
        print("Location of enzymes file and mod_file has to be specified as parameters")
        return 1


    enzymefile = open_enzymefile(sys.argv[1])

    # Note: both enzymefile *must* be kept in scope for as long as you use
    # Trypsin. See "Using the toolkit from Perl, Java and Python" in Mascot Parser
    # manual.
    Trypsin = enzymefile.getEnzymeByName('Trypsin')

    if not Trypsin :
        print("Cannot find 'Trypsin' in the enzyme file. Cannot continue.")
        return 1


    modfile = open_modfile(sys.argv[2])

    aahelper = msparser.ms_aahelper()
    # Note: both modfile and Trypsin *must* be kept in scope for as
    # long as you use aahelper. See "Using the toolkit from Perl, Java and 
    # Python" in Mascot Parser manual.
    aahelper.setMasses(modfile.getMassFile())
    aahelper.setEnzyme(Trypsin)

    # Now we can generate peptides for a given protein. This is 
    # RL29_METTP (50S ribosomal protein L29P OS=Methanosaeta thermophila (strain
    # DSM 6194 / PT) GN=rpl29p PE=3 SV=1) from SwissProt 2010_09.
    proteinStr = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK"

    # No missed cleavages are allowed (third parameter).
    aahelper.startIteratePeptides(proteinStr, len(proteinStr), 0); 

    print("List of peptides")

    while aahelper.getNextPeptide() :
        start = aahelper.getPepStart();  
        end = aahelper.getPepEnd()

        # getPepStart() and getPeptideEnd() return one-based index.
        peptideStr = proteinStr[start - 1 : end]
        print(peptideStr)

    print("End of list")

    # Create a list of fixed modifications.
    vecFixed = msparser.ms_modvector()
    vecFixed.appendModification( modfile.getModificationByName('Phospho (Y)') )

    # Create a list of variable modifications.
    vecVariable = msparser.ms_modvector()
    vecVariable.appendModification( modfile.getModificationByName('Oxidation (M)') )
    vecVariable.appendModification( modfile.getModificationByName('Acetyl (N-term)') )

    # Note: both vecFixed and vecVariable *must* be kept in scope for as
    # long as you use aahelper. See "Using the toolkit from Perl, Java and 
    # Python" in Mascot Parser manual.
    aahelper.setAvailableModifications(vecFixed, vecVariable)

    # ms_aahelper can also contain errors that might happen when applying
    # modifications, for instance when we have a conflict between two
    # modifications (same residue or same peptide end).
    if not aahelper.isValid() :
        print("Error while setting available modifications: %s" % aahelper.getLastErrorString())
        return 1

    # We will need also a separate error object for collecting peptide-specific
    # errors.
    err = msparser.ms_errs()

    # Example of how to call calcPeptideMZ(). It will often be more convenient to
    # create an ms_peptide instead, and then call getMrCalc() on that object.
    numThatMustBeModded = msparser.vectori()
    numThatMustBeModded.append(1);  # 1 acetylNterm modification
    numThatMustBeModded.append(1);  # 1 site is oxidised

    mr = aahelper.calcPeptideMZ(
        proteinStr, 
        len(proteinStr), 
        1, 
        10, # peptide ends (1-based)
        numThatMustBeModded, 
        0, # no charge - i.e. Mr
        msparser.MASS_TYPE_MONO,
        err
        )

    if err.isValid() :
        print("Peptide mass calculated using 'calcPeptideMZ' is %8.3f" % mr)
    else :
        print("Error while calculating peptide mass: %s" % err.getLastErrorString())
        # Don't need to halt as they are not fatal errors.
        err.clearAllErrors(); 

    # Create a peptide - which we can then fragment.
    #
    # Specify which residues are modified by which modification as it has to
    # correspond to a modification string:
    #
    # Nterm modification + 9 residues + Cterm modification
    numModded = msparser.vectori()
    numModded.append(2)  # N-term  - modified by "Acetyl (N-term)"
    numModded.append(1)  # M       - modified by "Oxidation (M)"
    numModded.append(0)  # A
    numModded.append(0)  # I
    numModded.append(0)  # F
    numModded.append(0)  # R
    numModded.append(0)  # I
    numModded.append(0)  # D
    numModded.append(0)  # E
    numModded.append(0)  # I
    numModded.append(0)  # R
    numModded.append(0)  # C-term

    # We have to specify (or at least supply an empty vector) which neutral loss
    # value to use, in case there are more than one available for a modification.
    whichNl = msparser.vectori()
    whichNl.append(0)  # N-term
    whichNl.append(1)  # M      - has 2 neutral losses. Specify the first (-98)
    whichNl.append(0)  # A
    whichNl.append(0)  # I
    whichNl.append(0)  # F
    whichNl.append(0)  # R
    whichNl.append(0)  # I
    whichNl.append(0)  # D
    whichNl.append(0)  # E
    whichNl.append(0)  # I
    whichNl.append(0)  # R
    whichNl.append(0)  # C-term

    peptide = aahelper.createPeptide(
        proteinStr, 
        len(proteinStr),
        1,
        10,          # end positions
        numModded, # modification string-like vector
        whichNl,   # which neutral loss to use
        0,          # no charge
        msparser.MASS_TYPE_MONO,
        err
        )

    if not err.isValid() :
        print("Error while creating a peptide: %s" % err.getLastErrorString())
        # Don't need to halt as they are not fatal errors.
        err.clearAllErrors()
    else :
        print("Peptide has been created successfully: %s" % peptide.getPeptideStr())
        

    # Keep a list of fragments from all series
    all_fragments = msparser.ms_fragmentvector()

    b_ions = fragmentPeptide(
        aahelper,
        peptide, 
        msparser.ms_fragmentationrules.FRAG_B_SERIES,
        'b-ion series',
        0,   # single-charged ions only
        mr  # maximal fragment mass to return
        )

    # copyFrom() can only be used to populate the list for the first time.
    all_fragments.copyFrom(b_ions)

    fragments = fragmentPeptide(
        aahelper,
        peptide,
        msparser.ms_fragmentationrules.FRAG_Y_SERIES, 
        'y-ion series',
        0,          # single-charged ions only
        mr         # maximal fragment mass to return 
        )

    for i in range(fragments.getNumberOfFragments()) :
        all_fragments.appendFragment(fragments.getFragmentByNumber(i))


    fragments = fragmentPeptide(
        aahelper,
        peptide,
        msparser.ms_fragmentationrules.FRAG_Y_SERIES, 
        'y++-ion series',
        2,          # double-charged ions only
        mr          # maximal fragment mass to return 
        ) 

    for i in range(fragments.getNumberOfFragments()) :
        all_fragments.appendFragment(fragments.getFragmentByNumber(i))


    fragments = fragmentPeptide(
        aahelper,
        peptide,
        msparser.ms_fragmentationrules.FRAG_INTERNAL_YB,
        'internal yb-ion series',
        0,          # single-charged ions only
        700         # maximal fragment mass to return 
        )

    for i in range(fragments.getNumberOfFragments()) :
        all_fragments.appendFragment(fragments.getFragmentByNumber(i))


    print("Paste the following into a Mascot search query window to verify this output:")

    displayMascotTestSearch(
        vecFixed, vecVariable, Trypsin, peptide.getMrCalc(), 
        b_ions # or you can use all_fragments
        ) 


def open_enzymefile(filename) :
    enzymefile = msparser.ms_enzymefile(filename)

    if not enzymefile.isValid :
        print("Error while opening enzyme file: %s " % enzymefile.getLastErrorString())
        sys.exit(1)

    return enzymefile


def open_modfile(filename) :
    # We can use the default masses in this example.
    masses = msparser.ms_masses()
    modfile = msparser.ms_modfile(filename, masses)

    if not modfile.isValid() :
        print("Error while opening mod file: %s" % modfile.getLastErrorString())
        sys.exit(1)
    
    for mod in ['Oxidation (M)', 'Acetyl (N-term)', 'Phospho (Y)'] :
        if not modfile.getModificationByName(mod) :
            print("Cannot find '%s' in the mod file. Cannot continue." % mod)
            sys.exit(1)

    return modfile


def fragmentPeptide(aahelper, peptide, series, series_label, doubleCharged, mass_max) :
    fragments = msparser.ms_fragmentvector()
    err = msparser.ms_errs()

    aahelper.calcFragments(
        peptide,
        series,
        doubleCharged,
        100.0,
        mass_max,
        msparser.MASS_TYPE_MONO,
        fragments,
        err
        ) 

    # Check err here.

    print(series_label + " fragments:")
    printFragmentsTable(fragments)

    return fragments


def printFragmentsTable(fragments) :

    print("Number of fragments: %d" % fragments.getNumberOfFragments())

    headerfmt = "%5s %5s %5s %-10s %7s %7s %4s %5s %6s %4s"
    fmt       = "%5s %5s %5s %-10s %7.2f %7.2f %4s %5s %6s %4s"

    print(headerfmt % ("Col", "Start", "End", "Label", "Mass", "NL", "Name", "Immon", "Intern", "Reg"))

    for i in range(fragments.getNumberOfFragments()) :
        frag = fragments.getFragmentByNumber(i)

        print(fmt % (
            frag.getColumn(),
            frag.getStart(),
            frag.getEnd(),
            frag.getLabel(),
            frag.getMass(),
            frag.getNeutralLoss(),
            frag.getSeriesName(),
            frag.isImmonium(),
            frag.isInternal(),
            frag.isRegular()
            ))
    
    print(" ")

    
def displayMascotTestSearch(vecFixed, vecVariable, enzyme, mr, fragments) :
    """
    vecFixed contains a list of fixed mods applied to the peptide
    vecVariable contains a list of variable mods applied to the peptide
    enzyme is the enzyme used in fragmentation
    mr is the peptide Mr(calc)
    fragments contains a list of b-ions from a peptide
    
    Use this information to generate an test search that can be run on Mascot.
    """

    for i in range(vecFixed.getNumberOfModifications()) :
        print("MODS=%s" % vecFixed.getModificationByNumber(i).getTitle())
    

    for i in range(vecVariable.getNumberOfModifications()) :
        print("IT_MODS=%s" % vecVariable.getModificationByNumber(i).getTitle())
    

    print("CHARGE=Mr")
    print("CLE=%s" % enzyme.getTitle())
    print("INSTRUMENT=MALDI-TOF-TOF")

    masses = []
    for i in range(fragments.getNumberOfFragments()) :
        masses.append("%.3f" % fragments.getFragmentByNumber(i).getMass())

    print("%.3f ions(%s)" % (mr, ", ".join(masses)))


if __name__ == "__main__" :
    sys.exit(main())


"""

Running the program as

python tools_aahelper.pl /usr/local/mascot/config/enzymes /usr/local/mascot/config/mod_file

will give the following output under Mascot 2.3:


List of peptides
M
MAIFR
AIFR
IDEIR
NMSSEELEEELR
K
LEVELIR
ER
GAVR
AGGAPEKPGR
IR
EIR
R
TIAR
MK
TVQR
ER
VR
K
End of list
Peptide mass calculated using 'calcPeptideMZ' is 1320.686
Peptide has been created successfully: MAIFRIDEIR
b-ion series fragments:
Number of fragments: 9
  Col Start   End Label         Mass      NL Name Immon Intern  Reg
    1     1    -1 b(1)        190.05    0.00    b False  False True
    2     2    -1 b(2)        261.09    0.00    b False  False True
    3     3    -1 b(3)        374.17    0.00    b False  False True
    4     4    -1 b(4)        521.24    0.00    b False  False True
    5     5    -1 b(5)        677.34    0.00    b False  False True
    6     6    -1 b(6)        790.43    0.00    b False  False True
    7     7    -1 b(7)        905.45    0.00    b False  False True
    8     8    -1 b(8)       1034.50    0.00    b False  False True
    9     9    -1 b(9)       1147.58    0.00    b False  False True

y-ion series fragments:
Number of fragments: 9
  Col Start   End Label         Mass      NL Name Immon Intern  Reg
    9     9    -1 y(9)       1132.65    0.00    y False  False True
    8     8    -1 y(8)       1061.61    0.00    y False  False True
    7     7    -1 y(7)        948.53    0.00    y False  False True
    6     6    -1 y(6)        801.46    0.00    y False  False True
    5     5    -1 y(5)        645.36    0.00    y False  False True
    4     4    -1 y(4)        532.27    0.00    y False  False True
    3     3    -1 y(3)        417.25    0.00    y False  False True
    2     2    -1 y(2)        288.20    0.00    y False  False True
    1     1    -1 y(1)        175.12    0.00    y False  False True

y++-ion series fragments:
Number of fragments: 8
  Col Start   End Label         Mass      NL Name Immon Intern  Reg
    9     9    -1 y(9)++      566.83    0.00    y False  False True
    8     8    -1 y(8)++      531.31    0.00    y False  False True
    7     7    -1 y(7)++      474.77    0.00    y False  False True
    6     6    -1 y(6)++      401.23    0.00    y False  False True
    5     5    -1 y(5)++      323.18    0.00    y False  False True
    4     4    -1 y(4)++      266.64    0.00    y False  False True
    3     3    -1 y(3)++      209.13    0.00    y False  False True
    2     2    -1 y(2)++      144.61    0.00    y False  False True

internal yb-ion series fragments:
Number of fragments: 22
  Col Start   End Label         Mass      NL Name Immon Intern  Reg
    2     2     3 AI          185.13    0.00   yb False   True False
    2     2     4 AIF         332.20    0.00   yb False   True False
    2     2     5 AIFR        488.30    0.00   yb False   True False
    2     2     6 AIFRI       601.38    0.00   yb False   True False
    3     3     4 IF          261.16    0.00   yb False   True False
    3     3     5 IFR         417.26    0.00   yb False   True False
    3     3     6 IFRI        530.34    0.00   yb False   True False
    3     3     7 IFRID       645.37    0.00   yb False   True False
    4     4     5 FR          304.18    0.00   yb False   True False
    4     4     6 FRI         417.26    0.00   yb False   True False
    4     4     7 FRID        532.29    0.00   yb False   True False
    4     4     8 FRIDE       661.33    0.00   yb False   True False
    5     5     6 RI          270.19    0.00   yb False   True False
    5     5     7 RID         385.22    0.00   yb False   True False
    5     5     8 RIDE        514.26    0.00   yb False   True False
    5     5     9 RIDEI       627.35    0.00   yb False   True False
    6     6     7 ID          229.12    0.00   yb False   True False
    6     6     8 IDE         358.16    0.00   yb False   True False
    6     6     9 IDEI        471.24    0.00   yb False   True False
    7     7     8 DE          245.08    0.00   yb False   True False
    7     7     9 DEI         358.16    0.00   yb False   True False
    8     8     9 EI          243.13    0.00   yb False   True False

Paste the following into a Mascot search query window to verify this output:
MODS=Phospho (Y)
IT_MODS=Oxidation (M)
IT_MODS=Acetyl (N-term)
CHARGE=Mr
CLE=Trypsin
INSTRUMENT=MALDI-TOF-TOF
1320.686 ions(190.053, 261.090, 374.174, 521.243, 677.344, 790.428, 905.455, 1034.498, 1147.582)

"""


Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29