Matrix Science header

tools_aahelper.cpp

For calculating peptide and fragment masses.

/*
##############################################################################
# File: tools_aahelper.cpp                                                   #
# Mascot Parser toolkit example code                                         #
# Example code for generating peptides, peptide and fragment masses          #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2006 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#    $Source: parser/examples/test_cxx/tools_aahelper.cpp $
#    $Author: villek@matrixscience.com $ 
#      $Date: 2018-07-30 16:23:53 +0100 $ 
#  $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $
##############################################################################
*/

#include "msparser.hpp"

#include <iostream>
#include <iomanip>

// All the classes are part of the matrix_science namespace
using namespace matrix_science;
static void printFragmentsTable(ms_fragmentvector & fragments);
static void displayMascotTestSearch(const ms_modvector      vecFixed,
                                    const ms_modvector      vecVariable,
                                    const ms_enzyme       * enzyme,
                                    const double            mr,
                                    const ms_fragmentvector fragments);

int main(int argc, char * argv[])
{
    // We need an enzyme to build a list of peptides
    // and mod_file file if we want to apply any modifications
    if ( argc < 3 )
    {
        std::cout << "Location of enzymes file and mod_file has to be specified as parameters" << std::endl;
        return 0;
    }

    ms_enzymefile enzymefile(argv[1]);
    if ( !enzymefile.isValid() )
    {
        std::cout << "There are errors. Cannot continue. The last error description:" << std::endl;
        std::cout << enzymefile.getLastErrorString() << std::endl;
        return 1;
    }
    const ms_enzyme *enzyme = enzymefile.getEnzymeByName("Trypsin");
    if ( enzyme == NULL )
    {
        std::cout << "Cannot find Trypsin enzyme in the file. Cannot continue." << std::endl;
        return 1;
    }

    // we need masses file, but can use default masses anyway
    ms_masses masses;
    ms_modfile modfile(argv[2], &masses);
    if ( !modfile.isValid() )
    {
        std::cout << "There are errors. Cannot continue. The last error description:" << std::endl;
        std::cout << modfile.getLastErrorString() << std::endl;
        return 1;
    }

    const ms_modification *oxidation   = modfile.getModificationByName("Oxidation (M)");
    const ms_modification *acetylNterm = modfile.getModificationByName("Acetyl (N-term)");
    const ms_modification *phospho     = modfile.getModificationByName("Phospho (STY)");

    if ( oxidation == NULL || acetylNterm == NULL || phospho == NULL)
    {
        std::cout << "Cannot find necessary modifications in the mod_file. Cannot continue." << std::endl;
        return 1;
    }

    ms_aahelper aahelper;
    aahelper.setMasses(&masses);
    aahelper.setEnzyme(enzyme);

    // Now we can generate peptides for a given protein
    char proteinStr[] = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK";

    aahelper.startIteratePeptides(proteinStr, static_cast<int>(strlen(proteinStr)), 0);// no missed cleavages are allowed
    std::cout << "List of peptides" << std::endl;
    while(aahelper.getNextPeptide())
    {
        int start = aahelper.getPepStart()-1; // the method returns 1-based position
        int len = aahelper.getPepEnd() - aahelper.getPepStart()+1;
        std::string peptideStr;
        peptideStr.assign(proteinStr+start, len);
        std::cout << peptideStr << std::endl;
    }

    std::cout << "End of list" << std::endl;

    // create a list of fixed modifications
    ms_modvector vecFixed;
    vecFixed.appendModification(phospho);

    // create a list of variable modifications
    ms_modvector vecVariable;
    vecVariable.appendModification(oxidation);
    vecVariable.appendModification(acetylNterm);

    aahelper.setAvailableModifications(&vecFixed, &vecVariable);

    // ms_aahelper can also contain errors that might happen when applying modifications
    // for instance, when we have a conflict between two modifications (same residue or same peptide end)
    if ( !aahelper.isValid() )
    {
        std::cout << "There are errors. Cannot continue. The last error description:" << std::endl;
        std::cout << aahelper.getLastErrorString() << std::endl;
        return 1;
    }

    // we will need also a separate error-object for collecting peptide-specific errors
    ms_errs err;

    // Example of how to call calcPeptideMZ
    // It will often be more convenient to create an ms_peptide and then
    // call getMrCalc() on that object.
    std::vector< int > numThatMustBeModded;
    numThatMustBeModded.push_back(1);  // 1 acetylNterm modification
    numThatMustBeModded.push_back(1);  // 1 site is oxidised


    double mr = aahelper.calcPeptideMZ(proteinStr, 
                                       static_cast<int>(strlen(proteinStr)), 
                                       1, 10, // peptide ends (1-based)
                                       numThatMustBeModded, 
                                       0, // no charge - i.e. mr
                                       matrix_science::MASS_TYPE_MONO,
                                       &err);
    if ( !err.isValid() )
    {
        std::cout << "There have been errors while calculating peptide mass: " << std::endl;
        std::cout << err.getLastErrorString() << std::endl;
        // don't need to halt as they are not fatal errors
        err.clearAllErrors(); // prepare to re-use it
    }
    else
    {
        std::cout << "Peptide mass calculated using 'calcPeptideMZ' is " 
                  << std::setprecision(3)
                  << std::fixed
                  << std::setw(8)
                  << mr 
                  << std::endl << std::endl << std::endl;
    }

    // Create a peptide - which we can then fragment
    //
    // Specify which residues are modified by which modification.
    // as it has to correspond to a modification string
    // Nterm modification + 9 residues + Cterm modification
    std::vector< int > numModded;
    numModded.push_back(2);  // N-term  - modified by "Acetyl (N-term)"
    numModded.push_back(1);  // M       - modified by "Oxidation (M)"
    numModded.push_back(0);  // A
    numModded.push_back(0);  // I
    numModded.push_back(0);  // F
    numModded.push_back(0);  // R
    numModded.push_back(0);  // I
    numModded.push_back(0);  // D
    numModded.push_back(0);  // E
    numModded.push_back(0);  // I
    numModded.push_back(0);  // R
    numModded.push_back(0);  // C-term


    // we have to specify (or at least supply an empty vector) which neutral loss value to use
    // in case there are more than one available for a modification
    std::vector< int > whichNl;
    whichNl.push_back(0);  // N-term
    whichNl.push_back(1);  // M - has 2 neutral losses. Specify the first (-98)
    whichNl.push_back(0);  // A
    whichNl.push_back(0);  // I
    whichNl.push_back(0);  // F
    whichNl.push_back(0);  // R
    whichNl.push_back(0);  // I
    whichNl.push_back(0);  // D
    whichNl.push_back(0);  // E
    whichNl.push_back(0);  // I
    whichNl.push_back(0);  // R
    whichNl.push_back(0);  // C-term


    ms_peptide peptide = aahelper.createPeptide(proteinStr, 
                                                static_cast<int>(strlen(proteinStr)),
                                                1,10,      // start, end positions
                                                numModded, // modification string-like vector
                                                whichNl,   // which neutral loss to use
                                                0,         // no charge
                                                matrix_science::MASS_TYPE_MONO,
                                                &err);     // collect errors in it
    if ( !err.isValid() )
    {
        std::cout << "There have been errors while creating a peptide: " << std::endl;
        std::cout << err.getLastErrorString() << std::endl;
        // don't need to halt as they are not fatal errors
        err.clearAllErrors(); // prepare to re-use it
    }
    else
    {
        std::cout 
            << "Peptide has been created successfully: " 
            << peptide.getPeptideStr()
            << std::endl;
    }

    std::vector< double > ions;
    ms_fragmentvector fragments;
    ms_fragmentvector all_fragments; // Keep a list of fragments from all series

    ions = aahelper.calcFragments(&peptide, // that is why we needed to create a peptide object first
                                  ms_fragmentationrules::FRAG_B_SERIES, // ions series ID
                                  false,    // single-charged ions only
                                  100.0,    // minimal fragment mass to return
                                  mr,       // maximal fragment mass to return 
                                  matrix_science::MASS_TYPE_MONO,
                                  &fragments,
                                  &err);    // collect peptide-specific errors

    std::cout << "b-ion series fragments: " << std::endl;
    printFragmentsTable(fragments);
    all_fragments.copyFrom(&fragments);
    ms_fragmentvector b_ions; 
    b_ions.copyFrom(&fragments);

    ions = aahelper.calcFragments(&peptide, // that is why we needed to create a peptide object first
                                  ms_fragmentationrules::FRAG_Y_SERIES, // ions series ID
                                  false,    // single-charged ions only
                                  100.0,    // minimal fragment mass to return
                                  mr,       // maximal fragment mass to return 
                                  matrix_science::MASS_TYPE_MONO,
                                  &fragments,
                                  &err);    // collect peptide-specific errors

    std::cout << "y-ion series fragments: " << std::endl;
    printFragmentsTable(fragments);
    int i;
    for (i=0; i < fragments.getNumberOfFragments(); i++) 
        all_fragments.appendFragment(fragments.getFragmentByNumber(i));

    aahelper.calcFragmentsEx(&peptide, // that is why we needed to create a peptide object first
                            ms_fragmentationrules::FRAG_Y_SERIES, // ions series ID
                            2,    // double-charged ions only
                            100.0,    // minimal fragment mass to return
                            mr,       // maximal fragment mass to return 
                            matrix_science::MASS_TYPE_MONO,
                            &fragments,
                            &err);    // collect peptide-specific errors

    std::cout << "y++-ion series fragments: " << std::endl;
    printFragmentsTable(fragments);
    for (i=0; i < fragments.getNumberOfFragments(); i++) 
        all_fragments.appendFragment(fragments.getFragmentByNumber(i));

    ions = aahelper.calcFragments(&peptide, // that is why we created a peptide object first
                                  ms_fragmentationrules::FRAG_INTERNAL_YB, // ions series ID
                                  false,    // single-charged ions only
                                  100.0,    // minimal fragment mass to return
                                  700,      // maximal fragment mass to return 
                                  matrix_science::MASS_TYPE_MONO,
                                  &fragments,
                                  &err);    // collect peptide-specific errors

    std::cout << "internal yb-ion series fragments: " << std::endl;
    printFragmentsTable(fragments);
    for (i=0; i < fragments.getNumberOfFragments(); i++) 
        all_fragments.appendFragment(fragments.getFragmentByNumber(i));

    std::cout << "Run a search under Mascot to verify the output above" << std::endl;
    std::cout << "Paste the following into a Mascot search query window:" << std::endl;
    displayMascotTestSearch(vecFixed, vecVariable, 
                            enzyme, 
                            mr, 
                            b_ions);  // Or you can use all_fragments
    return 0;
}


static void printFragmentsTable(ms_fragmentvector & fragments) 
{
    std::cout << "Number of fragments: " << fragments.getNumberOfFragments() << std::endl;

    std::cout << "Col\tStart\tEnd\tLabel\t\t Mass\t  NL\tName\tImmon\tIntern\tReg" << std::endl;
    for (int i=0; i < fragments.getNumberOfFragments(); i++)
    {
        const ms_fragment * frag = fragments.getFragmentByNumber(i);
        std::cout << frag->getColumn()     << "\t";
        std::cout << frag->getStart()      << "\t";
        std::cout << frag->getEnd()        << "\t";
        std::cout << std::setw(10) << frag->getLabel()      << "\t";
        std::cout << std::fixed << std::setprecision(2) << std::setw(7) << frag->getMass()       << "\t";
        std::cout << frag->getNeutralLoss()<< "\t";
        std::cout << frag->getSeriesName() << "\t";
        std::cout << frag->isImmonium()    << "\t";
        std::cout << frag->isInternal()    << "\t";
        std::cout << frag->isRegular()     << std::endl;
    }
    std::cout << std::endl;
}
    

static void displayMascotTestSearch(const ms_modvector      vecFixed,
                                    const ms_modvector      vecVariable,
                                    const ms_enzyme       * enzyme,
                                    const double            mr,
                                    const ms_fragmentvector fragments)
{
    /* fragments contains a list of b-ions from a peptide
     * vecVariable contains a list of variable mods applied to the peptide
     * vecFixed contains a list of fixed mods applied to the peptide
     * Use this information to generate an test search that can be run on Mascot.
     */
    int i;

    for (i=0; i < vecFixed.getNumberOfModifications(); i++)
    {
        std::cout << "MODS="
                  << vecFixed.getModificationByNumber(i)->getTitle()
                  << std::endl;
    }

    for (i=0; i < vecVariable.getNumberOfModifications(); i++)
    {
        std::cout << "IT_MODS="
                  << vecVariable.getModificationByNumber(i)->getTitle()
                  << std::endl;
    }
    std::cout << "CHARGE=Mr" << std::endl;
    std::cout << "CLE=" << enzyme->getTitle() << std::endl;
    std::cout << "INSTRUMENT=MALDI-TOF-TOF" << std::endl;
    std::cout << std::setprecision(3) << mr << " ions(";
    for(i=0; i < fragments.getNumberOfFragments(); i++)
    {
        if (i > 0)
            std::cout << ", ";
        std::cout << fragments.getFragmentByNumber(i)->getMass();
    }
    std::cout << ")" << std::endl;
}


/*
will give the output, for instance: 

# tools_aahelper ../config/enzymes ../config/mod_file

List of peptides
M
MEDYLDELR
EDYLDELR
HK
IPSFIVELLK
NNLK
NR
NLTR
NQLNK
IVNR
VSDLYFGK
KPEDK
K
AAELTNK
INDLSHK
LDALMK
VATVSSATK
VSDDIK
K
EIDNLDELDL
End of list
Peptide mass calculated using 'calcPeptideMZ' is 1320.494


Peptide has been created successfully: MEDYLDELR
b-ion series fragments:
Number of fragments: 8
Col     Start   End     Label            Mass     NL    Name    Immon   Intern  Reg
1       1       -1            b(1)       190.05 0.00    b       0       0       1
2       2       -1            b(2)       319.10 0.00    b       0       0       1
3       3       -1            b(3)       434.12 0.00    b       0       0       1
4       4       -1        b(4) -97       579.18 97.98   b       0       0       1
5       5       -1        b(5) -97       692.26 97.98   b       0       0       1
6       6       -1        b(6) -97       807.29 97.98   b       0       0       1
7       7       -1        b(7) -97       936.33 97.98   b       0       0       1
8       8       -1        b(8) -97      1049.41 97.98   b       0       0       1

y-ion series fragments:
Number of fragments: 8
Col     Start   End     Label            Mass     NL    Name    Immon   Intern  Reg
8       8       -1        y(8) -97      1034.48 97.98   y       0       0       1
7       7       -1        y(7) -97       905.44 97.98   y       0       0       1
6       6       -1        y(6) -97       790.41 97.98   y       0       0       1
5       5       -1            y(5)       645.36 0.00    y       0       0       1
4       4       -1            y(4)       532.27 0.00    y       0       0       1
3       3       -1            y(3)       417.25 0.00    y       0       0       1
2       2       -1            y(2)       288.20 0.00    y       0       0       1
1       1       -1            y(1)       175.12 0.00    y       0       0       1

internal yb-ion series fragments:
Number of fragments: 18
Col     Start   End     Label            Mass     NL    Name    Immon   Intern  Reg
2       2       3               ED       245.08 0.00    yb      0       1       0
2       2       4          EDY -97       390.13 97.98   yb      0       1       0
2       2       5         EDYL -97       503.21 97.98   yb      0       1       0
2       2       6        EDYLD -97       618.24 97.98   yb      0       1       0
3       3       4           DY -97       261.09 97.98   yb      0       1       0
3       3       5          DYL -97       374.17 97.98   yb      0       1       0
3       3       6         DYLD -97       489.20 97.98   yb      0       1       0
3       3       7        DYLDE -97       618.24 97.98   yb      0       1       0
4       4       5           YL -97       259.14 97.98   yb      0       1       0
4       4       6          YLD -97       374.17 97.98   yb      0       1       0
4       4       7         YLDE -97       503.21 97.98   yb      0       1       0
4       4       8        YLDEL -97       616.30 97.98   yb      0       1       0
5       5       6               LD       229.12 0.00    yb      0       1       0
5       5       7              LDE       358.16 0.00    yb      0       1       0
5       5       8             LDEL       471.24 0.00    yb      0       1       0
6       6       7               DE       245.08 0.00    yb      0       1       0
6       6       8              DEL       358.16 0.00    yb      0       1       0
7       7       8               EL       243.13 0.00    yb      0       1       0

Run a search under Mascot to verify the output above
Paste the following into a Mascot search query window:
MODS=Oxidation (M)
IT_MODS=Acetyl (N-term)
IT_MODS=Phospho (STY)
CHARGE=Mr
CLE=Trypsin
INSTRUMENT=MALDI-TOF-TOF
1320.494 ions(190.053, 319.096, 434.123, 579.176, 692.260, 807.287, 936.329, 1049.413)

*/

Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29