For calculating peptide and fragment masses.
/* ############################################################################## # File: tools_aahelper.cpp # # Mascot Parser toolkit example code # # Example code for generating peptides, peptide and fragment masses # ############################################################################## # COPYRIGHT NOTICE # # Copyright 1998-2006 Matrix Science Limited All Rights Reserved. # # # ############################################################################## # $Source: parser/examples/test_cxx/tools_aahelper.cpp $ # $Author: villek@matrixscience.com $ # $Date: 2018-07-30 16:23:53 +0100 $ # $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ ############################################################################## */ #include "msparser.hpp" #include <iostream> #include <iomanip> // All the classes are part of the matrix_science namespace using namespace matrix_science; static void printFragmentsTable(ms_fragmentvector & fragments); static void displayMascotTestSearch(const ms_modvector vecFixed, const ms_modvector vecVariable, const ms_enzyme * enzyme, const double mr, const ms_fragmentvector fragments); int main(int argc, char * argv[]) { // We need an enzyme to build a list of peptides // and mod_file file if we want to apply any modifications if ( argc < 3 ) { std::cout << "Location of enzymes file and mod_file has to be specified as parameters" << std::endl; return 0; } ms_enzymefile enzymefile(argv[1]); if ( !enzymefile.isValid() ) { std::cout << "There are errors. Cannot continue. The last error description:" << std::endl; std::cout << enzymefile.getLastErrorString() << std::endl; return 1; } const ms_enzyme *enzyme = enzymefile.getEnzymeByName("Trypsin"); if ( enzyme == NULL ) { std::cout << "Cannot find Trypsin enzyme in the file. Cannot continue." << std::endl; return 1; } // we need masses file, but can use default masses anyway ms_masses masses; ms_modfile modfile(argv[2], &masses); if ( !modfile.isValid() ) { std::cout << "There are errors. Cannot continue. The last error description:" << std::endl; std::cout << modfile.getLastErrorString() << std::endl; return 1; } const ms_modification *oxidation = modfile.getModificationByName("Oxidation (M)"); const ms_modification *acetylNterm = modfile.getModificationByName("Acetyl (N-term)"); const ms_modification *phospho = modfile.getModificationByName("Phospho (STY)"); if ( oxidation == NULL || acetylNterm == NULL || phospho == NULL) { std::cout << "Cannot find necessary modifications in the mod_file. Cannot continue." << std::endl; return 1; } ms_aahelper aahelper; aahelper.setMasses(&masses); aahelper.setEnzyme(enzyme); // Now we can generate peptides for a given protein char proteinStr[] = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK"; aahelper.startIteratePeptides(proteinStr, static_cast<int>(strlen(proteinStr)), 0);// no missed cleavages are allowed std::cout << "List of peptides" << std::endl; while(aahelper.getNextPeptide()) { int start = aahelper.getPepStart()-1; // the method returns 1-based position int len = aahelper.getPepEnd() - aahelper.getPepStart()+1; std::string peptideStr; peptideStr.assign(proteinStr+start, len); std::cout << peptideStr << std::endl; } std::cout << "End of list" << std::endl; // create a list of fixed modifications ms_modvector vecFixed; vecFixed.appendModification(phospho); // create a list of variable modifications ms_modvector vecVariable; vecVariable.appendModification(oxidation); vecVariable.appendModification(acetylNterm); aahelper.setAvailableModifications(&vecFixed, &vecVariable); // ms_aahelper can also contain errors that might happen when applying modifications // for instance, when we have a conflict between two modifications (same residue or same peptide end) if ( !aahelper.isValid() ) { std::cout << "There are errors. Cannot continue. The last error description:" << std::endl; std::cout << aahelper.getLastErrorString() << std::endl; return 1; } // we will need also a separate error-object for collecting peptide-specific errors ms_errs err; // Example of how to call calcPeptideMZ // It will often be more convenient to create an ms_peptide and then // call getMrCalc() on that object. std::vector< int > numThatMustBeModded; numThatMustBeModded.push_back(1); // 1 acetylNterm modification numThatMustBeModded.push_back(1); // 1 site is oxidised double mr = aahelper.calcPeptideMZ(proteinStr, static_cast<int>(strlen(proteinStr)), 1, 10, // peptide ends (1-based) numThatMustBeModded, 0, // no charge - i.e. mr matrix_science::MASS_TYPE_MONO, &err); if ( !err.isValid() ) { std::cout << "There have been errors while calculating peptide mass: " << std::endl; std::cout << err.getLastErrorString() << std::endl; // don't need to halt as they are not fatal errors err.clearAllErrors(); // prepare to re-use it } else { std::cout << "Peptide mass calculated using 'calcPeptideMZ' is " << std::setprecision(3) << std::fixed << std::setw(8) << mr << std::endl << std::endl << std::endl; } // Create a peptide - which we can then fragment // // Specify which residues are modified by which modification. // as it has to correspond to a modification string // Nterm modification + 9 residues + Cterm modification std::vector< int > numModded; numModded.push_back(2); // N-term - modified by "Acetyl (N-term)" numModded.push_back(1); // M - modified by "Oxidation (M)" numModded.push_back(0); // A numModded.push_back(0); // I numModded.push_back(0); // F numModded.push_back(0); // R numModded.push_back(0); // I numModded.push_back(0); // D numModded.push_back(0); // E numModded.push_back(0); // I numModded.push_back(0); // R numModded.push_back(0); // C-term // we have to specify (or at least supply an empty vector) which neutral loss value to use // in case there are more than one available for a modification std::vector< int > whichNl; whichNl.push_back(0); // N-term whichNl.push_back(1); // M - has 2 neutral losses. Specify the first (-98) whichNl.push_back(0); // A whichNl.push_back(0); // I whichNl.push_back(0); // F whichNl.push_back(0); // R whichNl.push_back(0); // I whichNl.push_back(0); // D whichNl.push_back(0); // E whichNl.push_back(0); // I whichNl.push_back(0); // R whichNl.push_back(0); // C-term ms_peptide peptide = aahelper.createPeptide(proteinStr, static_cast<int>(strlen(proteinStr)), 1,10, // start, end positions numModded, // modification string-like vector whichNl, // which neutral loss to use 0, // no charge matrix_science::MASS_TYPE_MONO, &err); // collect errors in it if ( !err.isValid() ) { std::cout << "There have been errors while creating a peptide: " << std::endl; std::cout << err.getLastErrorString() << std::endl; // don't need to halt as they are not fatal errors err.clearAllErrors(); // prepare to re-use it } else { std::cout << "Peptide has been created successfully: " << peptide.getPeptideStr() << std::endl; } std::vector< double > ions; ms_fragmentvector fragments; ms_fragmentvector all_fragments; // Keep a list of fragments from all series ions = aahelper.calcFragments(&peptide, // that is why we needed to create a peptide object first ms_fragmentationrules::FRAG_B_SERIES, // ions series ID false, // single-charged ions only 100.0, // minimal fragment mass to return mr, // maximal fragment mass to return matrix_science::MASS_TYPE_MONO, &fragments, &err); // collect peptide-specific errors std::cout << "b-ion series fragments: " << std::endl; printFragmentsTable(fragments); all_fragments.copyFrom(&fragments); ms_fragmentvector b_ions; b_ions.copyFrom(&fragments); ions = aahelper.calcFragments(&peptide, // that is why we needed to create a peptide object first ms_fragmentationrules::FRAG_Y_SERIES, // ions series ID false, // single-charged ions only 100.0, // minimal fragment mass to return mr, // maximal fragment mass to return matrix_science::MASS_TYPE_MONO, &fragments, &err); // collect peptide-specific errors std::cout << "y-ion series fragments: " << std::endl; printFragmentsTable(fragments); int i; for (i=0; i < fragments.getNumberOfFragments(); i++) all_fragments.appendFragment(fragments.getFragmentByNumber(i)); aahelper.calcFragmentsEx(&peptide, // that is why we needed to create a peptide object first ms_fragmentationrules::FRAG_Y_SERIES, // ions series ID 2, // double-charged ions only 100.0, // minimal fragment mass to return mr, // maximal fragment mass to return matrix_science::MASS_TYPE_MONO, &fragments, &err); // collect peptide-specific errors std::cout << "y++-ion series fragments: " << std::endl; printFragmentsTable(fragments); for (i=0; i < fragments.getNumberOfFragments(); i++) all_fragments.appendFragment(fragments.getFragmentByNumber(i)); ions = aahelper.calcFragments(&peptide, // that is why we created a peptide object first ms_fragmentationrules::FRAG_INTERNAL_YB, // ions series ID false, // single-charged ions only 100.0, // minimal fragment mass to return 700, // maximal fragment mass to return matrix_science::MASS_TYPE_MONO, &fragments, &err); // collect peptide-specific errors std::cout << "internal yb-ion series fragments: " << std::endl; printFragmentsTable(fragments); for (i=0; i < fragments.getNumberOfFragments(); i++) all_fragments.appendFragment(fragments.getFragmentByNumber(i)); std::cout << "Run a search under Mascot to verify the output above" << std::endl; std::cout << "Paste the following into a Mascot search query window:" << std::endl; displayMascotTestSearch(vecFixed, vecVariable, enzyme, mr, b_ions); // Or you can use all_fragments return 0; } static void printFragmentsTable(ms_fragmentvector & fragments) { std::cout << "Number of fragments: " << fragments.getNumberOfFragments() << std::endl; std::cout << "Col\tStart\tEnd\tLabel\t\t Mass\t NL\tName\tImmon\tIntern\tReg" << std::endl; for (int i=0; i < fragments.getNumberOfFragments(); i++) { const ms_fragment * frag = fragments.getFragmentByNumber(i); std::cout << frag->getColumn() << "\t"; std::cout << frag->getStart() << "\t"; std::cout << frag->getEnd() << "\t"; std::cout << std::setw(10) << frag->getLabel() << "\t"; std::cout << std::fixed << std::setprecision(2) << std::setw(7) << frag->getMass() << "\t"; std::cout << frag->getNeutralLoss()<< "\t"; std::cout << frag->getSeriesName() << "\t"; std::cout << frag->isImmonium() << "\t"; std::cout << frag->isInternal() << "\t"; std::cout << frag->isRegular() << std::endl; } std::cout << std::endl; } static void displayMascotTestSearch(const ms_modvector vecFixed, const ms_modvector vecVariable, const ms_enzyme * enzyme, const double mr, const ms_fragmentvector fragments) { /* fragments contains a list of b-ions from a peptide * vecVariable contains a list of variable mods applied to the peptide * vecFixed contains a list of fixed mods applied to the peptide * Use this information to generate an test search that can be run on Mascot. */ int i; for (i=0; i < vecFixed.getNumberOfModifications(); i++) { std::cout << "MODS=" << vecFixed.getModificationByNumber(i)->getTitle() << std::endl; } for (i=0; i < vecVariable.getNumberOfModifications(); i++) { std::cout << "IT_MODS=" << vecVariable.getModificationByNumber(i)->getTitle() << std::endl; } std::cout << "CHARGE=Mr" << std::endl; std::cout << "CLE=" << enzyme->getTitle() << std::endl; std::cout << "INSTRUMENT=MALDI-TOF-TOF" << std::endl; std::cout << std::setprecision(3) << mr << " ions("; for(i=0; i < fragments.getNumberOfFragments(); i++) { if (i > 0) std::cout << ", "; std::cout << fragments.getFragmentByNumber(i)->getMass(); } std::cout << ")" << std::endl; } /* will give the output, for instance: # tools_aahelper ../config/enzymes ../config/mod_file List of peptides M MEDYLDELR EDYLDELR HK IPSFIVELLK NNLK NR NLTR NQLNK IVNR VSDLYFGK KPEDK K AAELTNK INDLSHK LDALMK VATVSSATK VSDDIK K EIDNLDELDL End of list Peptide mass calculated using 'calcPeptideMZ' is 1320.494 Peptide has been created successfully: MEDYLDELR b-ion series fragments: Number of fragments: 8 Col Start End Label Mass NL Name Immon Intern Reg 1 1 -1 b(1) 190.05 0.00 b 0 0 1 2 2 -1 b(2) 319.10 0.00 b 0 0 1 3 3 -1 b(3) 434.12 0.00 b 0 0 1 4 4 -1 b(4) -97 579.18 97.98 b 0 0 1 5 5 -1 b(5) -97 692.26 97.98 b 0 0 1 6 6 -1 b(6) -97 807.29 97.98 b 0 0 1 7 7 -1 b(7) -97 936.33 97.98 b 0 0 1 8 8 -1 b(8) -97 1049.41 97.98 b 0 0 1 y-ion series fragments: Number of fragments: 8 Col Start End Label Mass NL Name Immon Intern Reg 8 8 -1 y(8) -97 1034.48 97.98 y 0 0 1 7 7 -1 y(7) -97 905.44 97.98 y 0 0 1 6 6 -1 y(6) -97 790.41 97.98 y 0 0 1 5 5 -1 y(5) 645.36 0.00 y 0 0 1 4 4 -1 y(4) 532.27 0.00 y 0 0 1 3 3 -1 y(3) 417.25 0.00 y 0 0 1 2 2 -1 y(2) 288.20 0.00 y 0 0 1 1 1 -1 y(1) 175.12 0.00 y 0 0 1 internal yb-ion series fragments: Number of fragments: 18 Col Start End Label Mass NL Name Immon Intern Reg 2 2 3 ED 245.08 0.00 yb 0 1 0 2 2 4 EDY -97 390.13 97.98 yb 0 1 0 2 2 5 EDYL -97 503.21 97.98 yb 0 1 0 2 2 6 EDYLD -97 618.24 97.98 yb 0 1 0 3 3 4 DY -97 261.09 97.98 yb 0 1 0 3 3 5 DYL -97 374.17 97.98 yb 0 1 0 3 3 6 DYLD -97 489.20 97.98 yb 0 1 0 3 3 7 DYLDE -97 618.24 97.98 yb 0 1 0 4 4 5 YL -97 259.14 97.98 yb 0 1 0 4 4 6 YLD -97 374.17 97.98 yb 0 1 0 4 4 7 YLDE -97 503.21 97.98 yb 0 1 0 4 4 8 YLDEL -97 616.30 97.98 yb 0 1 0 5 5 6 LD 229.12 0.00 yb 0 1 0 5 5 7 LDE 358.16 0.00 yb 0 1 0 5 5 8 LDEL 471.24 0.00 yb 0 1 0 6 6 7 DE 245.08 0.00 yb 0 1 0 6 6 8 DEL 358.16 0.00 yb 0 1 0 7 7 8 EL 243.13 0.00 yb 0 1 0 Run a search under Mascot to verify the output above Paste the following into a Mascot search query window: MODS=Oxidation (M) IT_MODS=Acetyl (N-term) IT_MODS=Phospho (STY) CHARGE=Mr CLE=Trypsin INSTRUMENT=MALDI-TOF-TOF 1320.494 ions(190.053, 319.096, 434.123, 579.176, 692.260, 807.287, 936.329, 1049.413) */
Copyright © 2022 Matrix Science Ltd. All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29 |