For calculating peptide and fragment masses.
#!/usr/local/bin/perl ############################################################################## # file: tools_aahelper.pl # # Mascot Parser toolkit example code # ############################################################################## # COPYRIGHT NOTICE # # Copyright 1998-2010 Matrix Science Limited All Rights Reserved. # # # ############################################################################## # $Source: parser/examples/test_perl/tools_aahelper.pl $ # $Author: villek@matrixscience.com $ # $Date: 2018-07-30 16:23:53 +0100 $ # $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ ############################################################################## use strict; ############################################################################## use msparser; if (!defined($ARGV[0]) || !defined($ARGV[1])) { # We need an enzyme to build a list of peptides and mod_file file if we # want to apply any modifications. print "Location of enzymes file and mod_file has to be specified as parameters\n"; exit 1; } my $enzymefile = open_enzymefile($ARGV[0]); # Note: both $enzymefile *must* be kept in scope for as long as you use # $Trypsin. See "Using the toolkit from Perl, Java and Python" in Mascot Parser # manual. my $Trypsin = $enzymefile->getEnzymeByName('Trypsin'); if (!$Trypsin) { print "Cannot find 'Trypsin' in the enzyme file. Cannot continue.\n"; exit 1; } my $modfile = open_modfile($ARGV[1]); my $aahelper = new msparser::ms_aahelper; # Note: both $modfile and $Trypsin *must* be kept in scope for as # long as you use $aahelper. See "Using the toolkit from Perl, Java and # Python" in Mascot Parser manual. $aahelper->setMasses($modfile->getMassFile); $aahelper->setEnzyme($Trypsin); # Now we can generate peptides for a given protein. This is # RL29_METTP (50S ribosomal protein L29P OS=Methanosaeta thermophila (strain # DSM 6194 / PT) GN=rpl29p PE=3 SV=1) from SwissProt 2010_09. my $proteinStr = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK"; # No missed cleavages are allowed (third parameter). $aahelper->startIteratePeptides($proteinStr, length($proteinStr), 0); print "List of peptides\n"; while ($aahelper->getNextPeptide) { my $start = $aahelper->getPepStart(); my $len = $aahelper->getPepEnd() - $aahelper->getPepStart() + 1; # getPepStart() returns one-based index. my $peptideStr = substr($proteinStr, $start - 1, $len); print $peptideStr, "\n"; } print "End of list\n\n"; # Create a list of fixed modifications. my $vecFixed = new msparser::ms_modvector(); $vecFixed->appendModification( $modfile->getModificationByName('Phospho (Y)') ); # Create a list of variable modifications. my $vecVariable = new msparser::ms_modvector(); $vecVariable->appendModification( $modfile->getModificationByName('Oxidation (M)') ); $vecVariable->appendModification( $modfile->getModificationByName('Acetyl (N-term)') ); # Note: both $vecFixed and $vecVariable *must* be kept in scope for as # long as you use $aahelper. See "Using the toolkit from Perl, Java and # Python" in Mascot Parser manual. $aahelper->setAvailableModifications($vecFixed, $vecVariable); # ms_aahelper can also contain errors that might happen when applying # modifications, for instance when we have a conflict between two # modifications (same residue or same peptide end). if (!$aahelper->isValid()) { print "Error while setting available modifications: "; print $aahelper->getLastErrorString(), "\n"; exit(1); } # We will need also a separate error object for collecting peptide-specific # errors. my $err = new msparser::ms_errs(); # Example of how to call calcPeptideMZ(). It will often be more convenient to # create an ms_peptide instead, and then call getMrCalc() on that object. my $numThatMustBeModded = new msparser::vectori(); $numThatMustBeModded->push(1); # 1 acetylNterm modification $numThatMustBeModded->push(1); # 1 site is oxidised my $mr = $aahelper->calcPeptideMZ( $proteinStr, length($proteinStr), 1, 10, # peptide ends (1-based) $numThatMustBeModded, 0, # no charge - i.e. Mr $msparser::MASS_TYPE_MONO, $err ); if (!$err->isValid()) { print "Error while calculating peptide mass: "; print $err->getLastErrorString(), "\n"; # Don't need to halt as they are not fatal errors. $err->clearAllErrors(); } else { printf "Peptide mass calculated using 'calcPeptideMZ' is %8.3f\n", $mr; } # Create a peptide - which we can then fragment. # # Specify which residues are modified by which modification as it has to # correspond to a modification string: # # Nterm modification + 9 residues + Cterm modification my $numModded = new msparser::vectori(); $numModded->push(2); # N-term - modified by "Acetyl (N-term)" $numModded->push(1); # M - modified by "Oxidation (M)" $numModded->push(0); # A $numModded->push(0); # I $numModded->push(0); # F $numModded->push(0); # R $numModded->push(0); # I $numModded->push(0); # D $numModded->push(0); # E $numModded->push(0); # I $numModded->push(0); # R $numModded->push(0); # C-term # We have to specify (or at least supply an empty vector) which neutral loss # value to use, in case there are more than one available for a modification. my $whichNl = new msparser::vectori(); $whichNl->push(0); # N-term $whichNl->push(1); # M - has 2 neutral losses. Specify the first (-98) $whichNl->push(0); # A $whichNl->push(0); # I $whichNl->push(0); # F $whichNl->push(0); # R $whichNl->push(0); # I $whichNl->push(0); # D $whichNl->push(0); # E $whichNl->push(0); # I $whichNl->push(0); # R $whichNl->push(0); # C-term my $peptide = $aahelper->createPeptide( $proteinStr, length($proteinStr), 1, 10, # end positions $numModded, # modification string-like vector $whichNl, # which neutral loss to use 0, # no charge $msparser::MASS_TYPE_MONO, $err ); if (!$err->isValid()) { print "Error while creating a peptide: "; print $err->getLastErrorString(), "\n"; # Don't need to halt as they are not fatal errors. $err->clearAllErrors(); } else { print "\n\nPeptide has been created successfully: "; print $peptide->getPeptideStr(), "\n"; } # Keep a list of fragments from all series my $all_fragments = new msparser::ms_fragmentvector; my $b_ions = fragmentPeptide( $aahelper, $peptide, $msparser::ms_fragmentationrules::FRAG_B_SERIES, 'b-ion series', 0, # single-charged ions only $mr, # maximal fragment mass to return ); # copyFrom() can only be used to populate the list for the first time. $all_fragments->copyFrom($b_ions); my $fragments = fragmentPeptide( $aahelper, $peptide, $msparser::ms_fragmentationrules::FRAG_Y_SERIES, 'y-ion series', 0, # single-charged ions only $mr, # maximal fragment mass to return ); for my $i (0 .. $fragments->getNumberOfFragments - 1) { $all_fragments->appendFragment($fragments->getFragmentByNumber($i)); } $fragments = fragmentPeptide( $aahelper, $peptide, $msparser::ms_fragmentationrules::FRAG_Y_SERIES, 'y++-ion series', 2, # double-charged ions only $mr, # maximal fragment mass to return ); for my $i (0 .. $fragments->getNumberOfFragments - 1) { $all_fragments->appendFragment($fragments->getFragmentByNumber($i)); } $fragments = fragmentPeptide( $aahelper, $peptide, $msparser::ms_fragmentationrules::FRAG_INTERNAL_YB, 'internal yb-ion series', 0, # single-charged ions only 700, # maximal fragment mass to return ); for my $i (0 .. $fragments->getNumberOfFragments - 1) { $all_fragments->appendFragment($fragments->getFragmentByNumber($i)); } print "Paste the following into a Mascot search query window to verify this output:\n"; displayMascotTestSearch( $vecFixed, $vecVariable, $Trypsin, $peptide->getMrCalc, $b_ions # or you can use $all_fragments ); sub open_enzymefile { my ($filename) = @_; my $enzymefile = new msparser::ms_enzymefile($filename); if (!$enzymefile->isValid) { print "Error while opening enzyme file: "; print $enzymefile->getLastErrorString() . "\n"; exit 1; } return $enzymefile; } sub open_modfile { my ($filename) = @_; # We can use the default masses in this example. my $masses = new msparser::ms_masses; my $modfile = new msparser::ms_modfile($filename, $masses); if (!$modfile->isValid) { print "Error while opening mod file: "; print $modfile->getLastErrorString . "\n"; exit 1; } for ('Oxidation (M)', 'Acetyl (N-term)', 'Phospho (Y)') { if (not $modfile->getModificationByName($_)) { print "Cannot find '$_' in the mod file. Cannot continue.\n"; exit(1); } } return $modfile; } sub fragmentPeptide { my ($aa_helper, $peptide, $series, $series_label, $doubleCharged, $mass_max) = @_; my $fragments = new msparser::ms_fragmentvector; my $err = new msparser::ms_errs(); $aahelper->calcFragments( $peptide, $series, $doubleCharged ? 1 : 0, 100.0, $mass_max, $msparser::MASS_TYPE_MONO, $fragments, $err ); # Check $err here. print $series_label, " fragments: \n"; printFragmentsTable($fragments); return $fragments; } sub printFragmentsTable { my ($fragments) = @_; print "Number of fragments: ", $fragments->getNumberOfFragments(), "\n"; my $headerfmt = "%5s %5s %5s %-10s %7s %7s %4s %5s %6s %4s\n"; my $fmt = "%5s %5s %5s %-10s %7.2f %7.2f %4s %5s %6s %4s\n"; printf $headerfmt, qw(Col Start End Label Mass NL Name Immon Intern Reg); for my $i (0 .. $fragments->getNumberOfFragments - 1) { my $frag = $fragments->getFragmentByNumber($i); printf $fmt, $frag->getColumn(), $frag->getStart(), $frag->getEnd(), $frag->getLabel(), $frag->getMass(), $frag->getNeutralLoss(), $frag->getSeriesName(), $frag->isImmonium(), $frag->isInternal(), $frag->isRegular(), ; } print "\n"; } # $vecFixed contains a list of fixed mods applied to the peptide # $vecVariable contains a list of variable mods applied to the peptide # $enzyme is the enzyme used in fragmentation # $mr is the peptide Mr(calc) # $fragments contains a list of b-ions from a peptide # # Use this information to generate an test search that can be run on Mascot. sub displayMascotTestSearch { my ($vecFixed, $vecVariable, $enzyme, $mr, $fragments) = @_; for my $i (0 .. $vecFixed->getNumberOfModifications-1) { printf "MODS=%s\n", $vecFixed->getModificationByNumber($i)->getTitle(); } for my $i (0 .. $vecVariable->getNumberOfModifications-1) { printf "IT_MODS=%s\n", $vecVariable->getModificationByNumber($i)->getTitle(); } print "CHARGE=Mr\n"; print "CLE=", $enzyme->getTitle(), "\n"; print "INSTRUMENT=MALDI-TOF-TOF\n"; printf "%.3f ions(", $mr; print join(', ', map { sprintf("%.3f", $fragments->getFragmentByNumber($_)->getMass) } 0 .. $fragments->getNumberOfFragments-1 ); print ")\n"; } =pod Running the program as perl -I../bin tools_aahelper.pl ../config/enzymes ../config/mod_file will give the following output under Mascot 2.3: List of peptides M MAIFR AIFR IDEIR NMSSEELEEELR K LEVELIR ER GAVR AGGAPEKPGR IR EIR R TIAR MK TVQR ER VR K End of list Peptide mass calculated using 'calcPeptideMZ' is 1320.686 Peptide has been created successfully: MAIFRIDEIR b-ion series fragments: Number of fragments: 9 Col Start End Label Mass NL Name Immon Intern Reg 1 1 -1 b(1) 190.05 0.00 b 1 2 2 -1 b(2) 261.09 0.00 b 1 3 3 -1 b(3) 374.17 0.00 b 1 4 4 -1 b(4) 521.24 0.00 b 1 5 5 -1 b(5) 677.34 0.00 b 1 6 6 -1 b(6) 790.43 0.00 b 1 7 7 -1 b(7) 905.45 0.00 b 1 8 8 -1 b(8) 1034.50 0.00 b 1 9 9 -1 b(9) 1147.58 0.00 b 1 y-ion series fragments: Number of fragments: 9 Col Start End Label Mass NL Name Immon Intern Reg 9 9 -1 y(9) 1132.65 0.00 y 1 8 8 -1 y(8) 1061.61 0.00 y 1 7 7 -1 y(7) 948.53 0.00 y 1 6 6 -1 y(6) 801.46 0.00 y 1 5 5 -1 y(5) 645.36 0.00 y 1 4 4 -1 y(4) 532.27 0.00 y 1 3 3 -1 y(3) 417.25 0.00 y 1 2 2 -1 y(2) 288.20 0.00 y 1 1 1 -1 y(1) 175.12 0.00 y 1 y++-ion series fragments: Number of fragments: 8 Col Start End Label Mass NL Name Immon Intern Reg 9 9 -1 y(9)++ 566.83 0.00 y 1 8 8 -1 y(8)++ 531.31 0.00 y 1 7 7 -1 y(7)++ 474.77 0.00 y 1 6 6 -1 y(6)++ 401.23 0.00 y 1 5 5 -1 y(5)++ 323.18 0.00 y 1 4 4 -1 y(4)++ 266.64 0.00 y 1 3 3 -1 y(3)++ 209.13 0.00 y 1 2 2 -1 y(2)++ 144.61 0.00 y 1 internal yb-ion series fragments: Number of fragments: 22 Col Start End Label Mass NL Name Immon Intern Reg 2 2 3 AI 185.13 0.00 yb 1 2 2 4 AIF 332.20 0.00 yb 1 2 2 5 AIFR 488.30 0.00 yb 1 2 2 6 AIFRI 601.38 0.00 yb 1 3 3 4 IF 261.16 0.00 yb 1 3 3 5 IFR 417.26 0.00 yb 1 3 3 6 IFRI 530.34 0.00 yb 1 3 3 7 IFRID 645.37 0.00 yb 1 4 4 5 FR 304.18 0.00 yb 1 4 4 6 FRI 417.26 0.00 yb 1 4 4 7 FRID 532.29 0.00 yb 1 4 4 8 FRIDE 661.33 0.00 yb 1 5 5 6 RI 270.19 0.00 yb 1 5 5 7 RID 385.22 0.00 yb 1 5 5 8 RIDE 514.26 0.00 yb 1 5 5 9 RIDEI 627.35 0.00 yb 1 6 6 7 ID 229.12 0.00 yb 1 6 6 8 IDE 358.16 0.00 yb 1 6 6 9 IDEI 471.24 0.00 yb 1 7 7 8 DE 245.08 0.00 yb 1 7 7 9 DEI 358.16 0.00 yb 1 8 8 9 EI 243.13 0.00 yb 1 Paste the following into a Mascot search query window to verify this output: MODS=Phospho (Y) IT_MODS=Oxidation (M) IT_MODS=Acetyl (N-term) CHARGE=Mr CLE=Trypsin INSTRUMENT=MALDI-TOF-TOF 1320.686 ions(190.053, 261.090, 374.174, 521.243, 677.344, 790.428, 905.455, 1034.498, 1147.582) =cut
Copyright © 2022 Matrix Science Ltd. All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29 |