Matrix Science header

tools_aahelper.pl

For calculating peptide and fragment masses.

#!/usr/local/bin/perl
##############################################################################
# file: tools_aahelper.pl                                                    #
# Mascot Parser toolkit example code                                         #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2010 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#     $Source: /vol/cvsroot/parser/examples/test_perl/tools_aahelper.pl,v $
#     $Author: villek $
#       $Date: 2010/08/31 08:48:07 $
#   $Revision: 1.8 $ 
##############################################################################
use strict;
##############################################################################

use msparser;

if (!defined($ARGV[0]) || !defined($ARGV[1])) {
    # We need an enzyme to build a list of peptides and mod_file file if we
    # want to apply any modifications.
    print "Location of enzymes file and mod_file has to be specified as parameters\n";
    exit 1;
}

my $enzymefile = open_enzymefile($ARGV[0]);

# Note: both $enzymefile *must* be kept in scope for as long as you use
# $Trypsin. See "Using the toolkit from Perl, Java and Python" in Mascot Parser
# manual.
my $Trypsin = $enzymefile->getEnzymeByName('Trypsin');

if (!$Trypsin) {
    print "Cannot find 'Trypsin' in the enzyme file. Cannot continue.\n";
    exit 1;
}

my $modfile = open_modfile($ARGV[1]);

my $aahelper = new msparser::ms_aahelper;
# Note: both $modfile and $Trypsin *must* be kept in scope for as
# long as you use $aahelper. See "Using the toolkit from Perl, Java and 
# Python" in Mascot Parser manual.
$aahelper->setMasses($modfile->getMassFile);
$aahelper->setEnzyme($Trypsin);

# Now we can generate peptides for a given protein. This is 
# RL29_METTP (50S ribosomal protein L29P OS=Methanosaeta thermophila (strain
# DSM 6194 / PT) GN=rpl29p PE=3 SV=1) from SwissProt 2010_09.
my $proteinStr = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK";

# No missed cleavages are allowed (third parameter).
$aahelper->startIteratePeptides($proteinStr, length($proteinStr), 0); 

print "List of peptides\n";

while ($aahelper->getNextPeptide) {
    my $start = $aahelper->getPepStart();  
    my $len = $aahelper->getPepEnd() - $aahelper->getPepStart() + 1;

    # getPepStart() returns one-based index.
    my $peptideStr = substr($proteinStr, $start - 1, $len);
    print $peptideStr, "\n"; 
}

print "End of list\n\n";

# Create a list of fixed modifications.
my $vecFixed = new msparser::ms_modvector();
$vecFixed->appendModification(
    $modfile->getModificationByName('Phospho (Y)')
);

# Create a list of variable modifications.
my $vecVariable = new msparser::ms_modvector();
$vecVariable->appendModification(
    $modfile->getModificationByName('Oxidation (M)')
);
$vecVariable->appendModification(
    $modfile->getModificationByName('Acetyl (N-term)')
);

# Note: both $vecFixed and $vecVariable *must* be kept in scope for as
# long as you use $aahelper. See "Using the toolkit from Perl, Java and 
# Python" in Mascot Parser manual.
$aahelper->setAvailableModifications($vecFixed, $vecVariable);

# ms_aahelper can also contain errors that might happen when applying
# modifications, for instance when we have a conflict between two
# modifications (same residue or same peptide end).
if (!$aahelper->isValid()) {
    print "Error while setting available modifications: ";
    print $aahelper->getLastErrorString(), "\n";
    exit(1);
}

# We will need also a separate error object for collecting peptide-specific
# errors.
my $err = new msparser::ms_errs();

# Example of how to call calcPeptideMZ(). It will often be more convenient to
# create an ms_peptide instead, and then call getMrCalc() on that object.
my $numThatMustBeModded = new msparser::vectori();
$numThatMustBeModded->push(1);  # 1 acetylNterm modification
$numThatMustBeModded->push(1);  # 1 site is oxidised

my $mr = $aahelper->calcPeptideMZ(
    $proteinStr, 
    length($proteinStr), 
    1, 
    10, # peptide ends (1-based)
    $numThatMustBeModded, 
    0, # no charge - i.e. Mr
    $msparser::MASS_TYPE_MONO,
    $err
);

if (!$err->isValid()) {
    print "Error while calculating peptide mass: ";
    print $err->getLastErrorString(), "\n";
    # Don't need to halt as they are not fatal errors.
    $err->clearAllErrors(); 
} else {
    printf "Peptide mass calculated using 'calcPeptideMZ' is %8.3f\n", $mr;
}


# Create a peptide - which we can then fragment.
#
# Specify which residues are modified by which modification as it has to
# correspond to a modification string:
#
# Nterm modification + 9 residues + Cterm modification
my $numModded = new msparser::vectori();
$numModded->push(2);  # N-term  - modified by "Acetyl (N-term)"
$numModded->push(1);  # M       - modified by "Oxidation (M)"
$numModded->push(0);  # A
$numModded->push(0);  # I
$numModded->push(0);  # F
$numModded->push(0);  # R
$numModded->push(0);  # I
$numModded->push(0);  # D
$numModded->push(0);  # E
$numModded->push(0);  # I
$numModded->push(0);  # R
$numModded->push(0);  # C-term

# We have to specify (or at least supply an empty vector) which neutral loss
# value to use, in case there are more than one available for a modification.
my $whichNl = new msparser::vectori();
$whichNl->push(0);  # N-term
$whichNl->push(1);  # M      - has 2 neutral losses. Specify the first (-98)
$whichNl->push(0);  # A
$whichNl->push(0);  # I
$whichNl->push(0);  # F
$whichNl->push(0);  # R
$whichNl->push(0);  # I
$whichNl->push(0);  # D
$whichNl->push(0);  # E
$whichNl->push(0);  # I
$whichNl->push(0);  # R
$whichNl->push(0);  # C-term

my $peptide = $aahelper->createPeptide(
    $proteinStr, 
    length($proteinStr),
    1,
    10,          # end positions
    $numModded, # modification string-like vector
    $whichNl,   # which neutral loss to use
    0,          # no charge
    $msparser::MASS_TYPE_MONO,
    $err
); 

if (!$err->isValid()) {
    print "Error while creating a peptide: ";
    print $err->getLastErrorString(), "\n";
    # Don't need to halt as they are not fatal errors.
    $err->clearAllErrors(); 
} else {
    print "\n\nPeptide has been created successfully: ";
    print $peptide->getPeptideStr(), "\n";
}

# Keep a list of fragments from all series
my $all_fragments = new msparser::ms_fragmentvector;  

my $b_ions = fragmentPeptide(
    $aahelper,
    $peptide, 
    $msparser::ms_fragmentationrules::FRAG_B_SERIES,
    'b-ion series',
    0,   # single-charged ions only
    $mr, # maximal fragment mass to return
);

# copyFrom() can only be used to populate the list for the first time.
$all_fragments->copyFrom($b_ions);

my $fragments = fragmentPeptide(
    $aahelper,
    $peptide,
    $msparser::ms_fragmentationrules::FRAG_Y_SERIES, 
    'y-ion series',
    0,          # single-charged ions only
    $mr,        # maximal fragment mass to return 
); 

for my $i (0 .. $fragments->getNumberOfFragments - 1) {
    $all_fragments->appendFragment($fragments->getFragmentByNumber($i));
}

$fragments = fragmentPeptide(
    $aahelper,
    $peptide,
    $msparser::ms_fragmentationrules::FRAG_Y_SERIES, 
    'y++-ion series',
    2,          # double-charged ions only
    $mr,        # maximal fragment mass to return 
); 

for my $i (0 .. $fragments->getNumberOfFragments - 1) {
    $all_fragments->appendFragment($fragments->getFragmentByNumber($i));
}

$fragments = fragmentPeptide(
    $aahelper,
    $peptide,
    $msparser::ms_fragmentationrules::FRAG_INTERNAL_YB,
    'internal yb-ion series',
    0,          # single-charged ions only
    700,        # maximal fragment mass to return 
); 

for my $i (0 .. $fragments->getNumberOfFragments - 1) {
    $all_fragments->appendFragment($fragments->getFragmentByNumber($i));
}


print "Paste the following into a Mascot search query window to verify this output:\n";

displayMascotTestSearch(
    $vecFixed, $vecVariable, $Trypsin, $peptide->getMrCalc, 
    $b_ions # or you can use $all_fragments
); 


sub open_enzymefile {
    my ($filename) = @_;

    my $enzymefile = new msparser::ms_enzymefile($filename);

    if (!$enzymefile->isValid) {
        print "Error while opening enzyme file: ";
        print $enzymefile->getLastErrorString() . "\n";
        exit 1;
    }

    return $enzymefile;
}

sub open_modfile {
    my ($filename) = @_;

    # We can use the default masses in this example.
    my $masses = new msparser::ms_masses;
    my $modfile = new msparser::ms_modfile($filename, $masses);

    if (!$modfile->isValid) {
        print "Error while opening mod file: ";
        print $modfile->getLastErrorString . "\n";
        exit 1;
    }

    for ('Oxidation (M)', 'Acetyl (N-term)', 'Phospho (Y)') {
        if (not $modfile->getModificationByName($_)) {
            print "Cannot find '$_' in the mod file. Cannot continue.\n";
            exit(1);
        }
    }

    return $modfile;
}

sub fragmentPeptide {
    my ($aa_helper, $peptide, $series, $series_label, $doubleCharged, $mass_max) = @_;

    my $fragments = new msparser::ms_fragmentvector;
    my $err = new msparser::ms_errs();

    $aahelper->calcFragments(
        $peptide,
        $series,
        $doubleCharged ? 1 : 0,
        100.0,
        $mass_max,
        $msparser::MASS_TYPE_MONO,
        $fragments,
        $err
    ); 

    # Check $err here.

    print $series_label, " fragments: \n";
    printFragmentsTable($fragments);

    return $fragments;
}

sub printFragmentsTable  {
    my ($fragments) = @_;

    print "Number of fragments: ", $fragments->getNumberOfFragments(), "\n";

    my $headerfmt = "%5s %5s %5s %-10s %7s %7s %4s %5s %6s %4s\n";
    my $fmt       = "%5s %5s %5s %-10s %7.2f %7.2f %4s %5s %6s %4s\n";

    printf $headerfmt, qw(Col Start End Label Mass NL Name Immon Intern Reg);

    for my $i (0 .. $fragments->getNumberOfFragments - 1) {
        my $frag = $fragments->getFragmentByNumber($i);

        printf $fmt,
            $frag->getColumn(),
            $frag->getStart(),
            $frag->getEnd(),
            $frag->getLabel(),
            $frag->getMass(),
            $frag->getNeutralLoss(),
            $frag->getSeriesName(),
            $frag->isImmonium(),
            $frag->isInternal(),
            $frag->isRegular(),
            ;
    }
    
    print "\n";
}
    
# $vecFixed contains a list of fixed mods applied to the peptide
# $vecVariable contains a list of variable mods applied to the peptide
# $enzyme is the enzyme used in fragmentation
# $mr is the peptide Mr(calc)
# $fragments contains a list of b-ions from a peptide
#
# Use this information to generate an test search that can be run on Mascot.
sub displayMascotTestSearch {
    my ($vecFixed, $vecVariable, $enzyme, $mr, $fragments) = @_;
    
    for my $i (0 .. $vecFixed->getNumberOfModifications-1) {
        printf "MODS=%s\n", $vecFixed->getModificationByNumber($i)->getTitle();
    }

    for my $i (0 .. $vecVariable->getNumberOfModifications-1) {
        printf "IT_MODS=%s\n", $vecVariable->getModificationByNumber($i)->getTitle();
    }

    print "CHARGE=Mr\n";
    print "CLE=", $enzyme->getTitle(), "\n";
    print "INSTRUMENT=MALDI-TOF-TOF\n";

    printf "%.3f ions(", $mr;

    print join(', ', map {
            sprintf("%.3f", $fragments->getFragmentByNumber($_)->getMass)
        } 0 .. $fragments->getNumberOfFragments-1
    );

    print ")\n";
}



=pod 

Running the program as

perl -I../bin tools_aahelper.pl ../config/enzymes ../config/mod_file

will give the following output under Mascot 2.3:


List of peptides
M
MAIFR
AIFR
IDEIR
NMSSEELEEELR
K
LEVELIR
ER
GAVR
AGGAPEKPGR
IR
EIR
R
TIAR
MK
TVQR
ER
VR
K
End of list

Peptide mass calculated using 'calcPeptideMZ' is 1320.686


Peptide has been created successfully: MAIFRIDEIR
b-ion series fragments: 
Number of fragments: 9
  Col Start   End Label         Mass      NL Name Immon Intern  Reg
    1     1    -1 b(1)        190.05    0.00    b                 1
    2     2    -1 b(2)        261.09    0.00    b                 1
    3     3    -1 b(3)        374.17    0.00    b                 1
    4     4    -1 b(4)        521.24    0.00    b                 1
    5     5    -1 b(5)        677.34    0.00    b                 1
    6     6    -1 b(6)        790.43    0.00    b                 1
    7     7    -1 b(7)        905.45    0.00    b                 1
    8     8    -1 b(8)       1034.50    0.00    b                 1
    9     9    -1 b(9)       1147.58    0.00    b                 1

y-ion series fragments: 
Number of fragments: 9
  Col Start   End Label         Mass      NL Name Immon Intern  Reg
    9     9    -1 y(9)       1132.65    0.00    y                 1
    8     8    -1 y(8)       1061.61    0.00    y                 1
    7     7    -1 y(7)        948.53    0.00    y                 1
    6     6    -1 y(6)        801.46    0.00    y                 1
    5     5    -1 y(5)        645.36    0.00    y                 1
    4     4    -1 y(4)        532.27    0.00    y                 1
    3     3    -1 y(3)        417.25    0.00    y                 1
    2     2    -1 y(2)        288.20    0.00    y                 1
    1     1    -1 y(1)        175.12    0.00    y                 1

y++-ion series fragments: 
Number of fragments: 8
  Col Start   End Label         Mass      NL Name Immon Intern  Reg
    9     9    -1 y(9)++      566.83    0.00    y                 1
    8     8    -1 y(8)++      531.31    0.00    y                 1
    7     7    -1 y(7)++      474.77    0.00    y                 1
    6     6    -1 y(6)++      401.23    0.00    y                 1
    5     5    -1 y(5)++      323.18    0.00    y                 1
    4     4    -1 y(4)++      266.64    0.00    y                 1
    3     3    -1 y(3)++      209.13    0.00    y                 1
    2     2    -1 y(2)++      144.61    0.00    y                 1

internal yb-ion series fragments: 
Number of fragments: 22
  Col Start   End Label         Mass      NL Name Immon Intern  Reg
    2     2     3 AI          185.13    0.00   yb            1     
    2     2     4 AIF         332.20    0.00   yb            1     
    2     2     5 AIFR        488.30    0.00   yb            1     
    2     2     6 AIFRI       601.38    0.00   yb            1     
    3     3     4 IF          261.16    0.00   yb            1     
    3     3     5 IFR         417.26    0.00   yb            1     
    3     3     6 IFRI        530.34    0.00   yb            1     
    3     3     7 IFRID       645.37    0.00   yb            1     
    4     4     5 FR          304.18    0.00   yb            1     
    4     4     6 FRI         417.26    0.00   yb            1     
    4     4     7 FRID        532.29    0.00   yb            1     
    4     4     8 FRIDE       661.33    0.00   yb            1     
    5     5     6 RI          270.19    0.00   yb            1     
    5     5     7 RID         385.22    0.00   yb            1     
    5     5     8 RIDE        514.26    0.00   yb            1     
    5     5     9 RIDEI       627.35    0.00   yb            1     
    6     6     7 ID          229.12    0.00   yb            1     
    6     6     8 IDE         358.16    0.00   yb            1     
    6     6     9 IDEI        471.24    0.00   yb            1     
    7     7     8 DE          245.08    0.00   yb            1     
    7     7     9 DEI         358.16    0.00   yb            1     
    8     8     9 EI          243.13    0.00   yb            1     

Paste the following into a Mascot search query window to verify this output:
MODS=Phospho (Y)
IT_MODS=Oxidation (M)
IT_MODS=Acetyl (N-term)
CHARGE=Mr
CLE=Trypsin
INSTRUMENT=MALDI-TOF-TOF
1320.686 ions(190.053, 261.090, 374.174, 521.243, 677.344, 790.428, 905.455, 1034.498, 1147.582)

=cut
Copyright © 2016 Matrix Science Ltd.  All Rights Reserved. Generated on Fri Jun 2 2017 01:44:50