Large example / test program that illustrates most functionality.
#!/usr/local/bin/perl
##############################################################################
# file: msparser.pl #
# This file illustrates how to call the Mascot perl module 'msparser' #
# It mirrors the C++ test harness #
##############################################################################
# COPYRIGHT NOTICE #
# Copyright 1998-2010 Matrix Science Limited All Rights Reserved. #
# #
# This program may be used and modified within the licensee's organisation #
# provided that this copyright notice remains intact. Distribution of this #
# program or parts thereof outside the licensee's organisation without the #
# prior written consent of Matrix Science Limited is expressly forbidden. #
##############################################################################
# $Source: parser/examples/test_perl/msparser.pl $ #
# $Author: robertog@matrixscience.com $ #
# $Date: 2024-09-04 10:23:46 +0100 $ #
# $Revision: 526921a73137894bb1eae0b0fc8ccb4bb52ea662 | MSPARSER_REL_3_1_0-2025-07-27-0-gea47708fac $ #
# $NoKeywords:: $ #
##############################################################################
use strict;
##############################################################################
use msparser;
die "Must specify results filename as parameter" unless defined $ARGV[0];
my $resfile = msparser::ms_mascotresfilebase::createResfile($ARGV[0]);
if (!$resfile->isValid) {
print STDERR "Cannot process file '$ARGV[0]':\n";
print STDERR "Error number: ", $resfile->getLastError(), "\n";
print STDERR "Error string: ", $resfile->getLastErrorString(), "\n";
exit(1);
}
checkErrorHandler($resfile);
searchInformation($resfile);
searchParameters($resfile);
inputData($resfile);
print "-------------------------------------------------------------\n";
print "--- Full Protein summary report ---\n";
print "-------------------------------------------------------------\n";
show_results(
$resfile,
0,
$msparser::ms_mascotresults::MSRES_NOFLAG,
1.0 / 20.0, # minProbability
5 # Number of hits
);
print "-------------------------------------------------------------\n";
print "--- Concise Protein summary report ---\n";
print "-------------------------------------------------------------\n";
show_results(
$resfile,
0,
$msparser::ms_mascotresults::MSRES_GROUP_PROTEINS
| $msparser::ms_mascotresults::MSRES_SHOW_SUBSETS,
0, # minProbability
3 # Number of hits
);
if ($resfile->isMSMS()) {
print "-------------------------------------------------------------\n";
print "--- Peptide summary report ---\n";
print "-------------------------------------------------------------\n";
show_results(
$resfile,
1, # peptide summary
$msparser::ms_mascotresults::MSRES_GROUP_PROTEINS
| $msparser::ms_mascotresults::MSRES_SHOW_SUBSETS,
0, # minProbability
8, # number of hits
0, # ignoreIonsScoreBelow
0 # minimum peptide length
);
} else {
print "Not an MS-MS results file - cannot show peptide summary report\n";
}
##############################################################################
# show_results #
# - parameter 0 is ms_mascotresfilebase #
# - parameter 1 is true for peptide summary, false for protein #
# - parameter 2 is the flags for display #
# - parameter 3 is the minimum protein probability to display #
# - parameter 4 is the maximum number of hits to display #
# - parameter 5 is the minimum ions score to use #
# - parameter 6 is the minPepLenInPepSummary to use #
##############################################################################
sub show_results {
my($file, $usePeptideSummary, $flags, $minProteinProb, $maxHits,
$minIonsScore, $minPepLenInPepSummary) = @_;
my $results;
if ($usePeptideSummary) {
$results = new msparser::ms_peptidesummary(
$resfile, $flags, $minProteinProb, $maxHits, "", $minIonsScore, $minPepLenInPepSummary
);
} else {
$results = new msparser::ms_proteinsummary(
$file, $flags, $minProteinProb, $maxHits
);
}
if ($resfile->getLastError()) {
print STDERR "Error : ", $file->getLastErrorString(), "\n";
return;
}
my $hit = 1;
my $prot = $results->getHit($hit);
while (defined($prot)) {
my $accession = $prot->getAccession();
my $description = $results->getProteinDescription($accession);
my $mass = $results->getProteinMass($accession);
print "Protein Hit ", $hit, "\n===================\n";
print "Accession : " , $accession, "\n";
print "Description : " , $description , "\n";
print "Score : " , $prot->getScore() , "\n";
print "Mass : " , $mass , "\n";
print "Frame : " , $prot->getFrame() , "\n";
print "Coverage : " , $prot->getCoverage() , "\n";
print "RMS error : " , $prot->getRMSDeltas($results), "\n";
print "Peptides : " , $prot->getNumDisplayPeptides(),"\n";
# Each protein has a number of peptides that matched - list them:
my $num_peps = $prot->getNumPeptides();
for my $i (1 .. $num_peps) {
my $query = $prot->getPeptideQuery($i);
my $p = $prot->getPeptideP($i);
if (
$p != -1
and $query != -1
and $prot->getPeptideDuplicate($i) != $msparser::ms_protein::DUPE_DuplicateSameQuery
) {
my $pep = $results->getPeptide($query, $p);
next unless defined $pep;
displayPeptideInfo(
0, $pep, $results,
$prot->getPeptideDuplicate($i) == $msparser::ms_protein::DUPE_Duplicate,
$prot->getPeptideIsBold($i),
$prot->getPeptideShowCheckbox($i)
);
}
}
# Now display list of all proteins that contained subsets or and
# identical list of the matching peptides
if ($flags & $msparser::ms_mascotresults::MSRES_GROUP_PROTEINS) {
print "Proteins matching the same set of peptides:\n";
my $i = 1;
while (my $prot = $results->getNextSimilarProtein($hit, $i)) {
my $accession = $prot->getAccession();
print $accession, " Total score: ", $prot->getScore();
print " Peptides matched: ", $prot->getNumDisplayPeptides(), "\n";
$i++;
}
if ($flags & $msparser::ms_mascotresults::MSRES_SHOW_SUBSETS) {
print "Proteins matching a subset of these peptides:\n";
my $i = 1;
while ($prot = $results->getNextSubsetProtein($hit, $i)) {
my $accession = $prot->getAccession();
print $accession, " Total score: ", $prot->getScore();
print " Peptides matched: ", $prot->getNumDisplayPeptides(), "\n";
$i++;
}
}
}
print "\n\n";
$hit++;
$prot = $results->getHit($hit);
}
$results->createUnassignedList($msparser::ms_mascotresults::SCORE);
if ($results->getNumberOfUnassigned()) {
print "Unassigned list\n";
print "---------------\n";
for my $u (1 .. $results->getNumberOfUnassigned) {
my $pep = $results->getUnassigned($u);
displayPeptideInfo(0, $pep, $results, 0, 1, 1);
}
}
if ($usePeptideSummary) {
print "\n";
displayYellowPopupInfo($results, 97);
}
}
##############################################################################
# displayYellowPopupInfo #
# Shows the equivalent of the yellow popup box for given query #
# - parameter 0 is the results object #
# - parameter 1 is the query number #
##############################################################################
sub displayYellowPopupInfo {
my ($results, $q) = @_;
my $fmt = "%5s %5s %9s %7s %7s\n";
printf $fmt, qw(Score Delta Hit Protein Peptide);
for my $p (1 .. 10) {
my $pep = $results->getPeptide($q, $p);
next unless defined $pep;
my $seq = $pep->getPeptideStr();
next unless defined $seq;
my $tmp = $results->getProteinsWithThisPepMatch($q, $p);
my ($hit, $protein) = ('', '');
if ($tmp) {
($hit, $protein, my $morethan) = $tmp =~ /(\d+):([^ ]*)[ ]*(.*)/;
if ($morethan) {
$hit .= "+";
}
}
printf $fmt, $pep->getIonsScore(), $pep->getDelta(), $hit, $protein, $seq;
}
my $p = 1;
print "Accessions that matched query ", $q, " rank ", $p, ":- ",
$results->getProteinsWithThisPepMatch($q, $p), "\n";
}
##############################################################################
# displayPeptideInfo #
# - parameter 0 showFullDetails? #
# - parameter 1 p #
# - parameter 2 ms_mascotresults #
# - parameter 3 isDuplicate? #
# - parameter 4 isBold? #
# - parameter 5 showCheckBox? #
##############################################################################
sub displayPeptideInfo {
my ($showFullDetails, $p, $r, $isDuplicate, $isBold, $showCheckBox) = @_;
my $q = $p->getQuery();
if (!$showFullDetails) {
my $fmt = "%2s %4s %4d %11f %4d(%4d) %-20s %s%3.2f%s %3s\n";
printf $fmt,
$showCheckBox ? "CB" : "--",
$isBold ? "BOLD" : "dim",
$q,
$p->getObserved,
$p->getRank,
$p->getPrettyRank,
$p->getPeptideStr,
$isDuplicate ? '(' : '',
$p->getIonsScore,
$isDuplicate ? ')' : '',
$r->getReadableVarMods($q, $p->getRank),
;
return;
}
print "Peptide hit\n";
if ($p->getAnyMatch()) {
my $fmt = " %-12s: %s\n";
printf $fmt, 'Query' , $q;
printf $fmt, 'Rank' , $p->getRank;
printf $fmt, 'Matched' , $p->getAnyMatch;
printf $fmt, 'missedCleave', $p->getMissedCleavages;
printf $fmt, 'mrCalc' , $p->getMrCalc;
printf $fmt, 'delta' , $p->getDelta;
printf $fmt, 'observed' , $p->getObserved;
printf $fmt, 'charge' , $p->getCharge;
printf $fmt, 'mrExp' , $p->getMrExperimental;
printf $fmt, 'ionsMatched' , $p->getNumIonsMatched;
printf $fmt, 'peptideStr' , $p->getPeptideStr;
printf $fmt, 'peaksUsed1' , $p->getPeaksUsedFromIons1;
printf $fmt, 'varModsStr' , $p->getVarModsStr;
printf $fmt, 'readable mod', $r->getReadableVarMods($q, $p->getRank);
printf $fmt, 'ionsScore' , $p->getIonsScore;
printf $fmt, 'seriesUsedS' , $p->getSeriesUsedStr;
printf $fmt, 'peaksUsed2' , $p->getPeaksUsedFromIons2;
printf $fmt, 'peaksUsed3' , $p->getPeaksUsedFromIons3;
printf $fmt, 'idth, hth, p', join(', ',
$r->getPeptideIdentityThreshold($q, 20),
$r->getHomologyThreshold($q, 20),
$r->getProbOfPepBeingRandomMatch($p->getIonsScore, $q)
);
print "\n";
} else {
print " No match\n";
}
}
##############################################################################
# checkErrorHandler #
# - parameter 0 ms_mascotresfilebase #
# Calls a couple of functions with invalid arguments #
##############################################################################
sub checkErrorHandler {
my ($resfile) = @_;
print "Testing the error handling... \n";
print "=========================================\n";
my $numQueries = $resfile->getNumQueries();
$resfile->getObservedCharge($numQueries + 40); # Should fail
print "Error number: ", $resfile->getLastError(), "\n";
print "Error string: ", $resfile->getLastErrorString(), "\n";
$resfile->clearAllErrors();
print "Cleared all errors - should have no errors left: ", $resfile->getNumberOfErrors();
print " errors left\n\n";
for my $x (1 .. 20) {
$resfile->getObservedCharge($numQueries + $x); # Should fail
}
# Now, the best way, print out all errors.
print "More errors added - there are now ", $resfile->getNumberOfErrors();
print " errors\n";
for my $i (1 .. $resfile->getNumberOfErrors) {
print "Error number: ", $resfile->getErrorNumber($i);
print " : ", $resfile->getErrorString($i);
print "\n";
}
print "\n";
$resfile->clearAllErrors();
}
##############################################################################
# searchInformation #
# - parameter 0 ms_mascotresfilebase #
# Display parameters from the ms_mascotresfilebase object. The functions #
# anyPMF, anyMSMS, anySQ should normally be used in preference to isPMF etc #
# because some people submit MSMS though the sequence query window etc. #
##############################################################################
sub searchInformation {
my ($resfile) = @_;
my $fmt = "%-20s: %s\n";
print "Search information from ms_mascotresfilebase", "\n";
print "========================================", "\n";
printf $fmt, "Number of queries" , $resfile->getNumQueries();
printf $fmt, "Number of sequences" , $resfile->getNumSeqs();
printf $fmt, "Sequences after tax" , $resfile->getNumSeqsAfterTax();
printf $fmt, "Number of residues" , $resfile->getNumResidues();
printf $fmt, "Execution time" , $resfile->getExecTime();
printf $fmt, "Date (seconds)" , $resfile->getDate();
# Let's jump through a few hoops to get the equivalent of a 'C' asctime call
my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst)
= localtime($resfile->getDate);
printf $fmt, "Date", sprintf("%s %s %d %02d:%02d:%02d %d",
(qw(Sun Mon Tue Wed Thu Fri Sat))[$wday],
(qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec))[$mon],
$mday,
$hour,
$min,
$sec,
$year+1900
);
printf $fmt, "Mascot version" , $resfile->getMascotVer();
printf $fmt, "Fasta version" , $resfile->getFastaVer();
printf $fmt, "Is PMF?" , $resfile->isPMF();
printf $fmt, "Is MSMS?" , $resfile->isMSMS();
printf $fmt, "Is SQ?" , $resfile->isSQ();
printf $fmt, "Is Error tolerant" , $resfile->isErrorTolerant();
printf $fmt, "Any PMF?" , $resfile->anyPMF();
printf $fmt, "Any MSMS?" , $resfile->anyMSMS();
printf $fmt, "Any SQ?" , $resfile->anySQ();
printf $fmt, "Any peptide matches" , $resfile->anyFastaMatches();
print "\n";
}
##############################################################################
# searchParameters #
# Display parameters from the ms_searchparams object. #
# The values come from the parameters and the masses sections of the file #
##############################################################################
sub searchParameters {
my ($resfile) = @_;
my $params = $resfile->params;
my $fmt = "%-20s: %s\n";
print "Search parameters from ms_searchparams" , "\n";
print "=========================================" , "\n";
printf $fmt, "License" , $params->getLICENSE();
printf $fmt, "Search title" , $params->getCOM();
printf $fmt, "SEG mass" , $params->getSEG();
printf $fmt, "Peptide tol" , $params->getTOL();
printf $fmt, "Peptide tol units" , $params->getTOLU();
printf $fmt, "Fragment tol" , $params->getITOL();
printf $fmt, "Fragment tol units" , $params->getITOLU();
printf $fmt, "Missed cleavages" , $params->getPFA();
printf $fmt, "Database" , $params->getDB();
printf $fmt, "Static mods" , $params->getMODS();
printf $fmt, "Average/monoisotopic", $params->getMASS();
printf $fmt, "Enzyme" , $params->getCLE();
printf $fmt, "Raw data file name" , $params->getFILENAME();
printf $fmt, "Input data" , $params->getQUE();
printf $fmt, "Type of search" , $params->getSEARCH();
printf $fmt, "User name" , $params->getUSERNAME();
printf $fmt, "User email" , $params->getUSEREMAIL();
printf $fmt, "Charge state" , $params->getCHARGE();
printf $fmt, "Repeat search file" , $params->getINTERMEDIATE();
printf $fmt, "Num hits to display" , $params->getREPORT();
printf $fmt, "Show overview" , $params->getOVERVIEW();
printf $fmt, "Data file format" , $params->getFORMAT();
printf $fmt, "Form version" , $params->getFORMVER();
printf $fmt, "Variable mods" , $params->getIT_MODS();
for my $i (0 .. 12) {
printf $fmt, sprintf("User%02d", $i), $params->getUSERField($i);
}
printf $fmt, "Precursor mass" , $params->getPRECURSOR();
printf $fmt, "Taxonomy filter" , $params->getTAXONOMY();
printf $fmt, "Type of report" , $params->getREPTYPE();
printf $fmt, "Accessions to search", $params->getACCESSION();
printf $fmt, "Subcluster used" , $params->getSUBCLUSTER();
printf $fmt, "ICAT search?" , $params->getICAT();
printf $fmt, "Instrument type" , $params->getINSTRUMENT();
printf $fmt, "Error tolerant?" , $params->getERRORTOLERANT();
printf $fmt, "Rules (ions series)" , $params->getRULES();
for my $ch (ord('A') .. ord('Z')) {
printf $fmt, join(' ', "Residue", chr($ch)), $params->getResidueMass(chr($ch));
}
printf $fmt, "C terminus mass" , $params->getCTermMass();
printf $fmt, "N terminus mass" , $params->getNTermMass();
printf $fmt, "Mass of hydrogen", $params->getHydrogenMass();
printf $fmt, "Mass of oxygen" , $params->getOxygenMass();
printf $fmt, "Mass of carbon" , $params->getCarbonMass();
printf $fmt, "Mass of nitrogen", $params->getNitrogenMass();
printf $fmt, "Mass of electron", $params->getElectronMass();
my $i = 1;
while ($params->getVarModsName($i)) {
printf $fmt, "Variable mod name" , $params->getVarModsName($i);
printf $fmt, "Variable mod delta" , $params->getVarModsDelta($i), "\n";
printf $fmt, "Variable mod neutral", $params->getVarModsNeutralLoss($i), "\n";
$i++;
}
print "\n\n";
}
##############################################################################
# inputData #
# - parameter 0 ms_mascotresfilebase #
# Display input data #
##############################################################################
sub inputData {
my ($resfile) = @_;
# display input data via inputquery get functions
for my $i (1 .. 1) {
# replace 1 .. 1 with 1 .. $resfile->getNumQueries() to get all input
# data.
print "Input data for query number ", $i , "\n";
print "=========================================", "\n";
my $q = new msparser::ms_inputquery($resfile, $i);
my $fmt = " %-16s: %s\n";
printf $fmt, "title" , $q->getStringTitle(1);
printf $fmt, "mass_min" , $q->getMassMin();
printf $fmt, "mass_max" , $q->getMassMax();
printf $fmt, "int_min" , $q->getIntMin();
printf $fmt, "int_max" , $q->getIntMax();
printf $fmt, "num_vals" , $q->getNumVals();
printf $fmt, "num_used1" , $q->getNumUsed();
printf $fmt, "ions1" , $q->getStringIons1();
printf $fmt, "ions2" , $q->getStringIons2();
printf $fmt, "ions3" , $q->getStringIons3();
printf $fmt, "peptol" , $q->getPepTol();
printf $fmt, "peptol units", $q->getPepTolUnits();
printf $fmt, "peptol str" , $q->getPepTolString();
printf $fmt, "repeat srch" , $resfile->getRepeatSearchString($i);
my $num_peaks = $q->getNumberOfPeaks(1);
for my $j (1 .. $num_peaks) {
print $q->getPeakMass(1, $j), ", ", $q->getPeakIntensity(1, $j), "\n";
}
}
print "\n\n";
}