Matrix Science header
Public Types | Public Member Functions | Static Public Member Functions | Protected Member Functions

ms_peptidesummary Class Reference
[Mascot results file module]

Use this class to get peptide summary results. More...

#include <ms_mascotrespeptidesum.hpp>

Inheritance diagram for ms_peptidesummary:
Inheritance graph
[legend]
Collaboration diagram for ms_peptidesummary:
Collaboration graph
[legend]

List of all members.

Public Types

enum  BUGFIX_NUM {
  BUGFIX_10780 = 10780,
  BUGFIX_10995 = 10995,
  BUGFIX_11002 = 11002,
  BUGFIX_11018 = 11018,
  BUGFIX_11235 = 11235,
  BUGFIX_11254 = 11254,
  BUGFIX_11344 = 11344,
  BUGFIX_11411 = 11411,
  BUGFIX_11425 = 11425,
  BUGFIX_11438 = 11438,
  BUGFIX_11483 = 11483,
  BUGFIX_11499 = 11499,
  BUGFIX_11856 = 11856,
  BUGFIX_12123 = 12123,
  BUGFIX_12317 = 12317,
  BUGFIX_11481 = 11481,
  BUGFIX_12538 = 12538,
  BUGFIX_12729 = 12729,
  BUGFIX_12740 = 12740
}
 

bugNum values for isDataCached().

More...
enum  CACHE_STATUS {
  CACHE_MISSING_RESFILE = 0x10000,
  RESFILE_CACHE_FILE_NOT_PRESENT = 0x00001,
  RESFILE_CACHE_BEING_CREATED = 0x00002,
  RESFILE_CACHE_DISABLED_IN_OPTIONS = 0x00004,
  RESFILE_CACHE_VALID = 0x00008,
  RESFILE_CACHE_CAN_CREATE = 0x00010,
  PEPSUMMARY_CACHE_FILE_NOT_PRESENT = 0x00100,
  PEPSUMMARY_CACHE_BEING_CREATED = 0x00200,
  PEPSUMMARY_CACHE_DISABLED_IN_OPTIONS = 0x00400,
  PEPSUMMARY_CACHE_VALID = 0x00800,
  PEPSUMMARY_CACHE_CAN_CREATE = 0x01000,
  PEPSUMMARY_CACHE_STATUS_NOT_AVAILABLE = 0x02000,
  PEPSUMMARY_CACHE_NOT_FOR_PMF = 0x04000
}
 

Returned by the willCreateCache function.

More...
enum  DB_MATCH_TYPE {
  DM_FASTA = 0,
  DM_SPECTRAL_LIBRARY = 1,
  DM_BOTH = 2
}
 

Type of matches counted by getNumHitsAboveIdentity() and friends, and type of score for getIonsScoreHistogram().

More...
enum  DECOY_STATS_COUNT_TYPE {
  DS_COUNT_PSM = 0,
  DS_COUNT_SEQUENCE = 1
}
 

Type of object counted by getNumHitsAboveIdentity() and friends.

More...
enum  FIND_COMPARE_FLAGS {
  FC_COMPLETESTR = 0x00000001,
  FC_SUBSTR = 0x00000002,
  FC_STARTSTR = 0x00000003,
  FC_STRTOK = 0x00000004,
  FC_MASK_STR_PART = 0x0000000F,
  FC_CASE_INSENSITIVE = 0x00000000,
  FC_CASE_SENSITIVE = 0x00000010,
  FC_MASK_CASE = 0x000000F0,
  FC_FORWARD = 0x00000000,
  FC_REVERSE = 0x00000100,
  FC_MASK_DIRECTION = 0x00000F00,
  FC_RESTRICT_TO_HIT = 0x00001000,
  FC_LOOP_INTO_UNASSIGNED = 0x00002000,
  FC_LOOP_FROM_UNASSIGNED = 0x00004000,
  FC_UNASSIGNED_MASK = 0x00006000,
  FC_SEARCH_ALL_RANKS = 0x00008000,
  FC_ALL_PEPTIDES = 0x00000000,
  FC_ALL_PEPTITDES = 0x00000000,
  FC_SIGNIFICANT_PEPTIDES = 0x00010000,
  FC_SEARCH_IGNORED_RANKS = 0x00020000,
  FC_SCORING_MASK = 0x000F0000,
  FC_PROTEIN_IGN_SAMESETS = 0x00100000,
  FC_PROTEIN_IGN_SUBSETS = 0x00200000,
  FC_PROTEIN_IGN_FAMILY = 0x00400000,
  FC_PROTEIN_IGN_MASK = 0x00F00000,
  FC_PROTEIN_INC_ALT_ACC = 0x01000000
}
 

Flags to specify how comparisons are performed in the find functions.

More...
enum  FIND_FLAGS {
  FT_PEPTIDE_EXP_MZ = 0x00000001,
  FT_PEPTIDE_EXP_MR = 0x00000002,
  FT_PEPTIDE_CALC_MZ = 0x00000004,
  FT_PEPTIDE_CALC_MR = 0x00000008,
  FT_PEPTIDE_STRING = 0x00000010,
  FT_PEPTIDE_QUERY = 0x00000020,
  FT_PEPTIDE_VARMOD = 0x00000040,
  FT_PEPTIDE_FIXMOD = 0x00000080,
  FT_PEPTIDE_ETMOD = 0x00000100,
  FT_PEPTIDE_SLMOD = 0x00000200,
  FT_PEPTIDE_VARMOD_BYNAME = 0x00000400,
  FT_PEPTIDE_FIND_MASK = 0x00000FFF,
  FT_PROTEIN_ACCESSION = 0x00001000,
  FT_PROTEIN_DESCRIPTION = 0x00002000
}
 

Flags for findPeptides() and findProteins().

More...
enum  FLAGS {
  MSRES_NOFLAG = 0x00000000,
  MSRES_GROUP_PROTEINS = 0x00000001,
  MSRES_SHOW_SUBSETS = 0x00000002,
  MSRES_SUBSETS_DIFF_PROT = 0x00000004,
  MSRES_REQUIRE_BOLD_RED = 0x00000008,
  MSRES_SHOW_ALL_FROM_ERR_TOL = 0x00000010,
  MSRES_IGNORE_PMF_MIXTURE = 0x00000020,
  MSRES_MUDPIT_PROTEIN_SCORE = 0x00000040,
  MSRES_DECOY = 0x00000080,
  MSRES_INTEGRATED_ERR_TOL = 0x00000100,
  MSRES_ERR_TOL = 0x00000200,
  MSRES_MAXHITS_OVERRIDES_MINPROB = 0x00000400,
  MSRES_CLUSTER_PROTEINS = 0x00000800,
  MSRES_DUPE_INCL_IN_SCORE_NONE = 0x00000000,
  MSRES_DUPE_INCL_IN_SCORE_A = 0x00002000,
  MSRES_DUPE_INCL_IN_SCORE_B = 0x00004000,
  MSRES_DUPE_INCL_IN_SCORE_C = 0x00008000,
  MSRES_DUPE_INCL_IN_SCORE_D = 0x00010000,
  MSRES_DUPE_INCL_IN_SCORE_E = 0x00020000,
  MSRES_DUPE_INCL_IN_SCORE_F = 0x00040000,
  MSRES_DUPE_INCL_IN_SCORE_G = 0x00080000,
  MSRES_DUPE_INCL_IN_SCORE_H = 0x00100000,
  MSRES_DUPE_INCL_IN_SCORE_I = 0x00200000,
  MSRES_DUPE_REMOVE_NONE = 0x00400000,
  MSRES_DUPE_REMOVE_A = 0x00800000,
  MSRES_DUPE_REMOVE_B = 0x01000000,
  MSRES_DUPE_REMOVE_C = 0x02000000,
  MSRES_DUPE_REMOVE_D = 0x04000000,
  MSRES_DUPE_REMOVE_E = 0x08000000,
  MSRES_DUPE_REMOVE_F = 0x10000000,
  MSRES_DUPE_REMOVE_G = 0x20000000,
  MSRES_DUPE_REMOVE_H = 0x40000000,
  MSRES_DUPE_REMOVE_I = 0x80000000,
  MSRES_DUPE_DEFAULT = 0x04800000
}
 

Flags for the type of results.

More...
enum  HOMOLOGY_THRESHOLD_SOURCE { ,
  HOMTHR_FASTA = -1,
  HOMTHR_SL = -2
}
 

Flags for getHomologyThreshold()

More...
enum  IONS_HISTOGRAM {
  IH_INCLUDE_TOP_MATCHES = 0x0000,
  IH_INCLUDE_TOP_10_MATCHES = 0x0001
}
 

Flags for getIonsScoreHistogram().

More...
enum  MSPEPSUM {
  MSPEPSUM_NONE = 0x0000,
  MSPEPSUM_PERCOLATOR = 0x0001,
  MSPEPSUM_USE_CACHE = 0x0002,
  MSPEPSUM_SINGLE_HIT_DBIDX = 0x0004,
  MSPEPSUM_USE_HOMOLOGY_THRESH = 0x0008,
  MSPEPSUM_NO_PROTEIN_GROUPING = 0x0010,
  MSPEPSUM_DISCARD_RELOADABLE = 0x0020,
  MSPEPSUM_DEFERRED_CREATE = 0x0040,
  MSPEPSUM_CACHE_IGNORE_DATE_CHANGE = 0x0080,
  MSPEPSUM_REMOVE_CHIMERIC_DUPES = 0x0100,
  MSPEPSUM_SL_INTEGRATED = 0x0200,
  MSPEPSUM_SL_ONLY = 0x0400
}
 

flags2 for ms_peptidesummary introduced in Mascot Parser 2.3.

More...
enum  QL_FLAG { ,
  QL_ALL = 0x0000,
  QL_UNASSIGNED = 0x0001,
  QL_BELOW_IDENTITY = 0x0002,
  QL_BELOW_HOMOLOGY = 0x0003,
  QL_IGNORE_IONS_SCORE_BELOW = 0x0004,
  QL_LAST = 0x0004
}
 

Flags for getQueryList().

More...
enum  QUANT_COMPONENT_STATUS {
  QCS_OK,
  QCS_OK_NO_MATCH,
  QCS_ERROR_NO_METHOD,
  QCS_ERROR_NO_COMPONENTS,
  QCS_ERROR_BAD_COMPONENT_NAME,
  QCS_OK_MULTIPLE_MATCHES
}
 

Return codes for getQuantitationComponentForPeptide().

More...
enum  sortBy {
  QUERY,
  SCORE,
  INTENSITY
}
 

Flags for createUnassignedList().

More...
enum  THRESHOLD_TYPE {
  TT_HOMOLOGY = 0x0000,
  TT_IDENTITY = 0x0001,
  TT_PEPSUM_DEFAULT = 0x0002
}
 

Flags for getPeptideThreshold()

More...
enum  TREE_CLUSTER_METHOD { ,
  TCM_PAIRWISE_SINGLE = 0x0001,
  TCM_PAIRWISE_MAXIMUM = 0x0002,
  TCM_PAIRWISE_AVERAGE = 0x0003
}
 

Flags for getTreeClusterNodes().

More...
enum  UNIQUE_PEP_RULES {
  UPR_WITHIN_FAMILY = 0x0001,
  UPR_WITHIN_FAMILY_MEMBER = 0x0002,
  UPR_IGNORE_SUBSET_PROTEINS = 0x0004,
  UPR_DEFAULT = (UPR_WITHIN_FAMILY_MEMBER + UPR_IGNORE_SUBSET_PROTEINS)
}
 

Flags for isPeptideUnique().

More...

Public Member Functions

 ms_peptidesummary (ms_mascotresfile &resfile, const unsigned int flags=MSRES_GROUP_PROTEINS, double minProbability=0.0, int maxHits=50, const char *unigeneIndexFile=0, double ignoreIonsScoreBelow=0.0, int minPepLenInPepSummary=0, const char *singleHit=0, const unsigned int flags2=MSPEPSUM_NONE)
 Call this constructor once to create peptide summary results.
 ms_peptidesummary (ms_mascotresfile &resfile, const ms_distiller_data &distillerData, int searchIndex)
 Call this constructor once to create peptide summary results.
 ms_peptidesummary (ms_mascotresfile &resfile, const ms_mascotresults_params &parameters)
 Call this constructor once to create peptide summary results.
 ms_peptidesummary (ms_mascotresfile &resfile, const ms_datfile &datfile, const ms_distiller_data_search &search)
 Call this constructor once to create peptide summary results.
virtual bool anyEmPAI () const
virtual bool anyNumDiscoveredMods () const
 Return true if modification discovery statistics are available.
void cancelCreateSummary (bool newValue=true)
 Cancel the call to createSummary()
virtual bool createSummary ()
 Create the summary using a separate call after the ms_peptidesummary object has been created.
bool createUnassignedList (sortBy s=QUERY)
 To have a list of unassigned peptides, need to call this first.
bool dumpCDB (const std::string dumpFileName)
virtual int findPeptides (const int startHit, const std::string &str, FIND_FLAGS item, FIND_COMPARE_FLAGS compareFlags, std::vector< int > &q, std::vector< int > &p) const
 Find the next hit that contains peptides with the specified attribute.
virtual int findProteins (const int startHit, const std::string &str, const int dbIdx, FIND_FLAGS item, FIND_COMPARE_FLAGS compareFlags, std::vector< std::string > &accessions, std::vector< int > &dbIndexes) const
 Find the next hit that contains proteins with the specified attributes.
virtual int findProteinsByAccession (const int startHit, const std::string &str, const int dbIdx, FIND_COMPARE_FLAGS compareFlags, std::vector< std::string > &accessions, std::vector< int > &dbIndexes) const
 Find the next hit that contains proteins with the specified accession.
virtual int findProteinsByDescription (const int startHit, const std::string &str, FIND_COMPARE_FLAGS compareFlags, std::vector< std::string > &accessions, std::vector< int > &dbIndexes) const
 Find the next hit that contains proteins with the specified description.
virtual void freeHit (const int hit)
 Frees any memory associated with the passed hit number.
virtual int getAllFamilyMembersWithThisPepMatch (const int hit, const int q, const int p, std::vector< int > &db, std::vector< std::string > &acc, std::vector< int > &dupe_status) const
 Return a list of (top level) family proteins that have a match to the specified q and p.
virtual std::vector< std::string > getAllProteinsWithThisPepMatch (const int q, const int p, std::vector< int > &start, std::vector< int > &end, std::vector< std::string > &pre, std::vector< std::string > &post, std::vector< int > &frame, std::vector< int > &multiplicity, std::vector< int > &db) const
 Returns a complete list of all the accessions that contained the peptide matched by this result.
virtual int getAvePeptideIdentityThreshold (double OneInXprobRnd, DB_MATCH_TYPE dbType=DM_FASTA) const
 Return the average threshold value for all MS-MS data sets.
std::string getCacheFileName () const
 Returns the filename of the cache file.
virtual bool getComponentIntensity (const int q, const int p, const std::string &componentName, double &value, double &rawValue) const
 Returns the component intensity for reporter or multiplex methods.
virtual const ms_proteingetComponentProtein (const char *accession, const int dbIdx=1) const
 Return a pointer to the protein entry given an accession.
virtual std::string getComponentString (const int q, const int p) const
 Return q1_p2_comp string value.
bool getCreateSummaryProgress (int *cspTotalPercentComplete, unsigned int *cspCurrTask, int *cspCurrTaskPercentageComplete, std::string *cspAccession, int *cspHit, int *cspQuery, std::string *cspKeepAliveText) const
 Return progress for the createSummary() call.
virtual std::vector< double > getDiscoveredErrTolModDeltas (std::vector< std::string > *vecDeltaStrings=NULL) const
 Return the deltas of all error tolerant modifications discovered in this search.
virtual std::vector< std::string > getDiscoveredErrTolModNames () const
 Return the names of all error tolerant modifications discovered in this search.
virtual std::vector< std::string > getDiscoveredLocalModNames () const
 Return the names of all query-level modifications discovered in this search.
virtual double getErrTolModDelta (const int q, const int p, std::string *modString=NULL, std::string *deltaAsString=NULL) const
 Return the error tolerant mod delta from h1_q2_et_mods or q1_p1_et_mods.
virtual std::vector< double > getErrTolModMasterNeutralLoss (const int q, const int p) const
 Return the error tolerant mod additional primary neutral losses from h1_q2_et_mods_master or q1_p1_et_mods_master.
virtual std::string getErrTolModMasterString (const int q, const int p) const
 Return the complete error tolerant mod master neutral loss string from q1_p1_et_mods_master.
virtual std::string getErrTolModName (const int q, const int p, std::string *modString=NULL) const
 Return the error tolerant mod name from h1_q2_et_mods or q1_p1_et_mods.
virtual double getErrTolModNeutralLoss (const int q, const int p) const
 Return the error tolerant mod neutral loss from h1_q2_et_mods or q1_p1_et_mods.
virtual std::vector< double > getErrTolModPepNeutralLoss (const int q, const int p) const
 Return the error tolerant mod peptide neutral losses from h1_q2_et_mods_pep or q1_p1_et_mods_pep.
virtual std::string getErrTolModPepString (const int q, const int p) const
 Return the complete error tolerant mod peptide neutral loss string from q1_p1_et_mods_pep.
virtual std::vector< double > getErrTolModReqPepNeutralLoss (const int q, const int p) const
 Return the error tolerant mod peptide neutral losses from h1_q2_et_mods_reqpep or q1_p1_et_mods_reqpep.
virtual std::string getErrTolModReqPepString (const int q, const int p) const
 Return the complete error tolerant mod required peptide neutral loss string from q1_p1_et_mods_reqpep.
virtual std::vector< double > getErrTolModSlaveNeutralLoss (const int q, const int p) const
 Return the error tolerant mod slave neutral losses from h1_q2_et_mods_slave or q1_p1_et_mods_slave.
virtual std::string getErrTolModSlaveString (const int q, const int p) const
 Return the complete error tolerant mod slave neutral loss string from q1_p1_et_mods_slave.
virtual std::string getErrTolModString (const int q, const int p) const
 Return the complete error tolerant mod string from h1_q2_et_mods or q1_p1_et_mods.
unsigned int getFlags () const
 Returns the flags value passed to the constructor.
unsigned int getFlags2 () const
 Return the flags2 value passed to the ms_peptidesummary constructor.
virtual ms_proteingetHit (const int hit, const int memberNumber=0) const
 Return the ms_protein hit - returns null if hit > number of hits.
virtual int getHomologyThreshold (const int query, double OneInXprobRnd, const int rank=1) const
 Returns the 'homology' threshold.
virtual int getHomologyThresholdForHistogram (double OneInXprobRnd, DB_MATCH_TYPE dbType=DM_FASTA) const
 Returns the value for the 'yellow section' in the histogram.
virtual double getIonsScore (const int q, const int p, const bool decoy) const
 Returns an ions score quickly without having to load an ms_peptide object.
virtual std::vector< int > getIonsScoreHistogram (IONS_HISTOGRAM flags=IH_INCLUDE_TOP_MATCHES, DB_MATCH_TYPE dbType=DM_FASTA) const
 Returns a list of counts for binned ions scores.
void getLibraryEntryId (const int q, const int p, std::vector< int > &dbIdx, std::vector< int > &offset, std::vector< std::string > &checksum, std::vector< std::string > &mods) const
 Return the database number, MSP file offset, checksum and modifications of the matched spectral library entry.
virtual std::string getLibraryModString (const int q, const int p) const
 Return the modification string of the spectral library match from q1_p1_SLmod.
virtual int getMaxPeptideIdentityThreshold (double OneInXprobRnd, DB_MATCH_TYPE dbType=DM_FASTA) const
 Return the max threshold value for all MS-MS data sets.
virtual int getMaxRankValue () const
 Returns the maximum 'rank' or 'hit' or 'p' value.
int getMinPepLenInPepSummary () const
 Peptides shorter than this are ignored when putting proteins into groups.
virtual ms_proteingetNextFamilyProtein (const int masterHit, const int id) const
 Find the next protein in the family masterHit.
virtual ms_proteingetNextSimilarProtein (const int masterHit, const int id) const
 Return the next protein that contains all the peptides in the 'master' protein.
virtual ms_proteingetNextSimilarProteinOf (const char *masterAccession, const int masterDB, const int id) const
 Return the next protein that contains all the peptides in the 'master' protein.
virtual ms_proteingetNextSubsetProtein (const int masterHit, const int id, const bool searchWholeFamily=true) const
 Return the next protein that contains some of the peptides in the 'master' protein.
virtual ms_proteingetNextSubsetProteinOf (const char *masterAccession, const int masterDB, const int id) const
 Return the next protein that contains some of the peptides in the 'master' protein.
std::vector< int > getNumberOfAccessionDBTypes (int q, int p) const
 Return a count of protein accessions by database type of all the accessions that contained the peptide match.
std::vector< int > getNumberOfFamilyAccessionDBTypes (int hit, int q, int p, bool includeSamesets=true) const
 Return a count of protein accessions by database type of all the family members that contained the peptide match.
virtual int getNumberOfFamilyMembers () const
 Return the total number of family members.
virtual int getNumberOfHits () const
 Returns the number of hits in the results.
int getNumberOfUnassigned () const
 Return the number of peptides in the unassigned list.
virtual long getNumDecoyHitsAboveHomology (double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType=DS_COUNT_PSM, DB_MATCH_TYPE dbType=DM_FASTA)
 Return the number of hits from the decoy search with a score at or above the homology threshold.
virtual long getNumDecoyHitsAboveIdentity (double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType=DS_COUNT_PSM, DB_MATCH_TYPE dbType=DM_FASTA)
 Return the number of hits from the decoy search with a score at or above the identity threshold.
virtual std::vector< int > getNumDiscoveredErrTolMods (const std::string modName, std::vector< std::string > &positions, std::vector< std::string > &sites) const
 Return the number of instances the error tolerant modification was discovered in this search.
virtual std::vector< int > getNumDiscoveredFixedMods (const int num, std::vector< std::string > &positions, std::vector< std::string > &sites) const
 Return the number of instances the fixed modification was discovered in this search.
std::vector< int > getNumDiscoveredLibraryMods (const int modId, std::vector< std::string > &positions, std::vector< std::string > &sites) const
 Return the number of instances the library modification was discovered in this search.
virtual std::vector< int > getNumDiscoveredLocalMods (const std::string modName, std::vector< std::string > &positions, std::vector< std::string > &sites) const
 Return the number of instances the query-level modification was discovered in this search.
virtual int getNumDiscoveredNonSpecCleavage () const
 Return the number of times non-specific cleavage occurred in an error tolerant search.
virtual std::vector< int > getNumDiscoveredVariableMods (const int num, std::vector< std::string > &positions, std::vector< std::string > &sites) const
 Return the number of instances the variable modification was discovered in this search.
virtual long getNumHitsAboveHomology (double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType=DS_COUNT_PSM, DB_MATCH_TYPE dbType=DM_FASTA)
 Return the number of hits with a score at or above the homology threshold.
virtual long getNumHitsAboveIdentity (double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType=DS_COUNT_PSM, DB_MATCH_TYPE dbType=DM_FASTA)
 Return the number of hits with a score at or above the identity threshold.
virtual std::vector< int > getPepsWithSameScore (const int q, const int p) const
 Returns a list of 'p' values for peptides with the same score.
virtual bool getPeptide (const int q, const int p, ms_peptide *&pep) const
 Return a peptide object for the specified query / rank.
virtual ms_peptide getPeptide (const int q, const int p) const
 Return a peptide object for the specified query / rank.
std::vector< int > getPeptideAmbiguityRanks (const int q, const int p)
 Return a vector of ranks that have the same ambiguity group.
virtual double getPeptideExpectationValue (const double score, const int query, const int rank=0) const
 Returns the expectation value for the given peptide score and query.
virtual int getPeptideIdentityThreshold (const int query, double OneInXprobRnd, DB_MATCH_TYPE dbType=DM_FASTA) const
 Return the threshold value for this ms-ms data being a random match.
double getPeptideThreshold (const int query, double OneInXprobRnd, const int rank=1, const THRESHOLD_TYPE thresholdType=TT_PEPSUM_DEFAULT) const
 Return either the identity or the homology threshold.
virtual double getProbabilityThreshold () const
 Return the minProbability value passed to the ms_mascotresults::ms_mascotresults constructor .
virtual int getProbFromScore (const double score) const
 Returns a probability value given a score.
virtual double getProbOfPepBeingRandomMatch (const double score, const int query) const
virtual double getProbOfProteinBeingRandomMatch (const double score) const
virtual const ms_proteingetProtein (const char *accession, const int dbIdx=1) const
 Return a pointer to the protein entry given an accession.
std::string getProteinDescription (const char *accession, const int dbIdx=1) const
 Return protein description if available.
virtual double getProteinEmPAI (const char *accession, const int dbIdx=1, const int length=-1) const
 Return protein emPAI if available.
virtual double getProteinExpectationValue (const double score) const
 Returns the expectation value for the given protein score.
double getProteinMass (const char *accession, const int dbIdx=1) const
 Return protein mass if available.
virtual int getProteinScoreCutoff (double OneInXprobRnd) const
 Return the 'protein' score value for cutting off results. Different for peptide and protein summary.
virtual double getProteinScoreForHistogram (const int num) const
 Returns scores for top 50 proteins, even if less in the peptidesummary or proteinsummary.
virtual std::string getProteinsWithThisPepMatch (const int q, const int p, const bool quotes=false)
 Return a partial list of proteins that matched the same peptide.
void getProteinTaxonomyIDs (const char *accession, const int dbIdx, std::vector< int > &gpt_ids, std::vector< std::string > &gpt_accessions) const
 Return the taxonomy ID(s), if any, from the results file.
virtual int getProteinThreshold (double OneInXprobRnd) const
 Return a threshold value for the protein summary report.
virtual int getQmatch (const int query) const
 Return the number of peptides with masses that matched this query.
virtual int getQmatch (const int query, const ms_mascotresfile::section sec) const
 Return the number of peptides with masses that matched this query.
virtual QUANT_COMPONENT_STATUS getQuantitationComponentForPeptide (const matrix_science::ms_peptide &peptide, matrix_science::ms_quant_component &component, const matrix_science::ms_quant_method *method=NULL) const
 Get the component in the quantitation method that matches the peptide.
const
matrix_science::ms_quant_method
getQuantMethod () const
 Returns the quantitation method from the results file.
std::string getQueryList (QL_FLAG flag, bool outputListOfQueries=true)
 Returns a list of query numbers that can be used for a repeat search.
virtual std::string getReadableVarMods (const int q, const int p, const int numDecimalPlaces=2) const
 Return a 'human readable' string with the variable, summed and error tolerant mods.
const ms_mascotresfilegetResfile () const
 Return a reference to the resfile.
ms_mascotresults_params getResultsParameters ()
 Returns an ms_mascotresults_params object containing the parameter and flag settings used to generate the report.
double getSequenceMass (const char *seq) const
 Return the mass of a sequence (protein or peptide).
int getSrcRank (int q, int p) const
 Returns the 'source' rank for a given peptide match.
ms_mascotresfile::section getSrcSection (int q, int p) const
 Returns the 'source' section for a given peptide match.
virtual int getTagDeltaRangeEnd (const int q, const int p) const
 Return the second number from q1_p2_drange=0,256.
virtual int getTagDeltaRangeStart (const int q, const int p) const
 Return the first number from q1_p2_drange=0,256.
virtual int getTagEnd (const int q, const int p, const int tagNumber) const
 Return the end position for the tag-match from h1_q2_tag or q1_p1_tag.
virtual int getTagSeries (const int q, const int p, const int tagNumber) const
 Return the series ID for the tag-match from h1_q2_tag or q1_p1_tag.
virtual int getTagStart (const int q, const int p, const int tagNumber) const
 Return the start position for the tag-match from h1_q2_tag or q1_p1_tag.
virtual std::string getTagString (const int q, const int p) const
 Return the complete tag string from q1_p1_tag.
virtual std::string getTerminalResiduesString (const int q, const int p) const
 Return the complete terminal residue string from q1_p1_terms.
bool getThresholdForFDRAboveHomology (double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *closestFDR, double *minProbability, int *numTargetMatches=0, int *numDecoyMatches=0)
 Given a target FDR for PSMs or sequences, return the probability threshold that gives the desired FDR using the homology threshold.
bool getThresholdForFDRAboveHomology (double targetFDR, double *closestFDR, double *minProbability, int *numTargetMatches=0, int *numDecoyMatches=0)
 Given a target FDR, return the probability threshold that gives the desired FDR using the homology threshold.
bool getThresholdForFDRAboveIdentity (double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *closestFDR, double *minProbability, int *numTargetMatches=0, int *numDecoyMatches=0)
 Given a target FDR for PSMs or sequences, return the probability threshold that gives the desired FDR using the identity threshold.
bool getThresholdForFDRAboveIdentity (double targetFDR, double *closestFDR, double *minProbability, int *numTargetMatches=0, int *numDecoyMatches=0)
 Given a target FDR, return the probability threshold that gives the desired FDR using the identity threshold.
double getToleranceInDalton (bool &needMass, const double *const pMass=NULL) const
 Returns the tolerance in dalton, and whether a mass is needed if the unit is in % or ppm.
virtual bool getTreeClusterNodes (const int hit, std::vector< int > &left, std::vector< int > &right, std::vector< double > &distance, TREE_CLUSTER_METHOD tcm=TCM_PAIRWISE_MAXIMUM, double ***reserved1=0, unsigned int *reserved2=0) const
 Return distances and structure suitable for a dendrogram plot.
ms_peptide getUnassigned (const int num) const
 Need to call createUnassignedList() before calling this.
bool getUnassignedIsBold (const int num) const
 Returns true if the item indexed by num in the assigned list should be bold.
bool getUnassignedShowCheckbox (const int num) const
 Returns true if the item indexed by num in the assigned list should have a check box next to it.
bool hasQuantMethod () const
 Returns true if the results file has a quantitation method.
bool isDataCached (BUGFIX_NUM bugNum) const
 Returns true if the cache file contains data relevant to the given bug number.
bool isEmPAIallowed () const
 Return true if emPAI could be calculated using data in this results file.
bool isNA () const
 Returns TRUE for a search against a nucelic acid database.
bool isPeptideComponentMatch (const matrix_science::ms_quant_component &component, const matrix_science::ms_peptide &peptide) const
 Tests whether the peptide is a match for all the modifications of the quantitation component.
bool isPeptideModificationMatch (const matrix_science::ms_quant_modgroup &group, const matrix_science::ms_peptide &peptide) const
 Tests whether the peptide is a match for the quantitation modification group.
virtual bool isPeptideUnique (const int q, const int p, const UNIQUE_PEP_RULES rules=UPR_DEFAULT) const
 Returns true if this peptide match is unique to one protein or one protein family.
ms_mascotresfilemascotresfile () const
 Return the attached results file.
virtual void setSubsetsThreshold (const double scoreFraction)
 Specifies which subset proteins should be reported.

Static Public Member Functions

static std::string getCacheFilename (ms_mascotresfile &resfile, const unsigned int flags, double minProbability=0.0, int maxHitsToReport=50, const char *unigeneIndexFile=0, double ignoreIonsScoreBelow=0.0, int minPepLenInPepSummary=0, const char *singleHit=0, const unsigned int flags2=MSPEPSUM_NONE)
 Call this to retrieve the name of the file that the peptide summary will use for its cache.
static std::string getCacheFilename (ms_mascotresfile &resfile, const ms_distiller_data &distillerData, int searchIndex)
 Call this to retrieve the name of the file that the peptide summary will use for its cache.
static std::string getCacheFilename (ms_mascotresfile &resfile, const ms_datfile &datfile, const ms_distiller_data_search &search)
 Call this to retrieve the name of the file that the peptide summary will use for its cache.
static double getMinProbabilityForSLScore (double score)
 Return the minProbability value that sets library score threshold to the given value in SL-only mode.
static double getSLThresholdFromMinProbability (double minProbability)
 Return the library score threshold in SL-only mode corresponding to the given value of minProbability.
static bool willCreateCache (const char *resultsFileName, const ms_mascotoptions &opts, const char *unigeneIndexFile, const char *singleHit, const char *applicationName, std::string &resfileCacheFileName, std::string &peptideSummaryCacheFileName, unsigned int &cacheStatus)
 Returns true if either cache file will be created when the ms_mascotresfile and ms_peptidesummary objects are created.
static bool willCreateCache (ms_mascotresfile &resfile, const unsigned int flags=MSRES_GROUP_PROTEINS, double minProbability=0.0, int maxHits=50, const char *unigeneIndexFile=0, double ignoreIonsScoreBelow=0.0, int minPepLenInPepSummary=0, const char *singleHit=0, const unsigned int flags2=MSPEPSUM_NONE)
 Returns true if a cache file will be created when the ms_peptidesummary constructor is called.

Protected Member Functions

double getIonsScoreCorrected (const double ionsScore, const long multiplicity) const
virtual double getPepIdentThreshProtected (const int query, double OneInXprobRnd, ms_mascotresfile::section sec, DB_MATCH_TYPE dbType, double *pQmatch=0) const
 Return the threshold value for this ms-ms data being a random match.

Detailed Description

Use this class to get peptide summary results.

The following functions provide threshold values:

This class inherits from ms_mascotresults, and all the class functions except for getPeptide() are documented in ms_mascotresults.

Examples:

peptide_list.cpp, repeat_search.cpp, and resfile_summary.cpp.


Member Enumeration Documentation

enum BUGFIX_NUM

bugNum values for isDataCached().

If caching is in use and the cache file has been created with a previous version of Parser, then the cache file may not contain data for later improvements. While this does not affect functionality, it may impact performance. For example, the addition of getProteinEmPAI() (bug 11235) causes the number of observed peptides to be saved in new cache files. If the numbers are missing from the cache file, they are computed on the fly, which may be slow with very large cache files.

See Using enumerated values in Perl, Java, Python and C# and ms_peptidesummary::isDataCached(), and also Caching Mascot Results.

Enumerator:
BUGFIX_10780 

The definition of peptide uniqueness changed in Parser 2.4.0. If isDataCached() returns false, isPeptideUnique() may be slower than expected when using the default setting UPR_DEFAULT.

BUGFIX_10995 

Since Parser 2.4.1, score thresholds never drop below 13. If isDataCached() returns false, score thresholds below 13 may be observed.

BUGFIX_11002 

Parser 2.4.1 filters out proteins whose longest significant peptide match is less than MinPepLenInPepSummary. If isDataCached() returns false, families may contain proteins with significant peptide matches shorter than MinPepLenInPepSummary.

BUGFIX_11018 

The scores of rank 1 matches have been saved in the cache file since Parser 2.4.01. If isDataCached() returns false, ms_mascotresults::getThresholdForFDRAboveIdentity() and ms_mascotresults::getThresholdForFDRAboveHomology() may be slower than expected.

BUGFIX_11235 

emPAI calculation was moved to Parser in version 2.5.0, which includes saving the number of observed peptides for each protein in the cache file. If isDataCached() returns false, ms_mascotresults::getProteinEmPAI() may be slower than expected.

BUGFIX_11254 

Parser 2.5.0 introduced modification discovery statistics (getNumDiscoveredVariableMods() et al.). If isDataCached() returns false, these statistics are computed by iterating over all queries in the file, which may be slow with large files.

BUGFIX_11344 

Reporter and Multiplex quantitation were moved to Parser in version 2.5.0. If isDataCached() returns false, component intensities will be extracted from queries on the fly, which may make ms_ms2quantitation and ms_mascotresults::getComponentIntensity() slower than expected.

BUGFIX_11411 

Parser 2.5.0 saves the qmatch and qplughole values of each match in new cache files. If isDataCached() returns false, peptide threshold calculation may be slower than expected.

BUGFIX_11425 

Parser 2.5.0 introduced ms_mascotresults::getIonsScore() for fetching match scores without a peptide object. If isDataCached() returns false, the cache file does not contain the saved scores and getIonsScore() and ms_ms2quantitation may be slower than expected.

BUGFIX_11438 

Parser 2.5.0 saves the charge of each query in new cache files. If isDataCached() returns false, ms_mascotresfile::getObservedCharge() and ms_ms2quantitation may be slower than expected.

BUGFIX_11483 

The number of unique sequences per protein are saved in new cache files since Parser 2.5.0. If isDataCached() returns false, ms_protein::getNumDistinctPeptides() may be slower than expected when using the DPF_UNIQUE flag.

BUGFIX_11499 

Parser 2.5.0 started saving the number of hits above identity and homology for all searches, not just decoy searches. If the numbers are not available, getNumHitsAboveIdentity() and getNumHitsAboveHomology() can be slow, especially in large files.

BUGFIX_11856 

Parser 2.5.0 getNumDiscoveredErrTolMods() excluded some error tolerant modifications when the queries were below requested probability threshold. If using isDataCached() then use BUGFIX_11254 enumeration. Refer to bug 11254 above for more information.

BUGFIX_12123 

Parser 2.6 added two new types of decoy statistics: counting peptide sequences, and counting library matches. If isDataCached() returns false, only counts for PSMs in FASTA sequences are saved in the cache file. Otherwise, all counts are cached. See getNumHitsAboveIdentity() for information about the different count types.

BUGFIX_12317 

Parser 2.6 also bug 12430, average identity threshold must be calculated with a number of qmatches of at least 20.

BUGFIX_11481 

Parser 2.6 decoy peptide strings added to cache so that they can be accessed in a non-decoy summary environment.

BUGFIX_12538 

Parser 2.6.1 ambiguity group id for peptides added to cache to speed up report loading.

BUGFIX_12729 

Improved protein inference for multi-library searches with only one reference database; fixed in Parser 2.6.1.

BUGFIX_12740 

Parser 2.6.0 supports reranking matches when Percolator is enabled, based on the new Percolated scores. Some attributes required for reranking were not being written correctly in the cache file. This is fixed in Parser 2.6.1.

Returned by the willCreateCache function.

See Using enumerated values in Perl, Java, Python and C# and Caching Mascot Results

The function may bitwise OR multiple values together

Enumerator:
CACHE_MISSING_RESFILE 

Either the results file is missing or an empty string is passed to the function.

RESFILE_CACHE_FILE_NOT_PRESENT 

The ms_mascotresfile cache file is not present and is not being created.

RESFILE_CACHE_BEING_CREATED 

The ms_mascotresfile cache file is being created.

RESFILE_CACHE_DISABLED_IN_OPTIONS 

Use of the ms_mascotresfile cache file is disabled in the options section of mascot.dat.

RESFILE_CACHE_VALID 

The ms_mascotresfile cache file has already been created and is valid.

RESFILE_CACHE_CAN_CREATE 

The ms_mascotresfile cache file can be (re-)created. It either doesn't exist or is out of date or invalid for some reason.

PEPSUMMARY_CACHE_FILE_NOT_PRESENT 

The ms_peptidesummary cache file is not present and is not being created.

PEPSUMMARY_CACHE_BEING_CREATED 

The ms_peptidesummary cache file is being created.

PEPSUMMARY_CACHE_DISABLED_IN_OPTIONS 

Use of the ms_peptidesummary cache file is disabled in the options section of mascot.dat.

PEPSUMMARY_CACHE_VALID 

The ms_peptidesummary cache file has already been created and is valid.

PEPSUMMARY_CACHE_CAN_CREATE 

The ms_peptidesummary cache file can be (re-)created. It either doesn't exist or is out of date or invalid for some reason.

PEPSUMMARY_CACHE_STATUS_NOT_AVAILABLE 

If the ms_mascotresfile cache file is not present, then no attempt is made to find out about the ms_peptidesummary cache file.

PEPSUMMARY_CACHE_NOT_FOR_PMF 

A ms_peptidesummary cache file is not created if the results file just contains PMF matches.

enum DB_MATCH_TYPE [inherited]

Type of matches counted by getNumHitsAboveIdentity() and friends, and type of score for getIonsScoreHistogram().

Enumerator:
DM_FASTA 

Mascot matches to FASTA sequences. This is the default.

DM_SPECTRAL_LIBRARY 

Matches to spectral library entries. This was introduced in Parser 2.6.

DM_BOTH 

Only valid for count of matches (getNumHitsAboveIdentity() and friends): the combined count for FASTA and spectral library entries.

enum DECOY_STATS_COUNT_TYPE [inherited]

Type of object counted by getNumHitsAboveIdentity() and friends.

Enumerator:
DS_COUNT_PSM 

Count peptide-spectrum matches (PSMs). This is the default.

DS_COUNT_SEQUENCE 

Count distinct peptide sequences. This was introduced in Parser 2.6.

enum FIND_COMPARE_FLAGS [inherited]

Flags to specify how comparisons are performed in the find functions.

See Using enumerated values in Perl, Java, Python and C#.

These flags are used in findProteins(), findPeptides() and the deprecated functions: findProteinsByAccession() and findProteinsByDescription(). The flags are used to specify how the comparison is performed and whether it should be a forward or reverse seach. Typically, three values will be combined together, using an OR operator; however, any default values do not need to be specifically specified.

  • Choose 1 of: FC_COMPLETESTR, FC_SUBSTR, FC_STARTSTR, FC_STRTOK
  • Choose 1 of: FC_CASE_INSENSITIVE, FC_CASE_SENSITIVE
  • Choose 1 of: FC_FORWARD, FC_REVERSE
  • Optionally choose FC_RESTRICT_TO_HIT or (FC_LOOP_INTO_UNASSIGNED and/or FC_LOOP_FROM_UNASSIGNED)
  • Optionally choose FC_SEARCH_ALL_RANKS
  • Optionally choose FC_ALL_PEPTIDES or FC_SIGNIFICANT_PEPTIDES
  • Optionally choose one or more of the FC_PROTEIN_IGN_ flags when calling findProteins()
Enumerator:
FC_COMPLETESTR 

Search for the complete string. If search item is a decimal number then match the number exactly.

FC_SUBSTR 

Search for any substring. Not valid if search item is a decimal number.

FC_STARTSTR 

String must match to start of target string. If search item is a decimal number then match to the same decimal precision of the filter value but rounded. For example a filter value of 1234.56 will return items with values in the range (1234.555 >= value < 1234.565).

FC_STRTOK 

Supplied string is a set of tokens, for example "STY" could be used to search for S or T or Y in a peptide sequence.

FC_MASK_STR_PART 

Bit mask to extract which one of FC_COMPLETESTR, FC_SUBSTR, FC_STARTSTR, FC_STRTOK has been specified.

FC_CASE_INSENSITIVE 

Case insensitive search (default).

FC_CASE_SENSITIVE 

Case sensitive search.

FC_MASK_CASE 

Bit mask to extract which one of FC_CASE_INSENSITIVE, FC_CASE_SENSITIVE has been specified.

FC_FORWARD 

Forward search. The returned hit number will be the same as or higher than the start hit number. (default).

FC_REVERSE 

Reverse search. The returned hit number will be the same as or lower than the start hit number.

FC_MASK_DIRECTION 

Bit mask to extract which one of FC_FORWARD, FC_REVERSE has been specified.

FC_RESTRICT_TO_HIT 

Don't search beyond the specified hit number. Cannot be used with FC_LOOP_INTO_UNASSIGNED or FC_LOOP_FROM_UNASSIGNED.

FC_LOOP_INTO_UNASSIGNED 

If no matches are found in the passed hit, or any subsequent hit, then search the unassigned list.

FC_LOOP_FROM_UNASSIGNED 

If the passed hit number is 0, and no match is found in the unassigned list, then start searching at 1 if FC_FORWARD is specified or start searching at 'numHits' if FC_REVERSE is specified.

FC_UNASSIGNED_MASK 

Bit mask to extract FC_LOOP_INTO_UNASSIGNED or FC_LOOP_FROM_UNASSIGNED.

FC_SEARCH_ALL_RANKS 

For use with findPeptides() only. Ordinarily only those queries and ranks are searched that are assigned to a protein hit. Use this flag to search all ranks in such queries instead.

FC_ALL_PEPTIDES 

Search all peptides regardless of score (default).

FC_ALL_PEPTITDES 
Deprecated:
Unfortunate spelling error, please use FC_ALL_PEPTIDES which has an identical value.
FC_SIGNIFICANT_PEPTIDES 

Only search peptides above identitity or homology threshold.

FC_SEARCH_IGNORED_RANKS 

In integrated library mode, also search peptides that are part of a family member's list of ignored peptides.

FC_SCORING_MASK 

Bit mask to extract FC_ALL_PEPTIDES or FC_SIGNIFICANT_PEPTIDES.

FC_PROTEIN_IGN_SAMESETS 

Ignore sameset proteins - only used for findProteins()

FC_PROTEIN_IGN_SUBSETS 

Ignore subset proteins - only used for findProteins()

FC_PROTEIN_IGN_FAMILY 

Ignore family member proteins - only used for findProteins()

FC_PROTEIN_IGN_MASK 

Ignore proteins flags.

FC_PROTEIN_INC_ALT_ACC 

For use with findProteins() only. If protein entries in the database are representative (i.e. contain multiple accessions) and those accessions are saved in the results file (Mascot 2.4 or later), setting this flag will search all accessions of each protein entry, not just the first accession.

enum FIND_FLAGS [inherited]

Flags for findPeptides() and findProteins().

Details what needs to be searched for. Any of the FT_PEPTIDE_ flags may be used for either findPeptides() or findProteins(), but the FT_PROTEIN flags may only be used for findProteins()

See Using enumerated values in Perl, Java, Python and C#.

Enumerator:
FT_PEPTIDE_EXP_MZ 

Find an experimental m/z value. Refer to FIND_COMPARE_FLAGS for comparison behaviour of decimal numbers.

FT_PEPTIDE_EXP_MR 

Find an experimental relative mass. Refer to FIND_COMPARE_FLAGS for comparison behaviour of decimal numbers.

FT_PEPTIDE_CALC_MZ 

Find a calculated m/z value. Refer to FIND_COMPARE_FLAGS for comparison behaviour of decimal numbers.

FT_PEPTIDE_CALC_MR 

Find a calculated relative mass. Refer to FIND_COMPARE_FLAGS for comparison behaviour of decimal numbers.

FT_PEPTIDE_STRING 

Find a peptide string.

FT_PEPTIDE_QUERY 

Find a query number.

FT_PEPTIDE_VARMOD 

Find a variable modification. Specifiy the modification 'number' as the string.

FT_PEPTIDE_FIXMOD 

Find a fixed modification. Specifiy the modification 'number' as the string.

FT_PEPTIDE_ETMOD 

Find an error tolerant modification. Specifiy a delta mass as the string.

FT_PEPTIDE_SLMOD 

Find a spectral library modification. Specifiy any substring of the mod name as the string.

FT_PEPTIDE_VARMOD_BYNAME 

Find a variable or query level modification. Specify any substring of the mod name as the string.

FT_PEPTIDE_FIND_MASK 

Bit mask for any of the peptide values to be found.

FT_PROTEIN_ACCESSION 

Find an accession - findProteins() only.

FT_PROTEIN_DESCRIPTION 

Find a description - findProteins() only.

enum FLAGS [inherited]

Flags for the type of results.

See Using enumerated values in Perl, Java, Python and C#.

Not all of the flags applicable for protein summary (e.g. MSRES_REQUIRE_BOLD_RED); see ms_proteinsummary.

Enumerator:
MSRES_NOFLAG 

Does nothing.

MSRES_GROUP_PROTEINS 

Group proteins with same peptide matches. See Grouping proteins together.

MSRES_SHOW_SUBSETS 

Show proteins that only match a subset of peptides. See Grouping proteins together.

MSRES_SUBSETS_DIFF_PROT 

Proteins that contain a subset of peptides are treated as a unique protein. See Grouping proteins together.

MSRES_REQUIRE_BOLD_RED 

Only proteins that have a top scoring peptide not seen before will be returned.

MSRES_SHOW_ALL_FROM_ERR_TOL 

If this flag is set, then all hits from error tolerant search are shown. See Error tolerant searches.

MSRES_IGNORE_PMF_MIXTURE 

If this flag is set, then PMF mixtures are ignored. See Peptide mass fingerprint mixtures.

MSRES_MUDPIT_PROTEIN_SCORE 

Protein scoring for the peptide summary was changed at Mascot 2.0 for large (MudPIT) searches. See ms_protein::getScore().

MSRES_DECOY 

If this flag is set, then use the results from searching against the decoy database. See Retrieving the results of a decoy search.

MSRES_INTEGRATED_ERR_TOL 

If this flag is set, then create a ms_peptidesummary object that contains results from the summary and et_summary section. See Error tolerant searches.

MSRES_ERR_TOL 

If this flag is set, then create a ms_peptidesummary object that contains results from the et_summary section. See Error tolerant searches.

MSRES_MAXHITS_OVERRIDES_MINPROB 

If minProbability and maxHitsToReport are both non zero, then minProbability is ignored when determining the number of proteins to be displayed. See ms_mascotresults::ms_mascotresults.

MSRES_CLUSTER_PROTEINS 

Protein clustering introduced in Mascot 2.3. See Using MSRES_CLUSTER_PROTEINS.

MSRES_DUPE_INCL_IN_SCORE_NONE 

See Treatment of duplicate peptides.

MSRES_DUPE_INCL_IN_SCORE_A 

See Treatment of duplicate peptides.

MSRES_DUPE_INCL_IN_SCORE_B 

See Treatment of duplicate peptides.

MSRES_DUPE_INCL_IN_SCORE_C 

See Treatment of duplicate peptides.

MSRES_DUPE_INCL_IN_SCORE_D 

See Treatment of duplicate peptides.

MSRES_DUPE_INCL_IN_SCORE_E 

See Treatment of duplicate peptides.

MSRES_DUPE_INCL_IN_SCORE_F 

See Treatment of duplicate peptides.

MSRES_DUPE_INCL_IN_SCORE_G 

See Treatment of duplicate peptides.

MSRES_DUPE_INCL_IN_SCORE_H 

See Treatment of duplicate peptides.

MSRES_DUPE_INCL_IN_SCORE_I 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_NONE 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_A 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_B 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_C 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_D 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_E 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_F 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_G 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_H 

See Treatment of duplicate peptides.

MSRES_DUPE_REMOVE_I 

See Treatment of duplicate peptides.

MSRES_DUPE_DEFAULT 

Default parameter for treatment of duplicates. See Treatment of duplicate peptides.

enum HOMOLOGY_THRESHOLD_SOURCE [inherited]

Flags for getHomologyThreshold()

The method getHomologyThreshold() accepts either a rank argument or one of the special enumerated values listed here. For backwards compatibility, the integers 1-20 have their own enumerated values.

Enumerator:
HOMTHR_FASTA 

Retrieve the homology threshold for FASTA matches.

HOMTHR_SL 

Retrieve the homology threshold for spectral library matches.

enum IONS_HISTOGRAM [inherited]

Flags for getIonsScoreHistogram().

See Using enumerated values in Perl, Java, Python and C#.

Enumerator:
IH_INCLUDE_TOP_MATCHES 

The default. Just include the top match to each spectrum.

IH_INCLUDE_TOP_10_MATCHES 

Instead of just the top match, use the top 10 matches to each spectrum.

enum MSPEPSUM

flags2 for ms_peptidesummary introduced in Mascot Parser 2.3.

See Using enumerated values in Perl, Java, Python and C# and ms_peptidesummary::ms_peptidesummary.

Enumerator:
MSPEPSUM_NONE 

Default.

MSPEPSUM_PERCOLATOR 

See Using Percolator scores for details.

MSPEPSUM_USE_CACHE 

See Using the ms_peptidesummary cache.

MSPEPSUM_SINGLE_HIT_DBIDX 

The singleHit parameter string must start with a database index and a colon, e.g. 3:CH60_SHEON. See Using the singleHit parameter to return a single protein.

MSPEPSUM_USE_HOMOLOGY_THRESH 

Expect values and cutoffs will use homology thresholds rather than identity thresholds.

MSPEPSUM_NO_PROTEIN_GROUPING 

Used for when a ms_peptidesummary object it required, but no protein grouping is needed. ms_peptidesummary::getNumberOfHits() will return zero. The only functions that are guaranteed to work are ms_peptidesummary::getPeptide(), ms_peptidesummary::getAllProteinsWithThisPepMatch(), ms_peptidesummary::getQmatch(). Will not work with error tolerant searches.

MSPEPSUM_DISCARD_RELOADABLE 

Specify this flag to use less memory. However, calls to getPeptide() will be slower because the data wll always be loaded again from the results file.

MSPEPSUM_DEFERRED_CREATE 

Useful if it is necessary to be able to cancel creation of a ms_peptidesummary() object (which can take a long time). Call createSummary() manually after creating the ms_peptidesummary object.

MSPEPSUM_CACHE_IGNORE_DATE_CHANGE 

Opening the peptide summary cache CDB file should ignore the last modified timestamp on the .dat file.

MSPEPSUM_REMOVE_CHIMERIC_DUPES 

Specify this flag to remove duplicate matches from chimeric spectra (spectra with more than one precursor mass). See Chimeric duplicates.

MSPEPSUM_SL_INTEGRATED 

Merge spectral library matches with Mascot matches, if any exist in the results file. If the flag is not specified, only Mascot matches are available.

MSPEPSUM_SL_ONLY 

Use only the spectral library match sections and ignore Mascot matches.

enum QL_FLAG

Flags for getQueryList().

See Using enumerated values in Perl, Java, Python and C# and getQueryList().

Enumerator:
QL_ALL 

Returns "All" .

QL_UNASSIGNED 

Returns a comma separated list of the query numbers in the unassigned list. If createUnassignedList() has not been called, getQueryList() function will call it, possibly causing some delay.

QL_BELOW_IDENTITY 

Returns a comma separated list of the query numbers which have scores below the identity threshold calculated using the minProbability threshold specified in the ms_peptidesummary constructor. If minProbability <= 0 or >= 1, then an empty string is returned.

QL_BELOW_HOMOLOGY 

Returns a comma separated list of the query numbers which have scores below the homology threshold calculated using the minProbability threshold specified in the ms_peptidesummary constructor. If minProbability <= 0 or >= 1, then an empty string is returned.

QL_IGNORE_IONS_SCORE_BELOW 

Uses the threshold specified by the ignoreIonsScoreBelow parameter. If ignoreIonsScoreBelow is zero, then an empty string is returned.

QL_LAST 

For looping through all possible values.

enum QUANT_COMPONENT_STATUS [inherited]

Return codes for getQuantitationComponentForPeptide().

See Using enumerated values in Perl, Java, Python and C#.

Enumerator:
QCS_OK 

The component was retrieved successfully.

QCS_OK_NO_MATCH 

The quantitation method's components were searched successfully, no matching component was found.

QCS_ERROR_NO_METHOD 

There is no quantitation method available (e.g. for protein summary).

QCS_ERROR_NO_COMPONENTS 

There are no components specified in the quantitation method (e.g. for non-precursor method).

QCS_ERROR_BAD_COMPONENT_NAME 

The peptide's component name in the Mascot results does not match any component in the quantitation method.

QCS_OK_MULTIPLE_MATCHES 

The quantitation method's components were searched successfully, multiple matching components were found.

enum sortBy [inherited]

Flags for createUnassignedList().

See Using enumerated values in Perl, Java, Python and C#.

Enumerator:
QUERY 

Sort the unassigned list by ascending query number - this is the same as ascending relative mass order.

SCORE 

Sort the unassigned list by descending score.

INTENSITY 

Sort the unassigned list by descending intensity. Intensity values are taken from the qintensity value in the results file if they are available (from PKL files, or some MGF files). If these values are not available, then the intensity is calculated from the sum of all the ions values. For a very large MS-MS file, this option can take some time to process unless there are qintensity value in the results file.

enum THRESHOLD_TYPE [inherited]

Flags for getPeptideThreshold()

See Using enumerated values in Perl, Java, Python and C#.

Enumerator:
TT_HOMOLOGY 

Homology threshold.

TT_IDENTITY 

Identity threshold.

TT_PEPSUM_DEFAULT 

If ms_peptidesummary::MSPEPSUM_USE_HOMOLOGY_THRESH is specified in the constructor, then this will resolve to TT_HOMOLOGY, otherwise it will resolve to TT_IDENTITY.

enum TREE_CLUSTER_METHOD [inherited]

Flags for getTreeClusterNodes().

See Using enumerated values in Perl, Java, Python and C#.

Enumerator:
TCM_PAIRWISE_SINGLE 

's': pairwise single-linkage clustering.

TCM_PAIRWISE_MAXIMUM 

'm': pairwise maximum- (or complete-) linkage clustering.

TCM_PAIRWISE_AVERAGE 

'a': pairwise average-linkage clustering.

enum UNIQUE_PEP_RULES [inherited]

Flags for isPeptideUnique().

See Using enumerated values in Perl, Java, Python and C#.

Choose UPR_WITHIN_FAMILY or UPR_WITHIN_FAMILY_MEMBER and then optionally 'or' UPR_IGNORE_SUBSET_PROTEINS

Enumerator:
UPR_WITHIN_FAMILY 

The peptide is unique if it occurs in proteins that are part of a single family.

UPR_WITHIN_FAMILY_MEMBER 

The peptide is unique if it occurs in proteins that just belong to a single family member.

UPR_IGNORE_SUBSET_PROTEINS 

Ignore any susbset proteins that contain the match when deciding if a peptide is unique. However, if the peptide just belongs to subset proteins for the same hit, then it is still considered to be unique.

UPR_DEFAULT 

Set to UPR_WITHIN_FAMILY_MEMBER | UPR_IGNORE_SUBSET_PROTEINS.


Constructor & Destructor Documentation

ms_peptidesummary ( ms_mascotresfile resfile,
const unsigned int  flags = MSRES_GROUP_PROTEINS,
double  minProbability = 0.0,
int  maxHitsToReport = 50,
const char *  unigeneIndexFile = 0,
double  ignoreIonsScoreBelow = 0.0,
int  minPepLenInPepSummary = 0,
const char *  singleHit = 0,
const unsigned int  flags2 = MSPEPSUM_NONE 
)

Call this constructor once to create peptide summary results.

Creating an ms_peptidesummary object may take some time for a very large file with a large number of protein hits.

Parameters:
resfileAn ms_mascotresfile object. See Maintaining object references: two rules of thumb.
flagscan be one or more of the ms_mascotresults::FLAGS values OR'ed together
minProbabilityspecifies a cutoff point for protein scores, a cutoff for an Integrated error tolerant search and a threshold for calculating mudpit scores.

  • A value of 0 indicates no cutoff - report maxHitsToReport proteins, whatever the protein scores.
  • A value between 0 and 1 is used to calculate a protein score cut-off based on peptide match expect values. With standard protein scoring, 0.05 would discard all proteins with a score less the average score of a peptide with an expect value of 0.05. With MudPIT protein scoring, 0.05 would be the peptide expect value threshold used for calculating the protein scores, and only proteins with non-zero scores are reported. This value is also used to select proteins for an Integrated error tolerant search.
  • A value of less than 0 is not permitted and will be replaced with the default of 0.05.
  • In Mascot Parser 2.2 and earlier, a value between 0.1 and 1.0 was replaced with the default value of 0.05. In Mascot Parser 2.3, any value between 1e-18 and 1.0 is acceptable.
  • A value greater than 1 indicates an absolute score cutoff for proteins, but this is not valid for an Integrated error tolerant search and is not permitted in the standard Mascot Search reports, so is not recommended.

Other points to note:

  • If the flag ms_mascotresults::MSRES_GROUP_PROTEINS is set, then this value applies to the top level proteins - the similar and subset proteins are not 'cutoff' by this value.
  • The cutoff point is the lower of maxHitsToReport and this value, unless the ms_mascotresults::MSRES_MAXHITS_OVERRIDES_MINPROB flag is set. With versions of parser before Mascot 2.2, this flag was not available so AUTO in the standard Mascot results reports was implemented by setting this probablility to a (user) defined value and setting maxHitsToReport to a very large number.
maxHitsToReportis a maximum value. There may be fewer than this number if minProbability is low (or > 1 and high), or if there are insufficient results.

  • If the ms_mascotresults::MSRES_MAXHITS_OVERRIDES_MINPROB flag is set, then the minProbability value is ignored when determining the number of hits to display.
  • With versions of parser before Mascot 2.2, this flag was not available so a value of 0 was passed as minProbability in the standard Mascot results reports. Also, if this flag is specified, a value of zero can be used for maxHitsToReport and the cutoff is then specified by minProbability.
  • If the flag ms_mascotresults::MSRES_GROUP_PROTEINS is set, then this value applies to the top level proteins -- the similar and subset proteins are not included in the count.
unigeneIndexFileis an optional file path to a UniGene Index file. This only makes sense if ms_mascotresults::MSRES_GROUP_PROTEINS is set.

  • From programming languages other than C++, pass an empty string object to specify no UniGene index file.
ignoreIonsScoreBelowis an optional flag for the peptide summary only. Any peptides with a score less than this will be ignored. If the value is greater than 0 and less than 1 then this is assumed to to be a probability rather than an absolute score; the reciprocal of this value is passed to getPeptideThreshold() to determine the score cutoff.
minPepLenInPepSummaryis an optional flag for the peptide summary only. Any peptides shorter than this will be ignored when grouping proteins (see Grouping proteins together).
singleHitshould only be used for getting the results for a single protein quickly. This is used for example by the the protein view report in Mascot, which just shows the matches for a single protein. See Using the singleHit parameter to return a single protein.

  • From programming languages other than C++, pass an empty string object to specify that all proteins should be used.
  • For Mascot 2.3 and later, a database index needs to be specified because the search may have been against multiple fasta files. If the database index is to be specified, the string must be of the form: "3:CH60_SHEON" and the ms_peptidesummary::MSPEPSUM_SINGLE_HIT_DBIDX must be specified in flags2. Othersise, at database index of 1 is assumed.
flags2should be one or more of the ms_peptidesummary::MSPEPSUM flags OR'd together
ms_peptidesummary ( ms_mascotresfile resfile,
const ms_distiller_data distillerData,
int  searchIndex 
)

Call this constructor once to create peptide summary results.

Creating an ms_peptidesummary object may take some time for a very large file with a large number of protein hits.

Parameters:
resfileAn ms_mascotresfile object. See Maintaining object references: two rules of thumb.
distillerDataThe Distiller project parameters.
searchIndexThe index (1..n) to the search to use in the Distiller project parameters.
ms_peptidesummary ( ms_mascotresfile resfile,
const ms_datfile datfile,
const ms_distiller_data_search search 
)

Call this constructor once to create peptide summary results.

Creating an ms_peptidesummary object may take some time for a very large file with a large number of protein hits.

Parameters:
resfileAn ms_mascotresfile object. See Maintaining object references: two rules of thumb.
datfileThe Mascot search results data.
searchThe Distiller search results data.
ms_peptidesummary ( ms_mascotresfile resfile,
const ms_mascotresults_params parameters 
)

Call this constructor once to create peptide summary results.

Creating an ms_peptidesummary object may take some time for a very large file with a large number of protein hits.

Parameters:
resfileAn ms_mascotresfile object. See Maintaining object references: two rules of thumb.
parametersAn ms_mascotresults_params object which should contain all of the required values and flags to generate the peptide summary.

Member Function Documentation

bool anyEmPAI (  ) const [virtual]

emPAI is not available unless all of the following are true:

  • Results file has MS/MS data.
  • Results file is not an old-style error tolerant search.
  • Results file has not been opened in decoy mode (ms_mascotresults::MSRES_DECOY).
  • There are at least 100 queries.
Returns:
true if above conditions are true and at least one protein has a non-zero emPAI value.

Reimplemented from ms_mascotresults.

bool anyNumDiscoveredMods (  ) const [virtual, inherited]

Return true if modification discovery statistics are available.

Modification discovery counts for fixed and variable modifications (and error tolerant modifications in an error tolerant search) are only available if the results file contains a Unimod section. Since only significant rank 1 peptide matches assigned to a protein hit are inspected for modification counting purposes, it is possible for the counts to be zero even when a Unimod section exists.

Returns:
True if number of modifications could be counted and there is at least one modification with a non-zero count; false otherwise.
void cancelCreateSummary ( bool  newValue = true ) [inherited]

Cancel the call to createSummary()

It is normally recommended that an application should delete the ms_peptidesummary object after calling this function because some objects will still be in memory (no cleanup is performed).

Cancelling may not occur immediately, and the calling application needs to wait for the createSummary() call to return.

See also getCreateSummaryProgress() and ms_peptidesummary::MSPEPSUM_DEFERRED_CREATE

Parameters:
newValueshould be set to true to cancel the createSummary call
bool createSummary (  ) [virtual]

Create the summary using a separate call after the ms_peptidesummary object has been created.

Creating an ms_peptidesummary object for a large results file can take a considerable amount of time. The standard Mascot reports do not have an option to 'cancel' the creation of the report and rely on the ms_mascotresfile::outputKeepAlive() function to provide progress reports and keep the connection alive. This is not so desireable for a client application that doesn't want to redirect stdout, and wants to allow the user to cancel a long operation. Specify the ms_peptidesummary::MSPEPSUM_DEFERRED_CREATE flag when creating the ms_peptidesummary object, and then use one thread to call this function and a separate thread to call getCreateSummaryProgress() and report progress to the end user.

This function is called directly by the ms_mascotresults constructor unless the ms_peptidesummary::MSPEPSUM_DEFERRED_CREATE flag is specified. If that flag is specified, then the constructor returns 'immediately' and this function must be called by the client code. Until this function returns, the client code in the separate thread can only reliably call the getCreateSummaryProgress(), ms_mascotresfile::outputKeepAlive() or cancelCreateSummary() functions. Calling any other function for the ms_mascotresults object may result in undefined behaviour and cause the ms_mascotresfile::ERR_RESULTS_NOT_CREATED error to be set.

This function should not be called for a ms_proteinsummary.

Returns:
true if successful

Reimplemented from ms_mascotresults.

bool createUnassignedList ( sortBy  s = QUERY ) [inherited]

To have a list of unassigned peptides, need to call this first.

See Unassigned peptides list for details of creating an unassigned list.

Parameters:
sorder to sort the unassigned list
Returns:
Currently always returns true unless createSummary() has not completed
Examples:
resfile_summary.cpp.
bool dumpCDB ( const std::string  dumpFileName )

Debugging aid. Exports the cdb file as text.

Parameters:
dumpFileNameis the path of the text file to dump information to .
Returns:
true if the file can be created and written to.
int findPeptides ( const int  startHit,
const std::string &  str,
FIND_FLAGS  item,
FIND_COMPARE_FLAGS  compareFlags,
std::vector< int > &  q,
std::vector< int > &  p 
) const [virtual]

Find the next hit that contains peptides with the specified attribute.

Note:
For a protein summary, this function always returns -1.

Can be used to find hits with a specified query number, sequence, mass, or fixed or variable modification.

Returns the hit number and a list of q,p values that contains one or more matches to the passed string.

All subset, sameset and family member proteins are searched, and it is possible that the returned q and p values only correspond to peptides in a single subset protein. The FC_PROTEIN_IGN_SAMESETS, FC_PROTEIN_IGN_SUBSETS and FC_PROTEIN_IGN_FAMILY flags cannot be used with this function. To find proteins which contain particular peptide matches, or to control which proteins to search, see findProteins().

For FT_PEPTIDE_VARMOD , str should contain the modification 'number' as passed to ms_searchparams::getVarModsName(). Modification numbers between 1 and 9 correspond to strings "1" through "9", and numbers between 10 and 32 correspond to "A" through "W". To find all error tolerant modifications, specify "X" and then use ms_peptidesummary::getErrTolModName() to retrieve the name of the modification.

For FT_PEPTIDE_FIXMOD, str should contain the modification 'number' as passed to ms_searchparams::getFixedModsName(). This will be in the range 1-20.

For both types of modification searches, str should only contain a single character. The value for compareFlags should include FC_SUBSTR, because this function is implemented by searching the return value from ms_peptide::getVarModsStr(). Specifying incorrect flags does not generate an error and results in undefined behaviour.

There is an alternative way to search for fixed mods: specify FT_PEPTIDE_STRING and FC_STRTOK as item and compareFlags, respectively, and pass the return value of ms_searchparams::getFixedModsResidues() as the string to search for, str. Although this may seem equivalent to the combination FT_PEPTIDE_FIXMOD and FC_SUBSTR, there are two corner cases where the latter works while the former doesn't:

  • ms_searchparams::getFixedModsResidues() can return "N_term" or "C_term", which are clearly not strings of amino acids.
  • The case where a fixed mod is 'replaced' with a variable mod. For example, if Carbamidomethyl (C) is specified as a fixed mod and Propionamide (C) as variable mod, then it is not sufficient to just check if a peptide contains a cysteine.
Parameters:
startHitis the hit number at which to start searching. To start searching at the first hit, pass a value of 1. To search the unassigned list, pass a value of 0.
stris the string to be matched.
itemis the peptide item to be found (for example, the peptide sequence or mass). Any of the FT_PEPTIDE_ values may be used.
compareFlagsspecifies whether the str value is required to match all or just part of the target string. It can also be used to specify a reverse search.
qis a vector of query values for peptides that match. See Using STL classes in Perl, Java, Python and C#.
pis a vector of corresponding rank values for peptides that match. See Using STL classes in Perl, Java, Python and C#.
Returns:
the single hit number that contains all the returned matched items, or 0 if the match is in the unassigned list, or -1 if there is no match found.

Implements ms_mascotresults.

int findProteins ( const int  startHit,
const std::string &  str,
const int  dbIdx,
FIND_FLAGS  item,
FIND_COMPARE_FLAGS  compareFlags,
std::vector< std::string > &  accessions,
std::vector< int > &  dbIndexes 
) const [virtual]

Find the next hit that contains proteins with the specified attributes.

Note:
For a protein summary, this function always returns -1.

Used to find accessions, protein descriptions or proteins that have peptide matches with a specified query number, sequence, mass, or fixed or variable modification. Any of the FIND_FLAGS may be used.

The findPeptides() function does not return any information about which subset/sameset/family proteins in a hit contain the found matches. findProteins(), however, returns a list of the protein accessions that contained a peptide match that fits the search criteria, and also allows the searched proteins to exclude samesets (FC_PROTEIN_IGN_SAMESETS), subsets (FC_PROTEIN_IGN_SUBSETS or family members (FC_PROTEIN_IGN_FAMILY).

If protein entries in the database are representative (i.e. contain multiple accessions), by default only the first accession will be considered when FT_PROTEIN_ACCESSION is specified as a search item. If alternative accessions are saved in the results file (Mascot 2.4 or later), setting FC_PROTEIN_INC_ALT_ACC will search all accessions of each protein entry. Note that this will make the search slower.

Parameters:
startHitis the hit number at which to start searching. To start searching at the first hit, pass a value of 1.
stris the string to be matched and depends on the value of item. If item is FT_PROTEIN_ACCESSION, then str will be the accession (or part of one) to be matched. If item is one of the FT_PEPTIDE_ values, then see FIND_FLAGS and findPeptides() for operational details.
dbIdxis the database ID to be matched. To search all databases, pass a value of -1. Note that the value 0 corresponds to a UniGene database.
itemis the accession, description or peptide item to be found. For example, return all proteins with a given peptide sequence or mass.
compareFlagsspecifies whether the str value is required to match all or just part of the accession. It can also be used to specify a reverse search.
accessionsis a vector of protein accessions that match. See Using STL classes in Perl, Java, Python and C#.
dbIndexesis a symmetric vector of database indexes for the protein accessions that match. See Using STL classes in Perl, Java, Python and C#.
Returns:
the single hit number that contains the matches, or -1 if there is no match found.

Implements ms_mascotresults.

int findProteinsByAccession ( const int  startHit,
const std::string &  str,
const int  dbIdx,
FIND_COMPARE_FLAGS  compareFlags,
std::vector< std::string > &  accessions,
std::vector< int > &  dbIndexes 
) const [virtual]

Find the next hit that contains proteins with the specified accession.

Deprecated:
See findProteins() with the ms_mascotresults::FT_PROTEIN_ACCESSION flag.
Note:
For a protein summary, this function always returns -1.

Return the hit number and a list of proteins with accessions that match to the the passed string. If protein grouping has been specified, then multiple proteins within the same hit may match, and these are all returned.

Parameters:
startHitis the number at which to start searching. To start searching at the first hit, pass a value of 1.
stris the accession (or part of accession) to be matched.
dbIdxis the database ID to be matched. To search all databases, pass a value of -1. Note that the value 0 corresponds to a UniGene database.
compareFlagsspecifies whether the str value is required to match all or just part of the accession. It can also be used to specify a reverse search.
accessionsis a vector of protein accessions that match. See Using STL classes in Perl, Java, Python and C#.
dbIndexesis a symmetric vector of database indexes for the protein accessions that match. See Using STL classes in Perl, Java, Python and C#.
Returns:
the single hit number that contains all the returned proteins, or -1 if there is no match found.

Implements ms_mascotresults.

int findProteinsByDescription ( const int  startHit,
const std::string &  str,
FIND_COMPARE_FLAGS  compareFlags,
std::vector< std::string > &  accessions,
std::vector< int > &  dbIndexes 
) const [virtual]

Find the next hit that contains proteins with the specified description.

Deprecated:
See findProteins() with the ms_mascotresults::FT_PROTEIN_DESCRIPTION flag.
Note:
For a protein summary, this function always returns -1.

Return the hit number and a list of proteins with descriptions that match the passed string. If protein grouping has been specified, then multiple proteins within the same hit may match, and these are all returned.

Descriptions are just those included in the results file. See getProteinDescription() for details of what is loaded into the results.

Parameters:
startHitis the number at which to start searching. To start searching at the first hit, pass a value of 1.
stris the accession (or part of accession) to be matched.
compareFlagsspecifies whether the str value is required to match all or just part of the description. It can also be used to specify a reverse search.
accessionsis a vector of protein accessions that match. See Using STL classes in Perl, Java, Python and C#.
dbIndexesis a symmetric vector of database indexes for the protein accessions that match. See Using STL classes in Perl, Java, Python and C#.
Returns:
the single hit number that contains all the returned proteins, or -1 if there is no match found.

Implements ms_mascotresults.

void freeHit ( const int  hit ) [virtual]

Frees any memory associated with the passed hit number.

Has no effect except when Using the ms_peptidesummary cache. This function frees the ms_protein objects for the specified hit. The return values from getHit(), getNextFamilyProtein(), getNextSimilarProtein(), getNextSimilarProteinOf(), getNextSubsetProtein() etc. for this hit will then point to deleted objects and hence should not be used.

In addition, any ms_peptide objects loaded into memory using the getPeptide(const int, const int, ms_peptide *&)const function will 'unloaded' if the ms_peptidesummary::MSPEPSUM_DISCARD_RELOADABLE has been specified. This function has no effect on the ms_peptide objects returned from the alternative getPeptide(const int, const int)const function.

Parameters:
hitshould be in the range 1.. getNumberOfHits().

Reimplemented from ms_mascotresults.

int getAllFamilyMembersWithThisPepMatch ( const int  hit,
const int  q,
const int  p,
std::vector< int > &  db,
std::vector< std::string > &  acc,
std::vector< int > &  dupe_status 
) const [virtual]

Return a list of (top level) family proteins that have a match to the specified q and p.

Only the top level proteins are returned by this function, that is, the proteins that would be returned by getHit() and getNextFamilyProtein(). It will be considerably faster than calling ms_protein::getPeptideQuery() and related functions, because that involves loading more information from the cache and from the .dat file.

This function will return the values rapidly when caching is enabled; see Using the ms_peptidesummary cache. It also functions correctly when not using the cache.

The returned vectors are all guaranteed to have the same number of elements. See Using STL classes in Perl, Java, Python and C# how to use them from programming languages other than C++.

See also:
getAllProteinsWithThisPepMatch() and getProteinsWithThisPepMatch()
Parameters:
hitShould be in the range 1.. getNumberOfHits().
qis the query number in the range 1 to ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
dbis a vector of database IDs.
accis the corresponding vector of accessions.
dupe_statusis the corresponding vector of ms_protein::DUPLICATE values for the returned proteins. Note that ms_protein::DUPE_Ignored will only appear in integrated library searches where the peptide match was removed due to IgnoreIonsScoreBelow.
Returns:
The number of proteins in the db/acc/dupe_status vectors.

Implements ms_mascotresults.

std::vector< std::string > getAllProteinsWithThisPepMatch ( const int  q,
const int  p,
std::vector< int > &  start,
std::vector< int > &  end,
std::vector< std::string > &  pre,
std::vector< std::string > &  post,
std::vector< int > &  frame,
std::vector< int > &  multiplicity,
std::vector< int > &  db 
) const [virtual]

Returns a complete list of all the accessions that contained the peptide matched by this result.

In a peptide summary, this function returns the complete list of proteins in the q1_p1= line of the peptides, et_peptides or decoy_peptides section as appropriate.

In a protein summary, the return value contains the complete list of proteins that had the same peptide match to the one specified in the h1_q1= line.

See Using STL classes in Perl, Java, Python and C# how to use the parameters start, end, pre, post, frame, multiplicity, db.

See also:
getProteinsWithThisPepMatch()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
startReturns a vector of the start residues -- one for each accession
endReturns a vector of the end residues -- one for each accession
preReturns a vector of the residue before the peptide -- one for each accession
postReturns a vector of the residue after the peptide -- one for each accession
frameReturns a vector of the frames -- one for each accession. For a protein sequence, the frame will always be 0
multiplicityReturns a vector of the multiplicities -- one for each accession. This is the number of precursor matches for each accession
dbReturns a vector of the database index numbers for searches against multiple databases. For searches against a single database, these values will always be 1.
Returns:
A list of all the accessions that contained the peptide matched by this result.

Implements ms_mascotresults.

int getAvePeptideIdentityThreshold ( double  OneInXprobRnd,
DB_MATCH_TYPE  dbType = DM_FASTA 
) const [virtual, inherited]

Return the average threshold value for all MS-MS data sets.

The average value is calculated by taking the mean value from calling getPeptideIdentityThreshold() for each query. This is the value that is used to be shown at the top of the standard Mascot reports in versions before Mascot 2.0. For example, threshold in the following string could be calculated by calling getAvePeptideIdentityThreshold(20):

Individual ions scores > 47 indicate identity or extensive homology (p<0.05).

Parameters:
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
dbTypeIf given, should be DM_FASTA for Mascot score threshold and DM_SPECTRAL_LIBRARY for spectral library threshold. (DM_BOTH is nonsensical; if given, -1 is returned.)
Returns:
The average score threshold. If there was no MS-MS data then -1 will be returned.
std::string getCacheFilename ( ms_mascotresfile resfile,
const unsigned int  flags,
double  minProbability = 0.0,
int  maxHitsToReport = 50,
const char *  unigeneIndexFile = 0,
double  ignoreIonsScoreBelow = 0.0,
int  minPepLenInPepSummary = 0,
const char *  singleHit = 0,
const unsigned int  flags2 = MSPEPSUM_NONE 
) [static]

Call this to retrieve the name of the file that the peptide summary will use for its cache.

Parameters:
resfileThe ms_mascotresfile for the project
flagscollection of values, bitwise or-ed together, from the FLAGS enumeration of ms_mascotresults.
minProbabilityThe minimum significance probability threshold
maxHitsToReportThe number of protein hits to include in the results
unigeneIndexFilepath to the Unigene index file
ignoreIonsScoreBelowThe score cutoff threshold value
minPepLenInPepSummaryThe minimum peptide length used to differentiate different proteins
singleHitThe accession string to retrieve a single protein
flags2collection of values, bitwise or-ed together, from the MSPEPSUM enumeration of ms_peptidesummary.
Returns:
cache file name
std::string getCacheFilename ( ms_mascotresfile resfile,
const ms_distiller_data distillerData,
int  searchIndex 
) [static]

Call this to retrieve the name of the file that the peptide summary will use for its cache.

Extracts the required parameters from the Distiller data and generates the name and path of the cache file.

Parameters:
resfileThe ms_mascotresfile for the project..
distillerDataThe Distiller project parameters..
searchIndexThe index number (1..n) of the search in the Distiller data.
Returns:
The relative path and filename of the cache file.
std::string getCacheFileName (  ) const

Returns the filename of the cache file.

If the cacheDirectory supplied to the ms_mascotresfile::ms_mascotresfile constructor is not empty, then a filename will be returned. This does not guarantee that the file exists, or is being used.

See also Caching Mascot Results.

Returns:
The filename for the cache file.
std::string getCacheFilename ( ms_mascotresfile resfile,
const ms_datfile datfile,
const ms_distiller_data_search search 
) [static]

Call this to retrieve the name of the file that the peptide summary will use for its cache.

Extracts the required parameters from the Distiller data and generates the name and path of the cache file.

Parameters:
resfileThe ms_mascotresfile dor the project..
datfileThe Mascot search results data.
searchThe Distiller search results data.
Returns:
The relative path and filename of the cache file.
bool getComponentIntensity ( const int  q,
const int  p,
const std::string &  componentName,
double &  value,
double &  rawValue 
) const [virtual]

Returns the component intensity for reporter or multiplex methods.

This function will only succeed for MS2 based quantitation methods such as "reporter" and "multiplex" where the required data is in the .dat file. It is up to the calling application to check that this is the case using ms_mascotresfile::getQuantitationMethod and ms_quant_method::getProtocol

This function was added in Mascot Parser 2.5. If the calling application is Using the ms_peptidesummary cache and the cache files were created with Mascot Parser 2.4 or earlier, then this function will calculate the intensities on demand. The first call to this function causes values for the whole file to be calculated, which may be slow. With new cache files, all the values are saved in the cache file.

This function will only calculate values for cases where ms_quant_helper::isPeptideQuantifiable returns ms_quant_helper::PEPTIDE_IS_QUANTIFIABLE

For a particular q,p pair there will always be values for all components or for no components. There can never be a case where there is just a value available for a subset of the components.

Parameters:
qis the query number.
pis the 'hit' or 'rank' number in the range 1.. getMaxRankValue(). For "reporter" protocols, this must be 1.
componentNameis the name of the component as defined in the method. For example, for iTRAQ this might be "114", and for one of the SILAC methods this might be "Heavy".
[out]valueis the corrected intensity value. Corrections are applied as specified in the method included in the results file.
[out]rawValueis the uncorrected intensity value.
Returns:
false if the method is unable to calculate or determine an intensity value for the specified q and p values

Implements ms_mascotresults.

const ms_protein * getComponentProtein ( const char *  accession,
const int  dbIdx = 1 
) const [virtual]

Return a pointer to the protein entry given an accession.

Only use this function for the "components" of a UniGene search or PMF mixture. It will generally be more convenient for client applications to call ms_protein::getComponent() to get an ms_protein object.

Parameters:
accessionis the accession string.
dbIdxa database index (1..ms_searchparams::getNumberOfDatabases()).
Returns:
If the protein cannot be found in the results, then a null value is returned. See Maintaining object references: two rules of thumb.

Reimplemented from ms_mascotresults.

std::string getComponentString ( const int  q,
const int  p 
) const [virtual]

Return q1_p2_comp string value.

Note:
This function will always return the empty string for a protein summary.

The entry in the results file might be, for example:

   q1_p2_comp=light 

See also ms_peptide::getComponentStr

Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'rank' number, which should be in the range 1..10.
Returns:
the full component string for a peptide summary, and the empty string for a protein summary.

Implements ms_mascotresults.

bool getCreateSummaryProgress ( int *  cspTotalPercentComplete,
unsigned int *  cspCurrTask,
int *  cspCurrTaskPercentageComplete,
std::string *  cspAccession,
int *  cspHit,
int *  cspQuery,
std::string *  cspKeepAliveText 
) const [inherited]

Return progress for the createSummary() call.

This function is most useful when using the ms_peptidesummary::MSPEPSUM_DEFERRED_CREATE flag and calling createSummary() from a separate thread. The ms_mascotresfile::outputKeepAlive() outputs progress text to stdout, and this may not be convenient for some applications.

Any of the passed parameters may be null.

Parameters:
cspTotalPercentCompleteis simply calculated from: 100 * cspCurrTask / ms_mascotresfile::KA_LAST + cspCurrTask / ms_mascotresfile::KA_LAST
cspCurrTaskis the ms_mascotresfile::KA_TASK currently being performed by Mascot Parser. If Parser is doing nothing, then this will be the last task that was completed and cspCurrTaskPercentageComplete will be 100
cspCurrTaskPercentageCompleteis the percentage (0..100) complete for the current cspCurrTask.
cspAccessionis the current 'accession' being processed. See ms_mascotresfile::outputKeepAlive() for details of which tasks set this value. For languages other than C++, this will be a reference rather than a pointer to a std::string.
cspHitis the current hit being processed. See ms_mascotresfile::outputKeepAlive() for details of which tasks set this value
cspQueryis the current 'query' being processed. See ms_mascotresfile::outputKeepAlive() for details of which tasks set this value
cspKeepAliveTextis the text that would be output by ms_mascotresfile::outputKeepAlive(). To use this, specify a suitable keepAliveText to the ms_mascotresfile constructor but set keepAliveInterval to zero to prevent any output to stdout. For languages other than C++, this will be a reference rather than a pointer to a std::string.
Returns:
true if the createSummary() call has completed.
std::vector< double > getDiscoveredErrTolModDeltas ( std::vector< std::string > *  vecDeltaStrings = NULL ) const [virtual, inherited]

Return the deltas of all error tolerant modifications discovered in this search.

Only rank 1 significant matches assigned to a protein hit contribute towards the list of error tolerant modification deltas. This means that if the error tolerant match is not the rank 1 match (e.g. the same query had a higher scoring regular peptide match), then the error tolerant modification of that match is not counted.

The list of error tolerant modification names is most useful when used in combination with getNumDiscoveredErrTolMods().

See Using STL classes in Perl, Java, Python and C#.

Parameters:
vecDeltaStringsPointer to a vector of strings to retrieve the deltas as strings instead of as doubles, if this pointer is valid then the returned vector is empty
Returns:
A vector of error tolerant modification deltas discovered in rank 1 significant matches assigned to a protein hit in this search. The vector could be empty.
std::vector< std::string > getDiscoveredErrTolModNames (  ) const [virtual, inherited]

Return the names of all error tolerant modifications discovered in this search.

Only rank 1 significant matches assigned to a protein hit contribute towards the list of error tolerant modification names. This means that if the error tolerant match is not the rank 1 match (e.g. the same query had a higher scoring regular peptide match), then the error tolerant modification of that match is not counted. Amino acid insertion, deletion and substitution have the usual special names AA_INSERTION, AA_DELETION and AA_SUBSTITUTION. Otherwise error tolerant modification names follow Unimod names.

The list of error tolerant modification names is most useful when used in combination with getNumDiscoveredErrTolMods().

See Using STL classes in Perl, Java, Python and C#.

Returns:
A vector of error tolerant modification names discovered in rank 1 significant matches assigned to a protein hit in this search. The vector could be empty.
std::vector< std::string > getDiscoveredLocalModNames (  ) const [virtual, inherited]

Return the names of all query-level modifications discovered in this search.

The list of query-level modification names is most useful when used in combination with getNumDiscoveredLocalMods().

See Using STL classes in Perl, Java, Python and C#.

Returns:
A vector of query-level modification names discovered in rank 1 significant matches assigned to a protein hit in this search. The vector could be empty.
double getErrTolModDelta ( const int  q,
const int  p,
std::string *  modString = NULL,
std::string *  deltaAsString = NULL 
) const [virtual, inherited]

Return the error tolerant mod delta from h1_q2_et_mods or q1_p1_et_mods.

The entry in, for example, q1_p2_et_mods might be

   q1_p2_et_mods=0.984020,0.000000,Citrullination 

where the modification delta will be 0.984020.

See also:
getErrTolModName(), getErrTolModNeutralLoss() and getReadableVarMods()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10.
modStringOptional pointer to a modString, if it is NULL or empty the modString is looked up in the result file, if the pointed string is empty its value is set from the result file, if it is already set to a value, this value is used to retrieve the delta directly
deltaAsString
Returns:
The modification delta.
std::vector< double > getErrTolModMasterNeutralLoss ( const int  q,
const int  p 
) const [virtual, inherited]

Return the error tolerant mod additional primary neutral losses from h1_q2_et_mods_master or q1_p1_et_mods_master.

Some modifications specify one or more neutral losses. These can be specified as 'master' or 'slave' neutral losses. If there are multiple neutral losses, then at least one and less than ten of the definitions must be masters.

During a search, Mascot iterates through the master neutral losses. The one that gives the highest score is chosen, and all the other neutral losses are treated as slaves. If a slave neutral loss gives a match to a peak, that peak is removed from the list of noise peaks, which further enhances the score. The ability to specify multiple neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p1_et_mods_master might be

   q1_p1_et_mods_master=1.0,2.0

The returned list will not contain the dominant neutral loss -- that can be retrieved using getErrTolModNeutralLoss().

See also:
getErrTolModSlaveNeutralLoss() and getErrTolModNeutralLoss()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10.
Returns:
The modification additional primary neutral loss values as a vector of doubles. See Using STL classes in Perl, Java, Python and C#.
std::string getErrTolModMasterString ( const int  q,
const int  p 
) const [virtual]

Return the complete error tolerant mod master neutral loss string from q1_p1_et_mods_master.

Consider the following two examples for peptide summary and protein summary, respectively:

  • The entry in q1_p2_et_mods_master might be
       q1_p2_et_mods_master=1.0,2.0 
    
  • The entry in h1_q2_et_mods_master might be
       h1_q2_et_mods_master=1.0,2.0 
    

This function returns the complete string value. A more useful function is ms_mascotresults::getErrTolModMasterNeutralLoss().

Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
Returns:
complete error tolerant master neutral loss string.

Implements ms_mascotresults.

std::string getErrTolModName ( const int  q,
const int  p,
std::string *  modString = NULL 
) const [virtual, inherited]

Return the error tolerant mod name from h1_q2_et_mods or q1_p1_et_mods.

The entry in, for example, q1_p2_et_mods might be

   q1_p2_et_mods=0.984020,0.000000,Citrullination 

where the modification name will be Citrullination.

See also:
getErrTolModDelta(), getErrTolModNeutralLoss() and getReadableVarMods()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10.
modStringOptional pointer to a modString, if it is NULL or empty the modString is looked up in the result file, if the pointed string is empty its value is set from the result file, if it is already set to a value, this value is used to retrieve the name directly
Returns:
The modification name.
double getErrTolModNeutralLoss ( const int  q,
const int  p 
) const [virtual, inherited]

Return the error tolerant mod neutral loss from h1_q2_et_mods or q1_p1_et_mods.

Some modifications specify one or more neutral losses. These can be specified as 'master' or 'slave' neutral losses. If there are multiple neutral losses, then at least one and less than 10 of the definitions must be masters.

During a search, Mascot iterates through the master neutral losses. The one that gives the highest score is chosen, and all the other neutral losses are treated as slaves. If a slave neutral loss gives a match to a peak, that peak is removed from the list of noise peaks, which further enhances the score. The ability to specify multiple neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p2_et_mods might be

   q1_p1_et_mods=79.966330,97.976898,Phospho (STY) 

where the highest scoring modification neutral loss, returned by this function is 97.976898.

See also:
getErrTolModName(), getErrTolModDelta(), getReadableVarMods(), getErrTolModMasterNeutralLoss()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10.
Returns:
The modification neutral loss value.
std::vector< double > getErrTolModPepNeutralLoss ( const int  q,
const int  p 
) const [virtual, inherited]

Return the error tolerant mod peptide neutral losses from h1_q2_et_mods_pep or q1_p1_et_mods_pep.

Some modifications specify one or more peptide neutral losses. PepNeutralLoss allows a neutral loss from the precursor to be specified so that this peak is not treated as a noise peak when being scored by Mascot. The ability to specify peptide neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p2_et_mods_pep might be

   q1_p1_et_mods_pep=97.976896, 79.966330 
See also:
getErrTolModReqPepNeutralLoss()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10.
Returns:
The modification peptide neutral loss values as a vector of doubles See Using STL classes in Perl, Java, Python and C#.
std::string getErrTolModPepString ( const int  q,
const int  p 
) const [virtual]

Return the complete error tolerant mod peptide neutral loss string from q1_p1_et_mods_pep.

ms_mascotresults::getErrTolModPepString(const int, const int) const

Parameters:
qquery
prank
Returns:
complete error tolerant peptide neutral loss string

Implements ms_mascotresults.

std::vector< double > getErrTolModReqPepNeutralLoss ( const int  q,
const int  p 
) const [virtual, inherited]

Return the error tolerant mod peptide neutral losses from h1_q2_et_mods_reqpep or q1_p1_et_mods_reqpep.

Some modifications specify one or more required peptide neutral losses. ReqPepNeutralLoss performs the same function as PepNeutralLoss but with the additional condition that the peak must be present in the spectrum. The ability to specify peptide neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p2_et_mods_reqpep might be

   q1_p1_et_mods_reqpep=97.976896, 79.966330 
See also:
getErrTolModPepNeutralLoss()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10.
Returns:
The modification required peptide neutral loss values as a vector of doubles. See Using STL classes in Perl, Java, Python and C#.
std::string getErrTolModReqPepString ( const int  q,
const int  p 
) const [virtual]

Return the complete error tolerant mod required peptide neutral loss string from q1_p1_et_mods_reqpep.

Consider the following two examples for peptide summary and protein summary, respectively:

  • The entry in q1_p2_et_mods_reqpep might be
       q1_p2_et_mods_reqpep=1.0,2.0 
    
  • The entry in h1_q2_et_mods_reqpep might be
       h1_q2_et_mods_reqpep=1.0,2.0 
    

This function returns the complete string value. A more useful function is ms_mascotresults::getErrTolModReqPepNeutralLoss().

Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
Returns:
complete error tolerant required peptide neutral loss string.

Implements ms_mascotresults.

std::vector< double > getErrTolModSlaveNeutralLoss ( const int  q,
const int  p 
) const [virtual, inherited]

Return the error tolerant mod slave neutral losses from h1_q2_et_mods_slave or q1_p1_et_mods_slave.

Some modifications specify one or more neutral losses. These can be specified as 'master' or 'slave' neutral losses. If there are multiple neutral losses, then at least one and less than ten of the definitions must be masters.

During a search, Mascot iterates through the master neutral losses. The one that gives the highest score is chosen, and all the other neutral losses are treated as slaves. If a slave neutral loss gives a match to a peak, that peak is removed from the list of noise peaks, which further enhances the score. The ability to specify multiple neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p2_et_mods_slave might be

   q1_p2_et_mods_slave=1.0,2.0
See also:
getErrTolModMasterNeutralLoss(), getErrTolModNeutralLoss()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10
Returns:
The modification slave neutral loss values as a vector of doubles. See Using STL classes in Perl, Java, Python and C#.
std::string getErrTolModSlaveString ( const int  q,
const int  p 
) const [virtual]

Return the complete error tolerant mod slave neutral loss string from q1_p1_et_mods_slave.

ms_mascotresults::getErrTolModSlaveString(const int, const int) const

Parameters:
qquery
prank
Returns:
complete error tolerant slave neutral loss string

Implements ms_mascotresults.

std::string getErrTolModString ( const int  q,
const int  p 
) const [virtual]

Return the complete error tolerant mod string from h1_q2_et_mods or q1_p1_et_mods.

ms_mascotresults::getErrTolModString(const int, const int) const

Parameters:
qquery
prank
Returns:
complete error tolerant modification string.

Implements ms_mascotresults.

unsigned int getFlags (  ) const [inline, inherited]

Returns the flags value passed to the constructor.

Returns:
the ms_mascotresults::FLAGS value.
unsigned int getFlags2 (  ) const [inline, inherited]

Return the flags2 value passed to the ms_peptidesummary constructor.

For an ms_proteinsummary, will always return 0 as there is no option to set the flags2 value.

Returns:
the ms_peptidesummary::MSPEPSUM value.
ms_protein * getHit ( const int  hit,
const int  memberNumber = 0 
) const [virtual]

Return the ms_protein hit - returns null if hit > number of hits.

If MSPEPSUM_USE_CACHE is set, this function will load the hit results 'on demand'.

If MSRES_GROUP_PROTEINS is specifed when creating the ms_peptidesummary, then memberNumber should be set to 1 (or 0) and the 'main' hit is returned.

If MSRES_CLUSTER_PROTEINS is specifed when creating the ms_peptidesummary, then there may be multiple members for each family. See Using MSRES_CLUSTER_PROTEINS for more details.

Parameters:
hitMust be in the range 1.. getNumberOfHits().
memberNumberShould be 1 (or 0) for a protein not in a family or for the main protein in a hit family. It should be 2 or more for subsequent proteins in the family. If this number is greater than the number of proteins in the family then the function returns a null value.
Returns:
If the hit number is outside the range above, then this function will return a null value. See Maintaining object references: two rules of thumb.

Reimplemented from ms_mascotresults.

int getHomologyThreshold ( const int  query,
double  OneInXprobRnd,
const int  rank = 1 
) const [virtual, inherited]

Returns the 'homology' threshold.

For MS-MS data only, this returns a threshold value for significant homology rather than identity. This value appears in the yellow popup box for an MS-MS result: Score greater than XX indicates homology.

In an error tolerant (ET) search, if the query contains error tolerant matches, the homology threshold is only valid for the non-ET matches. In a spectral library (SL) search, library matches only have identity threshold.

The return value will be zero if the threshold is not available. The value cannot be determined if the query contains error tolerant matches, or if the query contains a tag or etag, or if the value of qmatch is less than or equal to 100.

The value returned will not be higher than the identity threshold.

In versions prior to 2.2, the homology threshold was not affected by OneInXprobRnd -- this was corrected in version 2.2.

The rank parameter is mandatory in an integrated error tolerant (ET) search and integrated spectral library (SL) search. You can either pass ms_peptide::getRank() as the rank parameter, or use one of the enumerated values for HOMOLOGY_THRESHOLD_SOURCE. The rank determines the data source:

  • If rank = HOMTHR_FASTA, the method returns the FASTA homology threshold.
  • If the peptide at the given rank is from a FASTA database and is not an ET match, the method returns the FASTA homology threshold.
  • If rank = HOMTHR_SL, the method returns zero (since library searches don't have a homology threshold).
  • If the peptide at the given rank is from a spectral library, the method returns zero.
Parameters:
querySpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
rankSpecifies the rank of the match of interest.
Returns:
The homology threshold. If there was no MS-MS data for this query then 0 will be returned.
Examples:
resfile_summary.cpp.
int getHomologyThresholdForHistogram ( double  OneInXprobRnd,
DB_MATCH_TYPE  dbType = DM_FASTA 
) const [virtual, inherited]

Returns the value for the 'yellow section' in the histogram.

For MS-MS data only, this returns a homology (rather than identity) threshold value that is shown at the top of the report: Individual ions scores > 19 indicate peptides with significant homology (p<0.05).

The yellow area on the histogram also corresponds to this value.

Parameters:
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
dbTypeIf given, should be DM_FASTA for Mascot homology threshold. If dbType is DM_SPECTRAL_LIBRARY, the method returns zero; spectral library matches have only identity threshold. (DM_BOTH is nonsensical; if given, -1 is returned.)
Returns:
The homology threshold. Note that this value is only available for an MS-MS search with just one MS-MS data set, otherwise the return value will be zero.
double getIonsScore ( const int  q,
const int  p,
const bool  decoy 
) const [virtual]

Returns an ions score quickly without having to load an ms_peptide object.

If other values are required from the ms_peptide object, then it is normally faster to call getPeptide() and then ms_peptide::getIonsScore

To get the q and p values for a peptide in an ms_protein object, call ms_protein::getPeptideQuery() and ms_protein::getPeptideP().

Parameters:
qis the query number in the range 1 to ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, a maximum of 50 protein hits are saved are saved and hence p must be in the range 1..50. It is safest to call getMaxRankValue() to find the maximum value.
decoyis a flag to indicate if the score should be taken from the decoy section. This should only be set to true for searches with decoy matches.
Returns:
the score, or 0 if there was no score for these q / p values.

Implements ms_mascotresults.

double getIonsScoreCorrected ( const double  ionsScore,
const long  multiplicity 
) const [protected, inherited]
Note:
It shouldn't be necessary to call this function from outside the library.

This function is required here rather than as part of the ms_peptide object because one ms_peptide object can be used from many different proteins -- each with a different multiplicity factor.

This function subtracts the factor:

  • -10 * log(multiplicity) * $tolFact / log(10); where
  • tolFact = sqrt(ITOL * ITOL + 0.0625);
Parameters:
ionsScoreIs the uncorrected ions score.
multiplicitycan be obtained from ms_protein::getPeptideMultiplicity.
Returns:
The corrected value..
std::vector< int > getIonsScoreHistogram ( IONS_HISTOGRAM  flags = IH_INCLUDE_TOP_MATCHES,
DB_MATCH_TYPE  dbType = DM_FASTA 
) const [virtual, inherited]

Returns a list of counts for binned ions scores.

  • The first element of the array returns the count of matches with a score between 0 and 1.
  • The second element of the array returns the count of matches with a score between 1 and 2.
  • The third element of the array returns the count of matches with a score between 2 and 3. etc. etc.
Parameters:
flagsCurrently only IH_INCLUDE_TOP_MATCHES and IH_INCLUDE_TOP_10_MATCHES are supported.
dbTypeIf given, should be DM_FASTA for Mascot scores and DM_SPECTRAL_LIBRARY for spectral library scores. (DM_BOTH is nonsensical; if given, empty vector is returned.)
Returns:
a vector of integers. See Using STL classes in Perl, Java, Python and C#.
void getLibraryEntryId ( const int  q,
const int  p,
std::vector< int > &  dbIdx,
std::vector< int > &  offset,
std::vector< std::string > &  checksum,
std::vector< std::string > &  mods 
) const

Return the database number, MSP file offset, checksum and modifications of the matched spectral library entry.

If the peptide match q,p is a spectral library match, this method returns the source database index, an offset into the MSP file that was searched, a checksum of the peak list of the library entry and the modifications in the library entry. Typically each of the dbIdx, offset and checksum vectors contains only one element -- the library entry that matched the query. However, under some circumstances Mascot may merge matches from different library entries or libraries in the same peptide match (for example, if it has same sequence, same mods and same score). In that case, the vectors may contain multiple elements, but each vector is guaranteed to have the same number of elements. Mascot 2.6 does not merge library matches.

The datatabase index, offset, checksum and mods can be used for looking up the correct library entry; see example code in ms_spectral_lib_file::findEntries(). Once the correct entry is found, you can access its attributes, peak list, etc. with the corresponding methods in ms_spectral_lib_entry.

Note that the library ID returned by MSPepSearch is not stored in the Mascot results file, as the NIST library compression tool can renumber library entries arbitrarily if the MSP file changes.

If q,p is not a spectral library match, the returned vectors are empty.

See Using STL classes in Perl, Java, Python and C#.

Parameters:
qquery number of the spectral library match, in range 1..ms_mascotresfile::getNumQueries().
prank of the spectral library match, in range 1..getMaxRankValue().
dbIdxreturns a list of database indices.
offsetreturns a list of integer offsets.
checksumreturns a list of peak list checksums.
modsreturns a list of the modifications for each entry. The format of the mods is as described in ms_spectral_lib_entry::getMods() and can be passed verbatim to ms_spectral_lib_file::findEntries().
std::string getLibraryModString ( const int  q,
const int  p 
) const [virtual]

Return the modification string of the spectral library match from q1_p1_SLmod.

ms_mascotresults::getLibraryModString(const int, const int) const

Parameters:
qquery
prank
Returns:
modification string of the match if it is a spectral library match and has modifications, an empty string otherwise.

Implements ms_mascotresults.

int getMaxPeptideIdentityThreshold ( double  OneInXprobRnd,
DB_MATCH_TYPE  dbType = DM_FASTA 
) const [virtual, inherited]

Return the max threshold value for all MS-MS data sets.

The maximum value is calculated by taking the maximum value returned by calling getPeptideIdentityThreshold() for each query. This is the value that is shown at the top of the standard Mascot peptide summary report (version 2.0 and later). For example, threshold in the following string could be calculated by calling getMaxPeptideIdentityThreshold(20):

Individual ions scores > 47 indicate identity or extensive homology (p<0.05).

Versions of Mascot prior to 2.0 displayed the equivalent of getAvePeptideIdentityThreshold().

Parameters:
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
dbTypeIf given, should be DM_FASTA for Mascot identity threshold and DM_SPECTRAL_LIBRARY for spectral library threshold. (DM_BOTH is nonsensical; if given, -1 is returned.)
Returns:
The maximum score threshold. If there was no MS-MS data then -1 will be returned.
int getMaxRankValue (  ) const [virtual, inherited]

Returns the maximum 'rank' or 'hit' or 'p' value.

For functions such as getPeptide() a 'rank' or 'hit' value is required. The allowable range for this values depends on whether it is a protein summary or peptide summary.

For a peptide summary the value returned will normally be 10, but for an integrated error tolerant search this can be in the range 1..20.

For a protein summary the return value will be ms_mascotresfile::getNumHits() -- with the appropriate parameter passed for the case of a decoy or error tolerant search.

Returns:
max rank value
int getMinPepLenInPepSummary (  ) const [inherited]

Peptides shorter than this are ignored when putting proteins into groups.

minPepLenInPepSummary is an optional flag for the peptide summary only. Any peptides shorter than this will be ignored when grouping proteins together.

Specify this value when creating an ms_peptidesummary::ms_peptidesummary object. This value will always be zero for ms_proteinsummary.

Returns:
The minimum peptide length considered when creating the peptide summary.
double getMinProbabilityForSLScore ( double  score ) [static]

Return the minProbability value that sets library score threshold to the given value in SL-only mode.

The significance threshold (minProbability parameter) that corresponds to the library score threshold score is calculated from the expect value formula:

 E(s) = 0.05 * 10 ** (-(s - 300)/100)
 

This method should only be used in SL-only mode (MSPEPSUM_SL_ONLY). In integrated mode (MSPEPSUM_SL_INTEGRATED), library score significance is calculated based on Mascot matches; see getPeptideExpectationValue().

Note that due to how library scores are converted to expect values, the maximum minProbability (1.0) corresponds to score 170. If you give a score less than 170 as argument, the function returns just a bit less than 1.0.

Parameters:
scoreDesired library score threshold. If it is not within the range (0, 1000), the function returns 0.05.
Returns:
Value for the minProbability parameter that sets library score threshold to score in SL-only mode.
ms_protein * getNextFamilyProtein ( const int  masterHit,
const int  id 
) const [virtual, inherited]

Find the next protein in the family masterHit.

Call this function multiple times for each masterHit, incrementing id each time. When there are no more protein families this function will return a null value.

The main protein for the hit should be obtained by calling ms_mascotresults::getHit()

See Using MSRES_CLUSTER_PROTEINS for further information.

See also:
ms_protein::GROUP
Parameters:
masterHitis the id for the main hit (1..n).
idis a 'one based' number.
Returns:
The next protein family or a null value if there is no such group. See Maintaining object references: two rules of thumb.
Examples:
resfile_summary.cpp.
ms_protein * getNextSimilarProtein ( const int  masterHit,
const int  id 
) const [virtual, inherited]

Return the next protein that contains all the peptides in the 'master' protein.

Find the next protein that contains exactly the same set of peptides as the 'master' one. See Grouping proteins together for further information. Call this function multiple times for each masterHit, incrementing id each time. When there are no more proteins containing the same set of peptides, this function will return a null value.

See also:
ms_protein::GROUP
Parameters:
masterHitis a 'one based' number for the main hit.
idis a 'one based' number.
Returns:
The next protein with the same set of peptides as the masterAccession or a null value if there are no such proteins. See Maintaining object references: two rules of thumb.
ms_protein * getNextSimilarProteinOf ( const char *  masterAccession,
const int  masterDB,
const int  id 
) const [virtual, inherited]

Return the next protein that contains all the peptides in the 'master' protein.

Find the next protein that contains exactly the same set of peptides as the 'master' one. See Grouping proteins together for further information. Call this function multiple times for each masterAccession, incrementing id each time. When there are no more proteins containing the same set of peptides, this function will return a null value.

See also:
ms_protein::GROUP
Parameters:
masterAccessionis the accession for the main hit.
masterDBis the database (FASTA) ID for the main hit.
idis a 'one based' number.
Returns:
The next protein with the same set of peptides as the masterAccession or a null value if there are no such proteins. See Maintaining object references: two rules of thumb.
Examples:
resfile_summary.cpp.
ms_protein * getNextSubsetProtein ( const int  masterHit,
const int  id,
const bool  searchWholeFamily = true 
) const [virtual, inherited]

Return the next protein that contains some of the peptides in the 'master' protein.

Find the next protein that contains a subset of same peptides as the 'master' one. See Grouping proteins together and setSubsetsThreshold() for further information. Call this function multiple times for each masterHit, incrementing id each time. When there are no more proteins containing the same set of peptides, this function will return a null value.

See also:
ms_protein::GROUP, getNextSubsetProteinOf()
Parameters:
masterHitis a 'one based' number for the main hit.
idis a 'one based' number.
searchWholeFamilyonly has meaning when MSRES_CLUSTER_PROTEINS is specified. See Using MSRES_CLUSTER_PROTEINS.
Returns:
A protein that contains a subset of peptides in the master protein, or a null value if there is no such protein. See Maintaining object references: two rules of thumb.
ms_protein * getNextSubsetProteinOf ( const char *  masterAccession,
const int  masterDB,
const int  id 
) const [virtual, inherited]

Return the next protein that contains some of the peptides in the 'master' protein.

Find the next protein that contains a subset of same peptides as the 'master' one. See Grouping proteins together and setSubsetsThreshold() for further information.

Call this function multiple times for each masterAccession, incrementing id each time. When there are no more proteins containing the same set of peptides, this function will return a null value.

See also:
ms_protein::GROUP, getNextSubsetProtein()
Parameters:
masterAccessionis the accession for the master hit or a family hit.
masterDBis the database (fasta) ID for the main hit.
idis a 'one based' number.
Returns:
A protein that contains a subset of peptides in the master protein, or a null value if there is no such protein. See Maintaining object references: two rules of thumb.
Examples:
resfile_summary.cpp.
std::vector< int > getNumberOfAccessionDBTypes ( int  q,
int  p 
) const

Return a count of protein accessions by database type of all the accessions that contained the peptide match.

For the full list of data about protein accessions, see getAllProteinsWithThisPepMatch(). This method only counts the number of times each type of database (AA, NA, SL, SLREF) appears in the list of protein accessions.

Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number. For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) or an integrated spectral library search, this can be in the range 1 to 20.
Returns:
A vector of counts by database type. The vector has four elements, where the indices are AA (0), NA (1), SL (2) and SLREF (3).
std::vector< int > getNumberOfFamilyAccessionDBTypes ( int  hit,
int  q,
int  p,
bool  includeSamesets = true 
) const

Return a count of protein accessions by database type of all the family members that contained the peptide match.

For the full list of data about family members, see getAllFamilyMembersWithThisPepMatch(). This method only counts the number of times each type of database (AA, NA, SL, SLREF) appears in the list of protein accessions.

What is counted: family members and optionally their samesets.

What is not counted: subset and intersection proteins and their samesets.

Parameters:
hitShould be in the range 1.. getNumberOfHits().
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number. For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) or an integrated spectral library search, this can be in the range 1 to 20.
includeSamesetsIf true (default), include samesets of family members in the counts. Otherwise count only lead proteins of family members.
Returns:
A vector of counts by database type. The vector has four elements, where the indices are AA (0), NA (1), SL (2) and SLREF (3).
int getNumberOfFamilyMembers (  ) const [virtual, inherited]

Return the total number of family members.

Returns:
This function returns the number of 'top level' hits and family members, not taking into account any proteins that are identical or subsets of these hits. The value will be identical to that returned by getNumberOfHits() unless the ms_mascotresults::MSRES_CLUSTER_PROTEINS flag has been specified. In that case the number of values returned by getNextFamilyProtein() will be included.
int getNumberOfHits (  ) const [virtual, inherited]

Returns the number of hits in the results.

Returns:
This function returns the number of 'top level' hits -- not taking into account any proteins that are identical or subsets of the 'top level' hits. ms_mascotresults::getHit() can be called with any number from 1 to the value returned by this function.
int getNumberOfUnassigned (  ) const [inherited]

Return the number of peptides in the unassigned list.

See Unassigned peptides list for details of creating and using an unassigned list.

If there is no cache file for the results, then createUnassignedList() must be called before calling this function or the ms_mascotresfile::ERR_UNASSIGNED_PROG error will be set, and this function will return 0.

If a cache file has been created, then there is no requirement to call createUnassignedList() before calling this function.

Returns:
the number of peptides in the unassigned list, or 0 in the case of an error.
Examples:
resfile_summary.cpp.
long getNumDecoyHitsAboveHomology ( double  OneInXprobRnd,
DECOY_STATS_COUNT_TYPE  countType = DS_COUNT_PSM,
DB_MATCH_TYPE  dbType = DM_FASTA 
) [virtual, inherited]

Return the number of hits from the decoy search with a score at or above the homology threshold.

For a peptide summary, the number of hits is the number of peptide matches. The value is not affected by ignoreIonsScoreBelow and the count only includes rank 1 peptides. The returned value will be affected by the minProbability value passed to the ms_peptidesummary constructor. The homology threshold is retrieved using getPeptideHomologyThreshold(). If there is no homology threshold, then the identity threshold is used, which is retrieved using getPeptideIdentityThreshold(). These values may be slightly different from the threshold for the non-decoy matches.

For a protein summary, this function will always return -1 because there is never a homology threshold for the protein summary.

y* The first call to any of the following functions may take some time as the values are not calculated when the ms_peptidesummary or ms_proteinsummary object is created. All four values are cached when the first call is made.

However, calling a function with a different probability threshold will cause the cached values to be re-calculated.

The false discovery rate (FDR) can be calculated from

   getNumDecoyHitsAboveIdentity() / getNumHitsAboveIdentity()

or

   getNumDecoyHitsAboveHomology() / getNumHitsAboveHomology() 
Parameters:
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
countTypeIn peptide summary, type of object to count, either peptide-spectrum matches (PSMs) or distinct peptide sequences. The default, DS_COUNT_PSM, is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
dbTypeIn peptide summary, source of peptides being counted, either Mascot matches in FASTA sequences or spectral library matches. The default is DM_FASTA, which is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
Returns:
The number of decoy hits with a score at or above the homology threshold.
long getNumDecoyHitsAboveIdentity ( double  OneInXprobRnd,
DECOY_STATS_COUNT_TYPE  countType = DS_COUNT_PSM,
DB_MATCH_TYPE  dbType = DM_FASTA 
) [virtual, inherited]

Return the number of hits from the decoy search with a score at or above the identity threshold.

For a peptide summary, the number of hits is the number of peptide matches. The value is not affected by ignoreIonsScoreBelow and the count only includes rank 1 peptides. The returned value will be affected by the minProbability value passed to the ms_peptidesummary constructor. The identity threshold is retrieved using getPeptideIdentityThreshold(). This value may be slightly different from the threshold for the non-decoy matches.

For a protein summary, the number of hits is the number of protein matches. This number includes the number of protein mixture matches plus the number of single protein matches. The returned value will be affected by the minProbability value passed to the ms_proteinsummary constructor. A value of -1 will be returned if the search contained any MS-MS, sequence query or tag data. The identity threshold is retrieved using getProteinThreshold().

The first call to any of the following functions may take some time as the values are not calculated when the ms_peptidesummary or ms_proteinsummary object is created. All four values are cached when the first call is made.

However, calling a function with a different probability threshold will cause the cached values to be re-calculated.

The false discovery rate (FDR) can be calculated from

   getNumDecoyHitsAboveIdentity() / getNumHitsAboveIdentity()

or

   getNumDecoyHitsAboveHomology() / getNumHitsAboveHomology() 
Parameters:
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
countTypeIn peptide summary, type of object to count, either peptide-spectrum matches (PSMs) or distinct peptide sequences. The default, DS_COUNT_PSM, is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
dbTypeIn peptide summary, source of peptides being counted, either Mascot matches in FASTA sequences or spectral library matches. The default is DM_FASTA, which is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
Returns:
The number of decoy hits with a score at or above the identity threshold.
std::vector< int > getNumDiscoveredErrTolMods ( const std::string  modName,
std::vector< std::string > &  positions,
std::vector< std::string > &  sites 
) const [virtual, inherited]

Return the number of instances the error tolerant modification was discovered in this search.

The number of times an error tolerant modification has been discovered in the search is the number of times it appears in rank 1 matches in the search results. Since the number and kind of error tolerant modifications is (practically) unlimited, you should call getDiscoveredErrTolModNames() first to find out which error tolerant modifications appear in the search. (The count of error tolerant modifications that do not appear in the search is, of course, zero.) Amino acid insertion, deletion and substitution have the usual special names AA_INSERTION, AA_DELETION and AA_SUBSTITUTION. Otherwise error tolerant modification names follow Unimod names.

Only rank 1 significant matches assigned to a protein hit are counted. This means that if the error tolerant match is not the rank 1 match (e.g. the same query had a higher scoring regular peptide match), then the error tolerant modification of that match is not counted.

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the error tolerant modification occurred at specificity i. The ith element in the positions and sites vectors defines the position and site of the ith specificity. For example, if the ith specificity is N-terminal K, the ith element of positions would be "Any N-term" and the ith element of sites would be "K".

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

See Using STL classes in Perl, Java, Python and C#.

Parameters:
modNameError tolerant modification name (as returned by getDiscoveredErrTolModNames()).
positionsA vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
sitesA vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.
Returns:
The number of times the error tolerant modification was observed in rank 1 significant matches assigned to a protein hit in this search at each specificity, returned in positions and sites.
std::vector< int > getNumDiscoveredFixedMods ( const int  num,
std::vector< std::string > &  positions,
std::vector< std::string > &  sites 
) const [virtual, inherited]

Return the number of instances the fixed modification was discovered in this search.

The number of times a fixed modification has been discovered in the search is the number of times the terminus or residue matching its specificity appears in significant rank 1 significant matches assigned to a protein hit in the search results. For example, if Carbamidomethyl (C) is specified as a fixed modification, then the number of times it has been discovered in the search is the number of C residues in peptide sequences in significant rank 1 significant matches assigned to a protein hit.

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the fixed modification occurred at specificity i. The ith element in the positions and sites vectors defines the position and site of the ith specificity. For example, if the ith specificity is N-terminal K, the ith element of positions would be "Any N-term" and the ith element of sites would be "K".

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

Note that if the same modification is specified as a fixed modification with two different specificities in the search, discovery counts will be returned separately. For example, if fixed modification 1 is Carbamidomethyl (C) and 2 is Carbamidomethyl (N-term), then calling getNumDiscoveredFixedMods() with num = 1 returns the Carbamidomethyl count for specificity "Anywhere" and "C", while num = 2 returns the count for specificity "Any N-term" and "N-term". In both cases each vector contains a single item.

See Using STL classes in Perl, Java, Python and C#.

Parameters:
numFixed modification number (same value as passed to ms_searchparams::getFixedModsName()).
positionsA vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
sitesA vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.
Returns:
The number of times the fixed modification was observed in significant rank 1 significant matches assigned to a protein hit in this search at each specificity, returned in positions and sites.
std::vector< int > getNumDiscoveredLibraryMods ( const int  modId,
std::vector< std::string > &  positions,
std::vector< std::string > &  sites 
) const [inherited]

Return the number of instances the library modification was discovered in this search.

The number of times a library modification has been discovered in the search is the number of times it appears in the library mods string in significant rank 1 spectral library matches assigned to a protein hit in the search results.

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the library modification occurred at specificity i. The ith element in the positions and sites vectors defines the position and site of the ith specificity.

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

Note that modifications are part of the metadata of spectral library entries. Unlike in database searches, they are not specified as part of the search parameters. Some caveats apply:

  • A library modification may have the same name as a variable or fixed modification in the database search, and they may well be the same modification -- but you need to know how the library was constructed to conclude either way.
  • Library modification name is usually a free-text string, which may or may not be a Unimod modification name.
  • The MSP format for spectral libraries does not support specifities in the same way as Mascot. The "position" of the modification is not encoded in the library and is thus unknown; it will always be Anywhere.

See Using STL classes in Perl, Java, Python and C#.

Parameters:
modIdmodification ID, same as passed to ms_searchparams::getLibraryModName(). Must be in range 1..ms_searchparams::getNumberOfLibraryMods().
positionsA vector of positions, always "Anywhere".
sitesA vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.
Returns:
The number of times the library modification was observed in significant rank 1 spectral library matches assigned to a protein hit in this search at each specificity, returned in positions and sites.
std::vector< int > getNumDiscoveredLocalMods ( const std::string  modName,
std::vector< std::string > &  positions,
std::vector< std::string > &  sites 
) const [virtual, inherited]

Return the number of instances the query-level modification was discovered in this search.

The number of times a query-level modification has been discovered in the search is the number of times it appears in rank 1 significant matches assigned to a protein hit in the search results. Query-level modifications are counted separately from variable modifications (specified at search form level), because each individual query may contain up to 32 query-level variable modifications. This means the search as a whole may contain more than 32 different query-level modifications. You should call getDiscoveredLocalModNames() first to find out which query-level modifications appear in the search.

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the query-level modification occurred at specificity i. The ith element in the positions and sites vectors defines the position and site of the ith specificity. For example, if the ith specificity is N-terminal K, the ith element of positions would be "Any N-term" and the ith element of sites would be "K".

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

Note that only rank 1 significant matches assigned to a protein hit are counted.

See Using STL classes in Perl, Java, Python and C#.

Parameters:
modNameQuery-level modification name (as returned by getDiscoveredLocalModNames()).
positionsA vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
sitesA vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.
Returns:
The number of times the query-level modification was observed in rank 1 significant matches assigned to a protein hit in this search at each specificity, returned in positions and sites.
int getNumDiscoveredNonSpecCleavage (  ) const [virtual, inherited]

Return the number of times non-specific cleavage occurred in an error tolerant search.

An error tolerant match is the product of non-specific cleavage when the residue before or after the peptide terminal residue does not match the cleavage specificity of the enzyme. For example, trypsin cleaves at C-term K or R (unless followed by P), so all tryptic peptide sequences have K or R as the (N-term) residue before the sequence. An error tolerant match with non-specific cleavage would have a residue other than K or R at N-terminus.

Only rank 1 significant matches assigned to a protein hit contribute towards the count of non-specific cleavages. This means that if the error tolerant match is not the rank 1 match (e.g. the same query had a higher scoring regular peptide match), then the match is not counted.

Returns:
The number of times non-specific cleavage occurred in rank 1 significant matches assigned to a protein hit, or -1 for non-error tolerant search.
std::vector< int > getNumDiscoveredVariableMods ( const int  num,
std::vector< std::string > &  positions,
std::vector< std::string > &  sites 
) const [virtual, inherited]

Return the number of instances the variable modification was discovered in this search.

The number of times a variable modification has been discovered in the search is the number of times it appears in the variable mods string in significant rank 1 significant matches assigned to a protein hit in the search results.

Note that error tolerant matches are not included in these counts; see getNumDiscoveredErrTolMods(). Query-level variable modifications are counted separately as well; see getNumDiscoveredLocalMods().

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the variable modification occurred at specificity i. The ith element in the positions and sites vectors defines the position and site of the ith specificity. For example, if the ith specificity is N-terminal K, the ith element of positions would be "Any N-term" and the ith element of sites would be "K".

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

Note that if the same modification is specified as a variable modification with two different specificities in the search, discovery counts will be returned separately. For example, if variable modification 1 is Phospho (ST) and 2 is Phospho (Y), then calling getNumDiscoveredVariableMods() with num = 1 returns the Phospho counts for specificities "Anywhere" "S" and "Anywhere" "T", while num = 2 returns the count for specificity "Anywhere" "Y".

Note also that since error tolerant modifications are not included in counts of variable modifications, it is possible (though unlikely) for a modification to appear both as variable (specified explicitly in the search) and as error tolerant, especially if the two instances have different specificities.

See Using STL classes in Perl, Java, Python and C#.

Parameters:
numVariable modification number (same value as passed to ms_searchparams::getVarModsName()).
positionsA vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
sitesA vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.
Returns:
The number of times the variable modification was observed in significant rank 1 significant matches assigned to a protein hit in this search at each specificity, returned in positions and sites.
long getNumHitsAboveHomology ( double  OneInXprobRnd,
DECOY_STATS_COUNT_TYPE  countType = DS_COUNT_PSM,
DB_MATCH_TYPE  dbType = DM_FASTA 
) [virtual, inherited]

Return the number of hits with a score at or above the homology threshold.

For a peptide summary, the number of hits is the number of peptide matches. The value is not affected by ignoreIonsScoreBelow and the count only includes rank 1 peptides. The returned value will be affected by the minProbability value passed to the ms_peptidesummary constructor. The homology threshold is retrieved using getPeptideHomologyThreshold(). If there is no homology threshold, then the identity threshold is used, which is retrieved using getPeptideIdentityThreshold().

For a protein summary, this function will always return -1 because there is never a homology threshold for the protein summary.

The first call to any of the following functions may take some time as the values are not calculated when the ms_peptidesummary or ms_proteinsummary object is created. All four values are cached when the first call is made.

However, calling a function with a different probability threshold will cause the cached values to be re-calculated.

The false discovery rate (FDR) can be calculated from

   getNumDecoyHitsAboveIdentity() / getNumHitsAboveIdentity()

or

   getNumDecoyHitsAboveHomology() / getNumHitsAboveHomology() 
Parameters:
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
countTypeIn peptide summary, type of object to count, either peptide-spectrum matches (PSMs) or distinct peptide sequences. The default, DS_COUNT_PSM, is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
dbTypeIn peptide summary, source of peptides being counted, either Mascot matches in FASTA sequences or spectral library matches. The default is DM_FASTA, which is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
Returns:
The number of hits with a score at or above the homology threshold.
long getNumHitsAboveIdentity ( double  OneInXprobRnd,
DECOY_STATS_COUNT_TYPE  countType = DS_COUNT_PSM,
DB_MATCH_TYPE  dbType = DM_FASTA 
) [virtual, inherited]

Return the number of hits with a score at or above the identity threshold.

For a peptide summary, the number of hits is the number of peptide matches. The value is not affected by ignoreIonsScoreBelow and the count only includes rank 1 peptides. The returned value will be affected by the minProbability value passed to the ms_peptidesummary constructor. The identity threshold is retrieved using getPeptideIdentityThreshold().

For a protein summary, the number of hits is the number of protein matches. The number includes the number of protein mixture matches plus the number of single protein matches. The returned value will be affected by the minProbability value passed to the ms_proteinsummary constructor. A value of -1 will be returned if the search contained any MS-MS, sequence query or tag data. The identity threshold is retrieved using getProteinThreshold().

The first call to any of the following functions make take some time as the values are not calculated when the ms_peptidesummary or ms_proteinsummary object is created. All four values are cached when the first call is made.

However, calling a function with a different probability threshold will cause the cached values to be re-calculated.

The false discovery rate (FDR) can be calculated from

   getNumDecoyHitsAboveIdentity() / getNumHitsAboveIdentity()

or

   getNumDecoyHitsAboveHomology() / getNumHitsAboveHomology() 
Parameters:
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
countTypeIn peptide summary, type of object to count, either peptide-spectrum matches (PSMs) or distinct peptide sequences. The default, DS_COUNT_PSM, is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
dbTypeIn peptide summary, source of peptides being counted, either Mascot matches in FASTA sequences or spectral library matches. The default is DM_FASTA, which is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
Returns:
The number of hits with a score at or above the identity threshold.
std::vector< int > getPepsWithSameScore ( const int  q,
const int  p 
) const [virtual]

Returns a list of 'p' values for peptides with the same score.

Peptides with the same score will have the same pretty rank. See ms_peptide::getPrettyRank() for details.

It is preferable to call this function rather than to loop through all the ranks calling getPeptide() for each one because the cached index stores the pretty ranks and this should therefore be faster.

For a protein summary, the returned list always contains a single value that is the same as the passed p value.

Parameters:
qis the query number.
pis the 'hit' or 'rank' number in the range 1.. getMaxRankValue().
Returns:
a list of the ranks that have the same pretty value. See Using STL classes in Perl, Java, Python and C#.

Implements ms_mascotresults.

ms_peptide getPeptide ( const int  q,
const int  p 
) const [virtual]

Return a peptide object for the specified query / rank.

To get the q and p values for a peptide in an ms_protein object, call ms_protein::getPeptideQuery() and ms_protein::getPeptideP().

An 'empty' peptide object will be returned if

To test for an 'empty' peptide, use ms_peptide::getAnyMatch().

Parameters:
qis the query number in the range 1 to ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, a maximum of 50 protein hits are saved are saved and hence p must be in the range 1..50. It is safest to call getMaxRankValue() to find the maximum value.
Returns:
an ms_peptide object.

Implements ms_mascotresults.

bool getPeptide ( const int  q,
const int  p,
ms_peptide *&  pep 
) const [virtual]

Return a peptide object for the specified query / rank.

Note:
This function can not be used in programming languages other than C++.

To get the q and p values for a peptide in an ms_protein, call ms_protein::getPeptideQuery() and ms_protein::getPeptideP().

An 'empty' peptide object will be returned if

To test for an 'empty' peptide, use ms_peptide::getAnyMatch().

Parameters:
qis the query number in the range 1 to ms_mascotresfile::getNumQueries()
pis the 'hit' or 'rank' number.

  • For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, a maximum of 50 protein hits are saved are saved and hence p must be in the range 1..50. It is safest to call getMaxRankValue() to find the maximum value.
pepis reference to a pointer to an ms_peptide object that will be returned.
Returns:
true if the peptide exists.

Implements ms_mascotresults.

std::vector< int > getPeptideAmbiguityRanks ( const int  q,
const int  p 
)

Return a vector of ranks that have the same ambiguity group.

Ambiguity group ids start at 1 Two peptides are in the same ambiguity group when either 1) they have the same query and same pretty rank 2) or they have the same query, same sequence and different search engine

Note: q and p have to be within limits, otherwise an empty vector is returned Additional note: these values are in the cache file only from ms_peptidesummary::BUGFIX_12538 Calling this method with an older cache may be considerably slower

Parameters:
qquery number
prank number
Returns:
A vector of ranks that have the same ambiguity group id.
double getPeptideExpectationValue ( const double  score,
const int  query,
const int  rank = 0 
) const [virtual, inherited]

Returns the expectation value for the given peptide score and query.

Returns the number of peptides that you would expect to get this score or higher from this database. This value is displayed as the expectation value in Mascot 2.0 and later reports.

For Mascot matches, the expectation value is calculated from

    E = Pthreshold * (10 ** ((Sthreshold - score) / 10))

For a score that is exactly on the default significance threshold, (p<0.05), the expectation value is also 0.05.

The Sthreshold value is the value returned by getHomologyThreshold() if ms_peptidesummary::MSPEPSUM_USE_HOMOLOGY_THRESH was specifed in the ms_peptidesummary constructor, otherwise the value used is the one returned from getPeptideIdentityThreshold().

For spectral library matches, the expectation value formula depends on report mode.

  • In integrated mode (MSPEPSUM_SL_INTEGRATED), library scores are scaled internally so that their mean and standard deviation match the mean and standard deviation of significant rank 1 Mascot matches with the same sequence in the same query. See Library scores and thresholds.
  • In SL-only mode (MSPEPSUM_SL_ONLY), the expect value formula is
     E = 0.05 * 10 ** (-(s - 300)/100).
     
    For example, E = 0.05 for score 300.

Spectral library support was added in Mascot Parser 2.6. If rank is zero, which is the default in Parser 2.5 and earlier, the score is assumed to be a Mascot ions score. If rank is non-zero, the correct formula is chosen based on the type of the match at rank rank in this query.

See also:
getProteinExpectationValue()
Parameters:
scoreis the Mascot score to be converted.
queryspecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
rankis the rank of the match of interest in this query.
Returns:
The number of peptides that you would expect to get this score or higher from this database.
int getPeptideIdentityThreshold ( const int  query,
double  OneInXprobRnd,
DB_MATCH_TYPE  dbType = DM_FASTA 
) const [virtual, inherited]

Return the threshold value for this ms-ms data being a random match.

See also:
getAvePeptideIdentityThreshold()
Parameters:
querySpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
dbTypeIf given, should be DM_FASTA for Mascot identity threshold and DM_SPECTRAL_LIBRARY for spectral library threshold. (DM_BOTH is nonsensical; if given, -1 is returned.)
Returns:
A score threshold. If query is < 1 or > ms_mascotresfile::getNumQueries() then -1 will be returned. If there was no MS-MS data or no match for this query, then -1 will be returned.
Examples:
peptide_list.cpp, and resfile_summary.cpp.
double getPeptideThreshold ( const int  query,
double  OneInXprobRnd,
const int  rank = 1,
const THRESHOLD_TYPE  thresholdType = TT_PEPSUM_DEFAULT 
) const [inherited]

Return either the identity or the homology threshold.

The default is to use identity thresholds for the original Peptide Summary and Select Summary reports. For the Family Summary grouping, homology thresholds are used, and this needs to be specified by using the ms_peptidesummary::MSPEPSUM_USE_HOMOLOGY_THRESH flag. (This flag is automatically set when using the ms_mascotresfile::get_ms_mascotresults_params helper function).

This function calls getPeptideIdentityThreshold() or getHomologyThreshold() as appropriate. If it calls getHomologyThreshold() and this returns 0, then the identity threshold is returned.

The rank parameter is mandatory in an integrated error tolerant (ET) search and integrated spectral library (SL) search. In an ET search, if the query contains error tolerant matches, the homology threshold is only valid for the non-ET matches. In an SL search, Mascot and library matches have different score thresholds.

Parameters:
querySpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries()
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
rankSpecifies the rank of the match of interest.
thresholdTypecan be any one of the ms_mascotresults::THRESHOLD_TYPE values
Returns:
the threshold.
double getProbabilityThreshold (  ) const [virtual, inherited]

Return the minProbability value passed to the ms_mascotresults::ms_mascotresults constructor .

This value is used when calculating the MudPIT scores and used as a cutoff for an Integrated error tolerant search search.

If a value of <= 1e-18 or >= 1 is passed to the constructor, then a value of 1/20 will be returned by this function, otherwise it will just return the value passed to the ms_mascotresults::ms_mascotresults constructor.

Returns:
probability threshold
int getProbFromScore ( const double  score ) const [virtual, inherited]

Returns a probability value given a score.

Warning:
Do not use this without fully understanding what the function returns. It is more likely that you will want to use getPeptideExpectationValue() or getProteinExpectationValue().
Parameters:
scoreis the Mascot score.
Returns:
probability calculated from p = pow(10.0, (score/10.0)).
double getProbOfPepBeingRandomMatch ( const double  score,
const int  query 
) const [virtual, inherited]
Deprecated:
Use getPeptideExpectationValue().

Returns the inverse of getPeptideExpectationValue(). If, for example, the identity threshold (p<0.05) for a particular query is 30, and a value of 30 is passed to this function, then the return value will be 1/0.05. If, for the same search, a peptide of interest gets a score of 50, then this function will return a value of 1 / 0.00067 which is the (inverse of) the p value corresponding to that score.

This value is displayed as the expectation value in Mascot 2.0 reports.

See also:
getProbOfProteinBeingRandomMatch()
Parameters:
scoreis the Mascot score
queryspecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
Returns:
The probability.
Examples:
resfile_summary.cpp.
double getProbOfProteinBeingRandomMatch ( const double  score ) const [virtual, inherited]
Deprecated:
Use getProteinExpectationValue().

Returns the inverse of getProteinExpectationValue(). If, for example, the identity threshold (p<0.05) for a PMF search is 56, and a value of 56 is passed to this function, then the return value will be 1/0.05. If, for the same search, a protein of interest gets a score of 90, then this function will return a value of 1 / 0.000025 which is the (inverse of) the p value corresponding to that score.

This value is displayed as the expectation value in Mascot 2.0 reports.

See also:
getProbOfPepBeingRandomMatch()
Parameters:
scoreis the Mascot score to be converted.
Returns:
The inverse of getProteinExpectationValue().
const ms_protein * getProtein ( const char *  accession,
const int  dbIdx = 1 
) const [virtual]

Return a pointer to the protein entry given an accession.

Parameters:
accessionis the accession string.
dbIdxis the database index (1..ms_searchparams::getNumberOfDatabases()).

  • For a PMF mixture in protein summary (see Peptide mass fingerprint mixtures), use the default value of 1 even if the component proteins are all from other databases.
Returns:
If the protein cannot be found in the results, then a null value is returned. See Maintaining object references: two rules of thumb.

Reimplemented from ms_mascotresults.

std::string getProteinDescription ( const char *  accession,
const int  dbIdx = 1 
) const [inherited]

Return protein description if available.

This method attempts to return the 'description line' for the given accession. The description line originated from the FASTA file that was searched. An empty string will be returned if the protein is not listed in the proteins section or the summary section of the results file.

Format in the proteins section is, for example:

   "gi|4838561"=79480.13,"(AF144646) heat shock protein 70 [Crassostrea gigas]" 

When multiple databases are used for the search all, protein entries, except those from the first database, will have a database index as a prefix:

   "02::gi|483561"=...

Format in the summary section is, for example:

   h1=gi|4838561,1.89e+002,1.00,79480.13
   h1_db=01
   h1_text=(AF144646) heat shock protein 70 [Crassostrea gigas] 

Again the protein entry will include the database index if there are more than one databases.

With the default settings, Mascot only saves description lines in the proteins section for accessions that are most likely to be used in a report. If description lines for all proteins with a match were saved, then the results files could potentially be huge. You can control which description lines are put into the proteins section by changing the entry for

ProteinsInResultsFile 

in the options section of mascot.dat (see chapter 6 of the Mascot installation and setup manual).

The alternative way to get the description line is to call

     ms-getseq.exe [db] [accession] title

when nothing is returned by getProteinDescription(). See chapter 7 of the Mascot installation and setup manual for details of calling ms-getseq.exe.

If the 'protein' is actually a UniGene entry, then the description line is taken by concatenating the Gene and the Title line for the relevant entry.

If the protein is a PMF mixture, then the description line will be of the format:

 * Mixture from proteins:"gi|1234","gi|5432","gi|9999" 

If the protein comes from the reference database of a spectral library, then its description is not saved in the results file and this method returns the empty string. If the protein comes from the spectral library, it may or may not have a description, depending on how the library was created.

For a search against a decoy database, if the result is from the decoy section, then the description is prefixed with "Random sequence, was ", or if there is no description in the file, "Random sequence.". Depending on the configuration of the search, Random may also be Reversed or Shuffled.

Parameters:
accessionis the accession string.
dbIdxdatabase index used for the search.
Returns:
The description line or an empty string.
Examples:
resfile_summary.cpp.
double getProteinEmPAI ( const char *  accession,
const int  dbIdx = 1,
const int  length = -1 
) const [virtual]

Return protein emPAI if available.

The Exponentially Modified Protein Abundance Index (emPAI) offers approximate, label-free, relative quantitation of the proteins in a mixture based on protein coverage by the peptide matches in a database search result. The key publication is

Ishihama, Y., et al.: Exponentially modified protein abundance index (emPAI) for estimation of absolute protein amount in proteomics by the number of sequenced peptides per protein (2005). Molecular & Cellular Proteomics 4:1265-1272.

The number of observed peptides is determined using the function ms_protein::getNumObservedForEmPAI().

Protein sequence length is needed for emPAI calculation. You have two options:

  1. Give the sequence length as an argument to this method. You will need to fetch it with e.g. ms-getseq.exe on the Mascot Server machine.
  2. Give no argument to the method. If the protein has a mass in the results file, the mass will be used to calculate an approximate sequence length.

If no sequence length is given and the protein has no mass in the results file, the method returns -1. If you know the mass of the protein but not its sequence length, you can use the following approximate formula:

length = floor(0.5 + mass / 111)

Here 111 Da is the average mass of an amino acid residue.

If ms_mascotresults::isEmPAIallowed() is false, this method returns -1.

Parameters:
accessionProtein accession.
dbIdxProtein database index number.
lengthProtein sequence length (optional). If the length is not given, protein mass from the results file will be used to approximate it. If protein mass is not available, the method returns -1.
Returns:
emPAI or -1, as described above.

Reimplemented from ms_mascotresults.

double getProteinExpectationValue ( const double  score ) const [virtual, inherited]

Returns the expectation value for the given protein score.

Returns the number of proteins that you would expect to get this score or higher from random matches for this search. For a score that is exactly on the default significance threshold, (p<0.05), the expectation value is also 0.05.

This value is displayed as the expectation value in Mascot 2.0 reports.

See also:
getPeptideExpectationValue()
Parameters:
scoreis the Mascot protein score to be converted.
Returns:
The expectation value.
double getProteinMass ( const char *  accession,
const int  dbIdx = 1 
) const [inherited]

Return protein mass if available.

This method attempts to return the mass for the given accession. The protein mass is calculated during the Mascot search, and some, but not all, of these masses are saved in the results file. A value of zero will be returned if the protein is not listed in the proteins section or the summary section of the results file.

The format in the proteins section is, for example, for a protein of mass 79480.13 daltons:

   "gi|4838561"=79480.13,"(AF144646) heat shock protein 70 [Crassostrea gigas]" 

When multiple databases are used for the search, all protein entries, except those from the first database, will have a database index as a prefix:

   "02::gi|483561"=...

Format in the summary section is, for example:

   h1=gi|4838561,1.89e+002,1.00,79480.13 
   h1_db=01
   h1_text=(AF144646) heat shock protein 70 [Crassostrea gigas] 

Again the protein entry will include the database index if there are more than one databases.

With the default settings, Mascot only saves description lines and masses in the proteins section for accessions that are most likely to be used in a report. If description lines for all proteins with a match were saved, then the results files could potentially be huge. You can control which description lines and masses are put into the proteins section by changing the entry for

ProteinsInResultsFile 

in the options section of mascot.dat (see chapter 6 of the Mascot installation and setup manual).

If a mass of zero is returned by this function, then the alternative way to get the mass is to call

     ms-getseq.exe [db] [accession] seq

to get the sequence, and then call getSequenceMass(). See chapter 7 of the Mascot installation and setup manual for details of calling ms-getseq.exe.

Parameters:
accessionis the accession string.
dbIdxdatabase number (1..ms_searchparams::getNumberOfDatabases()).
Returns:
The protein mass or 0.0.
Examples:
resfile_summary.cpp.
int getProteinScoreCutoff ( double  OneInXprobRnd ) const [virtual]

Return the 'protein' score value for cutting off results. Different for peptide and protein summary.

The protein probability cutoff is calculated differently for a protein summary and a peptide summary.

  • For a peptide summary, we want to display any proteins that contain even one significant peptide, which means the threshold is the same as getAvePeptideIdentityThreshold().

    For MudPIT scoring, any protein with a score greater than zero must have one or more peptides above the homology threshold, so this function returns a value of 1.

  • For a protein summary, we simply use the protein score that corresponds to the threshold.
Parameters:
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
Returns:
The protein score cutoff threshold.

Implements ms_mascotresults.

double getProteinScoreForHistogram ( const int  num ) const [virtual, inherited]

Returns scores for top 50 proteins, even if less in the peptidesummary or proteinsummary.

The 'well known' green and red histogram at the top of the Mascot reports is created from the top 50 protein scores in the results. However, if the number of hits requested for a peptide or protein summary is 'AUTO' or is less than 50, then there won't be 50 hits in the ms_mascotresults object so it won't be possible to create the histogram.

This simple method can reliably be used to return the score for a hit number between 1 and 50, regardless of the number of entries in the protein or peptide summary. To create the histogram, it will be necessary to call this method 50 times - with num being 1..50.

In the rare case of there being less than 50 proteins in the results file, a value of -1 will be returned for values of num that have no protein hit. If a protein is rejected because it only has peptides shorter than minPepLenInPepSummary or because the MSRES_REQUIRE_BOLD_RED is set, then this protein will not be included in the list.

Parameters:
numIs a value in the range 1..50.
Returns:
The score.
std::string getProteinsWithThisPepMatch ( const int  q,
const int  p,
const bool  quotes = false 
) [virtual]

Return a partial list of proteins that matched the same peptide.

The returned string is of the following form:

    1:Q9XZJ2  2:100K_RAT  3:AAF55150 

The list is the hit number / accession of all proteins that matched the same peptide. There is no indication of which database the hit is from, but this can of course be determined by calling getHit().

In ms_peptidesummary, this is taken from the list of accessions on the q1_p1= line. The returned string only includes proteins that are in the current peptide summary. This means for example that if you choose to only see the top 5 hits, and this query/rank is for a peptide that occurs in protein 6, then protein number 6 will not appear in the resulting string.

In ms_proteinsummary, the string is also constructed from the list of accessions on the q1_h1= line. There's an additional check that the query number is the same, and that the peptide is the same string.

If you want a list of accession strings rather than a single concatenated string, use the function getAllProteinsWithThisPepMatch().

Parameters:
qis the query number in the range 1 to ms_mascotresfile::getNumQueries().
pis the 'hit' number (in ms_proteinsummary) or 'rank' number (in ms_proteinsummary).

  • For a protein summary, a maximum of 50 protein hits are saved are saved and hence h must be in the range 1..50. It is safest to call getMaxRankValue() to find the maximum value.
  • For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
quotesspecifies whether the accessions will be quoted, which should allow accesions with spaces and colons. If true, the string returned would then be

1:"Q9XZJ2" 2:"100K_RAT" 3:"AAF55150"
Returns:
list of proteins matching the peptide, as a string

Implements ms_mascotresults.

void getProteinTaxonomyIDs ( const char *  accession,
const int  dbIdx,
std::vector< int > &  gpt_ids,
std::vector< std::string > &  gpt_accessions 
) const [inherited]

Return the taxonomy ID(s), if any, from the results file.

In Mascot 2.4.0 and later, taxonomy IDs are saved in the proteins section of the results files in the form:

   [DB::]"accession"_tax=taxId[:"accession",taxID[:"accession",taxID[:...]]]

In Mascot 2.4.1 and later, taxonomy IDs are also saved in the summary section of the results files in the form:

   hN_tax=taxId[:"accession",taxID[:"accession",taxID[:...]]]

For example, for a single database, with a single accession/taxonomy:

   "gi|209490801"_tax=87229

   h1=gi|209490801, ...
   h1_tax=87229
 

and for a multi-database search, with a single accession/taxonomy:

   02::"gi|209490801"_tax=87229

   h1_db=2
   h1=gi|209490801, ...
   h1_tax=87229
 

and for a single database, with multiple accession/taxonomy:

   "gi|182438931"_tax=455632:"gi|282872348",649189:gi|178467447,455632:gi|282574201,649189

   h1=gi|182438931, ...
   h1_tax=455632:"gi|282872348",649189:gi|178467447,455632:gi|282574201,649189
 

If none of the databases used for the search contain taxonomy information, then no _tax lines will be output.

If any of the databases used for the search contain taxonomy information, then a _tax line will be output for all relevant proteins. This is governed by the same rules that determine for which proteins to output the mass and description lines. If a protein has no taxonomy information (for example, the database has no taxonomy), -1 will be used as the taxonomy ID.

Therefore, to determine if taxonomy information is available in the results file, simply take any protein in the proteins section, and see if there is a corresponding _tax entry.

For most FASTA files, there is a single accession and taxonomy for each sequence entry. For the NCBInr database, identical sequences are only stored once, but there are multiple accession/description lines separated by a "Control-A" character. Each accession may have a different taxonomy. This function returns the list of taxonomy IDs and corresponding accessions. For the last example shown above, this function will return the arrays of IDs

  [455632, 649189, 455632, 649189]
 

and accessions

  ["gi|182438931", "gi|282872348", "gi|178467447", "gi|282574201"]
 

See Using STL classes in Perl, Java, Python and C#.

Parameters:
accessionis the primary accession
dbIdxis the 1 based database ID
gpt_idsthe taxonomy IDs
gpt_accessionsthe list of accessions
int getProteinThreshold ( double  OneInXprobRnd ) const [virtual, inherited]

Return a threshold value for the protein summary report.

This is simply calculated by using:

   10.0 * log10(seqsAfterTax * OneInXprobRnd) 

where seqsAfterTax is obtained from ms_mascotresfile::getNumSeqsAfterTax().

The threshold value is displayed at the top of the protein summary report: Protein scores greater than 72 are significant (p<0.05).

Parameters:
OneInXprobRndFor a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.
Returns:
The protein threshold.
int getQmatch ( const int  query,
const ms_mascotresfile::section  sec 
) const [virtual, inherited]

Return the number of peptides with masses that matched this query.

See also:
getQmatch(const int) const

If the value is greater than INT_MAX, then INT_MAX will be returned.

Parameters:
querySpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
secis the section number and can be one of:

Returns:
the number of peptides with masses that matched this query. If the specified sec doesn't exist, then 0 will be returned.
int getQmatch ( const int  query ) const [virtual, inherited]

Return the number of peptides with masses that matched this query.

See also:
getPeptideIdentityThreshold()
getQmatch(const int, const ms_mascotresfile::section) const

If the value is greater than INT_MAX, then INT_MAX will be returned.

This function calls the getQmatch(query, section) function with the 'relevant' section which will normally be SEC_SUMMARY However, if the MSRES_DECOY flag was specified in the constructor, then the 'relevant' section will be SEC_DECOYSUMMARY. If the search is an error tolerant search, then the 'relevant' section will be SEC_ERRTOLPEPTIDES. However, to get the spectral library qmatch values, it is neccessary to explicitly call: getQmatch(query, ms_mascotresfile::SEC_LIBRARYSUMMARY)

Parameters:
querySpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
Returns:
the number of peptides with masses that matched this query
ms_mascotresults::QUANT_COMPONENT_STATUS getQuantitationComponentForPeptide ( const matrix_science::ms_peptide peptide,
matrix_science::ms_quant_component component,
const matrix_science::ms_quant_method method = NULL 
) const [virtual]

Get the component in the quantitation method that matches the peptide.

If the component name has already been determined (see ms_peptide::getComponentStr()) then that component is retrieved from the method.

If the component name has not yet been determined for the peptide, each component in the method is checked in turn to determine if it is the correct component for the peptide (see ms_peptidesummary::isPeptideComponentMatch()). A component only matches the peptide if every modification group of the component matches the peptide.

This is only applicable if there is a quantitation method, the method uses the Precursor protocol and the method has components defined. If an error occurs then an error message is appended to the matrix_science::ms_mascotresfile and a matching status code is returned.

Parameters:
peptideThe peptide to find the component for.
componentThis is set to the first matching component found, if any.
methodAn optional parameter that overrides the quantitation method used to look for matching components. If no method is specified here then the method from the ms_mascotresfile is used.
Returns:
One of the return status codes in ms_mascotresults::QUANT_COMPONENT_STATUS.

Implements ms_mascotresults.

const matrix_science::ms_quant_method * getQuantMethod (  ) const

Returns the quantitation method from the results file.

Returns:
The quantitation method from the ms_resfile if any, otherwise 0.
std::string getQueryList ( QL_FLAG  flag,
bool  outputListOfQueries = true 
)

Returns a list of query numbers that can be used for a repeat search.

The list returned by this function should be used as the querylist parameter in a repeat search in Mascot 2.3 or later.

Parameters:
flagSee the ms_peptidesummary::QL_FLAG definition for a description of the different values.
outputListOfQueriesis used to specify whether a comma separated list of values is returned, or an encoded short string that will be interpreted by nph-mascot. Setting this value to false is not supported in Mascot 2.3.
Returns:
A string that can be used for the querylist value when repeating a search.
std::string getReadableVarMods ( const int  q,
const int  p,
const int  numDecimalPlaces = 2 
) const [virtual, inherited]

Return a 'human readable' string with the variable, summed and error tolerant mods.

This function creates a string of the form

2 Oxidation (M); Phosphorylation (ST) 

It works by getting the variable mods string, e.g. 0110030, using ms_peptide::getVarModsStr(). It then converts the string to a human readable form. Each variable modification is assigned a number 1 to 32 (up to 32 variable modifications are allowed). In the results, a string is assigned for each matched peptide, and each character in the string will be 0..9A..W.

This function will work whether or not there are variable and/or error tolerant mods, and can be called for either the peptide or the protein summary. Variable modification names are obtained using ms_searchparams::getVarModsName() and error tolerant modification names are found using ms_mascotresults::getErrTolModName(). Library modification names, in peptide summary, are found using ms_searchparams::getLibraryModName(). See the Mascot installation and setup manual (configuration and log files chapter) for more information about variable modifications.

Error tolerant mods are shown with the delta in square brackets after the modification or substitution. For example:

   R->G [-99.08] 

The delta is shown to numDecimalPlaces.

Summed mods, from ms_peptide::getSummedModsStr(), and query-level mods, from ms_peptide::getLocalModsStr(), are also included in the returned string.

Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10.
numDecimalPlacesUsed for the display of the delta.
Returns:
The formatted string.
Examples:
resfile_summary.cpp.
const ms_mascotresfile & getResfile (  ) const [inherited]

Return a reference to the resfile.

Returns:
A reference to the resfile given in the constructor.
ms_mascotresults_params getResultsParameters (  ) [inherited]

Returns an ms_mascotresults_params object containing the parameter and flag settings used to generate the report.

Returns:
An ms_mascotresults_params object containing the parameter and flag values used to generate the report.
double getSequenceMass ( const char *  seq ) const [inherited]

Return the mass of a sequence (protein or peptide).

The sequence mass is calculated by summing the mass of each residue, using ms_searchparams::getResidueMass, and adding terminus masses using ms_searchparams::getCTermMass and ms_searchparams::getNTermMass.

The mass is therefore the 'Mr' (uncharged) mass, and is calculated using any static modifications.

Parameters:
seqis the protein sequence.
Returns:
The relative mass of the sequence.
double getSLThresholdFromMinProbability ( double  minProbability ) [static]

Return the library score threshold in SL-only mode corresponding to the given value of minProbability.

This is the inverse of getMinProbabilityForSLScore(). Note that due to how library scores are converted to expect values, the minimum value returned by this function is 170, for minProbability >= 1.0.

Parameters:
minProbabilitybetween 0 (exclusive) and 1 (exclusive).
Returns:
score The library score threshold.
int getSrcRank ( int  q,
int  p 
) const

Returns the 'source' rank for a given peptide match.

When ms_mascotresults::MSRES_INTEGRATED_ERR_TOL is specified for an integrated error tolerant search (see Integrated error tolerant search), the results from the ms_mascotresfile::SEC_PEPTIDES and ms_mascotresfile::SEC_ERRTOLPEPTIDES sections are combined at the query level, so there will be up to 20 matches for each query. ms_peptide::getRank() or ms_protein::getPeptideP() will therefore return a number in the range 1 to 20. This rank value can be used for any ms_peptidesummary function that requires a 'p' (rank) value. If you want to access data directly from the results file using functions like me_mascotresfile::getQuerySectionValueStr(), then you need to call this function and getSrcSection() to determine which entry to retrieve.

For any search other than an integrated error tolerant search, the return value from this function will be identical to the passed p value.

Parameters:
qis the query number.
pis the rank number.
Returns:
the rank number in the results file.
ms_mascotresfile::section getSrcSection ( int  q,
int  p 
) const

Returns the 'source' section for a given peptide match.

This function returns the peptide section that the peptide, specified by q and p, is obtained from. It will be one of:

When ms_mascotresults::MSRES_INTEGRATED_ERR_TOL is specified for an integrated error tolerant search (see Integrated error tolerant search), the results from the ms_mascotresfile::SEC_PEPTIDES and ms_mascotresfile::SEC_ERRTOLPEPTIDES sections are combined at the query level, so there will be up to 20 matches for each query. ms_peptide::getRank() or ms_protein::getPeptideP() will therefore return a number in the range 1 to 20. This rank value can be used for any ms_peptidesummary function that requires a 'p' (rank) value. If you want to access data directly from the results file using functions like me_mascotresfile::getQuerySectionValueStr(), then you need to call this function and getSrcSection() to determine which entry to retrieve.

Parameters:
qis the query number.
pis the rank number.
Returns:
the section in the results file.
int getTagDeltaRangeEnd ( const int  q,
const int  p 
) const [virtual]

Return the second number from q1_p2_drange=0,256.

Consider the following two examples for peptide summary and protein summary, respectively:

  • The entry in q1_p2_drange might be
       q1_p2_drange=0,256 
    
  • The entry in h1_q2_drange might be
       h1_q2_drange=0,256 
    
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
Returns:
the second number from the string or -1 if there is no tag match.

Implements ms_mascotresults.

int getTagDeltaRangeStart ( const int  q,
const int  p 
) const [virtual]

Return the first number from q1_p2_drange=0,256.

Consider the following two examples for peptide summary and protein summary, respectively:

  • The entry in q1_p2_drange might be
       q1_p2_drange=0,256 
    
  • The entry in h1_q2_drange might be
       h1_q2_drange=0,256 
    
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
Returns:
the first number from the string or -1 if there is no tag match.

Implements ms_mascotresults.

int getTagEnd ( const int  q,
const int  p,
const int  tagNumber 
) const [virtual, inherited]

Return the end position for the tag-match from h1_q2_tag or q1_p1_tag.

The entry in, for example, h1_q2_tag might be

   h1_q2_tag=1:10:12:6,3:1:12:1,... 

Every group of four numbers between commas is tagNumber:startPos:endPos:series.

See also:
getTagStart(), getTagSeries()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10 for a peptide summary.
tagNumberIs a one based value.
Returns:
The residue end position of the tag.
Examples:
resfile_summary.cpp.
int getTagSeries ( const int  q,
const int  p,
const int  tagNumber 
) const [virtual, inherited]

Return the series ID for the tag-match from h1_q2_tag or q1_p1_tag.

The entry in, for example, h1_q2_tag might be

   h1_q2_tag=1:10:12:6,3:1:12:1,... 

Every group of four numbers between commas is tagNumber:startPos:endPos:series.

See also:
getTagStart(), getTagEnd()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10 for a peptide summary.
tagNumberIs a one based value.
Returns:
The series that contained a match to the tag. Possible ion series numbers are
  • -1 means no matches for the tag
  • 0 "a" series (single charge)
  • 1 "a-NH3" series (single charge)
  • 2 "a" series (double charge)
  • 3 "b" series (single charge)
  • 4 "b-NH3" series (single charge)
  • 5 "b" series (double charge)
  • 6 "y" series (single charge)
  • 7 "y-NH3" series (single charge)
  • 8 "y" series (double charge)
  • 9 "c" series (single charge)
  • 10 "c" series (double charge)
  • 11 "x" series (single charge)
  • 12 "x" series (double charge)
  • 13 "z" series (single charge)
  • 14 "z" series (double charge)
  • 15 "a-H2O" series (single charge)
  • 16 "a-H2O" series (double charge)
  • 17 "b-H2O" series (single charge)
  • 18 "b-H2O" series (double charge)
  • 19 "y-H2O" series (single charge)
  • 20 "y-H2O" series (double charge)
  • 21 "a-NH3" series (double charge)
  • 22 "b-NH3" series (double charge)
  • 23 "y-NH3" series (double charge)
  • 25 "internal yb" series (single charge)
  • 26 "internal ya" series (single charge)
  • 27 "z+1" series (single charge)
  • 28 "z+1" series (double charge)
  • 29 high-enrgy "d" and "d'" series (single charge)
  • 31 high-enrgy "v" series (single charge)
  • 32 high-enrgy "w" and "w'" series (single charge)
  • 33 "z+2" series (single charge)
  • 34 "z+2" series (double charge)
Examples:
resfile_summary.cpp.
int getTagStart ( const int  q,
const int  p,
const int  tagNumber 
) const [virtual, inherited]

Return the start position for the tag-match from h1_q2_tag or q1_p1_tag.

The entry in, for example, h1_q2_tag might be

   h1_q2_tag=1:10:12:6,3:1:12:1,... 

Every group of four numbers between commas is tagNumber:startPos:endPos:series.

See also:
getTagEnd(), getTagSeries()
Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pShould be in the range 1..10 for a peptide summary.
tagNumberIs a one based value.
Returns:
The residue start position of the tag.
Examples:
resfile_summary.cpp.
std::string getTagString ( const int  q,
const int  p 
) const [virtual]

Return the complete tag string from q1_p1_tag.

Consider the following two examples for peptide summary and protein summary, respectively:

  • The entry in q1_p2_tag might be
       q1_p2_tag=1:10:12:6,3:1:12:1,... 
    
  • The entry in h1_q2_tag might be
       h1_q2_tag=1:10:12:6,3:1:12:1,... 
    

This function returns the complete string value. Every group of four numbers between commas is tagNumber:startPos:endPos:series.

More useful functions are

all of which will work for both the protein summary and the peptide summary.

Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
Returns:
complete tag string.

Implements ms_mascotresults.

std::string getTerminalResiduesString ( const int  q,
const int  p 
) const [virtual]

Return the complete terminal residue string from q1_p1_terms.

Consider the following two examples for peptide summary and protein summary, respectively:

  • The entry in the results file might be
       q1_p2_terms=-,A:K,I:-,-:Q,-:@,R:... 
    
  • The entry in the results file might be
       h1_q2_terms=A,Q 
    

This function returns the complete string value. More useful functions are ms_protein::getPeptideResidueBefore() and ms_protein::getPeptideResidueAfter(), which will work for both the protein summary and the peptide summary.

Parameters:
qSpecifies the query number. This should be in the range 1 .. ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
Returns:
the full terminal residues string.

Implements ms_mascotresults.

bool getThresholdForFDRAboveHomology ( double  targetFDR,
double *  closestFDR,
double *  minProbability,
int *  numTargetMatches = 0,
int *  numDecoyMatches = 0 
) [inherited]

Given a target FDR, return the probability threshold that gives the desired FDR using the homology threshold.

For example, to find out which probability threshold gives 1% FDR above identity or homology threshold, call this function with targetFDR = 0.01, and feed the returned threshold as the * minProbability parameter to the constructor. The FDR can be calculated as described in getNumHitsAboveHomology() and getNumDecoyHitsAboveHomology().

Otherwise functionality is the same as getThresholdForFRDAboveIdentity(double, double*, double*, int*, int*).

If calling from outside C++, see Multiple return values in Perl, Java, Python and C#.

Parameters:
[in]targetFDRan FDR to aim for.
[out]closestFDRan FDR closest to the target value, or -1 if FDR cannot be calculated.
[out]minProbabilitythe probability threshold that gives closestFDR, or -1 if FDR cannot be calculated.
[out]numTargetMatchesreturns the number of target matches at this FDR, or -1 if the FDR cannot be calculated.
[out]numDecoyMatchesreturns the number of decoy matches at this FDR, or -1 if the FDR cannot be calculated.
Returns:
true if the FDR was reached, false otherwise.
bool getThresholdForFDRAboveHomology ( double  targetFDR,
DECOY_STATS_COUNT_TYPE  countType,
DB_MATCH_TYPE  dbType,
double *  closestFDR,
double *  minProbability,
int *  numTargetMatches = 0,
int *  numDecoyMatches = 0 
) [inherited]

Given a target FDR for PSMs or sequences, return the probability threshold that gives the desired FDR using the homology threshold.

In Parser 2.5 and earlier, the FDR was calculated based on peptide-spectrum matches (PSMs). Parser 2.6 and later support calculating the FDR based on distinct peptide sequences. You can choose between PSMs and sequences with countType. The default, when countType is not given, is to count PSMs.

Parser 2.6 also added support for spectral libraries. However, there is no support for decoy spectral libraries, and thus there is no support for library FDR. If dbType is not DM_FASTA, the method returns false.

Otherwise functionality is the same as getThresholdForFRDAboveHomology(double, double*, double*, int*, int*).

If calling from outside C++, see Multiple return values in Perl, Java, Python and C#.

Parameters:
[in]targetFDRan FDR to aim for.
[in]countTypewhether to count PSMs or sequences.
[in]dbTypemust be DM_FASTA.
[out]closestFDRan FDR closest to the target value, or -1 if FDR cannot be calculated.
[out]minProbabilitythe probability threshold that gives closestFDR, or -1 if FDR cannot be calculated.
[out]numTargetMatchesreturns the number of target matches at this FDR, or -1 if the FDR cannot be calculated.
[out]numDecoyMatchesreturns the number of decoy matches at this FDR, or -1 if the FDR cannot be calculated.
Returns:
true if the FDR was reached, false otherwise.
bool getThresholdForFDRAboveIdentity ( double  targetFDR,
double *  closestFDR,
double *  minProbability,
int *  numTargetMatches = 0,
int *  numDecoyMatches = 0 
) [inherited]

Given a target FDR, return the probability threshold that gives the desired FDR using the identity threshold.

For example, to find out which probability threshold gives 1% FDR above identity threshold, call this function with targetFDR = 0.01, and feed the returned threshold as the minProbability parameter to the constructor. The FDR can be calculated as described in getNumHitsAboveIdentity() and getNumDecoyHitsAboveIdentity().

The function returns a value that maximises the number of peptide-spectrum matches while giving an FDR below the targetFDR value. In some cases, it would be possible to get an FDR closer to the supplied targetFDR, but with fewer target matches.

For a large file, this function can take some time to return. Feedback can be obtained by using ms_mascotresfile::outputKeepAlive with the fd= tag.

This function was first introduced in version 2.4.0 and the method for calculating the value was changed between version 2.4.0 and 2.4.1

If this function returns false, there are several possibilities:

  • The function is being called for a ms_proteinsummary object
  • The supplied targetFDR is 0, or negative
  • It is not possible to get an FDR less than the targetFDR. In this case, the lowest possible FDR is given

If calling from outside C++, see Multiple return values in Perl, Java, Python and C#.

Parameters:
[in]targetFDRan FDR to aim for.
[out]closestFDRan FDR closest to the target value, or -1 if FDR cannot be calculated.
[out]minProbabilitythe probability threshold that gives closestFDR, or -1 if FDR cannot be calculated.
[out]numTargetMatchesreturns the number of target matches at this FDR, or -1 if the FDR cannot be calculated.
[out]numDecoyMatchesreturns the number of decoy matches at this FDR, or -1 if the FDR cannot be calculated.
Returns:
true if the FDR was reached, false otherwise.
bool getThresholdForFDRAboveIdentity ( double  targetFDR,
DECOY_STATS_COUNT_TYPE  countType,
DB_MATCH_TYPE  dbType,
double *  closestFDR,
double *  minProbability,
int *  numTargetMatches = 0,
int *  numDecoyMatches = 0 
) [inherited]

Given a target FDR for PSMs or sequences, return the probability threshold that gives the desired FDR using the identity threshold.

In Parser 2.5 and earlier, the FDR was calculated based on peptide-spectrum matches (PSMs). Parser 2.6 and later support calculating the FDR based on distinct peptide sequences. You can choose between PSMs and sequences with countType. The default, when countType is not given, is to count PSMs.

Parser 2.6 also added support for spectral libraries. However, there is no support for decoy spectral libraries, and thus there is no support for library FDR. If dbType is not DM_FASTA, the method returns false.

Otherwise functionality is the same as getThresholdForFRDAboveIdentity(double, double*, double*, int*, int*).

If calling from outside C++, see Multiple return values in Perl, Java, Python and C#.

Parameters:
[in]targetFDRan FDR to aim for.
[in]countTypewhether to count PSMs or sequences.
[in]dbTypemust be DM_FASTA.
[out]closestFDRan FDR closest to the target value, or -1 if FDR cannot be calculated.
[out]minProbabilitythe probability threshold that gives closestFDR, or -1 if FDR cannot be calculated.
[out]numTargetMatchesreturns the number of target matches at this FDR, or -1 if the FDR cannot be calculated.
[out]numDecoyMatchesreturns the number of decoy matches at this FDR, or -1 if the FDR cannot be calculated.
Returns:
true if the FDR was reached, false otherwise.
double getToleranceInDalton ( bool &  needMass,
const double *const   pMass = NULL 
) const [inherited]

Returns the tolerance in dalton, and whether a mass is needed if the unit is in % or ppm.

Returns the tolerance of the result file in dalton.

Parameters:
needMassReference to a boolean to determine if the mass is needed to calculate the delta, namely when the tolerance unit is in % or ppm
pMassOptional parameter to calculate the tolerance from the mass for % and ppm
Returns:
The tolerance in dalton
bool getTreeClusterNodes ( const int  hit,
std::vector< int > &  left,
std::vector< int > &  right,
std::vector< double > &  distance,
TREE_CLUSTER_METHOD  tcm = TCM_PAIRWISE_MAXIMUM,
double ***  reserved1 = 0,
unsigned int *  reserved2 = 0 
) const [virtual]

Return distances and structure suitable for a dendrogram plot.

The 'distances' between each protein in the family are calculated using a tree clustering routine which, by default, implements pairwise maximum (or complete) linkage clustering. It is used for the dendrogram display in the reports introduced in Mascot 2.3.

The vectors returned by this function only contain values when MSRES_CLUSTER_PROTEINS has been specified and where there are at least two family members.

The code uses the cluster algorithm described at http://bonsai.ims.u-tokyo.ac.jp/~mdehoon/software/cluster/software.htm

The License agreement for using this excellent Open Source code is included with Mascot Parser; see C Clustering library.

A value of 0 returned in the left or right vector corresponds to the lead protein as returned by getHit(). To find the protein associated with a positive node value returned in the left or right vector, call getNextFamilyProtein() with the id value set to the value in the vector.

The distances passed to the cluster algorithm are calculated by determining which peptides need to be excluded to make one protein a subset of another. Each unique peptide sequence is considered in turn, and the distance is calculated by taking the maximum difference between the score and the value returned by getPeptideThreshold(). Additionally, where a query has matches to different peptides above threshold in each protein, the delta of the scores is added to the distance.

For details of how to use the left, right and distance parameters, see Using STL classes in Perl, Java, Python and C#.

Parameters:
hitis the hit number for the lead protein.
leftreturns a vector of 'left' values.
rightreturns a vector of 'right' values.
distancereturns a vector of 'distance' values.
tcmis the method to be used. See documentation referenced above
reserved1is for Matrix Science use only
reserved2is for Matrix Science use only
Returns:
true unless the code causes an out of memory error.

Reimplemented from ms_mascotresults.

ms_peptide getUnassigned ( const int  num ) const [inherited]

Need to call createUnassignedList() before calling this.

See Unassigned peptides list for details of creating and using an unassigned list.

If this function is called before createUnassignedList() then an error ms_mascotresfile::ERR_UNASSIGNED_PROG will be set, and this function will return an empty ms_peptide.

Parameters:
numMust be in the range 1..getNumberOfUnassigned().
Returns:
If num is outside the range ..getNumberOfUnassigned(), then an error ms_mascotresfile::ERR_UNASSIGNED_RANGE will be set, and this function will return an empty ms_peptide.
Examples:
resfile_summary.cpp.
bool getUnassignedIsBold ( const int  num ) const [inherited]

Returns true if the item indexed by num in the assigned list should be bold.

This function returns true if this peptide should be displayed in bold in a Mascot report. Bold is used for the first time a query is shown in a report. See Unassigned peptides list for details of creating and using an unassigned list.

If this function is called before createUnassignedList() then an error ms_mascotresfile::ERR_UNASSIGNED_PROG will be set, and this function will return true.

Parameters:
numMust be in the range 1..getNumberOfUnassigned().
Returns:
If num is outside the range ..getNumberOfUnassigned(), then an error ms_mascotresfile::ERR_UNASSIGNED_RANGE will be set, and this function will return true.
bool getUnassignedShowCheckbox ( const int  num ) const [inherited]

Returns true if the item indexed by num in the assigned list should have a check box next to it.

A check box is displayed if this is the first rank 1 match that has been displayed for this query. See also ms_peptide::getRank() and ms_peptide::getPrettyRank().

By definition, all unassigned queries will need a check box. See Unassigned peptides list for details of creating and using an unassigned list.

If this function is called before createUnassignedList() then an error ms_mascotresfile::ERR_UNASSIGNED_PROG will be set, and this function will return false.

Parameters:
numMust be in the range 1..getNumberOfUnassigned().
Returns:
If num is outside the range ..getNumberOfUnassigned(), then an error ms_mascotresfile::ERR_UNASSIGNED_RANGE will be set, and this function will return an empty ms_peptide.
bool hasQuantMethod (  ) const

Returns true if the results file has a quantitation method.

Returns:
True if there is quantitation method, otherwise false.
bool isDataCached ( BUGFIX_NUM  bugNum ) const

Returns true if the cache file contains data relevant to the given bug number.

If caching is in use and the cache file exists, it may have been created with a previous version of Parser. Parser does not always recreate the cache file when new features are added, which means that if newer versions would save more data in the cache file, some of this data may need to be computed on the fly when using an old cache file.

If caching is in use and the cache file contains data relevant to bugNum, the method returns true. Otherwise it returns false. If caching is not in use, the method always returns true, as the data related to the given bug number will always be computed on the fly.

See also Caching Mascot Results.

Parameters:
bugNumBug number of the relevant feature or bugfix.
Returns:
True if data related to the bug number is available in the cache file or if caching is not in use. False otherwise.
bool isEmPAIallowed (  ) const [inherited]

Return true if emPAI could be calculated using data in this results file.

emPAI is not available unless all of the following are true:

  • Results file has MS/MS data.
  • Results file is not an old-style error tolerant search.
  • Results file has not been opened in decoy mode (ms_mascotresults::MSRES_DECOY).
  • There are at least 100 queries.
Returns:
true if emPAI is allowed.
bool isNA (  ) const [inherited]

Returns TRUE for a search against a nucelic acid database.

There is no specific information in the results file for this. It is implemented by looking to see if there is a frame number that is non-zero in any result. If there are no matches to any query, then this method will return false and therefore may be wrong. (However, if there are no matches then it is unlikely that the outcome of this function is important!)

In versions prior to 2.3.01, this was determined by looking in protein matches. In version 2.3.01 and later, this is determined by looking in all peptide matches, regardless of score.

Returns:
True if the search was against a nucleic acid database.
bool isPeptideComponentMatch ( const matrix_science::ms_quant_component component,
const matrix_science::ms_peptide peptide 
) const

Tests whether the peptide is a match for all the modifications of the quantitation component.

Test whether the peptide is a match for all modification groups in the component using isPeptideModificationMatch().

Parameters:
componentThe component to check for a match to the peptide.
peptideThe peptide to check for a match to the component.
Returns:
True if all modification groups are matched, or there are no modification groups.
bool isPeptideModificationMatch ( const matrix_science::ms_quant_modgroup group,
const matrix_science::ms_peptide peptide 
) const

Tests whether the peptide is a match for the quantitation modification group.

The peptide matches the modification group if, and only if, the peptide has each and every applicable variable modification within the group.

Whether the Mascot search is constrained or not makes no difference to the modification matching.

Modification groups have a mode that can be 'exclusive' or 'variable'. Each modification is tested in turn until one is found that does is not match, or they have all been checked.

Modifications within the group are only applicable if the peptide has the type of site specified (e.g. "n-term (k)". If the peptide has no sites specified by any of the individual modifications, then it will still match the group if the group is not required . If the group is required then a peptide that has none of the specified sites will not be a match.

An individual modification is matched if the peptide has the modification at every specified site (e.g. every R, the N-terminus) in the peptide string. If the peptide string contains the type of site specified but does not have the modification at that site then the peptide does not match the modification group.

Parameters:
groupThe group the peptide is checked against.
peptideThe peptide to check for a match to the group.
Returns:
True if all modifications in the group are matched, and if required then at least one modification site was found.
bool isPeptideUnique ( const int  q,
const int  p,
const UNIQUE_PEP_RULES  rules = UPR_DEFAULT 
) const [virtual]

Returns true if this peptide match is unique to one protein or one protein family.

Note:
This function currently returns false for ms_proteinsummary. This function is only likely to be useful for a peptide summary.

This function is used, for example, in quantitation to only report ratios for peptide sequences that are unique to one protein hit. This can be specified as part of a Mascot quantitation method; see ms_quant_quality::isUniquePepseq().

No attempt is made to collapse peptides that just differ by, for example, I or L.

The specified q and p values must be for a peptide that has a protein match in the current report, i.e. not in the unassigned list. The function searches all proteins and not just those in the current report. For example, if peptide 'ABCDEFK' appears in protein hit one and protein hit 100, then it will return false even if only the top 10 hits have been requested.

If grouping is not selected when creating the ms_peptidesummary, (i.e. neither ms_mascotresults::MSRES_GROUP_PROTEINS nor ms_mascotresults::MSRES_CLUSTER_PROTEINS), then it is likely that this function will return false for most peptides.

At version 2.3, this function did not have the optional third parameter and the default value was the same as using ms_mascotresults::UPR_WITHIN_FAMILY, which is not the default value in version 2.4 and later. At version 2.3, the function would therefore return true for all peptides with a score above the homology threshold when ms_mascotresults::MSRES_CLUSTER_PROTEINS is specified.

Parameters:
qis the query number in the range 1 to ms_mascotresfile::getNumQueries().
pis the 'hit' or 'rank' number.

  • For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
  • For a protein summary, this will be in the * range 1 to 50.
rulesshould passed appropriate ms_mascotresults::UNIQUE_PEP_RULES flags to specify what rules are to be used for considerering when a peptide is unique.
Returns:
true if the peptide match is unique. Always false in ms_proteinsummary.
(const int, const int, const UNIQUE_PEP_RULES rules = UPR_DEFAULT) const

Implements ms_mascotresults.

ms_mascotresfile & mascotresfile (  ) const

Return the attached results file.

Returns:
The ms_mascotresfile object passed in the constructor.
void setSubsetsThreshold ( const double  scoreFraction ) [virtual, inherited]

Specifies which subset proteins should be reported.

If you have a primary hit with (say) 100 peptide matches, you are very interested in sub-set proteins with 99 matches but not if they only have 1 or 2 matches. These are the sub-set hits that clutter up the report if Show Sub-sets is checked in Mascot 2.1 and earlier. If you have a primary hit with (say) 2 peptide matches, you are more likely to be interested in sub-set proteins with just 1 match.

A scoreFraction of 1 indicates that all proteins with a subset of matches should be reported. A value of 0 indicates that no subsets will be reported. The flag MSRES_SHOW_SUBSETS must have been specified if a value > 0 is used, or no subsets will be shown.

See also:
getNextSubsetProtein(), getNextSubsetProteinOf()
Parameters:
scoreFractionis the fractional score required for a protein to be counted as a subset. Its score must be greater than or equal to main_protein_score * (1-scoreFraction). The default value is 1.0.
bool willCreateCache ( const char *  resultsFileName,
const ms_mascotoptions opts,
const char *  unigeneIndexFile,
const char *  singleHit,
const char *  applicationName,
std::string &  resfileCacheFileName,
std::string &  peptideSummaryCacheFileName,
unsigned int &  cacheStatus 
) [static]

Returns true if either cache file will be created when the ms_mascotresfile and ms_peptidesummary objects are created.

This static function can be called without creating an ms_mascotresfile or an ms_peptidesummary object. It can be used in advance of creating the object to see if there will be a delay while (re)creating the cache file(s). The function has the some of the same parameters as the ms_peptidesummary constructor, but takes a pointer to the options section of the mascot.dat file which will have the default parameters for reports.

The function may be more convenient than calling ms_mascotresfile::willCreateCache and then creating an ms_mascotresfile object and then calling the other willCreateCache

See Multiple return values in Perl, Java, Python and C#

See Static functions in Perl, Java, Python and C#

Parameters:
[in]resultsFileNameis the absolute or relative path to the Fxxxxx.dat file
[in]optsnormally loaded from the mascot.dat file using ms_datfile::getMascotOptions()
[in]unigeneIndexFileSee ms_peptidesummary::ms_peptidesummary
[in]singleHitSee ms_peptidesummary::ms_peptidesummary
[in]applicationNameis the name of the application or script that is calling this function. The applicationName is searched for in the return value from ms_mascotoptions::getResultsCache and ms_mascotoptions::getResfileCache to determine if the application should be using cache files. If it is not found then the function returns false and sets the cacheStatus to ms_peptidesummary::RESFILE_CACHE_DISABLED_IN_OPTIONS and/or ms_peptidesummary::PEPSUMMARY_CACHE_DISABLED_IN_OPTIONS. If null, or an empty string is passed, no check is made.
[out]resfileCacheFileNamereturns the name of the ms_mascotresfile cache file if one exists or would be created
[out]peptideSummaryCacheFileNamereturns the name of the ms_peptidesummary cache file if one exists or would be created
[out]cacheStatusis the ms_peptidesummary::CACHE_STATUS enumeration which gives more details about why the cache file may or may not be created. Multiple values may be bitwise OR'd toegether.
Returns:
true if either of the cache files will be (re-)created because they does not exist or are not complete or not up to date. It will return false if the options specify that the applicationName shouldn't create or use cache files.
bool willCreateCache ( ms_mascotresfile resfile,
const unsigned int  flags = MSRES_GROUP_PROTEINS,
double  minProbability = 0.0,
int  maxHitsToReport = 50,
const char *  unigeneIndexFile = 0,
double  ignoreIonsScoreBelow = 0.0,
int  minPepLenInPepSummary = 0,
const char *  singleHit = 0,
const unsigned int  flags2 = MSPEPSUM_NONE 
) [static]

Returns true if a cache file will be created when the ms_peptidesummary constructor is called.

This static function can be called without creating an ms_peptidesummary object, and can be used in advance of creating an object to see if there will be a delay while (re)creating a cache file. The function has the exact same parameters as the ms_peptidesummary constructor -- see the documentation for ms_peptidesummary::ms_peptidesummary for details.

Parameters:
resfile-- see ms_peptidesummary::ms_peptidesummary
flags-- see ms_peptidesummary::ms_peptidesummary
minProbability-- see ms_peptidesummary::ms_peptidesummary
maxHitsToReport-- see ms_peptidesummary::ms_peptidesummary
unigeneIndexFile-- see ms_peptidesummary::ms_peptidesummary
ignoreIonsScoreBelow-- see ms_peptidesummary::ms_peptidesummary
minPepLenInPepSummary-- see ms_peptidesummary::ms_peptidesummary
singleHit-- see ms_peptidesummary::ms_peptidesummary
flags2-- see ms_peptidesummary::ms_peptidesummary
Returns:
true if the cache file will be (re-)created because it does not exist or is not complete or not up to date. If the MSPEPSUM_USE_CACHE flag has not been specified, it will always return false.

The documentation for this class was generated from the following files:
Copyright © 2016 Matrix Science Ltd.  All Rights Reserved. Generated on Fri Jun 2 2017 01:44:53