Matrix Science header

ms_mascotrespeptidesum.hpp

00001 /*
00002 ##############################################################################
00003 # file: ms_mascotrespeptidesum.hpp                                           #
00004 # 'msparser' toolkit                                                         #
00005 # Encapsulates the peptide summary report from the mascot results file       #
00006 ##############################################################################
00007 # COPYRIGHT NOTICE                                                           #
00008 # Copyright 1998-2016 Matrix Science Limited  All Rights Reserved.           #
00009 #                                                                            #
00010 ##############################################################################
00011 #    $Archive:: /MowseBranches/ms_mascotresfile_1.2/include/ms_mascotrespe $ #
00012 #     $Author: villek $ #
00013 #       $Date: 2017/05/02 15:57:31 $ #
00014 #   $Revision: 1.131 $ #
00015 # $NoKeywords::                                                            $ #
00016 ##############################################################################
00017 */
00018 
00019 #ifndef MS_MASCOTRESPEPTIDESUM_HPP
00020 #define MS_MASCOTRESPEPTIDESUM_HPP
00021 
00022 #ifdef _WIN32
00023 #pragma warning(disable:4251)   // Don't want all classes to be exported
00024 #pragma warning(disable:4786)   // Debug symbols too long
00025 #   ifndef _MATRIX_USE_STATIC_LIB
00026 #       ifdef MS_MASCOTRESFILE_EXPORTS
00027 #           define MS_MASCOTRESFILE_API __declspec(dllexport)
00028 #       else
00029 #           define MS_MASCOTRESFILE_API __declspec(dllimport)
00030 #       endif
00031 #   else
00032 #       define MS_MASCOTRESFILE_API
00033 #   endif
00034 #else
00035 #   define MS_MASCOTRESFILE_API
00036 #endif
00037 
00038 #ifdef __ALPHA_UNIX__
00039 #include <ctype.h>
00040 #endif
00041 
00042 // Includes from the standard template library
00043 #include <list>
00044 #include <map>
00045 #include <set>
00046 #include <string>
00047 #include <vector>
00048 
00049 namespace msparser_internal {
00050     class ms_peptidesumcdb;
00051     class ms_peptide_impl;
00052     class ms_peptide_impl_reloadable;
00053 }
00054 
00055 namespace matrix_science {
00056     class ms_tinycdb;
00057     class ms_protein;
00058     class ms_datfile;
00059     class ms_distiller_data;
00060     class ms_distiller_data_search;
00061     class ms_quant_component;
00062     class ms_quant_modgroup;
00063 
00069     struct ms_percolator_score {
00070         double percolatorScore; 
00071         double qValue;
00072         double percolatorPEP;
00073         double mascotScore;
00074         bool foundInPopFile;
00075         ms_percolator_score()
00076             : percolatorScore(0.0)
00077             , qValue(1.0)
00078             , percolatorPEP(1.0)
00079             , mascotScore(0.0)
00080             , foundInPopFile(false)
00081         {}
00082     };
00083     typedef std::vector<ms_percolator_score> ms_percolator_scores_vector;
00084 
00085     typedef struct structExpectTargetDecoy //Bug 11481, alternative algorithm by sorting and counting to adjust FDR
00086     {
00087         double expect;
00088         int target;
00089         int decoy;
00090         
00091         bool operator<(const structExpectTargetDecoy &other) const
00092         {
00093             return expect > other.expect;//we want the smallest values first
00094         }
00095     } structExpectTargetDecoy;
00096 
00098 
00104     class MS_MASCOTRESFILE_API ms_peptidesummary : public matrix_science::ms_mascotresults
00105     {
00106         friend class msparser_internal::ms_peptide_impl;
00107         friend class ms_protein;
00108         friend class msparser_internal::ms_peptidesumcdb;
00109         friend class ms_ms2quantitation;
00110         friend class ms_customquantitation;
00111         friend class ms_quant_helper;
00112         public:
00114 
00117             enum QL_FLAG
00118             { 
00119                 QL_FIRST                    = 0x0000, 
00120                 QL_ALL                      = 0x0000, 
00121                 QL_UNASSIGNED               = 0x0001, 
00122                 QL_BELOW_IDENTITY           = 0x0002, 
00123                 QL_BELOW_HOMOLOGY           = 0x0003, 
00124                 QL_IGNORE_IONS_SCORE_BELOW  = 0x0004, 
00125 
00126                 QL_LAST                     = 0x0004  
00127             };
00128 
00130 
00133             enum MSPEPSUM
00134             { 
00135                 MSPEPSUM_NONE               = 0x0000, 
00136                 MSPEPSUM_PERCOLATOR         = 0x0001, 
00137                 MSPEPSUM_USE_CACHE          = 0x0002, 
00138                 MSPEPSUM_SINGLE_HIT_DBIDX   = 0x0004, 
00139                 MSPEPSUM_USE_HOMOLOGY_THRESH= 0x0008, 
00140                 MSPEPSUM_NO_PROTEIN_GROUPING= 0x0010, 
00141                 MSPEPSUM_DISCARD_RELOADABLE = 0x0020, 
00142                 MSPEPSUM_DEFERRED_CREATE            = 0x0040, 
00143                 MSPEPSUM_CACHE_IGNORE_DATE_CHANGE   = 0x0080, 
00144                 MSPEPSUM_REMOVE_CHIMERIC_DUPES      = 0x0100, 
00145                 MSPEPSUM_SL_INTEGRATED              = 0x0200, 
00146                 MSPEPSUM_SL_ONLY                    = 0x0400, 
00147             };
00148 
00150 
00164             enum BUGFIX_NUM
00165             { 
00166                 BUGFIX_10780 = 10780, 
00167                 BUGFIX_10995 = 10995, 
00168                 BUGFIX_11002 = 11002, 
00169                 BUGFIX_11018 = 11018, 
00170                 BUGFIX_11235 = 11235, 
00171                 BUGFIX_11254 = 11254, 
00172                 BUGFIX_11344 = 11344, 
00173                 BUGFIX_11411 = 11411, 
00174                 BUGFIX_11425 = 11425, 
00175                 BUGFIX_11438 = 11438, 
00176                 BUGFIX_11483 = 11483, 
00177                 BUGFIX_11499 = 11499, 
00178                 BUGFIX_11856 = 11856, 
00179                 BUGFIX_12123 = 12123, 
00180                 BUGFIX_12317 = 12317, 
00181                 BUGFIX_11481 = 11481,  
00182                 BUGFIX_12538 = 12538, 
00183                 BUGFIX_12729 = 12729, 
00184                 BUGFIX_12740 = 12740  
00185             };
00186 
00188 
00193             enum CACHE_STATUS {
00194             CACHE_MISSING_RESFILE                = 0x10000, 
00195 
00196             RESFILE_CACHE_FILE_NOT_PRESENT       = 0x00001, 
00197             RESFILE_CACHE_BEING_CREATED          = 0x00002, 
00198             RESFILE_CACHE_DISABLED_IN_OPTIONS    = 0x00004, 
00199             RESFILE_CACHE_VALID                  = 0x00008, 
00200             RESFILE_CACHE_CAN_CREATE             = 0x00010, 
00201 
00202             PEPSUMMARY_CACHE_FILE_NOT_PRESENT    = 0x00100, 
00203             PEPSUMMARY_CACHE_BEING_CREATED       = 0x00200, 
00204             PEPSUMMARY_CACHE_DISABLED_IN_OPTIONS = 0x00400, 
00205             PEPSUMMARY_CACHE_VALID               = 0x00800, 
00206             PEPSUMMARY_CACHE_CAN_CREATE          = 0x01000, 
00207             PEPSUMMARY_CACHE_STATUS_NOT_AVAILABLE= 0x02000, 
00208             PEPSUMMARY_CACHE_NOT_FOR_PMF         = 0x04000  
00209             };
00210 
00212             static std::string getCacheFilename(
00213                     ms_mascotresfile & resfile,
00214                     const unsigned int flags,
00215                     double             minProbability = 0.0,
00216                     int                maxHitsToReport = 50,
00217                     const char *       unigeneIndexFile = 0,
00218                     double             ignoreIonsScoreBelow = 0.0,
00219                     int                minPepLenInPepSummary = 0,
00220                     const char *       singleHit = 0,
00221                     const unsigned int flags2 = MSPEPSUM_NONE);
00222 
00224             static std::string getCacheFilename(
00225                     ms_mascotresfile & resfile,
00226                     const ms_distiller_data & distillerData,
00227                     int searchIndex);
00228 
00230             static std::string getCacheFilename(
00231                     ms_mascotresfile & resfile,
00232                     const ms_datfile & datfile,
00233                     const ms_distiller_data_search & search);
00234 
00236             ms_peptidesummary(ms_mascotresfile  &resfile,
00237                               const unsigned int flags = MSRES_GROUP_PROTEINS,
00238                               double             minProbability = 0.0,
00239                               int                maxHits = 50,
00240                               const char *       unigeneIndexFile = 0,
00241                               double             ignoreIonsScoreBelow = 0.0,
00242                               int                minPepLenInPepSummary = 0,
00243                               const char *       singleHit = 0,
00244                               const unsigned int flags2 = MSPEPSUM_NONE);
00245 
00247             ms_peptidesummary(
00248                     ms_mascotresfile & resfile,
00249                     const ms_distiller_data & distillerData,
00250                     int searchIndex);
00251 
00253             ms_peptidesummary(
00254                     ms_mascotresfile & resfile,
00255                     const ms_datfile & datfile,
00256                     const ms_distiller_data_search & search);
00257 
00259             ms_peptidesummary(
00260                     ms_mascotresfile & resfile,
00261                     const ms_mascotresults_params & parameters);
00262 
00263             virtual ~ms_peptidesummary();
00264             
00266             ms_mascotresfile & mascotresfile() const;
00267 
00269             virtual bool createSummary();
00270 
00271             virtual bool anyEmPAI() const;
00272 
00274             virtual double getProteinEmPAI(const char *accession, const int dbIdx = 1, const int length = -1) const;
00275 
00277             virtual ms_protein * getHit(const int hit, const int memberNumber = 0) const;
00278 
00280             virtual void freeHit(const int hit);
00281 
00282             // This is 'hard-coded' to 10 in Mascot
00283             enum { PEPS_PER_QUERY = 10 };
00284 
00286             virtual ms_peptide getPeptide(const int q, const int p) const;
00287 
00289             virtual bool getPeptide(const int q, const int p, ms_peptide * & pep) const;
00290 
00292             virtual double getIonsScore(const int q, const int p, const bool decoy) const;
00293 
00295             virtual bool isPeptideUnique(const int q, const int p, const UNIQUE_PEP_RULES rules = UPR_DEFAULT) const;
00296 
00298             virtual std::string getProteinsWithThisPepMatch(const int q, const int p, const bool quotes=false);
00299 
00301             virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p, 
00302                                                                             std::vector<int> & start, 
00303                                                                             std::vector<int> & end,
00304                                                                             std::vector<std::string> &pre,
00305                                                                             std::vector<std::string> &post,
00306                                                                             std::vector<int> & frame,
00307                                                                             std::vector<int> & multiplicity,
00308                                                                             std::vector<int> & db) const;
00309 
00311             std::vector<int> getNumberOfAccessionDBTypes(int q, int p) const;
00312 
00314             virtual int getAllFamilyMembersWithThisPepMatch(const int hit,
00315                                                             const int q,
00316                                                             const int p,
00317                                                             std::vector< int >& db,
00318                                                             std::vector< std::string >& acc,
00319                                                             std::vector< int >& dupe_status) const;
00320 
00322             std::vector<int> getNumberOfFamilyAccessionDBTypes(int hit, int q, int p, bool includeSamesets = true) const;
00323 
00325             virtual std::string getErrTolModString(const int q, const int p) const;
00326 
00328             virtual std::string getLibraryModString(const int q, const int p) const;
00329 
00331             virtual std::string getErrTolModMasterString(const int q, const int p) const;
00332 
00334             virtual std::string getErrTolModSlaveString(const int q, const int p) const;
00335 
00337             virtual std::string getErrTolModPepString(const int q, const int p) const;
00338 
00340             virtual std::string getErrTolModReqPepString(const int q, const int p) const;
00341 
00343             virtual std::string getTagString(const int q, const int p) const;
00344 
00346             virtual int getTagDeltaRangeStart(const int q, const int p) const;
00347 
00349             virtual int getTagDeltaRangeEnd(const int q, const int p) const;
00350 
00352             virtual std::string getTerminalResiduesString(const int q, const int p) const;
00353 
00355             virtual std::string getComponentString(const int q, const int p) const;
00356 
00358             virtual int getProteinScoreCutoff(double OneInXprobRnd) const;
00359 
00361             int getSrcRank(int q, int p) const;
00362 
00364             ms_mascotresfile::section getSrcSection(int q, int p)const;
00365 
00367             std::string getQueryList(QL_FLAG flag, bool outputListOfQueries = true);
00368 
00370             virtual std::vector<int> getPepsWithSameScore(const int q, const int p) const;
00371 
00373             virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & value, double & rawValue) const;
00374 
00376             virtual int findProteins(const int startHit, 
00377                                      const std::string & str, 
00378                                      const int dbIdx,
00379                                      FIND_FLAGS item,
00380                                      FIND_COMPARE_FLAGS compareFlags,
00381                                      std::vector<std::string> & accessions,
00382                                      std::vector<int> & dbIndexes) const;
00383 
00385             virtual int findProteinsByAccession(const int startHit, 
00386                                                 const std::string & str, 
00387                                                 const int dbIdx,
00388                                                 FIND_COMPARE_FLAGS compareFlags,
00389                                                 std::vector<std::string> & accessions,
00390                                                 std::vector<int> & dbIndexes) const;
00391 
00393             virtual int findProteinsByDescription(const int startHit, 
00394                                                   const std::string & str, 
00395                                                   FIND_COMPARE_FLAGS compareFlags,
00396                                                   std::vector<std::string> & accessions,
00397                                                   std::vector<int> & dbIndexes) const;
00398 
00400             virtual int findPeptides(const int startHit, 
00401                                      const std::string & str, 
00402                                      FIND_FLAGS item,
00403                                      FIND_COMPARE_FLAGS compareFlags,
00404                                      std::vector<int> & q,
00405                                      std::vector<int> & p) const;
00406 
00408             virtual QUANT_COMPONENT_STATUS getQuantitationComponentForPeptide(
00409                     const matrix_science::ms_peptide & peptide,
00410                     matrix_science::ms_quant_component & component,
00411                     const matrix_science::ms_quant_method * method = NULL) const;
00412 
00414             virtual const ms_protein * getProtein(const char * accession, const int dbIdx = 1) const;
00415 
00417             virtual const ms_protein * getComponentProtein(const char * accession, const int dbIdx = 1) const;
00418 
00420             virtual bool getTreeClusterNodes(const int hit,
00421                                              std::vector<int>    &left, 
00422                                              std::vector<int>    &right, 
00423                                              std::vector<double> &distance,
00424                                              TREE_CLUSTER_METHOD  tcm = TCM_PAIRWISE_MAXIMUM,
00425                                              double           *** reserved1 = 0,
00426                                              unsigned int       * reserved2 = 0) const;
00427 
00429             static bool willCreateCache(ms_mascotresfile  &resfile,
00430                                         const unsigned int flags = MSRES_GROUP_PROTEINS,
00431                                         double             minProbability = 0.0,
00432                                         int                maxHits = 50,
00433                                         const char *       unigeneIndexFile = 0,
00434                                         double             ignoreIonsScoreBelow = 0.0,
00435                                         int                minPepLenInPepSummary = 0,
00436                                         const char *       singleHit = 0,
00437                                         const unsigned int flags2 = MSPEPSUM_NONE);
00438 
00440             static bool willCreateCache(const char             * resultsFileName,
00441                                         const ms_mascotoptions & opts,
00442                                         const char             * unigeneIndexFile,
00443                                         const char             * singleHit,
00444                                         const char             * applicationName,
00445                                               std::string      & resfileCacheFileName,
00446                                               std::string      & peptideSummaryCacheFileName,
00447                                               unsigned int     & cacheStatus);
00448 
00450             bool isPeptideModificationMatch(
00451                     const matrix_science::ms_quant_modgroup & group, 
00452                     const matrix_science::ms_peptide & peptide) const;
00453 
00455             bool isPeptideComponentMatch(
00456                     const matrix_science::ms_quant_component & component,
00457                     const matrix_science::ms_peptide & peptide) const;
00458 
00460             bool hasQuantMethod() const;
00462             const matrix_science::ms_quant_method * getQuantMethod() const;
00463 
00465             std::string getCacheFileName() const;
00466 
00468             bool isDataCached(BUGFIX_NUM bugNum) const;
00469 
00471             virtual bool loadPepMatchesForProteinFromCache(ms_protein * prot, const bool loadRelated = true);
00472 
00474             virtual bool isValidQandP(const int q, const int p) const;
00475 
00477             bool dumpCDB(const std::string dumpFileName);
00478 
00480             void getLibraryEntryId(const int q, const int p, std::vector<int> &dbIdx, std::vector<int> &offset, std::vector<std::string> &checksum, std::vector<std::string> &mods) const;
00481 
00483             static double getMinProbabilityForSLScore(double score);
00484 
00486             static double getSLThresholdFromMinProbability(double minProbability);
00487             
00489             std::vector<int> getPeptideAmbiguityRanks(const int q, const int p);
00490 
00491         protected:  
00492             // Not safe to copy or assign this object.
00493 #ifndef SWIG
00494             ms_peptidesummary(const ms_peptidesummary & rhs);
00495             ms_peptidesummary & operator=(const ms_peptidesummary & rhs);
00496 #endif
00497             virtual bool getThresholdForFDR(bool homology, double targetFDR, 
00498                     DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType,
00499                     double * closestFDR, double * minProbability,
00500                     int * pNumTargetMatches, int * pNumDecoyMatches);
00501             void calculateDecoyStats(double dOneInXprobRnd);
00502             virtual void getQmatchValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const;
00503             virtual void getPlugholeValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const;
00504             virtual ms_mascotresfile::section getSrcSectionProtected(const int query, const int rank) const;
00505 
00506             virtual bool cacheModificationCounts();
00507 
00508         private:
00509             void initialise(
00510                     double ignoreIonsScoreBelow,
00511                     int maxHitsToReport,
00512                     const char * unigeneIndexFile,
00513                     int minPepLenInPepSummary,
00514                     const char * singleHit,
00515                     const unsigned int flags2);
00516 
00517             void loadQuery(int q, std::vector<ms_peptide*> &peptides,
00518                     ms_mascotresfile::section secPeptides,
00519                     ms_mascotresfile::section secPeptidesCompanion,
00520                     std::vector<short> *offsetToAccession,
00521                     std::vector< std::pair<double, double> > *observed_rank_1_same_seq,
00522                     std::vector<double> *library_rank_1_scores);
00523 
00524             void detectChimericDuplicates(const std::list<int> &query_set,
00525                     const std::vector<ms_peptide*> &peptides, 
00526                     std::set<int> &is_chimeric_duplicate) const;
00527             void massageQuery(int q, std::vector<ms_peptide*> &peptides,
00528                     ms_mascotresfile::section secPeptides, 
00529                     std::vector<unsigned char> &srcRank, 
00530                     std::vector<short> *offsetToAccession,
00531                     const std::set<int> &is_chimeric_duplicate,
00532                     bool load_proteins);
00533             void determinePrettyRanksAndAmbiguityGroupIds(const int q, std::vector<ms_peptide*> &peptides);
00534             void loadIonsScoresIntoCache(const bool decoy, const int p=1);
00535 
00536             ms_peptide * loadPepRes(const ms_mascotresfile::section sec,
00537                                     int q, int p, int rank,
00538                                     std::string::size_type & idx,
00539                                     msparser_internal::ms_peptide_impl_reloadable * * pReloadable = 0,
00540                                     bool *canLoadIntoProteins = 0);
00541             void determineSortingScore(ms_peptide *pep) const;
00542             void loadIntoProteins(const ms_mascotresfile::section sec,
00543                                   std::string str,
00544                                   std::string strDB,
00545                                   std::string::size_type idx,
00546                                   int q, int p, int rank,
00547                                   double ionsScore,
00548                                   acc_dbidx_set_t * pAccessions,
00549                                   bool isIgnored);
00550             double minIonsScoreForMatch(const int q,
00551                                         const ms_mascotresfile::section sec,
00552                                         const int srcP) const;
00553             int               singleHitDbIdx_;
00554 
00555             struct componentIdentifier_t {
00556                 int q_;
00557                 short p_;
00558                 short componentID_;
00559                 componentIdentifier_t(int q, short p, short cid) : q_(q), p_(p), componentID_(cid) { };
00560                 componentIdentifier_t(const std::string & q, const std::string & p, const std::string & componentID);
00561                 std::string asString() const;
00562                 bool operator<(const componentIdentifier_t & right) const;
00563              };
00564             typedef std::map<componentIdentifier_t, std::pair<double, double> > componentIntensities_t;
00565             componentIntensities_t cachedComponentIntensities_;
00566             bool cacheComponentIntensities(bool & isUsable);
00567             typedef std::map<std::string, short> componentNames_t;
00568             componentNames_t componentNames_;
00569             bool componentIntensitiesLoaded_;
00570 
00571             bool isPeptideModificationMatch(
00572                     const matrix_science::ms_quant_modgroup &group,
00573                     const std::string &varModStr,
00574                     const std::string &summedModStr,
00575                     const std::vector<int> &slModifiedPositions,
00576                     const std::string &pepStr,
00577                     const bool isAnyProteinNterminus,
00578                     const bool isAnyProteinCterminus) const;
00579 
00580             // Some private variables and functions for error tolerant search
00581             bool checkErrorTolerantStatus(int q, int p, double ionsScore, 
00582                                           bool fromET,
00583                                           const std::string & pepStr);
00584             ms_mascotresfile  * errTolSource_;
00585             ms_peptidesummary * errTolPepSummary_;
00586             unsigned int        errTolType_;
00587             bool                missingErrTolParent_;
00588 
00589             ms_mascotresfile::section secPeptidesInactive_, secPeptidesActiveCompanion_, secPeptidesInactiveCompanion_;
00590             std::vector<unsigned char> srcRankActiveSection_, srcRankInactiveSection_;
00591             bool srcRankInitialised_;
00592             std::vector<short> offsetToAccession_;
00593 
00594             bool rerankingCouldHappen() const;
00595 
00596             void setSrcRank(int q, int p, int srcRank, ms_mascotresfile::section peptideSec, bool rejected, std::vector<unsigned char> &srcRankVec) const;
00597             int getSrcRank(int q, int p, ms_mascotresfile::section & peptideSec, bool * pRejected = 0, bool inactiveSection = false) const;
00598             enum PEP_SECTIONS { PEP_SEC_INVALID         = 0, 
00599                                 PEP_SEC_PEPTIDES        = 1, 
00600                                 PEP_SEC_DECOYPEPTIDES   = 2, 
00601                                 PEP_SEC_ERRTOLPEPTIDES  = 3, 
00602                                 PEP_SEC_LAST            = 4};
00603             // For each of these, use SR_MASK_ and then, if required, >> SR_SHIFT_
00604             enum SRC_RANK_MASKS { SR_MASK_RANK      = 0x0F,  // bits 0..3
00605                                   SR_MASK_SEC       = 0x30,  // bits 4..5
00606                                   SR_MASK_REJECT    = 0x40,  // bit  6     If discarded because below threshold or other rule
00607                                   SR_MASK_LIBRARY   = 0x80,  // bit  7
00608                                   SR_SHIFT_RANK     = 0x00,  // bits 0..3 - no need to shift
00609                                   SR_SHIFT_SEC      = 0x04,  // bits 4..5
00610                                   SR_SHIFT_REJECT   = 0x06,  // bit  6     If discarded because below threshold or other rule
00611                                   SR_SHIFT_LIBRARY  = 0x07}; // bit  7
00612             unsigned char secLookupFwd_[ms_mascotresfile::SEC_NUMSECTIONS];
00613             int secLookupRev_[PEP_SEC_LAST];
00614 
00615             void getUnassignedListAsString(std::string & str);
00616             bool findCompareProtein(const std::string & accRequired, 
00617                                     const std::string & accToTest, 
00618                                     const int dbIdxRequired,
00619                                     const int dbIdxToTest,
00620                                     FIND_FLAGS item,
00621                                     FIND_COMPARE_FLAGS compareFlags,
00622                                     const ms_protein * prot,
00623                                     const std::vector<int> & q, 
00624                                     const std::vector<int> & p) const;
00625             bool findCompare(const std::string & find, 
00626                              const std::string & findIn, 
00627                              FIND_COMPARE_FLAGS compareFlags) const;
00628             bool findCompare(const double find_value, 
00629                              const double find_range,
00630                              const double value,
00631                              FIND_COMPARE_FLAGS compareFlags) const;
00632 
00633             struct distanceInfo_t {
00634                 double scoreExcess;
00635                 std::string peptideStr;
00636                 bool isFromLibrary;
00637                 distanceInfo_t():
00638                     scoreExcess(0.0), peptideStr(), isFromLibrary(false)
00639                 {}
00640                 distanceInfo_t(double se, std::string & pep, bool fromLib):
00641                     scoreExcess(se), peptideStr(pep), isFromLibrary(fromLib)
00642                 {}
00643             };
00644             typedef std::map<std::pair<int, int>, distanceInfo_t> distanceInfoMap_t;
00645             double getDistance(const distanceInfoMap_t & a, const distanceInfoMap_t & b) const;
00646             bool readPercolatorOutputFile(bool decoyResults,
00647                                           ms_percolator_scores_vector * pVector = 0,
00648                                           long * pNumAboveIdentity = 0, 
00649                                           double threshold = 0.05);
00650 
00651             typedef std::set<std::pair<int,int> > hitAndFamily_t;
00652             bool getHitAndFamilyMember(const ms_protein * prot, hitAndFamily_t & hitAndFamily, const UNIQUE_PEP_RULES rules) const;
00653 
00654             ms_percolator_scores_vector targetPercolatorScores_;
00655             ms_percolator_scores_vector decoyPercolatorScores_;
00656 
00657             // percolatorScores_ references targetPercolatorScores_ unless 
00658             // MSRES_DECOY is specified, in which case it references 
00659             // decoyPercolatorScores_
00660             ms_percolator_scores_vector & percolatorScores_;   
00661 
00662             msparser_internal::ms_peptidesumcdb * pCacheFile_;
00663             ms_tinycdb  * pTmpCache_;
00664 
00665             mutable std::vector< std::vector<double> > cachedScores_[2]; // Use cachedScores_[decoy][p][q]. Order important for performance with FDR
00666             mutable std::vector<bool> attemptedLoadScoresFromCache_[2];  // Use attemptedLoadScoresFromCache_[decoy][p]. 
00667 
00668             /* emPAI related values and helper methods */
00669 
00670             bool emPAIminmaxDone_;
00671             int emPAIminCleavageN_;
00672             int emPAImaxCleavageN_;
00673             bool emPAInetCleavageDone_;
00674             double netCleavageAbundance_;
00675 
00676             void init_emPAImzRange();
00677             void init_netCleavageAbundance();
00678             double calculate_AA_abundance(std::string) const;
00679             double calculate_emPAI_num_observable(int) const;
00680 
00681             mutable std::map<std::string, char> modfileLookup_;
00682             mutable bool methodCached_;
00683             mutable matrix_science::ms_quant_method * method_;
00684 
00685             mutable std::map< std::pair<int, int>, bool > uniquePepLookup_; // only used when no cache file
00686 
00687             std::vector<int> parseDBstr(const std::string &strDB, bool &has_fasta_acc, bool &has_sl_ref_acc) const;
00688             
00689             int addExpectationValue(bool homology, DECOY_STATS_COUNT_TYPE countType, bool decoy, int q,
00690                                      std::map<std::string, size_t> & mapSequenceToIndex, std::vector<structExpectTargetDecoy> & vecExpectTargetDecoy);
00691             double truncateExpectationValue(double dValue, int sigfig);
00692 
00693             bool getQPsFromMapValueToQP(const std::string & strSearch, const map_mod_value_to_qp & mapValueToQP, std::set<std::pair<int, int> > & setQP, bool bCaseSensitive = true) const;
00694 
00695             void countDBTypes(const std::vector<int> &db, std::vector<int> &counts) const;
00696 
00697             std::string getDecoyPeptideString(int q, int p);
00698     }; // end of resfile_group
00700 }   // matrix_science namespace
00701 
00702 #endif // MS_MASCOTRESPEPTIDESUM_HPP
00703 
00704 /*------------------------------- End of File -------------------------------*/
Copyright © 2016 Matrix Science Ltd.  All Rights Reserved. Generated on Fri Jun 2 2017 01:44:51