Matrix Science header

ms_mascotrespeptidesum.hpp

00001 /*
00002 ##############################################################################
00003 # file: ms_mascotrespeptidesum.hpp                                           #
00004 # 'msparser' toolkit                                                         #
00005 # Encapsulates the peptide summary report from the mascot results file       #
00006 ##############################################################################
00007 # COPYRIGHT NOTICE                                                           #
00008 # Copyright 1998-2016 Matrix Science Limited  All Rights Reserved.           #
00009 #                                                                            #
00010 ##############################################################################
00011 #    $Archive:: /MowseBranches/ms_mascotresfile_1.2/include/ms_mascotrespe $ #
00012 #     $Author: villek@matrixscience.com $ #
00013 #       $Date: 2021-04-22 10:00:41 +0100 $ #
00014 #   $Revision: 7bc84c21208ba1258516c93fb7b0917dc734c462 | MSPARSER_REL_2_8_1-0-gea32989045 $ #
00015 # $NoKeywords::                                                            $ #
00016 ##############################################################################
00017 */
00018 
00019 #ifndef MS_MASCOTRESPEPTIDESUM_HPP
00020 #define MS_MASCOTRESPEPTIDESUM_HPP
00021 
00022 
00023 // Includes from the standard template library
00024 #include <list>
00025 #include <map>
00026 #include <set>
00027 #include <string>
00028 #include <vector>
00029 
00030 namespace msparser_internal {
00031     class ms_peptidesumcdb;
00032     class ms_peptide_impl;
00033     class ms_peptide_impl_reloadable;
00034     class ms_SEQINFO;
00035     class ms_peptidesummary_fdr_query_data_extractor;
00036     class ms_peptidesummary_fdr_peptides_vector_extractor;
00037 }
00038 
00039 namespace matrix_science {
00040     class ms_tinycdb;
00041     class ms_mascotresults_params;
00042     class ms_protein;
00043     class ms_datfile;
00044     class ms_distiller_data;
00045     class ms_distiller_data_search;
00046     class ms_quant_component;
00047     class ms_quant_modgroup;
00048     class ms_linker_site;
00049 
00055     struct ms_percolator_score {
00056         double percolatorScore; 
00057         double qValue;
00058         double percolatorPEP;
00059         double mascotScore;
00060         bool foundInPopFile;
00061         ms_percolator_score()
00062             : percolatorScore(0.0)
00063             , qValue(1.0)
00064             , percolatorPEP(1.0)
00065             , mascotScore(0.0)
00066             , foundInPopFile(false)
00067         {}
00068     };
00069     typedef std::vector<ms_percolator_score> ms_percolator_scores_vector;
00070 
00071     typedef struct structExpectTargetDecoy //Bug 11481, alternative algorithm by sorting and counting to adjust FDR
00072     {
00073         double expect;
00074         int target;
00075         int decoy;
00076         
00077 #ifndef SWIG
00078         bool operator<(const structExpectTargetDecoy &other) const
00079         {
00080             return expect > other.expect;//we want the smallest values first
00081         }
00082 #endif
00083     } structExpectTargetDecoy;
00084 
00086 
00092     class MS_MASCOTRESFILE_API ms_peptidesummary : public matrix_science::ms_mascotresults
00093     {
00094         friend class msparser_internal::ms_peptide_impl;
00095         friend class ms_protein;
00096         friend class msparser_internal::ms_peptidesumcdb;
00097         friend class ms_ms2quantitation;
00098         friend class ms_customquantitation;
00099         friend class ms_quant_helper;
00100         friend class msparser_internal::ms_peptidesummary_fdr_query_data_extractor;
00101         friend class msparser_internal::ms_peptidesummary_fdr_peptides_vector_extractor;
00102         public:
00104 
00107             enum QL_FLAG
00108             { 
00109                 QL_FIRST                    = 0x0000, 
00110                 QL_ALL                      = 0x0000, 
00111                 QL_UNASSIGNED               = 0x0001, 
00112                 QL_BELOW_IDENTITY           = 0x0002, 
00113                 QL_BELOW_HOMOLOGY           = 0x0003, 
00114                 QL_IGNORE_IONS_SCORE_BELOW  = 0x0004, 
00115 
00116                 QL_LAST                     = 0x0004  
00117             };
00118 
00120 
00123             enum MSPEPSUM
00124             { 
00125                 MSPEPSUM_NONE               = 0x0000, 
00126                 MSPEPSUM_PERCOLATOR         = 0x0001, 
00127                 MSPEPSUM_USE_CACHE          = 0x0002, 
00128                 MSPEPSUM_SINGLE_HIT_DBIDX   = 0x0004, 
00129                 MSPEPSUM_USE_HOMOLOGY_THRESH= 0x0008, 
00130                 MSPEPSUM_NO_PROTEIN_GROUPING= 0x0010, 
00131                 MSPEPSUM_DISCARD_RELOADABLE = 0x0020, 
00132                 MSPEPSUM_DEFERRED_CREATE            = 0x0040, 
00133                 MSPEPSUM_CACHE_IGNORE_DATE_CHANGE   = 0x0080, 
00134                 MSPEPSUM_REMOVE_CHIMERIC_DUPES      = 0x0100, 
00135                 MSPEPSUM_SL_INTEGRATED              = 0x0200, 
00136                 MSPEPSUM_SL_ONLY                    = 0x0400, 
00137                 MSPEPSUM_CROSSLINK_INTEGRATED       = 0x0800, 
00138                 MSPEPSUM_CROSSLINK_ONLY             = 0x1000, 
00139             };
00140 
00142 
00156             enum BUGFIX_NUM
00157             { 
00158                 BUGFIX_10780 = 10780, 
00159                 BUGFIX_10995 = 10995, 
00160                 BUGFIX_11002 = 11002, 
00161                 BUGFIX_11018 = 11018, 
00162                 BUGFIX_11235 = 11235, 
00163                 BUGFIX_11254 = 11254, 
00164                 BUGFIX_11344 = 11344, 
00165                 BUGFIX_11411 = 11411, 
00166                 BUGFIX_11425 = 11425, 
00167                 BUGFIX_11438 = 11438, 
00168                 BUGFIX_11483 = 11483, 
00169                 BUGFIX_11499 = 11499, 
00170                 BUGFIX_11856 = 11856, 
00171                 BUGFIX_12123 = 12123, 
00172                 BUGFIX_12317 = 12317, 
00173                 BUGFIX_11481 = 11481,  
00174                 BUGFIX_12538 = 12538, 
00175                 BUGFIX_12729 = 12729, 
00176                 BUGFIX_12740 = 12740, 
00177                 BUGFIX_13324 = 13324, 
00178                 BUGFIX_12447 = 12447, 
00179                 BUGFIX_13759 = 13759  
00180             };
00181 
00183 
00188             enum CACHE_STATUS {
00189             CACHE_MISSING_RESFILE                = 0x10000, 
00190 
00191             RESFILE_CACHE_FILE_NOT_PRESENT       = 0x00001, 
00192             RESFILE_CACHE_BEING_CREATED          = 0x00002, 
00193             RESFILE_CACHE_DISABLED_IN_OPTIONS    = 0x00004, 
00194             RESFILE_CACHE_VALID                  = 0x00008, 
00195             RESFILE_CACHE_CAN_CREATE             = 0x00010, 
00196 
00197             PEPSUMMARY_CACHE_FILE_NOT_PRESENT    = 0x00100, 
00198             PEPSUMMARY_CACHE_BEING_CREATED       = 0x00200, 
00199             PEPSUMMARY_CACHE_DISABLED_IN_OPTIONS = 0x00400, 
00200             PEPSUMMARY_CACHE_VALID               = 0x00800, 
00201             PEPSUMMARY_CACHE_CAN_CREATE          = 0x01000, 
00202             PEPSUMMARY_CACHE_STATUS_NOT_AVAILABLE= 0x02000, 
00203             PEPSUMMARY_CACHE_NOT_FOR_PMF         = 0x04000  
00204             };
00205 
00207             static std::string getCacheFilename(
00208                     const ms_mascotresfile & resfile,
00209                     const unsigned int flags,
00210                     double             minProbability = 0.0,
00211                     int                maxHitsToReport = 50,
00212                     const char *       unigeneIndexFile = 0,
00213                     double             ignoreIonsScoreBelow = 0.0,
00214                     int                minPepLenInPepSummary = 0,
00215                     const char *       singleHit = 0,
00216                     const unsigned int flags2 = MSPEPSUM_NONE);
00217 
00219             static std::string getCacheFilename(
00220                     const ms_mascotresfile & resfile,
00221                     const ms_mascotresults_params & parameters);
00222 
00224             ms_peptidesummary(const ms_mascotresfile  &resfile,
00225                               const unsigned int flags = MSRES_GROUP_PROTEINS,
00226                               double             minProbability = 0.0,
00227                               int                maxHits = 50,
00228                               const char *       unigeneIndexFile = 0,
00229                               double             ignoreIonsScoreBelow = 0.0,
00230                               int                minPepLenInPepSummary = 0,
00231                               const char *       singleHit = 0,
00232                               const unsigned int flags2 = MSPEPSUM_NONE);
00233 
00235             ms_peptidesummary(
00236                     const ms_mascotresfile & resfile,
00237                     const ms_mascotresults_params & parameters);
00238 
00239             virtual ~ms_peptidesummary();
00240             
00242             const ms_mascotresfile & mascotresfile() const;
00243 
00245             virtual bool createSummary();
00246 
00247             virtual bool anyEmPAI() const;
00248 
00250             virtual double getProteinEmPAI(const char *accession, const int dbIdx = 1, const int length = -1) const;
00251 
00253             virtual ms_protein * getHit(const int hit, const int memberNumber = 0) const;
00254 
00256             virtual void freeHit(const int hit);
00257 
00258             // This is 'hard-coded' to 10 in Mascot
00259             enum { PEPS_PER_QUERY = 10 };
00260 
00262             virtual ms_peptide getPeptide(const int q, const int p) const;
00263 
00265             virtual bool getPeptide(const int q, const int p, ms_peptide * & pep) const;
00266 
00268             virtual double getIonsScore(const int q, const int p, const bool decoy) const;
00269 
00271             virtual bool isPeptideUnique(const int q, const int p, const UNIQUE_PEP_RULES rules = UPR_DEFAULT) const;
00272 
00274             virtual std::string getProteinsWithThisPepMatch(const int q, const int p, const bool quotes=false);
00275 
00277             virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p, 
00278                                                                             std::vector<int> & start, 
00279                                                                             std::vector<int> & end,
00280                                                                             std::vector<std::string> &pre,
00281                                                                             std::vector<std::string> &post,
00282                                                                             std::vector<int> & frame,
00283                                                                             std::vector<int> & multiplicity,
00284                                                                             std::vector<int> & db) const;
00285 
00287             virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p, 
00288                                                                             std::vector<int> & start, 
00289                                                                             std::vector<int> & end,
00290                                                                             std::vector<std::string> &pre,
00291                                                                             std::vector<std::string> &post,
00292                                                                             std::vector<int> & frame,
00293                                                                             std::vector<int> & multiplicity,
00294                                                                             std::vector<int> & db,
00295                                                                             std::vector<int> & psmComponent) const;
00296 
00298             std::vector<int> getNumberOfAccessionDBTypes(int q, int p) const;
00299 
00301             virtual int getAllFamilyMembersWithThisPepMatch(const int hit,
00302                                                             const int q,
00303                                                             const int p,
00304                                                             std::vector< int >& db,
00305                                                             std::vector< std::string >& acc,
00306                                                             std::vector< int >& dupe_status) const;
00307 
00309             std::vector<int> getNumberOfFamilyAccessionDBTypes(int hit, int q, int p, bool includeSamesets = true) const;
00310 
00312             virtual std::string getErrTolModString(const int q, const int p) const;
00313 
00315             virtual std::string getLibraryModString(const int q, const int p) const;
00316 
00318             virtual std::string getErrTolModMasterString(const int q, const int p) const;
00319 
00321             virtual std::string getErrTolModSlaveString(const int q, const int p) const;
00322 
00324             virtual std::string getErrTolModPepString(const int q, const int p) const;
00325 
00327             virtual std::string getErrTolModReqPepString(const int q, const int p) const;
00328 
00330             virtual std::string getTagString(const int q, const int p) const;
00331 
00333             virtual int getTagDeltaRangeStart(const int q, const int p) const;
00334 
00336             virtual int getTagDeltaRangeEnd(const int q, const int p) const;
00337 
00339             virtual std::string getTerminalResiduesString(const int q, const int p, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00340 
00342             virtual std::string getComponentString(const int q, const int p, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00343 
00345             virtual int getProteinScoreCutoff(double OneInXprobRnd) const;
00346 
00348             int getSrcRank(int q, int p) const;
00349 
00351             ms_mascotresfile::section getSrcSection(int q, int p)const;
00352 
00354             std::string getQueryList(QL_FLAG flag, bool outputListOfQueries = true);
00355 
00357             virtual std::vector<int> getPepsWithSameScore(const int q, const int p) const;
00358 #ifndef SWIG
00359 
00360             virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & value, double & rawValue) const;
00361 #else // SWIG Multiple return values
00362             virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & OUTPUT, double & OUTPUT) const;
00363 #endif
00364 
00365             virtual int findProteins(const int startHit, 
00366                                      const std::string & str, 
00367                                      const int dbIdx,
00368                                      FIND_FLAGS item,
00369                                      FIND_COMPARE_FLAGS compareFlags,
00370                                      std::vector<std::string> & accessions,
00371                                      std::vector<int> & dbIndexes) const;
00372 
00374             virtual int findProteinsByAccession(const int startHit, 
00375                                                 const std::string & str, 
00376                                                 const int dbIdx,
00377                                                 FIND_COMPARE_FLAGS compareFlags,
00378                                                 std::vector<std::string> & accessions,
00379                                                 std::vector<int> & dbIndexes) const;
00380 
00382             virtual int findProteinsByDescription(const int startHit, 
00383                                                   const std::string & str, 
00384                                                   FIND_COMPARE_FLAGS compareFlags,
00385                                                   std::vector<std::string> & accessions,
00386                                                   std::vector<int> & dbIndexes) const;
00387 
00389             virtual int findPeptides(const int startHit, 
00390                                      const std::string & str, 
00391                                      FIND_FLAGS item,
00392                                      FIND_COMPARE_FLAGS compareFlags,
00393                                      std::vector<int> & q,
00394                                      std::vector<int> & p) const;
00395 
00397             virtual QUANT_COMPONENT_STATUS getQuantitationComponentForPeptide(
00398                     const ms_peptide & peptide,
00399                     ms_quant_component & component,
00400                     const ms_quant_method * method = NULL) const;
00401 
00403             virtual const ms_protein * getProtein(const char * accession, const int dbIdx = 1) const;
00404 
00406             virtual const ms_protein * getComponentProtein(const char * accession, const int dbIdx = 1) const;
00407 
00409             virtual bool getTreeClusterNodes(const int hit,
00410                                              std::vector<int>    &left, 
00411                                              std::vector<int>    &right, 
00412                                              std::vector<double> &distance,
00413                                              TREE_CLUSTER_METHOD  tcm = TCM_PAIRWISE_MAXIMUM,
00414                                              double           *** reserved1 = 0,
00415                                              unsigned int       * reserved2 = 0) const;
00416 
00418             static bool willCreateCache(const ms_mascotresfile  &resfile,
00419                                         const unsigned int flags = MSRES_GROUP_PROTEINS,
00420                                         double             minProbability = 0.0,
00421                                         int                maxHits = 50,
00422                                         const char *       unigeneIndexFile = 0,
00423                                         double             ignoreIonsScoreBelow = 0.0,
00424                                         int                minPepLenInPepSummary = 0,
00425                                         const char *       singleHit = 0,
00426                                         const unsigned int flags2 = MSPEPSUM_NONE);
00427 
00429             static bool willCreateCache(const ms_mascotresfile  &resfile,
00430                                         const ms_mascotresults_params & parameters);
00431 
00432 #ifndef SWIG
00433 
00434             static bool willCreateCache(const char             * resultsFileName,
00435                                         const ms_mascotoptions & opts,
00436                                         const char             * unigeneIndexFile,
00437                                         const char             * singleHit,
00438                                         const char             * applicationName,
00439                                               std::string      & resfileCacheFileName,
00440                                               std::string      & peptideSummaryCacheFileName,
00441                                               unsigned int     & cacheStatus);
00442 
00444             static bool willCreateCache(const ms_mascotresfile        & resfile,
00445                                         const ms_mascotresults_params & parameters,
00446                                         const ms_mascotoptions        & opts,
00447                                               std::string             & peptideSummaryCacheFileName,
00448                                               unsigned int            & cacheStatus);
00449 #else // SWIG Multiple return values
00450             static bool willCreateCache(const char             * resultsFileName,
00451                                         const ms_mascotoptions & opts,
00452                                         const char             * unigeneIndexFile,
00453                                         const char             * singleHit,
00454                                         const char             * applicationName,
00455                                               std::string      & OUTPUT,
00456                                               std::string      & OUTPUT,
00457                                               unsigned int     & OUTPUT);
00458 
00459             static bool willCreateCache(const ms_mascotresfile        & resfile,
00460                                         const ms_mascotresults_params & parameters,
00461                                         const ms_mascotoptions        & opts,
00462                                               std::string             & OUTPUT,
00463                                               unsigned int            & OUTPUT);
00464 #endif
00465 
00467             bool isPeptideModificationMatch(
00468                     const ms_quant_modgroup & group, 
00469                     const ms_peptide & peptide) const;
00470 
00472             bool isPeptideComponentMatch(
00473                     const ms_quant_component & component,
00474                     const ms_peptide & peptide) const;
00475 
00477             bool hasQuantMethod() const;
00479             const ms_quant_method * getQuantMethod() const;
00480 
00482             bool hasCrosslinkingMethod() const;
00484             const ms_crosslinking_method * getCrosslinkingMethod() const;
00485 
00487             std::string getCacheFileName() const;
00488 
00490             bool isDataCached(BUGFIX_NUM bugNum) const;
00491 
00493             virtual bool loadPepMatchesForProteinFromCache(ms_protein * prot, const bool loadRelated = true);
00494 
00496             virtual bool isValidQandP(const int q, const int p) const;
00497 
00499             bool dumpCDB(const std::string dumpFileName);
00500 
00502             void getLibraryEntryId(const int q, const int p, std::vector<int> &dbIdx, std::vector<int> &offset, std::vector<std::string> &checksum, std::vector<std::string> &mods) const;
00503 
00505             static double getMinProbabilityForSLScore(double score);
00506 
00508             static double getSLThresholdFromMinProbability(double minProbability);
00509             
00511             std::vector<int> getPeptideAmbiguityRanks(const int q, const int p);
00512 
00513         protected:  
00514             // Not safe to copy or assign this object.
00515 #ifndef SWIG
00516             ms_peptidesummary(const ms_peptidesummary & rhs);
00517             ms_peptidesummary & operator=(const ms_peptidesummary & rhs);
00518 #endif
00519 
00520             virtual bool queryHasSignificantRank1Match(ms_mascotresfile::section secSummary,
00521                                                        int query) const;
00522 
00523             double       getFirstPassRank1Score(int query, bool decoy) const;
00524 
00525             virtual bool getThresholdForFDR(bool homology, double targetFDR, 
00526                     DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType,
00527                     double * closestFDR, double * minProbability,
00528                     int * pNumTargetMatches, int * pNumDecoyMatches);
00529             void calculateDecoyStats(double dOneInXprobRnd);
00530             virtual void getQmatchValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const;
00531             virtual void getPlugholeValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const;
00532             virtual ms_mascotresfile::section getSrcSectionProtected(const int query, const int rank) const;
00533 
00534             virtual bool cacheModificationCounts();
00535 
00536         private:
00537 
00538             ms_errs* getLogger() const;
00539             
00540             static ms_mascotresults_params getValidatedResultsParameters(const ms_mascotresfile &resfile, const ms_mascotresults_params &params,
00541                                                                          double * minProbabilityForCache = 0);
00542             
00543             void initialise(const ms_mascotresults_params& parameters, double minProbabilityForCache);
00544 
00545             void loadQuery(int q, std::vector<ms_peptide*> &peptides,
00546                     int rankStart, int rankEnd,
00547                     ms_mascotresfile::section secPeptides,
00548                     ms_mascotresfile::section secPeptidesCompanion);
00549 
00550             void accumulateSameSequenceScores(int q, const std::vector<ms_peptide*> &peptides,
00551                     std::vector< std::pair<double, double> > *observed_rank_1_same_seq,
00552                     std::vector<double> *library_rank_1_scores) const;
00553 
00554             void determineFirstPassSignificance(const std::vector<ms_peptide*> &peptides,
00555                     ms_mascotresfile::section secSummary,
00556                     std::vector<bool> &queryHasSignificantFirstPass,
00557                     std::vector<double> &memoFirstPassRank1Score) const;
00558 
00559             void detectChimericDuplicates(const std::list<int> &query_set,
00560                     const std::vector<ms_peptide*> &peptides, 
00561                     std::set<int> &is_chimeric_duplicate) const;
00562             void massageQuery(int q, std::vector<ms_peptide*> &peptides,
00563                     ms_mascotresfile::section secPeptides, 
00564                     ms_mascotresfile::section secPeptidesCompanion, 
00565                     std::vector<unsigned short> &srcRank,
00566                     const std::set<int> &is_chimeric_duplicate,
00567                     bool load_proteins);
00568             void determinePrettyRanksAndAmbiguityGroupIds(const int q, std::vector<ms_peptide*> &peptides);
00569             void loadIonsScoresIntoCache(const bool decoy, const int p=1);
00570 
00571             ms_peptide * loadPepRes(const ms_mascotresfile::section sec,
00572                                     int q, int srcRank, int p,
00573                                     msparser_internal::ms_peptide_impl_reloadable * * pReloadable = 0,
00574                                     bool *canLoadIntoProteins = 0);
00575 
00576             typedef struct {
00577                 std::string primaryNlStr;
00578                 std::string substStr;
00579                 std::string componentStr;
00580                 std::string summedModsStr;
00581                 std::string summedModsNlStr;
00582                 std::string localModsStr;
00583                 std::string parsedLocalModsStr;
00584                 std::string localModsNlStr;
00585                 std::string monoLinkStr;
00586                 ms_linker_site_vector loopLinks;
00587             } qp_attribute_lines_t;
00588 
00589             struct psm_seq_attributes_t {
00590                 psm_seq_attributes_t()
00591                     : mrCalc(0.0)
00592                     , missedCleavages(-1)
00593                     , peptideStr()
00594                     , varModsStr()
00595                     {}
00596                 double mrCalc;
00597                 int missedCleavages;
00598                 std::string peptideStr, varModsStr;
00599             };
00600 
00601             void loadAdditionalMatchAttributes(const ms_mascotresfile::section sec,
00602                                                const int q, const int srcRank,
00603                                                const ms_peptide::PSM psmComponent,
00604                                                const std::string &varModsStr,
00605                                                qp_attribute_lines_t &lines) const;
00606 
00607             int loadPepResCharge(const ms_mascotresfile::section chargeSection, int q) const;
00608 
00609             ms_peptide * loadPepResLinear(const ms_mascotresfile::section sec,
00610                                          int q, int srcRank, int p,
00611                                          double observed,
00612                                          ms_peptide::SEARCH_PHASE searchPhase,
00613                                          msparser_internal::ms_peptide_impl_reloadable * * pReloadable,
00614                                          bool *canLoadIntoProteins,
00615                                          bool &etOK);
00616             ms_peptide * loadPepResCrossLinked(const ms_mascotresfile::section sec,
00617                                                int q, int srcRank, int p,
00618                                                double observed,
00619                                                ms_peptide::SEARCH_PHASE searchPhase,
00620                                                msparser_internal::ms_peptide_impl_reloadable * * pReloadable,
00621                                                bool *canLoadIntoProteins,
00622                                                bool &etOK);
00623 
00624             bool loadCrossLinkedSequenceData(const int q,
00625                                              const int srcRank,
00626                                              const ms_mascotresfile::section sec,
00627                                              const std::string &str,
00628                                              const ms_peptide::PSM psmComponent,
00629                                              psm_seq_attributes_t &seq,
00630                                              qp_attribute_lines_t &attributeLines) const;
00631 
00632             ms_linker_site parseLinkerSite(const int q,
00633                                            const int srcRank,
00634                                            const ms_mascotresfile::section sec,
00635                                            const int alphaVarModsLen,
00636                                            const int betaVarModsLen) const;
00637             bool parseLinkedSiteTriplet(const int q,
00638                                         const int srcRank,
00639                                         const ms_mascotresfile::section sec,
00640                                         const std::string &str,
00641                                         const int alphaVarModsLen,
00642                                         const int betaVarModsLen,
00643                                         ms_peptide::PSM &psmComponent,
00644                                         int &pos,
00645                                         int &varmodIdx) const;
00646 
00647             bool parseLoopLinkedSites(const int q,
00648                                       const int srcRank,
00649                                       const ms_mascotresfile::section sec,
00650                                       const std::string &loopLinkedSitesStr,
00651                                       const ms_peptide::PSM psmComponent,
00652                                       const std::string &varModsStr,
00653                                       ms_linker_site_vector &loopLinks) const;
00654 
00655             void determineSortingScore(ms_peptide *pep) const;
00656             void loadIntoProteins(const ms_mascotresfile::section sec,
00657                                   int q, int srcRank, int p,
00658                                   double ionsScore,
00659                                   acc_dbidx_set_t * pAccessions,
00660                                   bool isIgnored);
00661 
00662             typedef std::pair<acc_dbidx_t, msparser_internal::ms_SEQINFO> acc_dbidx_match_tuple_t;
00663             typedef std::list<acc_dbidx_match_tuple_t> acc_dbidx_tuple_list_t;
00664 
00665             void gatherProteinMatchData(acc_dbidx_tuple_list_t &proteinInputOrder,
00666                                         const ms_mascotresfile::section sec,
00667                                         std::string str,
00668                                         const int q, const int srcRank, const int p,
00669                                         const ms_peptide::PSM thisPsmComponent,
00670                                         const ms_peptide::SEARCH_PHASE searchPhase,
00671                                         const acc_dbidx_set_t * pAccessions);
00672             void insertIntoProtein(const int q,
00673                                    const int p,
00674                                    const std::string &accession,
00675                                    const int dbIdx,
00676                                    const msparser_internal::ms_protein_match_data &proteinMatchData,
00677                                    const ms_peptide::SEARCH_PHASE searchPhase,
00678                                    const double ionsScore,
00679                                    const bool isIgnored);
00680 
00681             double minIonsScoreForMatch(const int q,
00682                                         const ms_mascotresfile::section sec) const;
00683             int               singleHitDbIdx_;
00684 
00685             struct componentIdentifier_t {
00686                 int q_;
00687                 short p_;
00688                 short componentID_;
00689                 componentIdentifier_t(int q, short p, short cid) : q_(q), p_(p), componentID_(cid) { };
00690                 componentIdentifier_t(const std::string & q, const std::string & p, const std::string & componentID);
00691                 std::string asString() const;
00692                 bool operator<(const componentIdentifier_t & right) const;
00693              };
00694             typedef std::map<componentIdentifier_t, std::pair<double, double> > componentIntensities_t;
00695             componentIntensities_t cachedComponentIntensities_;
00696             bool cacheComponentIntensities(bool & isUsable);
00697             typedef std::map<std::string, short> componentNames_t;
00698             componentNames_t componentNames_;
00699             bool componentIntensitiesLoaded_;
00700 
00701             bool isPeptideModificationMatch(
00702                     const ms_quant_modgroup &group,
00703                     const std::string &varModStr,
00704                     const std::string &summedModStr,
00705                     const std::vector<int> &slModifiedPositions,
00706                     const std::string &pepStr,
00707                     const bool isAnyProteinNterminus,
00708                     const bool isAnyProteinCterminus) const;
00709 
00710             // Some private variables and functions for error tolerant search
00711             bool checkErrorTolerantStatus(ms_mascotresfile::section sec,
00712                                           int q, int p, double ionsScore, 
00713                                           bool fromET,
00714                                           const std::string & pepStr);
00715             const ms_mascotresfile  * errTolSource_;
00716             const ms_peptidesummary * errTolPepSummary_;
00717             unsigned int        errTolType_;
00718             bool                missingErrTolParent_;
00719             bool                acceptAllErrTolMatches_;
00720 
00721             std::vector<bool>   queryHasSignificant1stPassTarget_, queryHasSignificant1stPassDecoy_;
00722             mutable std::vector<double> memoFirstPassRank1ScoreTarget_, memoFirstPassRank1ScoreDecoy_;
00723             
00724             ms_mascotresfile::section secPeptidesInactive_, secPeptidesActiveCompanion_, secPeptidesInactiveCompanion_;
00725             std::vector<unsigned short> srcRankActiveSection_, srcRankInactiveSection_;
00726             bool srcRankInitialised_;
00727 
00728             bool rerankingCouldHappen() const;
00729 
00730             void setSrcRank(int q, int p, int srcRank, ms_mascotresfile::section peptideSec, bool rejected, std::vector<unsigned short> &srcRankVec) const;
00731             int getSrcRank(int q, int p, ms_mascotresfile::section & peptideSec, bool * pRejected = 0, bool inactiveSection = false) const;
00732             enum PEP_SECTIONS { PEP_SEC_INVALID         = 0, 
00733                                 PEP_SEC_PEPTIDES        = 1, 
00734                                 PEP_SEC_DECOYPEPTIDES   = 2, 
00735                                 PEP_SEC_ERRTOLPEPTIDES  = 3,
00736                                 // Do not ever add more enums here. Maximum is 4.
00737                                 PEP_SEC_LAST            = 4};
00738             // For each of these, use SR_MASK_ and then, if required, >> SR_SHIFT_
00739             enum SRC_RANK_MASKS { SR_MASK_RANK      = 0x0F,  // bits 0..3
00740                                   SR_MASK_SEC       = 0x30,  // bits 4..5
00741                                   SR_MASK_REJECT    = 0x40,  // bit  6     If discarded because below threshold or other rule
00742                                   SR_MASK_LIBRARY   = 0x80,  // bit  7
00743 
00744                                   SR_MASK_CROSSLINK = 0x0100, // bit  8 (bit 1 in upper 8 bits)
00745                                   SR_MASK_ERRTOL    = 0x0200, // bit  9 (bit 2 in upper 8 bits)
00746 
00747                                   SR_SHIFT_RANK     = 0x00,  // bits 0..3 - no need to shift
00748                                   SR_SHIFT_SEC      = 0x04,  // bits 4..5
00749                                   SR_SHIFT_REJECT   = 0x06,  // bit  6     If discarded because below threshold or other rule
00750                                   SR_SHIFT_LIBRARY  = 0x07,  // bit  7
00751 
00752                                   SR_SHIFT_CROSSLINK = 0x08, // bit  8 (bit 1 in upper 8 bits)
00753                                   SR_SHIFT_ERRTOL    = 0x09  // bit  9 (bit 2 in upper 8 bits)
00754                                   };
00755             unsigned char secLookupFwd_[ms_mascotresfile::SEC_NUMSECTIONS];
00756             int secLookupRev_[PEP_SEC_LAST];
00757 
00758             void getUnassignedListAsString(std::string & str);
00759             bool findCompareProtein(const std::string & accRequired, 
00760                                     const std::string & accToTest, 
00761                                     const int dbIdxRequired,
00762                                     const int dbIdxToTest,
00763                                     FIND_FLAGS item,
00764                                     FIND_COMPARE_FLAGS compareFlags,
00765                                     const ms_protein * prot,
00766                                     const std::vector<int> & q, 
00767                                     const std::vector<int> & p) const;
00768             bool findCompare(const std::string & find, 
00769                              const std::string & findIn, 
00770                              FIND_COMPARE_FLAGS compareFlags) const;
00771             bool findCompare(const double find_value, 
00772                              const double find_range,
00773                              const double value,
00774                              FIND_COMPARE_FLAGS compareFlags) const;
00775 
00776             struct distanceInfo_t {
00777                 double scoreExcess;
00778                 std::string peptideStr;
00779                 bool isFromLibrary;
00780                 distanceInfo_t():
00781                     scoreExcess(0.0), peptideStr(), isFromLibrary(false)
00782                 {}
00783                 distanceInfo_t(double se, std::string & pep, bool fromLib):
00784                     scoreExcess(se), peptideStr(pep), isFromLibrary(fromLib)
00785                 {}
00786             };
00787             typedef std::map<std::pair<int, int>, distanceInfo_t> distanceInfoMap_t;
00788             double getDistance(const distanceInfoMap_t & a, const distanceInfoMap_t & b) const;
00789             bool readPercolatorOutputFile(bool decoyResults,
00790                                           ms_percolator_scores_vector * pVector = 0,
00791                                           long * pNumAboveIdentity = 0, 
00792                                           double threshold = 0.05);
00793 
00794             typedef std::set<std::pair<int,int> > hitAndFamily_t;
00795             bool getHitAndFamilyMember(const ms_protein * prot, hitAndFamily_t & hitAndFamily, const UNIQUE_PEP_RULES rules) const;
00796 
00797             ms_percolator_scores_vector targetPercolatorScores_;
00798             ms_percolator_scores_vector decoyPercolatorScores_;
00799 
00800             // percolatorScores_ references targetPercolatorScores_ unless 
00801             // MSRES_DECOY is specified, in which case it references 
00802             // decoyPercolatorScores_
00803             ms_percolator_scores_vector & percolatorScores_;   
00804 
00805             //BUG#13406
00806             bool usePercolatorQValSigThres_;
00807 
00808             msparser_internal::ms_peptidesumcdb * pCacheFile_;
00809             ms_tinycdb  * pTmpCache_;
00810 
00811             mutable std::vector< std::vector<double> > cachedScores_[2]; // Use cachedScores_[decoy][p][q]. Order important for performance with FDR
00812             mutable std::vector<bool> attemptedLoadScoresFromCache_[2];  // Use attemptedLoadScoresFromCache_[decoy][p]. 
00813 
00814             /* emPAI related values and helper methods */
00815 
00816             bool emPAIminmaxDone_;
00817             int emPAIminCleavageN_;
00818             int emPAImaxCleavageN_;
00819             bool emPAInetCleavageDone_;
00820             double netCleavageAbundance_;
00821 
00822             void init_emPAImzRange();
00823             void init_netCleavageAbundance();
00824             double calculate_AA_abundance(std::string) const;
00825             double calculate_emPAI_num_observable(int) const;
00826 
00827             mutable std::map<std::string, char> modfileLookup_;
00828             mutable bool quantMethodCached_;
00829             mutable ms_quant_method * quantMethod_;
00830             mutable bool crosslinkingMethodCached_;
00831             mutable ms_crosslinking_method * crosslinkingMethod_;
00832 
00833             mutable std::map< std::pair<int, int>, bool > uniquePepLookup_; // only used when no cache file
00834 
00835             std::vector<int> parseDBstr(const std::string &strDB, bool &has_fasta_acc, bool &has_sl_ref_acc) const;
00836             void parseMatchAccessionDBTermsLines(const int q,
00837                                                  const int p,
00838                                                  const int srcRank,
00839                                                  const ms_mascotresfile::section sec,
00840                                                  const ms_peptide::PSM thisPsmComponent,
00841                                                  std::string str,
00842                                                  std::vector<std::string> & acc,
00843                                                  std::vector<int> & start, 
00844                                                  std::vector<int> & end,
00845                                                  std::vector<std::string> &pre,
00846                                                  std::vector<std::string> &post,
00847                                                  std::vector<int> & frame,
00848                                                  std::vector<int> & multiplicity,
00849                                                  std::vector<int> & db,
00850                                                  std::vector<int> & psmComponent) const;
00851             
00852             int addExpectationValue(bool homology, DECOY_STATS_COUNT_TYPE countType, bool decoy, int q,
00853                                      std::map<std::string, size_t> & mapSequenceToIndex, std::vector<structExpectTargetDecoy> & vecExpectTargetDecoy);
00854             double truncateExpectationValue(double dValue, int sigfig);
00855 
00856             bool getQPsFromMapValueToQP(const std::string & strSearch, const map_mod_value_to_qp & mapValueToQP, std::set<std::pair<int, int> > & setQP, bool bCaseSensitive = true) const;
00857 
00858             void countDBTypes(const std::vector<int> &db, std::vector<int> &counts) const;
00859 
00860             std::string getDecoyPeptideString(int q, int p) const;
00861             
00862             bool getPeptideFromCache(const int q, const int p, ms_peptide *  & pep) const;
00863             
00864             void dumpFlags(std::string fileName);
00865     }; // end of resfile_group
00867 }   // matrix_science namespace
00868 
00869 #endif // MS_MASCOTRESPEPTIDESUM_HPP
00870 
00871 /*------------------------------- End of File -------------------------------*/

Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29