00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef MS_MASCOTRESPEPTIDESUM_HPP
00020 #define MS_MASCOTRESPEPTIDESUM_HPP
00021
00022
00023
00024 #include <list>
00025 #include <map>
00026 #include <set>
00027 #include <string>
00028 #include <vector>
00029
00030 namespace msparser_internal {
00031 class ms_peptidesumcdb;
00032 class ms_peptide_impl;
00033 class ms_peptide_impl_reloadable;
00034 class ms_SEQINFO;
00035 class ms_peptidesummary_fdr_query_data_extractor;
00036 class ms_peptidesummary_fdr_peptides_vector_extractor;
00037 }
00038
00039 namespace matrix_science {
00040 class ms_tinycdb;
00041 class ms_mascotresults_params;
00042 class ms_protein;
00043 class ms_datfile;
00044 class ms_distiller_data;
00045 class ms_distiller_data_search;
00046 class ms_quant_component;
00047 class ms_quant_modgroup;
00048 class ms_linker_site;
00049
00055 struct ms_percolator_score {
00056 double percolatorScore;
00057 double qValue;
00058 double percolatorPEP;
00059 double mascotScore;
00060 bool foundInPopFile;
00061 ms_percolator_score()
00062 : percolatorScore(0.0)
00063 , qValue(1.0)
00064 , percolatorPEP(1.0)
00065 , mascotScore(0.0)
00066 , foundInPopFile(false)
00067 {}
00068 };
00069 typedef std::vector<ms_percolator_score> ms_percolator_scores_vector;
00070
00071 typedef struct structExpectTargetDecoy
00072 {
00073 double expect;
00074 int target;
00075 int decoy;
00076
00077 #ifndef SWIG
00078 bool operator<(const structExpectTargetDecoy &other) const
00079 {
00080 return expect > other.expect;
00081 }
00082 #endif
00083 } structExpectTargetDecoy;
00084
00086
00092 class MS_MASCOTRESFILE_API ms_peptidesummary : public matrix_science::ms_mascotresults
00093 {
00094 friend class msparser_internal::ms_peptide_impl;
00095 friend class ms_protein;
00096 friend class msparser_internal::ms_peptidesumcdb;
00097 friend class ms_ms2quantitation;
00098 friend class ms_customquantitation;
00099 friend class ms_quant_helper;
00100 friend class msparser_internal::ms_peptidesummary_fdr_query_data_extractor;
00101 friend class msparser_internal::ms_peptidesummary_fdr_peptides_vector_extractor;
00102 public:
00104
00107 enum QL_FLAG
00108 {
00109 QL_FIRST = 0x0000,
00110 QL_ALL = 0x0000,
00111 QL_UNASSIGNED = 0x0001,
00112 QL_BELOW_IDENTITY = 0x0002,
00113 QL_BELOW_HOMOLOGY = 0x0003,
00114 QL_IGNORE_IONS_SCORE_BELOW = 0x0004,
00115
00116 QL_LAST = 0x0004
00117 };
00118
00120
00123 enum MSPEPSUM
00124 {
00125 MSPEPSUM_NONE = 0x0000,
00126 MSPEPSUM_PERCOLATOR = 0x0001,
00127 MSPEPSUM_USE_CACHE = 0x0002,
00128 MSPEPSUM_SINGLE_HIT_DBIDX = 0x0004,
00129 MSPEPSUM_USE_HOMOLOGY_THRESH= 0x0008,
00130 MSPEPSUM_NO_PROTEIN_GROUPING= 0x0010,
00131 MSPEPSUM_DISCARD_RELOADABLE = 0x0020,
00132 MSPEPSUM_DEFERRED_CREATE = 0x0040,
00133 MSPEPSUM_CACHE_IGNORE_DATE_CHANGE = 0x0080,
00134 MSPEPSUM_REMOVE_CHIMERIC_DUPES = 0x0100,
00135 MSPEPSUM_SL_INTEGRATED = 0x0200,
00136 MSPEPSUM_SL_ONLY = 0x0400,
00137 MSPEPSUM_CROSSLINK_INTEGRATED = 0x0800,
00138 MSPEPSUM_CROSSLINK_ONLY = 0x1000,
00139 };
00140
00142
00156 enum BUGFIX_NUM
00157 {
00158 BUGFIX_10780 = 10780,
00159 BUGFIX_10995 = 10995,
00160 BUGFIX_11002 = 11002,
00161 BUGFIX_11018 = 11018,
00162 BUGFIX_11235 = 11235,
00163 BUGFIX_11254 = 11254,
00164 BUGFIX_11344 = 11344,
00165 BUGFIX_11411 = 11411,
00166 BUGFIX_11425 = 11425,
00167 BUGFIX_11438 = 11438,
00168 BUGFIX_11483 = 11483,
00169 BUGFIX_11499 = 11499,
00170 BUGFIX_11856 = 11856,
00171 BUGFIX_12123 = 12123,
00172 BUGFIX_12317 = 12317,
00173 BUGFIX_11481 = 11481,
00174 BUGFIX_12538 = 12538,
00175 BUGFIX_12729 = 12729,
00176 BUGFIX_12740 = 12740,
00177 BUGFIX_13324 = 13324,
00178 BUGFIX_12447 = 12447,
00179 BUGFIX_13759 = 13759
00180 };
00181
00183
00188 enum CACHE_STATUS {
00189 CACHE_MISSING_RESFILE = 0x10000,
00190
00191 RESFILE_CACHE_FILE_NOT_PRESENT = 0x00001,
00192 RESFILE_CACHE_BEING_CREATED = 0x00002,
00193 RESFILE_CACHE_DISABLED_IN_OPTIONS = 0x00004,
00194 RESFILE_CACHE_VALID = 0x00008,
00195 RESFILE_CACHE_CAN_CREATE = 0x00010,
00196
00197 PEPSUMMARY_CACHE_FILE_NOT_PRESENT = 0x00100,
00198 PEPSUMMARY_CACHE_BEING_CREATED = 0x00200,
00199 PEPSUMMARY_CACHE_DISABLED_IN_OPTIONS = 0x00400,
00200 PEPSUMMARY_CACHE_VALID = 0x00800,
00201 PEPSUMMARY_CACHE_CAN_CREATE = 0x01000,
00202 PEPSUMMARY_CACHE_STATUS_NOT_AVAILABLE= 0x02000,
00203 PEPSUMMARY_CACHE_NOT_FOR_PMF = 0x04000
00204 };
00205
00207 static std::string getCacheFilename(
00208 const ms_mascotresfile & resfile,
00209 const unsigned int flags,
00210 double minProbability = 0.0,
00211 int maxHitsToReport = 50,
00212 const char * unigeneIndexFile = 0,
00213 double ignoreIonsScoreBelow = 0.0,
00214 int minPepLenInPepSummary = 0,
00215 const char * singleHit = 0,
00216 const unsigned int flags2 = MSPEPSUM_NONE);
00217
00219 static std::string getCacheFilename(
00220 const ms_mascotresfile & resfile,
00221 const ms_mascotresults_params & parameters);
00222
00224 ms_peptidesummary(const ms_mascotresfile &resfile,
00225 const unsigned int flags = MSRES_GROUP_PROTEINS,
00226 double minProbability = 0.0,
00227 int maxHits = 50,
00228 const char * unigeneIndexFile = 0,
00229 double ignoreIonsScoreBelow = 0.0,
00230 int minPepLenInPepSummary = 0,
00231 const char * singleHit = 0,
00232 const unsigned int flags2 = MSPEPSUM_NONE);
00233
00235 ms_peptidesummary(
00236 const ms_mascotresfile & resfile,
00237 const ms_mascotresults_params & parameters);
00238
00239 virtual ~ms_peptidesummary();
00240
00242 const ms_mascotresfile & mascotresfile() const;
00243
00245 virtual bool createSummary();
00246
00247 virtual bool anyEmPAI() const;
00248
00250 virtual double getProteinEmPAI(const char *accession, const int dbIdx = 1, const int length = -1) const;
00251
00253 virtual ms_protein * getHit(const int hit, const int memberNumber = 0) const;
00254
00256 virtual void freeHit(const int hit);
00257
00258
00259 enum { PEPS_PER_QUERY = 10 };
00260
00262 virtual ms_peptide getPeptide(const int q, const int p) const;
00263
00265 virtual bool getPeptide(const int q, const int p, ms_peptide * & pep) const;
00266
00268 virtual double getIonsScore(const int q, const int p, const bool decoy) const;
00269
00271 virtual bool isPeptideUnique(const int q, const int p, const UNIQUE_PEP_RULES rules = UPR_DEFAULT) const;
00272
00274 virtual std::string getProteinsWithThisPepMatch(const int q, const int p, const bool quotes=false);
00275
00277 virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p,
00278 std::vector<int> & start,
00279 std::vector<int> & end,
00280 std::vector<std::string> &pre,
00281 std::vector<std::string> &post,
00282 std::vector<int> & frame,
00283 std::vector<int> & multiplicity,
00284 std::vector<int> & db) const;
00285
00287 virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p,
00288 std::vector<int> & start,
00289 std::vector<int> & end,
00290 std::vector<std::string> &pre,
00291 std::vector<std::string> &post,
00292 std::vector<int> & frame,
00293 std::vector<int> & multiplicity,
00294 std::vector<int> & db,
00295 std::vector<int> & psmComponent) const;
00296
00298 std::vector<int> getNumberOfAccessionDBTypes(int q, int p) const;
00299
00301 virtual int getAllFamilyMembersWithThisPepMatch(const int hit,
00302 const int q,
00303 const int p,
00304 std::vector< int >& db,
00305 std::vector< std::string >& acc,
00306 std::vector< int >& dupe_status) const;
00307
00309 std::vector<int> getNumberOfFamilyAccessionDBTypes(int hit, int q, int p, bool includeSamesets = true) const;
00310
00312 virtual std::string getErrTolModString(const int q, const int p) const;
00313
00315 virtual std::string getLibraryModString(const int q, const int p) const;
00316
00318 virtual std::string getErrTolModMasterString(const int q, const int p) const;
00319
00321 virtual std::string getErrTolModSlaveString(const int q, const int p) const;
00322
00324 virtual std::string getErrTolModPepString(const int q, const int p) const;
00325
00327 virtual std::string getErrTolModReqPepString(const int q, const int p) const;
00328
00330 virtual std::string getTagString(const int q, const int p) const;
00331
00333 virtual int getTagDeltaRangeStart(const int q, const int p) const;
00334
00336 virtual int getTagDeltaRangeEnd(const int q, const int p) const;
00337
00339 virtual std::string getTerminalResiduesString(const int q, const int p, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00340
00342 virtual std::string getComponentString(const int q, const int p, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00343
00345 virtual int getProteinScoreCutoff(double OneInXprobRnd) const;
00346
00348 int getSrcRank(int q, int p) const;
00349
00351 ms_mascotresfile::section getSrcSection(int q, int p)const;
00352
00354 std::string getQueryList(QL_FLAG flag, bool outputListOfQueries = true);
00355
00357 virtual std::vector<int> getPepsWithSameScore(const int q, const int p) const;
00358 #ifndef SWIG
00359
00360 virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & value, double & rawValue) const;
00361 #else // SWIG Multiple return values
00362 virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & OUTPUT, double & OUTPUT) const;
00363 #endif
00364
00365 virtual int findProteins(const int startHit,
00366 const std::string & str,
00367 const int dbIdx,
00368 FIND_FLAGS item,
00369 FIND_COMPARE_FLAGS compareFlags,
00370 std::vector<std::string> & accessions,
00371 std::vector<int> & dbIndexes) const;
00372
00374 virtual int findProteinsByAccession(const int startHit,
00375 const std::string & str,
00376 const int dbIdx,
00377 FIND_COMPARE_FLAGS compareFlags,
00378 std::vector<std::string> & accessions,
00379 std::vector<int> & dbIndexes) const;
00380
00382 virtual int findProteinsByDescription(const int startHit,
00383 const std::string & str,
00384 FIND_COMPARE_FLAGS compareFlags,
00385 std::vector<std::string> & accessions,
00386 std::vector<int> & dbIndexes) const;
00387
00389 virtual int findPeptides(const int startHit,
00390 const std::string & str,
00391 FIND_FLAGS item,
00392 FIND_COMPARE_FLAGS compareFlags,
00393 std::vector<int> & q,
00394 std::vector<int> & p) const;
00395
00397 virtual QUANT_COMPONENT_STATUS getQuantitationComponentForPeptide(
00398 const ms_peptide & peptide,
00399 ms_quant_component & component,
00400 const ms_quant_method * method = NULL) const;
00401
00403 virtual const ms_protein * getProtein(const char * accession, const int dbIdx = 1) const;
00404
00406 virtual const ms_protein * getComponentProtein(const char * accession, const int dbIdx = 1) const;
00407
00409 virtual bool getTreeClusterNodes(const int hit,
00410 std::vector<int> &left,
00411 std::vector<int> &right,
00412 std::vector<double> &distance,
00413 TREE_CLUSTER_METHOD tcm = TCM_PAIRWISE_MAXIMUM,
00414 double *** reserved1 = 0,
00415 unsigned int * reserved2 = 0) const;
00416
00418 static bool willCreateCache(const ms_mascotresfile &resfile,
00419 const unsigned int flags = MSRES_GROUP_PROTEINS,
00420 double minProbability = 0.0,
00421 int maxHits = 50,
00422 const char * unigeneIndexFile = 0,
00423 double ignoreIonsScoreBelow = 0.0,
00424 int minPepLenInPepSummary = 0,
00425 const char * singleHit = 0,
00426 const unsigned int flags2 = MSPEPSUM_NONE);
00427
00429 static bool willCreateCache(const ms_mascotresfile &resfile,
00430 const ms_mascotresults_params & parameters);
00431
00432 #ifndef SWIG
00433
00434 static bool willCreateCache(const char * resultsFileName,
00435 const ms_mascotoptions & opts,
00436 const char * unigeneIndexFile,
00437 const char * singleHit,
00438 const char * applicationName,
00439 std::string & resfileCacheFileName,
00440 std::string & peptideSummaryCacheFileName,
00441 unsigned int & cacheStatus);
00442
00444 static bool willCreateCache(const ms_mascotresfile & resfile,
00445 const ms_mascotresults_params & parameters,
00446 const ms_mascotoptions & opts,
00447 std::string & peptideSummaryCacheFileName,
00448 unsigned int & cacheStatus);
00449 #else // SWIG Multiple return values
00450 static bool willCreateCache(const char * resultsFileName,
00451 const ms_mascotoptions & opts,
00452 const char * unigeneIndexFile,
00453 const char * singleHit,
00454 const char * applicationName,
00455 std::string & OUTPUT,
00456 std::string & OUTPUT,
00457 unsigned int & OUTPUT);
00458
00459 static bool willCreateCache(const ms_mascotresfile & resfile,
00460 const ms_mascotresults_params & parameters,
00461 const ms_mascotoptions & opts,
00462 std::string & OUTPUT,
00463 unsigned int & OUTPUT);
00464 #endif
00465
00467 bool isPeptideModificationMatch(
00468 const ms_quant_modgroup & group,
00469 const ms_peptide & peptide) const;
00470
00472 bool isPeptideComponentMatch(
00473 const ms_quant_component & component,
00474 const ms_peptide & peptide) const;
00475
00477 bool hasQuantMethod() const;
00479 const ms_quant_method * getQuantMethod() const;
00480
00482 bool hasCrosslinkingMethod() const;
00484 const ms_crosslinking_method * getCrosslinkingMethod() const;
00485
00487 std::string getCacheFileName() const;
00488
00490 bool isDataCached(BUGFIX_NUM bugNum) const;
00491
00493 virtual bool loadPepMatchesForProteinFromCache(ms_protein * prot, const bool loadRelated = true);
00494
00496 virtual bool isValidQandP(const int q, const int p) const;
00497
00499 bool dumpCDB(const std::string dumpFileName);
00500
00502 void getLibraryEntryId(const int q, const int p, std::vector<int> &dbIdx, std::vector<int> &offset, std::vector<std::string> &checksum, std::vector<std::string> &mods) const;
00503
00505 static double getMinProbabilityForSLScore(double score);
00506
00508 static double getSLThresholdFromMinProbability(double minProbability);
00509
00511 std::vector<int> getPeptideAmbiguityRanks(const int q, const int p);
00512
00513 protected:
00514
00515 #ifndef SWIG
00516 ms_peptidesummary(const ms_peptidesummary & rhs);
00517 ms_peptidesummary & operator=(const ms_peptidesummary & rhs);
00518 #endif
00519
00520 virtual bool queryHasSignificantRank1Match(ms_mascotresfile::section secSummary,
00521 int query) const;
00522
00523 double getFirstPassRank1Score(int query, bool decoy) const;
00524
00525 virtual bool getThresholdForFDR(bool homology, double targetFDR,
00526 DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType,
00527 double * closestFDR, double * minProbability,
00528 int * pNumTargetMatches, int * pNumDecoyMatches);
00529 void calculateDecoyStats(double dOneInXprobRnd);
00530 virtual void getQmatchValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const;
00531 virtual void getPlugholeValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const;
00532 virtual ms_mascotresfile::section getSrcSectionProtected(const int query, const int rank) const;
00533
00534 virtual bool cacheModificationCounts();
00535
00536 private:
00537
00538 ms_errs* getLogger() const;
00539
00540 static ms_mascotresults_params getValidatedResultsParameters(const ms_mascotresfile &resfile, const ms_mascotresults_params ¶ms,
00541 double * minProbabilityForCache = 0);
00542
00543 void initialise(const ms_mascotresults_params& parameters, double minProbabilityForCache);
00544
00545 void loadQuery(int q, std::vector<ms_peptide*> &peptides,
00546 int rankStart, int rankEnd,
00547 ms_mascotresfile::section secPeptides,
00548 ms_mascotresfile::section secPeptidesCompanion);
00549
00550 void accumulateSameSequenceScores(int q, const std::vector<ms_peptide*> &peptides,
00551 std::vector< std::pair<double, double> > *observed_rank_1_same_seq,
00552 std::vector<double> *library_rank_1_scores) const;
00553
00554 void determineFirstPassSignificance(const std::vector<ms_peptide*> &peptides,
00555 ms_mascotresfile::section secSummary,
00556 std::vector<bool> &queryHasSignificantFirstPass,
00557 std::vector<double> &memoFirstPassRank1Score) const;
00558
00559 void detectChimericDuplicates(const std::list<int> &query_set,
00560 const std::vector<ms_peptide*> &peptides,
00561 std::set<int> &is_chimeric_duplicate) const;
00562 void massageQuery(int q, std::vector<ms_peptide*> &peptides,
00563 ms_mascotresfile::section secPeptides,
00564 ms_mascotresfile::section secPeptidesCompanion,
00565 std::vector<unsigned short> &srcRank,
00566 const std::set<int> &is_chimeric_duplicate,
00567 bool load_proteins);
00568 void determinePrettyRanksAndAmbiguityGroupIds(const int q, std::vector<ms_peptide*> &peptides);
00569 void loadIonsScoresIntoCache(const bool decoy, const int p=1);
00570
00571 ms_peptide * loadPepRes(const ms_mascotresfile::section sec,
00572 int q, int srcRank, int p,
00573 msparser_internal::ms_peptide_impl_reloadable * * pReloadable = 0,
00574 bool *canLoadIntoProteins = 0);
00575
00576 typedef struct {
00577 std::string primaryNlStr;
00578 std::string substStr;
00579 std::string componentStr;
00580 std::string summedModsStr;
00581 std::string summedModsNlStr;
00582 std::string localModsStr;
00583 std::string parsedLocalModsStr;
00584 std::string localModsNlStr;
00585 std::string monoLinkStr;
00586 ms_linker_site_vector loopLinks;
00587 } qp_attribute_lines_t;
00588
00589 struct psm_seq_attributes_t {
00590 psm_seq_attributes_t()
00591 : mrCalc(0.0)
00592 , missedCleavages(-1)
00593 , peptideStr()
00594 , varModsStr()
00595 {}
00596 double mrCalc;
00597 int missedCleavages;
00598 std::string peptideStr, varModsStr;
00599 };
00600
00601 void loadAdditionalMatchAttributes(const ms_mascotresfile::section sec,
00602 const int q, const int srcRank,
00603 const ms_peptide::PSM psmComponent,
00604 const std::string &varModsStr,
00605 qp_attribute_lines_t &lines) const;
00606
00607 int loadPepResCharge(const ms_mascotresfile::section chargeSection, int q) const;
00608
00609 ms_peptide * loadPepResLinear(const ms_mascotresfile::section sec,
00610 int q, int srcRank, int p,
00611 double observed,
00612 ms_peptide::SEARCH_PHASE searchPhase,
00613 msparser_internal::ms_peptide_impl_reloadable * * pReloadable,
00614 bool *canLoadIntoProteins,
00615 bool &etOK);
00616 ms_peptide * loadPepResCrossLinked(const ms_mascotresfile::section sec,
00617 int q, int srcRank, int p,
00618 double observed,
00619 ms_peptide::SEARCH_PHASE searchPhase,
00620 msparser_internal::ms_peptide_impl_reloadable * * pReloadable,
00621 bool *canLoadIntoProteins,
00622 bool &etOK);
00623
00624 bool loadCrossLinkedSequenceData(const int q,
00625 const int srcRank,
00626 const ms_mascotresfile::section sec,
00627 const std::string &str,
00628 const ms_peptide::PSM psmComponent,
00629 psm_seq_attributes_t &seq,
00630 qp_attribute_lines_t &attributeLines) const;
00631
00632 ms_linker_site parseLinkerSite(const int q,
00633 const int srcRank,
00634 const ms_mascotresfile::section sec,
00635 const int alphaVarModsLen,
00636 const int betaVarModsLen) const;
00637 bool parseLinkedSiteTriplet(const int q,
00638 const int srcRank,
00639 const ms_mascotresfile::section sec,
00640 const std::string &str,
00641 const int alphaVarModsLen,
00642 const int betaVarModsLen,
00643 ms_peptide::PSM &psmComponent,
00644 int &pos,
00645 int &varmodIdx) const;
00646
00647 bool parseLoopLinkedSites(const int q,
00648 const int srcRank,
00649 const ms_mascotresfile::section sec,
00650 const std::string &loopLinkedSitesStr,
00651 const ms_peptide::PSM psmComponent,
00652 const std::string &varModsStr,
00653 ms_linker_site_vector &loopLinks) const;
00654
00655 void determineSortingScore(ms_peptide *pep) const;
00656 void loadIntoProteins(const ms_mascotresfile::section sec,
00657 int q, int srcRank, int p,
00658 double ionsScore,
00659 acc_dbidx_set_t * pAccessions,
00660 bool isIgnored);
00661
00662 typedef std::pair<acc_dbidx_t, msparser_internal::ms_SEQINFO> acc_dbidx_match_tuple_t;
00663 typedef std::list<acc_dbidx_match_tuple_t> acc_dbidx_tuple_list_t;
00664
00665 void gatherProteinMatchData(acc_dbidx_tuple_list_t &proteinInputOrder,
00666 const ms_mascotresfile::section sec,
00667 std::string str,
00668 const int q, const int srcRank, const int p,
00669 const ms_peptide::PSM thisPsmComponent,
00670 const ms_peptide::SEARCH_PHASE searchPhase,
00671 const acc_dbidx_set_t * pAccessions);
00672 void insertIntoProtein(const int q,
00673 const int p,
00674 const std::string &accession,
00675 const int dbIdx,
00676 const msparser_internal::ms_protein_match_data &proteinMatchData,
00677 const ms_peptide::SEARCH_PHASE searchPhase,
00678 const double ionsScore,
00679 const bool isIgnored);
00680
00681 double minIonsScoreForMatch(const int q,
00682 const ms_mascotresfile::section sec) const;
00683 int singleHitDbIdx_;
00684
00685 struct componentIdentifier_t {
00686 int q_;
00687 short p_;
00688 short componentID_;
00689 componentIdentifier_t(int q, short p, short cid) : q_(q), p_(p), componentID_(cid) { };
00690 componentIdentifier_t(const std::string & q, const std::string & p, const std::string & componentID);
00691 std::string asString() const;
00692 bool operator<(const componentIdentifier_t & right) const;
00693 };
00694 typedef std::map<componentIdentifier_t, std::pair<double, double> > componentIntensities_t;
00695 componentIntensities_t cachedComponentIntensities_;
00696 bool cacheComponentIntensities(bool & isUsable);
00697 typedef std::map<std::string, short> componentNames_t;
00698 componentNames_t componentNames_;
00699 bool componentIntensitiesLoaded_;
00700
00701 bool isPeptideModificationMatch(
00702 const ms_quant_modgroup &group,
00703 const std::string &varModStr,
00704 const std::string &summedModStr,
00705 const std::vector<int> &slModifiedPositions,
00706 const std::string &pepStr,
00707 const bool isAnyProteinNterminus,
00708 const bool isAnyProteinCterminus) const;
00709
00710
00711 bool checkErrorTolerantStatus(ms_mascotresfile::section sec,
00712 int q, int p, double ionsScore,
00713 bool fromET,
00714 const std::string & pepStr);
00715 const ms_mascotresfile * errTolSource_;
00716 const ms_peptidesummary * errTolPepSummary_;
00717 unsigned int errTolType_;
00718 bool missingErrTolParent_;
00719 bool acceptAllErrTolMatches_;
00720
00721 std::vector<bool> queryHasSignificant1stPassTarget_, queryHasSignificant1stPassDecoy_;
00722 mutable std::vector<double> memoFirstPassRank1ScoreTarget_, memoFirstPassRank1ScoreDecoy_;
00723
00724 ms_mascotresfile::section secPeptidesInactive_, secPeptidesActiveCompanion_, secPeptidesInactiveCompanion_;
00725 std::vector<unsigned short> srcRankActiveSection_, srcRankInactiveSection_;
00726 bool srcRankInitialised_;
00727
00728 bool rerankingCouldHappen() const;
00729
00730 void setSrcRank(int q, int p, int srcRank, ms_mascotresfile::section peptideSec, bool rejected, std::vector<unsigned short> &srcRankVec) const;
00731 int getSrcRank(int q, int p, ms_mascotresfile::section & peptideSec, bool * pRejected = 0, bool inactiveSection = false) const;
00732 enum PEP_SECTIONS { PEP_SEC_INVALID = 0,
00733 PEP_SEC_PEPTIDES = 1,
00734 PEP_SEC_DECOYPEPTIDES = 2,
00735 PEP_SEC_ERRTOLPEPTIDES = 3,
00736
00737 PEP_SEC_LAST = 4};
00738
00739 enum SRC_RANK_MASKS { SR_MASK_RANK = 0x0F,
00740 SR_MASK_SEC = 0x30,
00741 SR_MASK_REJECT = 0x40,
00742 SR_MASK_LIBRARY = 0x80,
00743
00744 SR_MASK_CROSSLINK = 0x0100,
00745 SR_MASK_ERRTOL = 0x0200,
00746
00747 SR_SHIFT_RANK = 0x00,
00748 SR_SHIFT_SEC = 0x04,
00749 SR_SHIFT_REJECT = 0x06,
00750 SR_SHIFT_LIBRARY = 0x07,
00751
00752 SR_SHIFT_CROSSLINK = 0x08,
00753 SR_SHIFT_ERRTOL = 0x09
00754 };
00755 unsigned char secLookupFwd_[ms_mascotresfile::SEC_NUMSECTIONS];
00756 int secLookupRev_[PEP_SEC_LAST];
00757
00758 void getUnassignedListAsString(std::string & str);
00759 bool findCompareProtein(const std::string & accRequired,
00760 const std::string & accToTest,
00761 const int dbIdxRequired,
00762 const int dbIdxToTest,
00763 FIND_FLAGS item,
00764 FIND_COMPARE_FLAGS compareFlags,
00765 const ms_protein * prot,
00766 const std::vector<int> & q,
00767 const std::vector<int> & p) const;
00768 bool findCompare(const std::string & find,
00769 const std::string & findIn,
00770 FIND_COMPARE_FLAGS compareFlags) const;
00771 bool findCompare(const double find_value,
00772 const double find_range,
00773 const double value,
00774 FIND_COMPARE_FLAGS compareFlags) const;
00775
00776 struct distanceInfo_t {
00777 double scoreExcess;
00778 std::string peptideStr;
00779 bool isFromLibrary;
00780 distanceInfo_t():
00781 scoreExcess(0.0), peptideStr(), isFromLibrary(false)
00782 {}
00783 distanceInfo_t(double se, std::string & pep, bool fromLib):
00784 scoreExcess(se), peptideStr(pep), isFromLibrary(fromLib)
00785 {}
00786 };
00787 typedef std::map<std::pair<int, int>, distanceInfo_t> distanceInfoMap_t;
00788 double getDistance(const distanceInfoMap_t & a, const distanceInfoMap_t & b) const;
00789 bool readPercolatorOutputFile(bool decoyResults,
00790 ms_percolator_scores_vector * pVector = 0,
00791 long * pNumAboveIdentity = 0,
00792 double threshold = 0.05);
00793
00794 typedef std::set<std::pair<int,int> > hitAndFamily_t;
00795 bool getHitAndFamilyMember(const ms_protein * prot, hitAndFamily_t & hitAndFamily, const UNIQUE_PEP_RULES rules) const;
00796
00797 ms_percolator_scores_vector targetPercolatorScores_;
00798 ms_percolator_scores_vector decoyPercolatorScores_;
00799
00800
00801
00802
00803 ms_percolator_scores_vector & percolatorScores_;
00804
00805
00806 bool usePercolatorQValSigThres_;
00807
00808 msparser_internal::ms_peptidesumcdb * pCacheFile_;
00809 ms_tinycdb * pTmpCache_;
00810
00811 mutable std::vector< std::vector<double> > cachedScores_[2];
00812 mutable std::vector<bool> attemptedLoadScoresFromCache_[2];
00813
00814
00815
00816 bool emPAIminmaxDone_;
00817 int emPAIminCleavageN_;
00818 int emPAImaxCleavageN_;
00819 bool emPAInetCleavageDone_;
00820 double netCleavageAbundance_;
00821
00822 void init_emPAImzRange();
00823 void init_netCleavageAbundance();
00824 double calculate_AA_abundance(std::string) const;
00825 double calculate_emPAI_num_observable(int) const;
00826
00827 mutable std::map<std::string, char> modfileLookup_;
00828 mutable bool quantMethodCached_;
00829 mutable ms_quant_method * quantMethod_;
00830 mutable bool crosslinkingMethodCached_;
00831 mutable ms_crosslinking_method * crosslinkingMethod_;
00832
00833 mutable std::map< std::pair<int, int>, bool > uniquePepLookup_;
00834
00835 std::vector<int> parseDBstr(const std::string &strDB, bool &has_fasta_acc, bool &has_sl_ref_acc) const;
00836 void parseMatchAccessionDBTermsLines(const int q,
00837 const int p,
00838 const int srcRank,
00839 const ms_mascotresfile::section sec,
00840 const ms_peptide::PSM thisPsmComponent,
00841 std::string str,
00842 std::vector<std::string> & acc,
00843 std::vector<int> & start,
00844 std::vector<int> & end,
00845 std::vector<std::string> &pre,
00846 std::vector<std::string> &post,
00847 std::vector<int> & frame,
00848 std::vector<int> & multiplicity,
00849 std::vector<int> & db,
00850 std::vector<int> & psmComponent) const;
00851
00852 int addExpectationValue(bool homology, DECOY_STATS_COUNT_TYPE countType, bool decoy, int q,
00853 std::map<std::string, size_t> & mapSequenceToIndex, std::vector<structExpectTargetDecoy> & vecExpectTargetDecoy);
00854 double truncateExpectationValue(double dValue, int sigfig);
00855
00856 bool getQPsFromMapValueToQP(const std::string & strSearch, const map_mod_value_to_qp & mapValueToQP, std::set<std::pair<int, int> > & setQP, bool bCaseSensitive = true) const;
00857
00858 void countDBTypes(const std::vector<int> &db, std::vector<int> &counts) const;
00859
00860 std::string getDecoyPeptideString(int q, int p) const;
00861
00862 bool getPeptideFromCache(const int q, const int p, ms_peptide * & pep) const;
00863
00864 void dumpFlags(std::string fileName);
00865 };
00867 }
00868
00869 #endif // MS_MASCOTRESPEPTIDESUM_HPP
00870
00871