00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef MS_MASCOTRESULTS_HPP
00020 #define MS_MASCOTRESULTS_HPP
00021
00022
00023
00024 #include <string>
00025 #include <list>
00026 #include <vector>
00027 #include <set>
00028 #include <map>
00029 #include <algorithm>
00030
00031 namespace msparser_internal {
00032 class ms_proteininference;
00033 class ms_unassigned;
00034 class ms_peptidesumcdb;
00035 class ms_protein_match_data;
00036 }
00037
00038 namespace matrix_science {
00044 class ms_unigene;
00045 class ms_protein;
00046 class ms_peptide;
00047 class ms_mascotresfile;
00048 class ms_quant_component;
00049 class ms_mascotresults_params;
00050
00051 #ifndef SWIG
00052
00053 struct LexicoCompare
00054 {
00055 bool caseSensitive_;
00056 LexicoCompare(bool caseSensitive) : caseSensitive_(caseSensitive) { }
00057 bool operator()(const std::string& left, const std::string& right ) const;
00058 };
00059 #endif
00060
00062
00079 class MS_MASCOTRESFILE_API ms_mascotresults
00080 {
00081 friend class ms_protein;
00082 friend class msparser_internal::ms_proteininference;
00083 friend class msparser_internal::ms_peptidesumcdb;
00084
00085 public:
00086
00088
00095 enum FLAGS
00096 {
00097
00098 MSRES_NOFLAG = 0x00000000,
00099 MSRES_GROUP_PROTEINS = 0x00000001,
00100 MSRES_SHOW_SUBSETS = 0x00000002,
00101 MSRES_SUBSETS_DIFF_PROT = 0x00000004,
00102 MSRES_REQUIRE_BOLD_RED = 0x00000008,
00103 MSRES_SHOW_ALL_FROM_ERR_TOL = 0x00000010,
00104 MSRES_IGNORE_PMF_MIXTURE = 0x00000020,
00105 MSRES_MUDPIT_PROTEIN_SCORE = 0x00000040,
00106 MSRES_DECOY = 0x00000080,
00107 MSRES_INTEGRATED_ERR_TOL = 0x00000100,
00108 MSRES_ERR_TOL = 0x00000200,
00109 MSRES_MAXHITS_OVERRIDES_MINPROB = 0x00000400,
00110 MSRES_CLUSTER_PROTEINS = 0x00000800,
00111
00112 MSRES_DUPE_INCL_IN_SCORE_NONE = 0x00000000,
00113 MSRES_DUPE_INCL_IN_SCORE_A = 0x00002000,
00114 MSRES_DUPE_INCL_IN_SCORE_B = 0x00004000,
00115 MSRES_DUPE_INCL_IN_SCORE_C = 0x00008000,
00116 MSRES_DUPE_INCL_IN_SCORE_D = 0x00010000,
00117 MSRES_DUPE_INCL_IN_SCORE_E = 0x00020000,
00118 MSRES_DUPE_INCL_IN_SCORE_F = 0x00040000,
00119 MSRES_DUPE_INCL_IN_SCORE_G = 0x00080000,
00120 MSRES_DUPE_INCL_IN_SCORE_H = 0x00100000,
00121 MSRES_DUPE_INCL_IN_SCORE_I = 0x00200000,
00122 MSRES_DUPE_REMOVE_NONE = 0x00400000,
00123 MSRES_DUPE_REMOVE_A = 0x00800000,
00124 MSRES_DUPE_REMOVE_B = 0x01000000,
00125 MSRES_DUPE_REMOVE_C = 0x02000000,
00126 MSRES_DUPE_REMOVE_D = 0x04000000,
00127 MSRES_DUPE_REMOVE_E = 0x08000000,
00128 MSRES_DUPE_REMOVE_F = 0x10000000,
00129 MSRES_DUPE_REMOVE_G = 0x20000000,
00130 MSRES_DUPE_REMOVE_H = 0x40000000,
00131 #ifndef SWIG // 'live' version for C++ compiler
00132 MSRES_DUPE_REMOVE_I = 0x80000000,
00133 #else // SWIG version
00134
00135 MSRES_DUPE_REMOVE_I = unchecked((int)0x80000000),
00136 #endif
00137 MSRES_DUPE_DEFAULT = 0x04800000
00138 };
00139
00141
00144 enum sortBy
00145 {
00146 QUERY,
00147 SCORE,
00148 INTENSITY
00149
00150 };
00151
00160 enum dupeStatus
00161 {
00162 dupe_query_same = 0x0001,
00163 dupe_seq_same = 0x0002,
00164 dupe_mods_same = 0x0004,
00165 dupe_pos_same = 0x0008,
00166
00167
00168
00169
00170 dupe_protein_same = 0x0010
00171 };
00172
00174
00177 enum TREE_CLUSTER_METHOD
00178 {
00179 TCM_FIRST_VALUE = 0x0001,
00180 TCM_PAIRWISE_SINGLE = 0x0001,
00181 TCM_PAIRWISE_MAXIMUM = 0x0002,
00182 TCM_PAIRWISE_AVERAGE = 0x0003,
00183
00184 TCM_LAST_VALUE = 0x0003
00185 };
00186
00188
00195 enum FIND_FLAGS
00196 {
00197 FT_PEPTIDE_EXP_MZ = 0x00000001,
00198 FT_PEPTIDE_EXP_MR = 0x00000002,
00199 FT_PEPTIDE_CALC_MZ = 0x00000004,
00200 FT_PEPTIDE_CALC_MR = 0x00000008,
00201 FT_PEPTIDE_STRING = 0x00000010,
00202 FT_PEPTIDE_QUERY = 0x00000020,
00203
00204 FT_PEPTIDE_VARMOD = 0x00000040,
00205 FT_PEPTIDE_FIXMOD = 0x00000080,
00206 FT_PEPTIDE_ETMOD = 0x00000100,
00207 FT_PEPTIDE_SLMOD = 0x00000200,
00208 FT_PEPTIDE_VARMOD_BYNAME = 0x00000400,
00209
00210 FT_PEPTIDE_FIND_MASK = 0x00000FFF,
00211
00212 FT_PROTEIN_ACCESSION = 0x00001000,
00213 FT_PROTEIN_DESCRIPTION = 0x00002000
00214 };
00215
00217
00238 enum FIND_COMPARE_FLAGS
00239 {
00240 FC_COMPLETESTR = 0x00000001,
00241 FC_SUBSTR = 0x00000002,
00242 FC_STARTSTR = 0x00000003,
00243 FC_STRTOK = 0x00000004,
00244 FC_MASK_STR_PART = 0x0000000F,
00245
00246 FC_CASE_INSENSITIVE = 0x00000000,
00247 FC_CASE_SENSITIVE = 0x00000010,
00248 FC_MASK_CASE = 0x000000F0,
00249
00250 FC_FORWARD = 0x00000000,
00251 FC_REVERSE = 0x00000100,
00252 FC_MASK_DIRECTION = 0x00000F00,
00253
00254 FC_RESTRICT_TO_HIT = 0x00001000,
00255 FC_LOOP_INTO_UNASSIGNED = 0x00002000,
00256 FC_LOOP_FROM_UNASSIGNED = 0x00004000,
00257 FC_UNASSIGNED_MASK = 0x00006000,
00258
00259 FC_SEARCH_ALL_RANKS = 0x00008000,
00260
00261 FC_ALL_PEPTIDES = 0x00000000,
00262 FC_ALL_PEPTITDES = 0x00000000,
00263
00264 FC_SIGNIFICANT_PEPTIDES = 0x00010000,
00265 FC_SEARCH_IGNORED_RANKS = 0x00020000,
00266 FC_SCORING_MASK = 0x000F0000,
00267
00268 FC_PROTEIN_IGN_SAMESETS = 0x00100000,
00269 FC_PROTEIN_IGN_SUBSETS = 0x00200000,
00270 FC_PROTEIN_IGN_FAMILY = 0x00400000,
00271 FC_PROTEIN_IGN_MASK = 0x00F00000,
00272
00273 FC_PROTEIN_INC_ALT_ACC = 0x01000000
00274 };
00275
00277
00280 enum IONS_HISTOGRAM
00281 {
00282 IH_INCLUDE_TOP_MATCHES = 0x0000,
00283 IH_INCLUDE_TOP_10_MATCHES = 0x0001
00284
00285
00286
00287
00288 };
00289
00291
00296 enum UNIQUE_PEP_RULES
00297 {
00298 UPR_WITHIN_FAMILY = 0x0001,
00299 UPR_WITHIN_FAMILY_MEMBER = 0x0002,
00300 UPR_IGNORE_SUBSET_PROTEINS = 0x0004,
00301
00302 UPR_DEFAULT = (UPR_WITHIN_FAMILY_MEMBER + UPR_IGNORE_SUBSET_PROTEINS)
00303 };
00304
00306
00309 enum THRESHOLD_TYPE
00310 {
00311 TT_HOMOLOGY = 0x0000,
00312 TT_IDENTITY = 0x0001,
00313 TT_PEPSUM_DEFAULT = 0x0002
00314 };
00315
00317
00323 enum HOMOLOGY_THRESHOLD_SOURCE
00324 {
00325 HOMTHR_RANK1 = 1,
00326 HOMTHR_RANK2 = 2,
00327 HOMTHR_RANK3 = 3,
00328 HOMTHR_RANK4 = 4,
00329 HOMTHR_RANK5 = 5,
00330 HOMTHR_RANK6 = 6,
00331 HOMTHR_RANK7 = 7,
00332 HOMTHR_RANK8 = 8,
00333 HOMTHR_RANK9 = 9,
00334 HOMTHR_RANK10 = 10,
00335 HOMTHR_RANK11 = 11,
00336 HOMTHR_RANK12 = 12,
00337 HOMTHR_RANK13 = 13,
00338 HOMTHR_RANK14 = 14,
00339 HOMTHR_RANK15 = 15,
00340 HOMTHR_RANK16 = 16,
00341 HOMTHR_RANK17 = 17,
00342 HOMTHR_RANK18 = 18,
00343 HOMTHR_RANK19 = 19,
00344 HOMTHR_RANK20 = 20,
00345
00346 HOMTHR_FASTA = -1,
00347 HOMTHR_SL = -2
00348 };
00349
00351
00354 enum QUANT_COMPONENT_STATUS
00355 {
00356 QCS_OK,
00357 QCS_OK_NO_MATCH,
00358 QCS_ERROR_NO_METHOD,
00359 QCS_ERROR_NO_COMPONENTS,
00360 QCS_ERROR_BAD_COMPONENT_NAME,
00361 QCS_OK_MULTIPLE_MATCHES
00362 };
00363
00365 enum DECOY_STATS_COUNT_TYPE
00366 {
00367 DS_COUNT_PSM = 0,
00368 DS_COUNT_SEQUENCE = 1
00369 };
00370
00372 enum DB_MATCH_TYPE
00373 {
00374 DM_FASTA = 0,
00375 DM_SPECTRAL_LIBRARY = 1,
00376 DM_BOTH = 2,
00377
00378 DM_FASTA_FIRST_PASS = 3,
00379 DM_FASTA_SECOND_PASS = 4,
00380 };
00381
00382 public:
00383
00384
00385 typedef std::set<ms_protein> proteinSet;
00386 typedef std::pair<std::string, int> acc_dbidx_t;
00387 typedef std::set<acc_dbidx_t> acc_dbidx_set_t;
00388 typedef std::set<std::pair<int, int> > q_p_set_t;
00389 typedef std::vector<acc_dbidx_t> acc_dbidx_vect_t;
00390 typedef std::vector<std::pair<int, int> > q_p_vect_t;
00391
00392
00394 ms_mascotresults(const ms_mascotresfile &resfile,
00395 const ms_mascotresults_params ¶ms);
00396 virtual ~ms_mascotresults();
00397
00399 const ms_mascotresfile & getResfile() const;
00400
00402 virtual bool createSummary();
00403
00404 #ifndef SWIG // Can't / don't want to allow std::string * as OUTPUT in Java and C#
00405
00406 bool getCreateSummaryProgress(int * cspTotalPercentComplete,
00407 unsigned int * cspCurrTask,
00408 int * cspCurrTaskPercentageComplete,
00409 std::string * cspAccession,
00410 int * cspHit,
00411 int * cspQuery,
00412 std::string * cspKeepAliveText) const;
00413
00414 bool getCreateSummaryProgress(int * cspTotalPercentComplete,
00415 unsigned int * cspCurrTask,
00416 int * cspCurrTaskPercentageComplete,
00417 std::string & cspAccession,
00418 int * cspHit,
00419 int * cspQuery,
00420 std::string & cspKeepAliveText) const {
00421 return getCreateSummaryProgress(cspTotalPercentComplete, cspCurrTask, cspCurrTaskPercentageComplete, &cspAccession, cspHit, cspQuery, &cspKeepAliveText);
00422 }
00423 #else // SWIG Multiple return values
00424 bool getCreateSummaryProgress(int * OUTPUT,
00425 unsigned int * OUTPUT,
00426 int * OUTPUT,
00427 std::string & OUTPUT,
00428 int * OUTPUT,
00429 int * OUTPUT,
00430 std::string & OUTPUT) const;
00431 #endif
00432
00433 void cancelCreateSummary(bool newValue = true);
00434
00436 virtual ms_protein * getHit(const int hit, const int memberNumber = 0) const;
00437
00439 virtual void freeHit(const int hit);
00440
00442 virtual int getNumberOfHits() const;
00443
00445 virtual int getNumberOfFamilyMembers() const;
00446
00448 std::string getProteinDescription(const char * accession, const int dbIdx = 1) const;
00449
00451 double getProteinMass(const char * accession, const int dbIdx = 1) const;
00452
00454 virtual double getProteinEmPAI(const char *accession, const int dbIdx = 1, const int length = -1) const;
00455
00457 bool isEmPAIallowed() const;
00458
00460 virtual bool anyEmPAI() const;
00461
00463 double getSequenceMass(const char * seq) const;
00464
00466 void getProteinTaxonomyIDs(const char * accession, const int dbIdx,
00467 std::vector<int> & gpt_ids, std::vector<std::string> & gpt_accessions) const;
00468
00470 virtual const ms_protein * getProtein(const char * accession, const int dbIdx = 1) const;
00471
00473 virtual const ms_protein * getComponentProtein(const char * accession, const int dbIdx = 1) const;
00474
00476 virtual ms_protein * getNextSimilarProtein(const int masterHit, const int id) const;
00477
00479 virtual ms_protein * getNextSimilarProteinOf(const char * masterAccession, const int masterDB, const int id) const;
00480
00482 virtual ms_protein * getNextFamilyProtein(const int masterHit, const int id) const;
00483
00484
00486 virtual ms_protein * getNextSubsetProtein(const int masterHit, const int id,
00487 const bool searchWholeFamily = true) const;
00488
00490 virtual ms_protein * getNextSubsetProteinOf(const char * masterAccession, const int masterDB, const int id) const;
00491
00493 virtual ms_peptide getPeptide(const int q, const int p) const = 0;
00494
00496 virtual bool getPeptide(const int q, const int p, ms_peptide * & pep) const = 0;
00497
00499 virtual double getIonsScore(const int q, const int p, const bool decoy) const = 0;
00500
00502 virtual bool isPeptideUnique(const int q, const int p, const UNIQUE_PEP_RULES rules = UPR_DEFAULT) const = 0;
00503
00505 virtual int getQmatch(const int query) const;
00506
00508 virtual int getQmatch(const int query, const ms_mascotresfile::section sec) const;
00509
00511 virtual int getPeptideIdentityThreshold(const int query, double OneInXprobRnd, DB_MATCH_TYPE dbType = DM_FASTA) const;
00512
00514 virtual int getAvePeptideIdentityThreshold(double OneInXprobRnd, DB_MATCH_TYPE dbType = DM_FASTA) const;
00515
00517 virtual int getMaxPeptideIdentityThreshold(double OneInXprobRnd, DB_MATCH_TYPE dbType = DM_FASTA) const;
00518
00520 double getPeptideThreshold(const int query, double OneInXprobRnd, const int rank=1, const THRESHOLD_TYPE thresholdType=TT_PEPSUM_DEFAULT) const;
00521
00522 enum ERROR_TOLERANT_PEPTIDE { ETPEP_YES, ETPEP_NO, ETPEP_UNKNOWN };
00523 #ifndef SWIG
00524 double getPeptideThresholdProtected(const int query, double OneInXprobRnd, const int rank,
00525 const ERROR_TOLERANT_PEPTIDE etPep,
00526 const ms_mascotresfile::section secSummary,
00527 const THRESHOLD_TYPE thresholdType,
00528 const DB_MATCH_TYPE dbType) const;
00529 #endif
00530
00532 virtual int getProteinScoreCutoff(double OneInXprobRnd) const = 0;
00533
00535 virtual int getProteinThreshold(double OneInXprobRnd) const;
00536
00538 virtual int getHomologyThreshold(const int query,
00539 double OneInXprobRnd,
00540 const int rank=1) const;
00541
00543 virtual int getHomologyThresholdForHistogram(double OneInXprobRnd, DB_MATCH_TYPE dbType = DM_FASTA) const;
00544
00546 virtual int getProbFromScore(const double score) const;
00547
00549 virtual double getPeptideExpectationValue(const double score,
00550 const int query,
00551 const int rank = 0) const;
00552
00554 virtual double getProbOfPepBeingRandomMatch(const double score,
00555 const int query) const;
00556
00558 virtual double getProteinExpectationValue(const double score) const;
00559
00561 virtual double getProbOfProteinBeingRandomMatch(const double score) const;
00562
00564 virtual std::string getProteinsWithThisPepMatch(const int q, const int p, const bool quotes=false) = 0;
00565
00567 virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p,
00568 std::vector<int> & start,
00569 std::vector<int> & end,
00570 std::vector<std::string> &pre,
00571 std::vector<std::string> &post,
00572 std::vector<int> & frame,
00573 std::vector<int> & multiplicity,
00574 std::vector<int> & db) const = 0;
00575
00577 virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p,
00578 std::vector<int> & start,
00579 std::vector<int> & end,
00580 std::vector<std::string> &pre,
00581 std::vector<std::string> &post,
00582 std::vector<int> & frame,
00583 std::vector<int> & multiplicity,
00584 std::vector<int> & db,
00585 std::vector<int> & psmComponent) const = 0;
00586
00588 virtual int getAllFamilyMembersWithThisPepMatch(const int hit,
00589 const int q,
00590 const int p,
00591 std::vector< int >& db,
00592 std::vector< std::string >& acc,
00593 std::vector< int >& dupe_status) const = 0;
00594
00595
00597 virtual std::string getErrTolModString(const int q, const int p) const = 0;
00598
00600 virtual std::string getLibraryModString(const int q, const int p) const = 0;
00601
00603 virtual std::string getErrTolModMasterString(const int q, const int p) const = 0;
00604
00606 virtual std::string getErrTolModSlaveString(const int q, const int p) const = 0;
00607
00609 virtual std::string getErrTolModPepString(const int q, const int p) const = 0;
00610
00612 virtual std::string getErrTolModReqPepString(const int q, const int p) const = 0;
00613
00615 virtual std::string getErrTolModName(const int q, const int p, std::string * modString = NULL) const;
00616
00618 virtual double getErrTolModDelta(const int q, const int p, std::string * modString = NULL, std::string * deltaAsString = NULL) const;
00619
00621 virtual double getErrTolModNeutralLoss(const int q, const int p) const;
00622
00624 virtual std::vector<double> getErrTolModMasterNeutralLoss(const int q, const int p) const;
00625
00627 virtual std::vector<double> getErrTolModSlaveNeutralLoss(const int q, const int p) const;
00628
00630 virtual std::vector<double> getErrTolModPepNeutralLoss(const int q, const int p) const;
00631
00633 virtual std::vector<double> getErrTolModReqPepNeutralLoss(const int q, const int p) const;
00634
00636 std::string getVarModsForQP(const int q, const int p, const ms_peptide & pep,
00637 const ms_peptide::PSM psmComponent,
00638 int * numETMods = NULL, int * numLibraryMods = NULL,
00639 std::set<std::string> * listMods = NULL) const;
00640
00642 std::string getLocalModsForQP(const int q, const int p, const ms_peptide & pep, const ms_peptide::PSM psmComponent, std::set<std::string> * listMods = NULL) const;
00643
00645 std::string getLibraryModsForQP(const int q, const int p, const ms_peptide & pep, std::set<std::string> * listMods = NULL) const;
00646
00648 virtual std::string getReadableVarMods(const int q, const int p,
00649 const int numDecimalPlaces=2,
00650 const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00651
00653 virtual bool anyNumDiscoveredMods() const;
00654
00656 virtual std::vector<int> getNumDiscoveredFixedMods(const int num,
00657 std::vector<std::string> &positions,
00658 std::vector<std::string> &sites) const;
00659
00661 virtual std::vector<int> getNumDiscoveredVariableMods(const int num,
00662 std::vector<std::string> &positions,
00663 std::vector<std::string> &sites,
00664 std::vector<double> &deltas,
00665 std::vector<std::string> &names) const;
00666
00668 virtual std::vector<int> getNumDiscoveredVariableMods(const int num, std::vector<std::string> &positions, std::vector<std::string> &sites) const;
00669
00671 virtual std::vector<std::string> getDiscoveredLocalModNames() const;
00672
00674 virtual std::vector<double> getDiscoveredLocalModDeltas(std::vector<std::string> * vecDeltaStrings = NULL) const;
00675
00677 virtual std::vector<int> getNumDiscoveredLocalMods(const std::string modName, std::vector<std::string> &positions, std::vector<std::string> &sites) const;
00678
00680 virtual std::vector<std::string> getDiscoveredErrTolModNames() const;
00681
00683 virtual std::vector<double> getDiscoveredErrTolModDeltas(std::vector<std::string> * vecDeltaStrings = NULL) const;
00684
00686 virtual std::vector<int> getNumDiscoveredErrTolMods(const std::string modName, std::vector<std::string> &positions, std::vector<std::string> &sites) const;
00687
00689 std::vector<int> getNumDiscoveredLibraryMods(const int modId, std::vector<std::string> &positions, std::vector<std::string> &sites) const;
00690
00692 std::vector<int> getNumDiscoveredIntactLinks(std::vector<int> &varModNum1,
00693 std::vector<std::string> &position1,
00694 std::vector<std::string> &site1,
00695 std::vector<int> &varModNum2,
00696 std::vector<std::string> &position2,
00697 std::vector<std::string> &site2,
00698 std::vector<int> &numLinkedPeptides) const;
00699
00701 virtual int getNumDiscoveredNonSpecCleavage() const;
00702
00704 virtual std::string getTagString(const int q, const int p) const = 0;
00705
00707 virtual int getTagStart(const int q, const int p, const int tagNumber) const;
00708
00710 virtual int getTagEnd(const int q, const int p, const int tagNumber) const;
00711
00713 virtual int getTagSeries(const int q, const int p, const int tagNumber) const;
00714
00716 virtual int getTagDeltaRangeStart(const int q, const int p) const = 0;
00717
00719 virtual int getTagDeltaRangeEnd(const int q, const int p) const = 0;
00720
00722 virtual std::string getTerminalResiduesString(const int q, const int p, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const = 0;
00723
00725 virtual std::string getComponentString(const int q, const int p, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const = 0;
00726
00728 virtual int getMaxRankValue() const;
00729
00731 virtual std::vector<int> getIonsScoreHistogram(IONS_HISTOGRAM flags = IH_INCLUDE_TOP_MATCHES, DB_MATCH_TYPE dbType = DM_FASTA) const;
00732
00734 double getToleranceInDalton(bool & needMass, const double * const pMass = NULL) const;
00735
00737 bool createUnassignedList(sortBy s = QUERY);
00738
00740 int getNumberOfUnassigned() const;
00741
00743 ms_peptide getUnassigned(const int num) const;
00744
00746 bool getUnassignedIsBold(const int num) const;
00747
00749 bool getUnassignedShowCheckbox(const int num) const;
00750
00752 ms_mascotresults_params getResultsParameters();
00753
00755 virtual bool getTreeClusterNodes(const int hit,
00756 std::vector<int> &left,
00757 std::vector<int> &right,
00758 std::vector<double> &distance,
00759 TREE_CLUSTER_METHOD tcm = TCM_PAIRWISE_MAXIMUM,
00760 double *** reserved1 = 0,
00761 unsigned int * reserved2 = 0) const;
00762
00764 virtual int findProteins(const int startHit,
00765 const std::string & str,
00766 const int dbIdx,
00767 FIND_FLAGS item,
00768 FIND_COMPARE_FLAGS compareFlags,
00769 std::vector<std::string> & accessions,
00770 std::vector<int> & dbIndexes) const = 0;
00771
00773 virtual int findProteinsByAccession(const int startHit,
00774 const std::string & str,
00775 const int dbIdx,
00776 FIND_COMPARE_FLAGS compareFlags,
00777 std::vector<std::string> & accessions,
00778 std::vector<int> & dbIndexes) const = 0;
00779
00781 virtual int findProteinsByDescription(const int startHit,
00782 const std::string & str,
00783 FIND_COMPARE_FLAGS compareFlags,
00784 std::vector<std::string> & accessions,
00785 std::vector<int> & dbIndexes) const = 0;
00786
00788 virtual int findPeptides(const int startHit,
00789 const std::string & str,
00790 FIND_FLAGS item,
00791 FIND_COMPARE_FLAGS compareFlags,
00792 std::vector<int> & q,
00793 std::vector<int> & p) const = 0;
00794
00796 virtual QUANT_COMPONENT_STATUS getQuantitationComponentForPeptide(
00797 const matrix_science::ms_peptide & peptide,
00798 matrix_science::ms_quant_component & component,
00799 const matrix_science::ms_quant_method * method = NULL) const = 0;
00800
00802 virtual double getProteinScoreForHistogram(const int num) const;
00803
00805 bool isNA() const;
00806
00808
00811 unsigned int getFlags() const { return flags_; }
00812
00814
00818 unsigned int getFlags2() const { return flags2_; }
00819
00821 int getMinPepLenInPepSummary() const;
00822
00824 virtual long getNumHitsAboveIdentity(double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType = DS_COUNT_PSM, DB_MATCH_TYPE dbType = DM_FASTA);
00825
00827 virtual long getNumDecoyHitsAboveIdentity(double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType = DS_COUNT_PSM, DB_MATCH_TYPE dbType = DM_FASTA);
00828
00830 virtual long getNumHitsAboveHomology(double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType = DS_COUNT_PSM, DB_MATCH_TYPE dbType = DM_FASTA);
00831
00833 virtual long getNumDecoyHitsAboveHomology(double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType = DS_COUNT_PSM, DB_MATCH_TYPE dbType = DM_FASTA);
00834
00836 virtual void setSubsetsThreshold(const double scoreFraction);
00837
00839 virtual double getProbabilityThreshold() const;
00840
00842 virtual double getErrTolProbabilityThreshold() const;
00843
00844 #ifndef SWIG
00845
00846 bool getThresholdForFDRAboveIdentity(double targetFDR, double *closestFDR, double *minProbability, int * numTargetMatches = 0, int * numDecoyMatches = 0);
00847
00849 bool getThresholdForFDRAboveIdentity(double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *closestFDR, double *minProbability, int * numTargetMatches = 0, int * numDecoyMatches = 0);
00850
00852 bool getThresholdForFDRAboveHomology(double targetFDR, double *closestFDR, double *minProbability, int * numTargetMatches = 0, int * numDecoyMatches = 0);
00853
00855 bool getThresholdForFDRAboveHomology(double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *closestFDR, double *minProbability, int * numTargetMatches = 0, int * numDecoyMatches = 0);
00856
00858 virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & value, double & rawValue) const = 0;
00859 #else // SWIG Multiple return values
00860
00861 bool getThresholdForFDRAboveIdentity(double targetFDR, double *OUTPUT, double *OUTPUT, int * OUTPUT, int * OUTPUT);
00862 bool getThresholdForFDRAboveIdentity(double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *OUTPUT, double *OUTPUT, int * OUTPUT, int * OUTPUT);
00863 bool getThresholdForFDRAboveHomology(double targetFDR, double *OUTPUT, double *OUTPUT, int * OUTPUT, int * OUTPUT);
00864 bool getThresholdForFDRAboveHomology(double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *OUTPUT, double *OUTPUT, int * OUTPUT, int * OUTPUT);
00865 virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & OUTPUT, double & OUTPUT) const;
00866 #endif
00867
00869 virtual std::vector<int> getPepsWithSameScore(const int q, const int p) const = 0;
00887 bool queryRemoveThisPeptide(const unsigned short dupeFlags) const { return dupeRemoveIDs_.find(dupeFlags) != dupeRemoveIDs_.end(); }
00888
00905 bool queryScoreThisPeptide(const unsigned short dupeFlags) const { return dupeIncludeInScoreIDs_.find(dupeFlags) != dupeIncludeInScoreIDs_.end(); }
00906
00908 virtual bool loadPepMatchesForProteinFromCache(ms_protein * prot, const bool loadRelated=true) { return false; }
00909
00911 virtual bool isValidQandP(const int q, const int p) const = 0;
00912
00914 std::string lookupParsedLocalModsStr(const int q, const std::string localModsStr) const;
00915
00917 std::string lookupParsedLocalModsStr(const ms_peptide *thisOnePep) const;
00918
00919 protected:
00920
00921 #ifndef SWIG
00922 ms_mascotresults(const ms_mascotresults & rhs);
00923 ms_mascotresults & operator=(const ms_mascotresults & rhs);
00924
00925 void setResfileError(int error, ...) const;
00926
00927 ms_protein* addProtein(const std::string & accession,
00928 const int dbIdx,
00929 const int q, const int p,
00930 const msparser_internal::ms_protein_match_data &proteinMatchData,
00931 const double score,
00932 const double uncorrectedScore,
00933 const ms_peptide::SEARCH_PHASE searchPhase = ms_peptide::SEARCH_PHASE_PRIMARY,
00934 const bool isIgnored = false);
00935
00936 bool canAddPeptideToProtein(const int dbIdx,
00937 const ms_peptide::SEARCH_PHASE searchPhase,
00938 const bool isIgnored) const;
00939
00940 ms_protein* addMixtureComponentProtein(const std::string & mixAccession,
00941 const int q, const int p,
00942 const msparser_internal::ms_protein_match_data &proteinMatchData,
00943 const double score,
00944 const double uncorrectedScore,
00945 const ms_protein * master,
00946 const ms_peptide::SEARCH_PHASE searchPhase);
00947
00948 ms_protein * addUniGeneComponentProtein(const std::string & accession,
00949 const int dbIdx,
00950 const int q, const int p,
00951 const msparser_internal::ms_protein_match_data &proteinMatchData,
00952 const double score,
00953 const double uncorrectedScore,
00954 const ms_peptide::SEARCH_PHASE searchPhase,
00955 const bool isIgnored = false);
00956
00957 ms_protein * addUniGeneProtein(const std::string & unigeneID,
00958 const int q, const int p,
00959 const msparser_internal::ms_protein_match_data &proteinMatchData,
00960 const double score,
00961 const double uncorrectedScore,
00962 const ms_protein * component,
00963 const ms_peptide::SEARCH_PHASE searchPhase,
00964 const bool isIgnored = false);
00965
00966 bool queryHasSecondPassMatches(ms_mascotresfile::section secSummary, int query) const;
00967 double overrideOneInXprobRnd(bool hasSecondPassMatches) const;
00968
00969 double calculateFirstPassThreshold(ms_mascotresfile::section secSummary,
00970 int query,
00971 double OneInXprobRnd,
00972 THRESHOLD_TYPE tt) const;
00973
00974 double calculateHomologyThreshold(int query,
00975 double qplughole,
00976 double OneInXprobRnd,
00977 double identityThreshold,
00978 double qmatch) const;
00979
00980 virtual bool queryHasSignificantRank1Match(ms_mascotresfile::section secSummary,
00981 int query) const = 0;
00982
00983 virtual bool getThresholdForFDR(bool homology, double targetFDR,
00984 DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType,
00985 double * closestFDR, double * minProbability,
00986 int * pNumTargetMatches, int * pNumDecoyMatches) = 0;
00987 virtual void calculateDecoyStats(double dOneInXprobRnd) = 0;
00988
00989 virtual double getPeptideExpectationValueProtected(const double score,
00990 const int query,
00991 const ms_mascotresfile::section summary_section,
00992 const THRESHOLD_TYPE thresholdType,
00993 const DB_MATCH_TYPE dbType) const;
00994 virtual double getQplughole(const int query, const ms_mascotresfile::section sec) const;
00995 virtual ms_mascotresfile::section getSrcSectionProtected(const int query, const int rank) const;
00996
00997
00998 enum QMATCH_PLUGHOLE_INDEX_SECTIONS {
00999 QMATCH_PLUGHOLE_SEC_SUMMARY = 0,
01000 QMATCH_PLUGHOLE_SEC_DECOYSUMMARY = 1,
01001 QMATCH_PLUGHOLE_SEC_ERRTOLSUMMARY = 2,
01002 QMATCH_PLUGHOLE_SEC_LIBRARYSUMMARY= 3,
01003 QMATCH_PLUGHOLE_SEC_CROSSLINK_SUMMARY = 4,
01004 QMATCH_PLUGHOLE_SEC_ERRTOLDECOYSUMMARY = 5,
01005 QMATCH_PLUGHOLE_SEC_LAST = 6
01006 };
01007 virtual void getQmatchValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const {} ;
01008 virtual void getPlugholeValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const {};
01009
01010 #endif
01011
01012 static double getValidMinProbability(double minProbability);
01013 static double getValidIgnoreIonsScoreBelow(double ignoreIonsScoreBelow);
01014 static int getValidMinNumSigUniqueSeqs(int n);
01015 static int getValidFlags1(const ms_mascotresfile &resfile, int flags);
01016 static double getValidOneInXprobRnd(double OneInXprobRnd);
01017 static double getValidTargetFDR(double fdr);
01018 static DECOY_STATS_COUNT_TYPE getValidTargetFDRType(DECOY_STATS_COUNT_TYPE type);
01019
01020 const ms_mascotresfile &resfile_;
01021 const int numQueries_;
01022 const double tolFactor_;
01023
01024 proteinSet proteins_;
01025
01026
01027 proteinSet componentProteins_;
01028
01029
01030 double getIonsScoreCorrected(const double ionsScore,
01031 const long multiplicity) const;
01032
01033 const ms_mascotresults_params *resultsParams_;
01034
01035 const unsigned int flags_;
01036 const unsigned int flags2_;
01037 const double minProbability_;
01038 const int maxHitsToReport_;
01039 const std::string unigeneIndexFile_;
01040 ms_unigene * unigene_;
01041 const bool tooOld_;
01042 const int minPepLenInPepSummary_;
01043 std::string singleHit_;
01044 const double ignoreIonsScoreBelow_;
01045 const int minNumSigUniqueSeqs_;
01046 const double targetFDR_;
01047 const DECOY_STATS_COUNT_TYPE targetFDRtype_;
01048
01049 const int numDatabases_;
01050
01051 double minErrTolProbability_;
01052
01053
01054 bool isMinProbabilityDifferentFromFirstPassThreshold() const;
01055
01056
01057
01058 std::vector<ms_peptide *> peptides_;
01059
01060 double slScoreStats_[4];
01061 double& slScoreStatsMascotMean() { return slScoreStats_[0]; }
01062 double& slScoreStatsMascotStdev() { return slScoreStats_[1]; }
01063 double& slScoreStatsLibraryMean() { return slScoreStats_[2]; }
01064 double& slScoreStatsLibraryStdev() { return slScoreStats_[3]; }
01065 const double& slScoreStatsMascotMean() const { return slScoreStats_[0]; }
01066 const double& slScoreStatsMascotStdev() const { return slScoreStats_[1]; }
01067 const double& slScoreStatsLibraryMean() const { return slScoreStats_[2]; }
01068 const double& slScoreStatsLibraryStdev() const { return slScoreStats_[3]; }
01069
01071 virtual double getPepIdentThreshProtected(const int query,
01072 double OneInXprobRnd,
01073 ms_mascotresfile::section sec,
01074 DB_MATCH_TYPE dbType,
01075 double * pQmatch = 0) const;
01076 virtual double getHomologyThreshProtected(const int query,
01077 double OneInXprobRnd,
01078 ms_mascotresfile::section sec,
01079 const int rank=1,
01080 const ERROR_TOLERANT_PEPTIDE etPep = ETPEP_UNKNOWN ) const;
01081
01082 inline bool checkCreated(const char * funcname, unsigned int t) const {
01083 if (!(completedTasks_ & t)) {
01084 setResfileError(ms_mascotresfile::ERR_RESULTS_NOT_CREATED, funcname);
01085 return false;
01086 } else {
01087 return true;
01088 }
01089 }
01090
01091
01092 msparser_internal::ms_unassigned * unassigned_;
01093
01094 double top50Scores_[50];
01095 std::set<unsigned short>dupeRemoveIDs_;
01096 std::set<unsigned short>dupeIncludeInScoreIDs_;
01097
01098 bool bDecoyStatsCalculated_;
01099 double dOneInXprobRndForDecoy_;
01100
01101 enum DECOY_STATS_THRESHOLD_TYPE { DS_IDENTITY = 0, DS_HOMOLOGY = 1 };
01102 enum {
01103 DECOY_STATS_TD_LEVELS = 2,
01104 DECOY_STATS_COUNT_TYPE_LEVELS = 2,
01105 DECOY_STATS_THR_TYPE_LEVELS = 3,
01106 DECOY_STATS_DB_TYPE_LEVELS = 5
01107 };
01108 long decoyStats_[DECOY_STATS_TD_LEVELS][DECOY_STATS_COUNT_TYPE_LEVELS][DECOY_STATS_THR_TYPE_LEVELS][DECOY_STATS_DB_TYPE_LEVELS];
01109 inline long& decoyStatsOf(bool decoy, DECOY_STATS_COUNT_TYPE countType, DECOY_STATS_THRESHOLD_TYPE thrType, DB_MATCH_TYPE dbType)
01110 {
01111 return decoyStats_[decoy ? 1 : 0][countType][thrType][dbType];
01112 }
01113 static void checkDecoyStatsArguments(double &OneInXprobRnd, DECOY_STATS_COUNT_TYPE &countType, DB_MATCH_TYPE &dbType);
01114 inline void resetDecoyStats(long c)
01115 {
01116 for (int i = 0; i < DECOY_STATS_TD_LEVELS; i++) {
01117 for (int j = 0; j < DECOY_STATS_COUNT_TYPE_LEVELS; j++) {
01118 for (int k = 0; k < DECOY_STATS_THR_TYPE_LEVELS; k++) {
01119 for (int l = 0; l < DECOY_STATS_DB_TYPE_LEVELS; l++) {
01120 decoyStats_[i][j][k][l] = c;
01121 }
01122 }
01123 }
01124 }
01125 }
01126
01127 ms_mascotresfile::section secSummary_;
01128 ms_mascotresfile::section secMixture_;
01129 ms_mascotresfile::section secPeptides_;
01130 ms_mascotresfile::section secProteins_;
01131 double subsetsScoreFraction_;
01132 msparser_internal::ms_proteininference * pProteinInferencer_;
01133 bool nucleicAcid_;
01134 mutable int cachedAvePepIdentThreshMascot_;
01135 mutable int cachedAvePepIdentThreshSL_;
01136 std::vector<int> ionsScoreHistogramTopMatchMascot_;
01137 std::vector<int> ionsScoreHistogramTop10Mascot_;
01138 std::vector<int> ionsScoreHistogramTopMatchSL_;
01139 std::vector<int> ionsScoreHistogramTop10SL_;
01140 int maxRankValue_;
01141 bool isPercolator_;
01142 THRESHOLD_TYPE thresholdType_;
01143 double minScoreThreshold_;
01144 bool useFusionETthresholds_;
01145
01146 typedef std::map<char, int> mod_residue_count_t;
01147 typedef std::map<std::pair<char, char>, int> linked_residue_count_t;
01148 static const char mod_residue_count_NTERM;
01149 static const char mod_residue_count_CTERM;
01150 static const double probability_lower_bound;
01151 static std::string site_char_to_string(char);
01152 bool countModificationInstances(
01153 std::vector<mod_residue_count_t> &fixed_mod_count,
01154 std::vector<mod_residue_count_t> &var_mod_count,
01155 std::map<std::pair<int, int>, mod_residue_count_t> &monolink_count,
01156 std::map<std::pair<int, int>, linked_residue_count_t> &looplink_count,
01157 std::map<std::pair<int, int>, linked_residue_count_t> &intact_link_count,
01158 std::vector<std::string> &var_mod_position_types,
01159 std::map< std::string, std::map<std::string, mod_residue_count_t> > &local_mod_count,
01160 std::vector<std::string> &local_mod_deltas,
01161 std::map< std::string, std::map<std::string, mod_residue_count_t> > &et_mod_count,
01162 std::vector<std::string> &et_mod_deltas,
01163 int &num_nonspecific_cleavage,
01164 std::vector<mod_residue_count_t> &library_mod_count);
01165 virtual bool cacheModificationCounts() = 0;
01166 std::vector<mod_residue_count_t> discoveredFixedMods_;
01167 std::vector<mod_residue_count_t> discoveredVariableMods_;
01168 std::map<std::pair<int, int>, mod_residue_count_t> discoveredMonoLinks_;
01169 std::map<std::pair<int, int>, linked_residue_count_t> discoveredLoopLinks_;
01170 std::map<std::pair<int, int>, linked_residue_count_t> discoveredIntactLinks_;
01171 std::vector<std::string> discoveredVariableModPositionTypes_;
01172 std::map<std::string, std::map<std::string, mod_residue_count_t> > discoveredLocalMods_;
01173 std::vector<std::string> discoveredLocalModDeltas_;
01174 std::map<std::string, std::map<std::string, mod_residue_count_t> > discoveredErrorTolerantMods_;
01175 std::vector<std::string> discoveredErrorTolerantModDeltas_;
01176 int discoveredNonSpecificCleavageNum_;
01177 std::vector<mod_residue_count_t> discoveredLibraryMods_;
01178 bool modificationCountsLoaded_;
01179 bool modificationCountsAvailable_;
01180
01181 typedef std::map<std::string, std::set<std::pair<int, int> >, LexicoCompare > map_mod_value_to_qp;
01182
01183 enum COMPLETED_TASKS {
01184 CT_NONE = 0x0000,
01185 CT_LOADQUERIES = 0x0001,
01186 CT_SRCRANKINITIALISED = 0x0002,
01187 CT_PERCOLATORRESULTS = 0x0004,
01188 CT_INFERENCING = 0x0008,
01189 CT_UNASSIGNEDLIST = 0x0010,
01190 CT_COMPONENTINTENSITIES = 0x0020,
01191 CT_COUNTMODS = 0x0040,
01192 CT_CREATECDB = 0x0080,
01193 CT_ALLDONE = 0xFFFF
01194 };
01195 mutable unsigned int completedTasks_;
01196
01197 bool isCancelling() const;
01198
01199 private:
01200 ms_protein*
01201 addProteinInternal(ms_mascotresults::proteinSet &proteinSet,
01202 const std::string &accession,
01203 const int dbIdx,
01204 const int q, const int p,
01205 const msparser_internal::ms_protein_match_data &proteinMatchData,
01206 const double correctedScore,
01207 const double uncorrectedScore,
01208 const ms_protein * component,
01209 const ms_peptide::SEARCH_PHASE searchPhase,
01210 const bool isUnigene,
01211 const bool isIgnored);
01212
01213 mutable bool cachedHomology_[2];
01214 mutable double cachedHomologyProb_[2];
01215 mutable std::vector<double> cachedHomologyValues_[2];
01216 mutable std::vector<int> cachedQMatch_[QMATCH_PLUGHOLE_SEC_LAST];
01217 mutable std::vector<double> cachedQPlughole_[QMATCH_PLUGHOLE_SEC_LAST];
01218 typedef std::map<std::pair<int, std::string>, int> dbIdxPlusAccToId_t;
01219 mutable dbIdxPlusAccToId_t summarySectionAccs_;
01220 mutable bool cancelCreateSummary_;
01221 ms_progress_info * progress_;
01222
01223 mutable bool loadedQmatchFromCacheFile_[QMATCH_PLUGHOLE_SEC_LAST];
01224 mutable bool loadedPlugholeFromCacheFile_[QMATCH_PLUGHOLE_SEC_LAST];
01225
01226 bool getProteinDescriptionAndMass(const char * accession, const int dbIdx,
01227 double & mass, std::string & desc) const;
01228 void cacheSummarySectionAccs() const;
01229 void debugCheckReloadablePeps() const;
01230 bool setQmatch(const int query, const QMATCH_PLUGHOLE_INDEX_SECTIONS index, const int value) const;
01231 bool setPlughole(const int query, const QMATCH_PLUGHOLE_INDEX_SECTIONS index, const double value) const;
01232
01233
01234 typedef std::map<int, std::string> queryIT_MODS_t;
01235 mutable queryIT_MODS_t queryIT_MODS_;
01236 typedef std::map<std::string, std::vector<std::string> > parsedLocalMods_t;
01237 mutable parsedLocalMods_t parsedLocalMods_;
01238 typedef std::map<std::pair<int, std::string>, std::string> pepParsedLocalMods_t;
01239 mutable pepParsedLocalMods_t pepParsedLocalMods_;
01240
01241 static std::string parseLocalModsStr(const std::string &localModsStr, const std::vector<std::string> &local_modnames);
01242
01243 const std::vector<std::string>& lookupParsedIT_MODS(int q) const;
01244
01245 double sequenceMassResidueLookup_[256];
01246 };
01247
01249 }
01250
01251 #endif // MS_MASCOTRESULTS_HPP
01252
01253