00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef MS_MASCOTRESPROTEIN_HPP
00020 #define MS_MASCOTRESPROTEIN_HPP
00021
00022
00023
00024 #include <string>
00025 #include <list>
00026 #include <vector>
00027 #include <set>
00028 #include <map>
00029
00030 namespace msparser_internal {
00031 class ms_protein_match_data;
00032 class PEPINFO;
00033 }
00034
00035 namespace matrix_science {
00036
00037 class ms_mascotresults;
00038 class ms_proteinsummary;
00039 class ms_pepinfoSortByScore;
00040 class ms_peptide;
00041
00047
00048
00054 class MS_MASCOTRESFILE_API ms_protein
00055 {
00056 public:
00058
00071 enum GROUP
00072 {
00073 GROUP_UNKNOWN,
00074 GROUP_NO,
00075 GROUP_SUBSET,
00076 GROUP_COMPLETE,
00077 GROUP_FAMILY
00078 };
00079
00080 #ifdef DUPLICATE
00081 #ifdef _WIN32
00082
00083 #endif
00084 #undef DUPLICATE
00085 #endif
00086
00088
00095 enum DUPLICATE
00096 {
00097 DUPE_NotDuplicate,
00098 DUPE_Duplicate,
00099 DUPE_DuplicateSameQuery,
00100 DUPE_HighestScoringDuplicate,
00101 DUPE_Ignored
00102 };
00103
00105
00114 enum MASS_FLAGS
00115 {
00116 MASS_NON_SELECT_NON_MATCH = 0x0001,
00117 MASS_SELECT_NON_MATCH = 0x0010,
00118 MASS_NON_SELECT_MATCH = 0x0100,
00119 MASS_SELECT_MATCH = 0x1000
00120 };
00121
00123
00184 enum DISTINCT_PEPTIDE_FLAGS
00185 {
00186 DPF_SEQUENCE = 0x0001,
00187 DPF_CHARGE = 0x0002,
00188 DPF_MODS = 0x0004,
00189 DPF_UNIQUE = 0x0008,
00190 DPF_NODUPSAMEQUERY = 0x0010
00191 };
00192
00193
00194 typedef std::pair<int, std::string> dbIdxPlusAcc_t;
00195 typedef std::vector<dbIdxPlusAcc_t> dbIdxPlusAccVect_t;
00196 typedef std::set<dbIdxPlusAcc_t> dbIdxPlusAccSet_t;
00197
00198
00200 ms_protein(const double score,
00201 const std::string accession,
00202 const bool updateScoreFromPepScores,
00203 const int proteinSummaryHit = 0);
00204
00206 ms_protein(const ms_protein& src);
00207
00209 ~ms_protein();
00210
00211 #ifndef SWIG
00212
00213 ms_protein& operator=(const ms_protein& right);
00214 #endif
00215
00217 void copyFrom(const ms_protein* src);
00218
00220 std::string getAccession() const;
00221
00223 int getDB() const;
00224
00226 void setDB(int dbIdx);
00227
00229 double getScore() const;
00230
00232 double getNonMudpitScore() const;
00233
00235 double getScoreWithET() const;
00236
00238 int getNumPeptides() const;
00239
00241 int getNumDisplayPeptides(bool aboveThreshold = false) const;
00242
00244 GROUP getGrouping() const;
00245
00246 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00247
00248 void setGrouping(GROUP g) { group_ = g; }
00249
00251 std::string getForCache(dbIdxPlusAccVect_t & supersetProteinsUnsorted,
00252 dbIdxPlusAccVect_t & components) const;
00253
00255 bool setFromCache(const std::string & str, ms_mascotresults & results,
00256 const dbIdxPlusAccVect_t & supersetProteinsUnsorted,
00257 const dbIdxPlusAccVect_t & components,
00258 const std::string & cdbFeatures);
00259
00261 std::vector<std::pair<int, int> > getIgnoredQPs() const;
00262
00264 bool isIgnoredQP(const int q, const int p) const;
00265 #endif
00266
00268 void getIgnoredQPs(std::vector<int> &q, std::vector<int> &p) const;
00269
00271 int getPeptideQuery (const int pepNumber) const;
00272
00274 int getPeptideP (const int pepNumber) const;
00275
00277 int getPepNumber(const int q, const int p) const;
00278
00280 int getPeptideFrame (const int pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00281
00283 long getPeptideStart (const int pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00284
00286 long getPeptideEnd (const int pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00287
00289 long getPeptideMultiplicity (const int pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00290
00292 DUPLICATE getPeptideDuplicate (const int pepNumber, const bool allowErrTolDuplicate = true) const;
00293
00295 double getPeptideIonsScore (const int pepNumber) const;
00296
00298 bool getPeptideIsBold (const int pepNumber) const;
00299
00301 void setPeptideIsBold (const int pepNumber);
00302
00304 bool getPeptideShowCheckbox (const int pepNumber) const;
00305
00307 void setPeptideShowCheckbox (const int pepNumber);
00308
00310 int getPeptideComponentID (const int pepNumber) const;
00311
00313 char getPeptideResidueBefore (const int pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00314
00316 char getPeptideResidueAfter (const int pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00317
00319 bool isASimilarProtein(const ms_protein * prot,
00320 const ms_mascotresults * results,
00321 const bool groupByQueryNumber = false);
00322
00324 std::string getSimilarProteinName() const;
00325
00327 int getSimilarProteinDB() const;
00328
00330 bool isSimilarProtein(const std::string & acc, const int dbIdx) const;
00331
00333 int getSimilarProteins(std::vector<std::string> & accessions, std::vector<int> & dbIdxs) const;
00334
00336 void setSimilarProtein(const ms_protein * prot);
00337
00340 void addOnePeptide( ms_mascotresults & results,
00341 const int q, const int p,
00342 const msparser_internal::ms_protein_match_data &proteinMatchData,
00343 const double correctedScore,
00344 const double uncorrectedScore,
00345 const ms_protein * component,
00346 const ms_peptide::SEARCH_PHASE searchPhase,
00347 const bool isIgnored);
00348
00349
00351 long getCoverage() const;
00352
00354 bool anyMatchToQuery(const int query) const;
00355
00357 bool anyMatchToQueryAndP(const int query, const int P) const;
00358
00360 std::string getUnmatchedMasses(ms_mascotresfile & resfile,
00361 const int numDecimalPlaces = 2) const;
00362
00364 std::string getMasses(ms_mascotresfile & resfile,
00365 const ms_proteinsummary & summary,
00366 const unsigned int flags = MASS_SELECT_MATCH,
00367 const int numDecimalPlaces = 2) const;
00368
00370 int getFrame() const;
00371
00373 bool anyBoldRedPeptides(const ms_mascotresults & results) const;
00374
00376 bool isUnigene() const;
00377
00379 void setIsUnigeneEntry();
00380
00382 bool isPMFMixture() const;
00383
00385 void setIsPMFMixture();
00386
00388 void sortPeptides(const ms_mascotresults & results, bool keepAlive = false, int keepAlivePercent = 0, const char * keepAliveAccession = "", int keepAliveCount = 0);
00389
00391 int getNumComponents() const;
00392
00394 const ms_protein * getComponent(const int componentNumber) const;
00395
00397 int getProteinSummaryHit() const;
00398
00400 double getRMSDeltas(const ms_mascotresults & results) const;
00401
00403 int getHitNumber() const;
00404
00411 void setHitNumber(const int hit) { hitNum_ = hit;}
00412
00414 int getMemberNumber() const;
00415
00417 int getLongestPeptideLen() const;
00418
00420 int getNumDistinctPeptides(bool aboveThreshold = false,
00421 DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE) const;
00422
00424 int getNumDistinctPeptideRepeats(
00425 int distinctIndex,
00426 bool aboveThreshold = false,
00427 DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE) const;
00428
00430 ms_peptide getDistinctPeptide(
00431 int distinctIndex,
00432 int repeatIndex = 1,
00433 bool aboveThreshold = false,
00434 DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE) const;
00435
00437 int getLongestSigPeptideLen() const;
00438
00440 int getNumObservedForEmPAI() const;
00441 #ifndef SWIG
00442
00443
00452 friend inline bool operator<(const ms_protein & lhs, const ms_protein & rhs) {
00453 if (lhs.dbIdx_ == rhs.dbIdx_) {
00454 if ( lhs.proteinSummaryHit_ == 0 ) {
00455 return lhs.accession_ < rhs.accession_;
00456 } else {
00457 if ( lhs.accession_ == rhs.accession_) {
00458 return lhs.getFrame() < rhs.getFrame();
00459 } else {
00460 return lhs.accession_ < rhs.accession_;
00461 }
00462 }
00463 } else {
00464 return lhs.dbIdx_ < rhs.dbIdx_;
00465 }
00466 }
00467 #endif
00468
00469 const char * getAccessionStr() const { return accession_.c_str(); }
00470
00471 private:
00472 void initialiseDistinctPeptideTree(
00473 bool aboveThreshold,
00474 DISTINCT_PEPTIDE_FLAGS flags) const;
00475
00476 ms_errs* getErrorHandler() const;
00477
00478
00479 mutable std::vector<msparser_internal::PEPINFO *> peptides_;
00480 mutable std::vector<msparser_internal::PEPINFO *> ignoredPeptides_;
00481 mutable std::vector<msparser_internal::PEPINFO> allPeptides_;
00482
00483 ms_mascotresults * results_;
00484
00485
00486
00487
00488 unsigned char flags_;
00489
00490 int numPeptides_;
00491 mutable int numDisplayPeptides_;
00492 mutable int numDisplayPeptidesAboveThresh_;
00493 mutable int numDistinctPeptides_;
00494 mutable int numDistinctPeptidesAboveThresh_;
00495 mutable int numDistinctUniquePeptides_;
00496 mutable int numDistinctUniqPepAboveThresh_;
00497 mutable int lenLongestPeptideAboveThresh_;
00498 mutable int numObservedForEmPAI_;
00499 mutable int frame_;
00500 mutable bool distinctPeptideAboveThreshold_;
00501 mutable DISTINCT_PEPTIDE_FLAGS distinctPeptideFlags_;
00502 mutable std::list<std::list<ms_peptide*> > distinctPeptideTree_;
00503 dbIdxPlusAccSet_t supersetProteins_;
00504 dbIdxPlusAccVect_t supersetProteinsUnsorted_;
00505
00506
00507
00508 dbIdxPlusAccVect_t components_;
00509
00510 std::string accession_;
00511 int dbIdx_;
00512 double score_;
00513 double nonMudPITScore_;
00514 double scoreWithET_;
00515 GROUP group_;
00516 int proteinSummaryHit_;
00517 int hitNum_;
00518 mutable int memberNum_;
00519 int longestPeptideLen_;
00520 mutable long coverage_;
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530 void copyPeptidePointers(std::vector<msparser_internal::PEPINFO *> &pointersTo, const std::vector<msparser_internal::PEPINFO *> &pointersFrom, const ms_protein *src);
00531 void checkFromCache(const char * calledBy) const;
00532 void checkQPFromCache(const char * calledBy) const;
00533 bool isFlagSet(unsigned char fl) const { return (flags_ & fl)?true:false; }
00534 void setFlag(unsigned char fl, bool val) {
00535 if (val) {
00536 flags_ |= fl;
00537 } else {
00538
00539 flags_ = static_cast<unsigned char>(flags_ & ~fl);
00540 }
00541 }
00542
00543 static bool isVarModStrEmpty(const std::string &str);
00544
00545 friend class prot_sort;
00546 friend class ms_pepinfoSortByScore;
00547 };
00548 #ifndef SWIG
00549
00550 class ms_proteinPtrSortByAccession
00551 {
00552 public:
00553 bool operator() (const ms_protein * p1, const ms_protein * p2) const {
00554 return (*p1 < *p2);
00555 }
00556 };
00557
00558 class ms_proteinPtrSortByScore
00559 {
00560 public:
00561 bool operator() (const ms_protein * p1, const ms_protein * p2) const {
00562 if (p1->getScore() != p2->getScore()) {
00563 return (p1->getScore() > p2->getScore());
00564 } else {
00565 return (*p1 < *p2);
00566 }
00567 }
00568 };
00569
00570
00571 class ms_pepinfoSortByScore
00572 {
00573 public:
00574 ms_pepinfoSortByScore(std::pair<bool, bool> pairParam): removeDiffPos_(pairParam.first), anyLibraryMatches_(pairParam.second) { }
00575 bool operator() (const msparser_internal::PEPINFO * p1, const msparser_internal::PEPINFO * p2) const;
00576 ms_pepinfoSortByScore(const ms_pepinfoSortByScore& other): removeDiffPos_(other.removeDiffPos_), anyLibraryMatches_(other.anyLibraryMatches_){}
00577 ms_pepinfoSortByScore& operator=(const ms_pepinfoSortByScore& other)
00578 {
00579 if (&other != this) {
00580 removeDiffPos_ = other.removeDiffPos_;
00581 anyLibraryMatches_ = other.anyLibraryMatches_;
00582 }
00583 return *this;
00584 }
00585
00586 private:
00587 bool removeDiffPos_;
00588 bool anyLibraryMatches_;
00589 };
00590
00591 #endif
00592
00593 }
00594
00595 #endif // MS_MASCOTRESPROTEIN_HPP
00596
00597
00598
00599
00600
00601