Matrix Science header

ms_mascotresprotein.hpp

00001 /*
00002 ##############################################################################
00003 # file: ms_mascotresprotein.hpp                                              #
00004 # 'msparser' toolkit                                                         #
00005 # Encapsulates a protein - either for protein summary or peptide summary     #
00006 ##############################################################################
00007 # COPYRIGHT NOTICE                                                           #
00008 # Copyright 1998-2018 Matrix Science Limited  All Rights Reserved.           #
00009 #                                                                            #
00010 ##############################################################################
00011 #    $Archive:: /Mowse/ms_mascotresfile/include/ms_mascotresprotein.hpp    $ #
00012 #     $Author: robertog@matrixscience.com $ #
00013 #       $Date: 2019-09-26 12:13:39 +0100 $ #
00014 #   $Revision: dfb5ce6a52a298b40097fe6b12c45e552d08f950 | MSPARSER_REL_2_8_1-0-gea32989045 $ #
00015 # $NoKeywords::                                                            $ #
00016 ##############################################################################
00017 */
00018 
00019 #ifndef MS_MASCOTRESPROTEIN_HPP
00020 #define MS_MASCOTRESPROTEIN_HPP
00021 
00022 
00023 // Includes from the standard template library
00024 #include <string>
00025 #include <list>
00026 #include <vector>
00027 #include <set>
00028 #include <map>
00029 
00030 namespace msparser_internal {
00031     class ms_protein_match_data;
00032     class PEPINFO;
00033 }
00034 
00035 namespace matrix_science {
00036 
00037     class ms_mascotresults;
00038     class ms_proteinsummary;
00039     class ms_pepinfoSortByScore;
00040     class ms_peptide;
00041 
00047 
00048 
00054     class MS_MASCOTRESFILE_API ms_protein
00055     {
00056         public:
00058 
00071             enum GROUP 
00072             { 
00073                 GROUP_UNKNOWN,  
00074                 GROUP_NO,       
00075                 GROUP_SUBSET,   
00076                 GROUP_COMPLETE, 
00077                 GROUP_FAMILY    
00078             };
00079 
00080 #ifdef DUPLICATE
00081     #ifdef _WIN32
00082 //        #pragma message("WARNING: The identifier 'DUPLICATE' was defined but is incompatible with the definition for ms_protein")
00083     #endif
00084     #undef DUPLICATE
00085 #endif
00086 
00088 
00095             enum DUPLICATE 
00096             { 
00097                 DUPE_NotDuplicate,           
00098                 DUPE_Duplicate,              
00099                 DUPE_DuplicateSameQuery,     
00100                 DUPE_HighestScoringDuplicate, 
00101                 DUPE_Ignored                 
00102             };
00103 
00105 
00114             enum MASS_FLAGS
00115             {
00116                 MASS_NON_SELECT_NON_MATCH   = 0x0001, 
00117                 MASS_SELECT_NON_MATCH       = 0x0010, 
00118                 MASS_NON_SELECT_MATCH       = 0x0100, 
00119                 MASS_SELECT_MATCH           = 0x1000  
00120             };
00121 
00123 
00184             enum DISTINCT_PEPTIDE_FLAGS
00185             {
00186                 DPF_SEQUENCE        = 0x0001,  
00187                 DPF_CHARGE          = 0x0002,  
00188                 DPF_MODS            = 0x0004,  
00189                 DPF_UNIQUE          = 0x0008,  
00190                 DPF_NODUPSAMEQUERY  = 0x0010   
00191             };
00192 
00193             // Types for uniquely identifying a protein
00194             typedef std::pair<int, std::string> dbIdxPlusAcc_t;
00195             typedef std::vector<dbIdxPlusAcc_t> dbIdxPlusAccVect_t;
00196             typedef std::set<dbIdxPlusAcc_t>    dbIdxPlusAccSet_t;
00197 
00198 
00200             ms_protein(const double score,
00201                        const std::string accession,
00202                        const bool updateScoreFromPepScores,
00203                        const int  proteinSummaryHit = 0);
00204 
00206             ms_protein(const ms_protein& src);
00207 
00209             ~ms_protein();
00210 
00211 #ifndef SWIG
00212 
00213             ms_protein& operator=(const ms_protein& right);
00214 #endif
00215 
00217             void copyFrom(const ms_protein* src);
00218 
00220             std::string getAccession() const;
00221 
00223             int getDB() const;
00224 
00226             void setDB(int dbIdx);
00227 
00229             double getScore()          const;
00230 
00232             double getNonMudpitScore()     const;
00233 
00235             double getScoreWithET() const;
00236 
00238             int    getNumPeptides()    const;
00239 
00241             int    getNumDisplayPeptides(bool aboveThreshold = false)    const;
00242 
00244             GROUP getGrouping() const;
00245 
00246 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00247 
00248             void setGrouping(GROUP g)         { group_ = g;       }
00249 
00251             std::string getForCache(dbIdxPlusAccVect_t & supersetProteinsUnsorted,
00252                                     dbIdxPlusAccVect_t & components) const;
00253 
00255             bool setFromCache(const std::string & str, ms_mascotresults & results,
00256                               const dbIdxPlusAccVect_t & supersetProteinsUnsorted,
00257                               const dbIdxPlusAccVect_t & components,
00258                               const std::string & cdbFeatures);
00259 
00261             std::vector<std::pair<int, int> > getIgnoredQPs() const;
00262 
00264             bool isIgnoredQP(const int q, const int p) const;
00265 #endif
00266 
00268             void getIgnoredQPs(std::vector<int> &q, std::vector<int> &p) const;
00269 
00271             int    getPeptideQuery         (const int   pepNumber) const;
00272 
00274             int    getPeptideP             (const int   pepNumber) const;
00275 
00277             int getPepNumber(const int q, const int p) const;
00278 
00280             int    getPeptideFrame         (const int   pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00281 
00283             long   getPeptideStart         (const int   pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00284 
00286             long   getPeptideEnd           (const int   pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00287 
00289             long   getPeptideMultiplicity  (const int   pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00290 
00292             DUPLICATE getPeptideDuplicate  (const int   pepNumber, const bool allowErrTolDuplicate = true) const;
00293 
00295             double getPeptideIonsScore     (const int   pepNumber) const;
00296             
00298             bool   getPeptideIsBold        (const int   pepNumber) const;
00299             
00301             void   setPeptideIsBold        (const int   pepNumber);
00302 
00304             bool   getPeptideShowCheckbox  (const int   pepNumber) const;
00305             
00307             void   setPeptideShowCheckbox  (const int   pepNumber);
00308             
00310             int    getPeptideComponentID   (const int   pepNumber) const;
00311             
00313             char   getPeptideResidueBefore (const int   pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00314 
00316             char   getPeptideResidueAfter  (const int   pepNumber, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
00317 
00319             bool isASimilarProtein(const ms_protein       * prot, 
00320                                    const ms_mascotresults * results,
00321                                    const bool groupByQueryNumber = false);
00322 
00324             std::string getSimilarProteinName() const;
00325 
00327             int getSimilarProteinDB() const;
00328 
00330             bool isSimilarProtein(const std::string & acc, const int dbIdx) const;
00331 
00333             int getSimilarProteins(std::vector<std::string> & accessions, std::vector<int> & dbIdxs) const;
00334 
00336             void setSimilarProtein(const ms_protein * prot);
00337 
00340             void addOnePeptide(      ms_mascotresults & results,
00341                                const int q, const int p,
00342                                const msparser_internal::ms_protein_match_data &proteinMatchData,
00343                                const double correctedScore,
00344                                const double uncorrectedScore,
00345                                const ms_protein * component,
00346                                const ms_peptide::SEARCH_PHASE searchPhase,
00347                                const bool isIgnored);
00348 
00349 
00351             long getCoverage() const;
00352 
00354             bool anyMatchToQuery(const int query) const;
00355 
00357             bool anyMatchToQueryAndP(const int query, const int P) const;
00358 
00360             std::string getUnmatchedMasses(ms_mascotresfile & resfile,
00361                                            const int numDecimalPlaces = 2) const;
00362 
00364             std::string getMasses(ms_mascotresfile & resfile,
00365                                   const ms_proteinsummary & summary,
00366                                   const unsigned int flags = MASS_SELECT_MATCH,
00367                                   const int numDecimalPlaces = 2) const;
00368 
00370             int getFrame() const;
00371 
00373             bool anyBoldRedPeptides(const ms_mascotresults & results) const;
00374 
00376             bool isUnigene() const;
00377 
00379             void setIsUnigeneEntry();
00380 
00382             bool isPMFMixture() const;
00383 
00385             void setIsPMFMixture();
00386 
00388             void sortPeptides(const ms_mascotresults & results, bool keepAlive = false, int keepAlivePercent = 0, const char * keepAliveAccession = "", int keepAliveCount = 0);
00389 
00391             int getNumComponents() const;
00392 
00394             const ms_protein * getComponent(const int componentNumber) const;
00395 
00397             int getProteinSummaryHit() const;
00398 
00400             double getRMSDeltas(const ms_mascotresults & results) const;
00401 
00403             int getHitNumber() const;
00404 
00411             void setHitNumber(const int hit) { hitNum_ = hit;}
00412 
00414             int getMemberNumber() const;
00415 
00417             int getLongestPeptideLen() const;
00418 
00420             int getNumDistinctPeptides(bool aboveThreshold = false,
00421                                        DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE) const;
00422 
00424             int getNumDistinctPeptideRepeats(
00425                     int distinctIndex, // 1..getNumDistinctPeptides
00426                     bool aboveThreshold = false,
00427                     DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE) const;
00428 
00430             ms_peptide getDistinctPeptide(
00431                     int distinctIndex, // 1..getNumDistinctPeptides
00432                     int repeatIndex = 1, // 1..getNumDistinctPeptideRepeats
00433                     bool aboveThreshold = false,
00434                     DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE) const;
00435 
00437             int getLongestSigPeptideLen() const;
00438 
00440             int getNumObservedForEmPAI() const;
00441 #ifndef SWIG
00442 
00443 
00452             friend inline bool operator<(const ms_protein & lhs, const ms_protein & rhs) { 
00453                 if (lhs.dbIdx_ == rhs.dbIdx_) {
00454                     if ( lhs.proteinSummaryHit_ == 0 ) {
00455                         return lhs.accession_ < rhs.accession_;
00456                     } else { // i.e ms_proteinsummary - see parser bug 493
00457                         if ( lhs.accession_ == rhs.accession_) {
00458                             return lhs.getFrame() < rhs.getFrame();
00459                         } else {
00460                             return lhs.accession_ < rhs.accession_;
00461                         }
00462                     }
00463                 } else {
00464                     return lhs.dbIdx_ < rhs.dbIdx_; 
00465                 }
00466             }
00467 #endif
00468             // Undocumented function for fast access
00469             const char * getAccessionStr() const { return accession_.c_str(); }
00470 
00471         private:
00472             void initialiseDistinctPeptideTree(
00473                     bool  aboveThreshold,
00474                     DISTINCT_PEPTIDE_FLAGS flags) const;
00475 
00476             ms_errs* getErrorHandler() const;
00477 
00478             // --- Start of uncached variables
00479             mutable std::vector<msparser_internal::PEPINFO *> peptides_; // sort by query
00480             mutable std::vector<msparser_internal::PEPINFO *> ignoredPeptides_;
00481             mutable std::vector<msparser_internal::PEPINFO>   allPeptides_;
00482 
00483             ms_mascotresults * results_;
00484 //          bool loadedFromCache_;
00485             // --- End of uncached variables
00486 
00487             // Start of all cached variables
00488             unsigned char    flags_; // See FL_... not all bits are cached
00489 
00490             int numPeptides_;  // Only used if loadedFromCache_ is true - otherwise peptides_.size();
00491             mutable int numDisplayPeptides_;
00492             mutable int numDisplayPeptidesAboveThresh_;
00493             mutable int numDistinctPeptides_;
00494             mutable int numDistinctPeptidesAboveThresh_;
00495             mutable int numDistinctUniquePeptides_;
00496             mutable int numDistinctUniqPepAboveThresh_;
00497             mutable int lenLongestPeptideAboveThresh_;
00498             mutable int numObservedForEmPAI_;
00499             mutable int frame_;
00500             mutable bool distinctPeptideAboveThreshold_;
00501             mutable DISTINCT_PEPTIDE_FLAGS distinctPeptideFlags_;
00502             mutable std::list<std::list<ms_peptide*> > distinctPeptideTree_;
00503             dbIdxPlusAccSet_t  supersetProteins_;  // This one is filled when loading from cache
00504             dbIdxPlusAccVect_t supersetProteinsUnsorted_;
00505 
00506             // For unignene and PMF mixture, the protein is really a 'pseudo'
00507             // protein, made up from a number of 'real' proteins
00508             dbIdxPlusAccVect_t components_;
00509 
00510             std::string accession_;
00511             int dbIdx_;
00512             double score_;
00513             double nonMudPITScore_;
00514             double scoreWithET_;
00515             GROUP group_;
00516             int proteinSummaryHit_;
00517             int hitNum_;
00518             mutable int memberNum_;
00519             int longestPeptideLen_;         // Useful with minPepLenInPepSummary
00520             mutable long coverage_;
00521 //          bool pmfMixture_;               // True if protein actually originates from a PMF mixture
00522 //          bool sorted_;                   // Sorting the list of peptides is expensive - don't repeat...
00523 //          bool unigene_;                  // For unigene, we need to get the description line from the unigene file
00524 //          bool updateScoreFromPepScores_; // For protein summary, the protein score is calculated by
00525                                             // nph-mascot.exe, and is in the results file. For the
00526                                             // peptide summary, the score is calculated by adding the ions
00527                                             // scores
00528 
00529             // Functions
00530             void copyPeptidePointers(std::vector<msparser_internal::PEPINFO *> &pointersTo, const std::vector<msparser_internal::PEPINFO *> &pointersFrom, const ms_protein *src);
00531             void checkFromCache(const char * calledBy) const;
00532             void checkQPFromCache(const char * calledBy) const;
00533             bool isFlagSet(unsigned char fl) const { return (flags_ & fl)?true:false; }
00534             void setFlag(unsigned char fl, bool val) {
00535                 if (val) {
00536                     flags_ |= fl;
00537                 } else {
00538                     //the standard says that complement must promote to an int... there is no performance difference though
00539                     flags_ = static_cast<unsigned char>(flags_ & ~fl); 
00540                 }
00541             }
00542 
00543             static bool isVarModStrEmpty(const std::string &str);
00544 
00545             friend class prot_sort;
00546             friend class ms_pepinfoSortByScore;
00547     };
00548 #ifndef SWIG
00549     // Helper class - don't use from outside library
00550     class ms_proteinPtrSortByAccession
00551     {
00552         public:
00553             bool operator() (const ms_protein * p1, const ms_protein * p2) const {
00554                 return (*p1 < *p2);
00555             }
00556     };
00557 
00558     class ms_proteinPtrSortByScore
00559     {
00560         public:
00561             bool operator() (const ms_protein * p1, const ms_protein * p2) const {
00562                 if (p1->getScore() != p2->getScore()) {
00563                     return (p1->getScore() > p2->getScore());
00564                 } else {
00565                     return (*p1 < *p2);
00566                 }
00567             }
00568     };
00569 
00570 
00571     class ms_pepinfoSortByScore
00572     {
00573     public:
00574         ms_pepinfoSortByScore(std::pair<bool, bool> pairParam): removeDiffPos_(pairParam.first), anyLibraryMatches_(pairParam.second) { }
00575         bool operator() (const msparser_internal::PEPINFO * p1, const msparser_internal::PEPINFO * p2) const;
00576         ms_pepinfoSortByScore(const ms_pepinfoSortByScore& other): removeDiffPos_(other.removeDiffPos_), anyLibraryMatches_(other.anyLibraryMatches_){}
00577         ms_pepinfoSortByScore& operator=(const ms_pepinfoSortByScore& other)
00578         {
00579             if (&other != this) {
00580                 removeDiffPos_ = other.removeDiffPos_;
00581                 anyLibraryMatches_ = other.anyLibraryMatches_;
00582             }
00583             return *this;
00584         }
00585 
00586     private:
00587         bool removeDiffPos_;
00588         bool anyLibraryMatches_;
00589     };
00590 
00591 #endif
00592  // end of resfile_group
00593 }   // matrix_science namespace
00594 
00595 #endif // MS_MASCOTRESPROTEIN_HPP
00596 
00597 /*------------------------------- End of File -------------------------------*/
00598 
00599 
00600 
00601 

Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29