Matrix Science header

ms_mascotresults.hpp

00001 /*
00002 ##############################################################################
00003 # file: ms_mascotresults.hpp                                                 #
00004 # 'msparser' toolkit                                                         #
00005 # Abstract class for either ms_peptidesummary or ms_proteinsummary           #
00006 ##############################################################################
00007 # COPYRIGHT NOTICE                                                           #
00008 # Copyright 1998-2016 Matrix Science Limited  All Rights Reserved.           #
00009 #                                                                            #
00010 ##############################################################################
00011 #    $Archive:: /MowseBranches/ms_mascotresfile_1.2/include/ms_mascotresul $ #
00012 #     $Author: francoisr $ #
00013 #       $Date: 2017/03/31 10:23:24 $ #
00014 #   $Revision: 1.143 $ #
00015 # $NoKeywords::                                                            $ #
00016 ##############################################################################
00017 */
00018 
00019 #ifndef MS_MASCOTRESULTS_HPP
00020 #define MS_MASCOTRESULTS_HPP
00021 
00022 #ifdef _WIN32
00023 #pragma warning(disable:4251)   // Don't want all classes to be exported
00024 #pragma warning(disable:4786)   // Debug symbols too long
00025 #   ifndef _MATRIX_USE_STATIC_LIB
00026 #       ifdef MS_MASCOTRESFILE_EXPORTS
00027 #           define MS_MASCOTRESFILE_API __declspec(dllexport)
00028 #       else
00029 #           define MS_MASCOTRESFILE_API __declspec(dllimport)
00030 #       endif
00031 #   else
00032 #       define MS_MASCOTRESFILE_API
00033 #   endif
00034 #else
00035 #   define MS_MASCOTRESFILE_API
00036 #endif
00037 
00038 // for the sake of #include <string>
00039 #ifdef __ALPHA_UNIX__
00040 #include <ctype.h>
00041 #endif
00042 
00043 // Includes from the standard template library
00044 #include <string>
00045 #include <list>
00046 #include <vector>
00047 #include <set>
00048 #include <map>
00049 #include <algorithm>
00050 
00051 //#define DEBUG_LOGGING
00052 #ifdef DEBUG_LOGGING
00053 //#   define DEBUG_LOGGING_USE_STD_ERR
00054 #   ifdef  DEBUG_LOGGING_USE_STD_ERR
00055 #       define DEBUG_LOG(x,y,z) { \
00056                                     const time_t timeNow = time(NULL); \
00057                                     std::cerr << std::string(asctime(localtime(&timeNow)), 24)  << " :" << x << ", " << y << ", " << z << std::endl; \
00058                                 }
00059 #   else
00060 #       define DEBUG_LOG_FILENAME "/tmp/parser.log"
00061 #       define DEBUG_LOG(x,y,z) { \
00062                                     const time_t timeNow = time(NULL); \
00063                                     std::ofstream LOG_STREAM(DEBUG_LOG_FILENAME, std::ios_base::app); \
00064                                     LOG_STREAM \
00065                                         << std::string(asctime(localtime(&timeNow)), 24) \
00066                                         << " : " << x << ", " << y << ", " << z << std::endl; \
00067                                 }
00068 #   endif
00069 #else
00070 #   define DEBUG_LOG(x, y, z)
00071 #endif
00072 
00073 #ifdef __AIX__
00074 #undef SCORE
00075 #endif
00076 
00077 namespace msparser_internal {
00078     class ms_proteininference;
00079     class ms_unassigned;
00080     class ms_peptidesumcdb;
00081 }
00082 
00083 namespace matrix_science {
00089     class ms_unigene;
00090     class ms_protein;
00091     class ms_mascotresfile;
00092     class ms_quant_component;
00093 
00095     struct LexicoCompare
00096     {
00097         bool caseSensitive_;
00098         LexicoCompare(bool caseSensitive) : caseSensitive_(caseSensitive) { }
00099         bool operator()(const std::string& left, const std::string& right ) const;
00100     };
00101 
00103 
00120     class MS_MASCOTRESFILE_API ms_mascotresults
00121     {
00122         friend class ms_protein;
00123         friend class msparser_internal::ms_proteininference;
00124         friend class msparser_internal::ms_peptidesumcdb;
00125 
00126         public:
00127 
00129 
00136             enum FLAGS 
00137             { 
00138 
00139                 MSRES_NOFLAG                    = 0x00000000, 
00140                 MSRES_GROUP_PROTEINS            = 0x00000001, 
00141                 MSRES_SHOW_SUBSETS              = 0x00000002, 
00142                 MSRES_SUBSETS_DIFF_PROT         = 0x00000004, 
00143                 MSRES_REQUIRE_BOLD_RED          = 0x00000008, 
00144                 MSRES_SHOW_ALL_FROM_ERR_TOL     = 0x00000010, 
00145                 MSRES_IGNORE_PMF_MIXTURE        = 0x00000020, 
00146                 MSRES_MUDPIT_PROTEIN_SCORE      = 0x00000040, 
00147                 MSRES_DECOY                     = 0x00000080, 
00148                 MSRES_INTEGRATED_ERR_TOL        = 0x00000100, 
00149                 MSRES_ERR_TOL                   = 0x00000200, 
00150                 MSRES_MAXHITS_OVERRIDES_MINPROB = 0x00000400, 
00151                 MSRES_CLUSTER_PROTEINS          = 0x00000800, 
00152                 
00153                 MSRES_DUPE_INCL_IN_SCORE_NONE   = 0x00000000, 
00154                 MSRES_DUPE_INCL_IN_SCORE_A      = 0x00002000, 
00155                 MSRES_DUPE_INCL_IN_SCORE_B      = 0x00004000, 
00156                 MSRES_DUPE_INCL_IN_SCORE_C      = 0x00008000, 
00157                 MSRES_DUPE_INCL_IN_SCORE_D      = 0x00010000, 
00158                 MSRES_DUPE_INCL_IN_SCORE_E      = 0x00020000, 
00159                 MSRES_DUPE_INCL_IN_SCORE_F      = 0x00040000, 
00160                 MSRES_DUPE_INCL_IN_SCORE_G      = 0x00080000, 
00161                 MSRES_DUPE_INCL_IN_SCORE_H      = 0x00100000, 
00162                 MSRES_DUPE_INCL_IN_SCORE_I      = 0x00200000, 
00163                 MSRES_DUPE_REMOVE_NONE          = 0x00400000, 
00164                 MSRES_DUPE_REMOVE_A             = 0x00800000, 
00165                 MSRES_DUPE_REMOVE_B             = 0x01000000, 
00166                 MSRES_DUPE_REMOVE_C             = 0x02000000, 
00167                 MSRES_DUPE_REMOVE_D             = 0x04000000, 
00168                 MSRES_DUPE_REMOVE_E             = 0x08000000, 
00169                 MSRES_DUPE_REMOVE_F             = 0x10000000, 
00170                 MSRES_DUPE_REMOVE_G             = 0x20000000, 
00171                 MSRES_DUPE_REMOVE_H             = 0x40000000, 
00172     #ifndef SWIG // 'live' version for C++ compiler
00173                 MSRES_DUPE_REMOVE_I             = 0x80000000, 
00174     #else // SWIG version
00175                 MSRES_DUPE_REMOVE_I             = unchecked((int)0x80000000),
00176     #endif
00177                 MSRES_DUPE_DEFAULT              = 0x04800000  
00178             };
00179 
00181 
00184             enum sortBy 
00185             {
00186                 QUERY,    
00187                 SCORE,    
00188                 INTENSITY 
00189 
00190             };
00191 
00200             enum dupeStatus
00201             {
00202                 dupe_query_same = 0x0001, 
00203                 dupe_seq_same   = 0x0002, 
00204                 dupe_mods_same  = 0x0004, 
00205                 dupe_pos_same   = 0x0008  
00206             };
00207 
00209 
00212             enum TREE_CLUSTER_METHOD
00213             {
00214                 TCM_FIRST_VALUE       = 0x0001, // For looping - undocumented
00215                 TCM_PAIRWISE_SINGLE   = 0x0001, 
00216                 TCM_PAIRWISE_MAXIMUM  = 0x0002, 
00217                 TCM_PAIRWISE_AVERAGE  = 0x0003, 
00218 
00219                 TCM_LAST_VALUE        = 0x0003  // For looping - undocumented
00220             };
00221 
00223 
00230             enum FIND_FLAGS
00231             {
00232                 FT_PEPTIDE_EXP_MZ        = 0x00000001, 
00233                 FT_PEPTIDE_EXP_MR        = 0x00000002, 
00234                 FT_PEPTIDE_CALC_MZ       = 0x00000004, 
00235                 FT_PEPTIDE_CALC_MR       = 0x00000008, 
00236                 FT_PEPTIDE_STRING        = 0x00000010, 
00237                 FT_PEPTIDE_QUERY         = 0x00000020, 
00238 
00239                 FT_PEPTIDE_VARMOD        = 0x00000040, 
00240                 FT_PEPTIDE_FIXMOD        = 0x00000080, 
00241                 FT_PEPTIDE_ETMOD         = 0x00000100, 
00242                 FT_PEPTIDE_SLMOD         = 0x00000200, 
00243                 FT_PEPTIDE_VARMOD_BYNAME = 0x00000400, 
00244 
00245                 FT_PEPTIDE_FIND_MASK     = 0x00000FFF, 
00246 
00247                 FT_PROTEIN_ACCESSION     = 0x00001000, 
00248                 FT_PROTEIN_DESCRIPTION   = 0x00002000  
00249             };
00250 
00252 
00273             enum FIND_COMPARE_FLAGS
00274             {
00275                 FC_COMPLETESTR          = 0x00000001, 
00276                 FC_SUBSTR               = 0x00000002, 
00277                 FC_STARTSTR             = 0x00000003, 
00278                 FC_STRTOK               = 0x00000004, 
00279                 FC_MASK_STR_PART        = 0x0000000F, 
00280 
00281                 FC_CASE_INSENSITIVE     = 0x00000000, 
00282                 FC_CASE_SENSITIVE       = 0x00000010, 
00283                 FC_MASK_CASE            = 0x000000F0, 
00284 
00285                 FC_FORWARD              = 0x00000000, 
00286                 FC_REVERSE              = 0x00000100, 
00287                 FC_MASK_DIRECTION       = 0x00000F00, 
00288 
00289                 FC_RESTRICT_TO_HIT      = 0x00001000, 
00290                 FC_LOOP_INTO_UNASSIGNED = 0x00002000, 
00291                 FC_LOOP_FROM_UNASSIGNED = 0x00004000, 
00292                 FC_UNASSIGNED_MASK      = 0x00006000, 
00293 
00294                 FC_SEARCH_ALL_RANKS     = 0x00008000, 
00295 
00296                 FC_ALL_PEPTIDES         = 0x00000000, 
00297                 FC_ALL_PEPTITDES        = 0x00000000, 
00298 
00299                 FC_SIGNIFICANT_PEPTIDES = 0x00010000, 
00300                 FC_SEARCH_IGNORED_RANKS = 0x00020000, 
00301                 FC_SCORING_MASK         = 0x000F0000, 
00302 
00303                 FC_PROTEIN_IGN_SAMESETS = 0x00100000, 
00304                 FC_PROTEIN_IGN_SUBSETS  = 0x00200000, 
00305                 FC_PROTEIN_IGN_FAMILY   = 0x00400000, 
00306                 FC_PROTEIN_IGN_MASK     = 0x00F00000, 
00307 
00308                 FC_PROTEIN_INC_ALT_ACC  = 0x01000000  
00309             };
00310 
00312 
00315             enum IONS_HISTOGRAM
00316             {
00317                 IH_INCLUDE_TOP_MATCHES           = 0x0000, 
00318                 IH_INCLUDE_TOP_10_MATCHES        = 0x0001  
00319 /*              IH_INCLUDE_TOP_ERRTOL_MATCHES    = 0x0002, //!< Not yet implemented. Include top error tolerant match
00320                 IH_INCLUDE_TOP_10_ERRTOL_MATCHES = 0x0004, //!< Not yet implemented. Include top 10 error tolerant match
00321                 IH_INCLUDE_TOP_DECOY_MATCHES     = 0x0008, //!< Not yet implemented. Include top decoy matche
00322                 IH_INCLUDE_TOP_10_DECOY_MATCHES  = 0x0010  //!< Not yet implemented. Include top 10 decoy matches */
00323             };
00324 
00326 
00331             enum UNIQUE_PEP_RULES
00332             {
00333                 UPR_WITHIN_FAMILY                = 0x0001, 
00334                 UPR_WITHIN_FAMILY_MEMBER         = 0x0002, 
00335                 UPR_IGNORE_SUBSET_PROTEINS       = 0x0004, 
00336 
00337                 UPR_DEFAULT                      = (UPR_WITHIN_FAMILY_MEMBER + UPR_IGNORE_SUBSET_PROTEINS) 
00338             };
00339 
00341 
00344             enum THRESHOLD_TYPE
00345             {
00346                 TT_HOMOLOGY       = 0x0000, 
00347                 TT_IDENTITY       = 0x0001, 
00348                 TT_PEPSUM_DEFAULT = 0x0002  
00349             };
00350 
00352 
00358             enum HOMOLOGY_THRESHOLD_SOURCE
00359             {
00360                 HOMTHR_RANK1    = 1,
00361                 HOMTHR_RANK2    = 2,
00362                 HOMTHR_RANK3    = 3,
00363                 HOMTHR_RANK4    = 4,
00364                 HOMTHR_RANK5    = 5,
00365                 HOMTHR_RANK6    = 6,
00366                 HOMTHR_RANK7    = 7,
00367                 HOMTHR_RANK8    = 8,
00368                 HOMTHR_RANK9    = 9,
00369                 HOMTHR_RANK10   = 10,
00370                 HOMTHR_RANK11   = 11,
00371                 HOMTHR_RANK12   = 12,
00372                 HOMTHR_RANK13   = 13,
00373                 HOMTHR_RANK14   = 14,
00374                 HOMTHR_RANK15   = 15,
00375                 HOMTHR_RANK16   = 16,
00376                 HOMTHR_RANK17   = 17,
00377                 HOMTHR_RANK18   = 18,
00378                 HOMTHR_RANK19   = 19,
00379                 HOMTHR_RANK20   = 20,
00380 
00381                 HOMTHR_FASTA    = -1, 
00382                 HOMTHR_SL       = -2  
00383             };
00384 
00386 
00389             enum QUANT_COMPONENT_STATUS
00390             {
00391                 QCS_OK,                         
00392                 QCS_OK_NO_MATCH,                
00393                 QCS_ERROR_NO_METHOD,            
00394                 QCS_ERROR_NO_COMPONENTS,        
00395                 QCS_ERROR_BAD_COMPONENT_NAME,   
00396                 QCS_OK_MULTIPLE_MATCHES         
00397             };
00398 
00400             enum DECOY_STATS_COUNT_TYPE
00401             {
00402                 DS_COUNT_PSM = 0, 
00403                 DS_COUNT_SEQUENCE = 1 
00404             };
00405 
00407             enum DB_MATCH_TYPE
00408             {
00409                 DM_FASTA = 0, 
00410                 DM_SPECTRAL_LIBRARY = 1, 
00411                 DM_BOTH = 2, 
00412             };
00413 
00414         public:
00415 
00416             // Some useful types
00417             typedef std::set<ms_protein> proteinSet;  // ms_protein has operator< which compares accession and dbIdx
00418             typedef std::set<std::pair<std::string, int> > acc_dbidx_set_t;
00419             typedef std::set<std::pair<int, int> > q_p_set_t;
00420             typedef std::vector<std::pair<std::string, int> > acc_dbidx_vect_t;
00421             typedef std::vector<std::pair<int, int> > q_p_vect_t;
00422 
00423 
00425             ms_mascotresults(ms_mascotresfile  &resfile,
00426                              const unsigned int flags,
00427                              double             minProbability,
00428                              int                maxHitsToReport,
00429                              const char *       unigeneIndexFile,
00430                              const char *       singleHit = 0);
00431             virtual ~ms_mascotresults();
00432             
00434             const ms_mascotresfile & getResfile() const;
00435 
00437             virtual bool createSummary();
00438 #ifndef SWIG  // Can't / don't want to allow std::string * as OUTPUT in Java and C#
00439 
00440             bool getCreateSummaryProgress(int          * cspTotalPercentComplete,
00441                                           unsigned int * cspCurrTask, 
00442                                           int          * cspCurrTaskPercentageComplete, 
00443                                           std::string  * cspAccession, 
00444                                           int          * cspHit, 
00445                                           int          * cspQuery,
00446                                           std::string  * cspKeepAliveText) const;
00447 #endif
00448             bool getCreateSummaryProgress(int          * cspTotalPercentComplete,
00449                                           unsigned int * cspCurrTask, 
00450                                           int          * cspCurrTaskPercentageComplete, 
00451                                           std::string  & cspAccession, 
00452                                           int          * cspHit, 
00453                                           int          * cspQuery,
00454                                           std::string  & cspKeepAliveText) const {
00455                                               return getCreateSummaryProgress(cspTotalPercentComplete, cspCurrTask, cspCurrTaskPercentageComplete, &cspAccession, cspHit, cspQuery, &cspKeepAliveText);
00456             }
00457 
00459             void cancelCreateSummary(bool newValue = true);
00460 
00462             virtual ms_protein * getHit(const int hit, const int memberNumber = 0) const;
00463 
00465             virtual void freeHit(const int hit);
00466 
00468             virtual int getNumberOfHits() const;
00469 
00471             virtual int getNumberOfFamilyMembers() const;
00472 
00474             std::string getProteinDescription(const char * accession, const int dbIdx = 1) const;
00475 
00477             double getProteinMass(const char * accession, const int dbIdx = 1) const;
00478 
00480             virtual double getProteinEmPAI(const char *accession, const int dbIdx = 1, const int length = -1) const;
00481 
00483             bool isEmPAIallowed() const;
00484 
00486             virtual bool anyEmPAI() const;
00487 
00489             double getSequenceMass(const char * seq) const;
00490 
00492             void getProteinTaxonomyIDs(const char * accession, const int dbIdx,
00493                                        std::vector<int> & gpt_ids, std::vector<std::string> & gpt_accessions) const;
00494 
00496             virtual const ms_protein * getProtein(const char * accession, const int dbIdx = 1) const;
00497 
00499             virtual const ms_protein * getComponentProtein(const char * accession, const int dbIdx = 1) const;
00500 
00502             virtual ms_protein * getNextSimilarProtein(const int masterHit, const int id) const;
00503 
00505             virtual ms_protein * getNextSimilarProteinOf(const char * masterAccession, const int masterDB, const int id) const;
00506 
00508             virtual ms_protein * getNextFamilyProtein(const int masterHit, const int id) const;
00509 
00510 
00512             virtual ms_protein * getNextSubsetProtein(const int masterHit, const int id,
00513                                                       const bool searchWholeFamily = true) const;
00514 
00516             virtual ms_protein * getNextSubsetProteinOf(const char * masterAccession, const int masterDB, const int id) const;
00517 
00519             virtual ms_peptide getPeptide(const int q, const int p) const = 0;
00520 
00522             virtual bool getPeptide(const int q, const int p, ms_peptide * & pep) const = 0;
00523 
00525             virtual double getIonsScore(const int q, const int p, const bool decoy) const = 0;
00526 
00528             virtual bool isPeptideUnique(const int q, const int p, const UNIQUE_PEP_RULES rules = UPR_DEFAULT) const = 0;
00529 
00531             virtual int getQmatch(const int query) const;
00532 
00534             virtual int getQmatch(const int query, const ms_mascotresfile::section sec) const;
00535 
00537             virtual int getPeptideIdentityThreshold(const int query, double OneInXprobRnd, DB_MATCH_TYPE dbType = DM_FASTA) const;
00538 
00540             virtual int getAvePeptideIdentityThreshold(double OneInXprobRnd, DB_MATCH_TYPE dbType = DM_FASTA) const;
00541 
00543             virtual int getMaxPeptideIdentityThreshold(double OneInXprobRnd, DB_MATCH_TYPE dbType = DM_FASTA) const;
00544 
00546             double getPeptideThreshold(const int query, double OneInXprobRnd, const int rank=1, const THRESHOLD_TYPE thresholdType=TT_PEPSUM_DEFAULT) const;
00547 
00548             enum ERROR_TOLERANT_PEPTIDE { ETPEP_YES, ETPEP_NO, ETPEP_UNKNOWN };
00549 #ifndef SWIG
00550             double getPeptideThresholdProtected(const int query, double OneInXprobRnd, const int rank,
00551                                                 const ERROR_TOLERANT_PEPTIDE etPep,
00552                                                 const ms_mascotresfile::section secSummary,
00553                                                 const THRESHOLD_TYPE thresholdType,
00554                                                 const DB_MATCH_TYPE dbType) const;
00555 #endif
00556 
00558             virtual int getProteinScoreCutoff(double OneInXprobRnd) const = 0;
00559 
00561             virtual int getProteinThreshold(double OneInXprobRnd) const;
00562 
00564             virtual int getHomologyThreshold(const int query,
00565                                              double OneInXprobRnd,
00566                                              const int rank=1) const;
00567 
00569             virtual int getHomologyThresholdForHistogram(double OneInXprobRnd, DB_MATCH_TYPE dbType = DM_FASTA) const;
00570 
00572             virtual int getProbFromScore(const double score) const;
00573 
00575             virtual double getPeptideExpectationValue(const double score, 
00576                                                       const int query,
00577                                                       const int rank = 0) const;
00578 
00580             virtual double getProbOfPepBeingRandomMatch(const double score, 
00581                                                         const int query) const;
00582 
00584             virtual double getProteinExpectationValue(const double score) const;
00585 
00587             virtual double getProbOfProteinBeingRandomMatch(const double score) const;
00588 
00590             virtual std::string getProteinsWithThisPepMatch(const int q, const int p, const bool quotes=false) = 0;
00591 
00593             virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p, 
00594                                                                             std::vector<int> & start, 
00595                                                                             std::vector<int> & end,
00596                                                                             std::vector<std::string> &pre,
00597                                                                             std::vector<std::string> &post,
00598                                                                             std::vector<int> & frame,
00599                                                                             std::vector<int> & multiplicity,
00600                                                                             std::vector<int> & db) const = 0;
00601 
00603             virtual int getAllFamilyMembersWithThisPepMatch(const int hit,
00604                                                             const int q,
00605                                                             const int p,
00606                                                             std::vector< int >& db,
00607                                                             std::vector< std::string >& acc,
00608                                                             std::vector< int >& dupe_status) const = 0;
00609 
00610 
00612             virtual std::string getErrTolModString(const int q, const int p) const = 0;
00613 
00615             virtual std::string getLibraryModString(const int q, const int p) const = 0;
00616 
00618             virtual std::string getErrTolModMasterString(const int q, const int p) const = 0;
00619 
00621             virtual std::string getErrTolModSlaveString(const int q, const int p) const = 0;
00622 
00624             virtual std::string getErrTolModPepString(const int q, const int p) const = 0;
00625 
00627             virtual std::string getErrTolModReqPepString(const int q, const int p) const = 0;
00628 
00630             virtual std::string getErrTolModName(const int q, const int p, std::string * modString = NULL) const;
00631 
00633             virtual double getErrTolModDelta(const int q, const int p, std::string * modString = NULL, std::string * deltaAsString = NULL) const;
00634 
00636             virtual double getErrTolModNeutralLoss(const int q, const int p) const;
00637 
00639             virtual std::vector<double> getErrTolModMasterNeutralLoss(const int q, const int p) const;
00640 
00642             virtual std::vector<double> getErrTolModSlaveNeutralLoss(const int q, const int p) const;
00643 
00645             virtual std::vector<double> getErrTolModPepNeutralLoss(const int q, const int p) const;
00646 
00648             virtual std::vector<double> getErrTolModReqPepNeutralLoss(const int q, const int p) const;
00649 
00651             std::string getVarModsForQP(const int q, const int p, const ms_peptide & pep,
00652                                         int * numETMods = NULL, int * numLibraryMods = NULL,
00653                                         std::set<std::string> * listMods = NULL) const;
00654 
00656             std::string getLocalModsForQP(const int q, const int p, const ms_peptide & pep, std::set<std::string> * listMods = NULL) const;
00657 
00659             std::string getLibraryModsForQP(const int q, const int p, const ms_peptide & pep, std::set<std::string> * listMods = NULL) const;
00660 
00662             virtual std::string getReadableVarMods(const int q, const int p,
00663                                                    const int numDecimalPlaces=2) const;
00664 
00666             virtual bool anyNumDiscoveredMods() const;
00667 
00669             virtual std::vector<int> getNumDiscoveredFixedMods(const int num, std::vector<std::string> &positions, std::vector<std::string> &sites) const;
00670 
00672             virtual std::vector<int> getNumDiscoveredVariableMods(const int num, std::vector<std::string> &positions, std::vector<std::string> &sites) const;
00673 
00675             virtual std::vector<std::string> getDiscoveredLocalModNames() const;
00676 
00678             virtual std::vector<int> getNumDiscoveredLocalMods(const std::string modName, std::vector<std::string> &positions, std::vector<std::string> &sites) const;
00679 
00681             virtual std::vector<std::string> getDiscoveredErrTolModNames() const;
00682 
00684             virtual std::vector<double> getDiscoveredErrTolModDeltas(std::vector<std::string> * vecDeltaStrings = NULL) const;
00685 
00687             virtual std::vector<int> getNumDiscoveredErrTolMods(const std::string modName, std::vector<std::string> &positions, std::vector<std::string> &sites) const;
00688 
00690             std::vector<int> getNumDiscoveredLibraryMods(const int modId, std::vector<std::string> &positions, std::vector<std::string> &sites) const;
00691 
00693             virtual int getNumDiscoveredNonSpecCleavage() const;
00694 
00696             virtual std::string getTagString(const int q, const int p) const = 0;
00697 
00699             virtual int getTagStart(const int q, const int p, const int tagNumber) const;
00700 
00702             virtual int getTagEnd(const int q, const int p, const int tagNumber) const;
00703 
00705             virtual int getTagSeries(const int q, const int p, const int tagNumber) const;
00706 
00708             virtual int getTagDeltaRangeStart(const int q, const int p) const = 0;
00709 
00711             virtual int getTagDeltaRangeEnd(const int q, const int p) const = 0;
00712 
00714             virtual std::string getTerminalResiduesString(const int q, const int p) const = 0;
00715 
00717             virtual std::string getComponentString(const int q, const int p) const = 0;
00718 
00720             virtual int getMaxRankValue() const;
00721 
00723             virtual std::vector<int> getIonsScoreHistogram(IONS_HISTOGRAM flags = IH_INCLUDE_TOP_MATCHES, DB_MATCH_TYPE dbType = DM_FASTA) const;
00724 
00726             double getToleranceInDalton(bool & needMass, const double * const pMass = NULL) const;
00727 
00729             bool createUnassignedList(sortBy s  = QUERY);
00730 
00732             int getNumberOfUnassigned() const;
00733 
00735             ms_peptide getUnassigned(const int num) const;
00736 
00738             bool getUnassignedIsBold(const int num) const;
00739 
00741             bool getUnassignedShowCheckbox(const int num) const;
00742 
00744             ms_mascotresults_params getResultsParameters();
00745 
00747             virtual bool getTreeClusterNodes(const int hit,
00748                                              std::vector<int>    &left, 
00749                                              std::vector<int>    &right, 
00750                                              std::vector<double> &distance,
00751                                              TREE_CLUSTER_METHOD  tcm = TCM_PAIRWISE_MAXIMUM,
00752                                              double           *** reserved1 = 0,
00753                                              unsigned int       * reserved2 = 0) const;
00754 
00756             virtual int findProteins(const int startHit, 
00757                                      const std::string & str, 
00758                                      const int dbIdx,
00759                                      FIND_FLAGS item,
00760                                      FIND_COMPARE_FLAGS compareFlags,
00761                                      std::vector<std::string> & accessions,
00762                                      std::vector<int> & dbIndexes) const = 0;
00763 
00765             virtual int findProteinsByAccession(const int startHit, 
00766                                                 const std::string & str, 
00767                                                 const int dbIdx,
00768                                                 FIND_COMPARE_FLAGS compareFlags,
00769                                                 std::vector<std::string> & accessions,
00770                                                 std::vector<int> & dbIndexes) const = 0;
00771 
00773             virtual int findProteinsByDescription(const int startHit, 
00774                                                   const std::string & str, 
00775                                                   FIND_COMPARE_FLAGS compareFlags,
00776                                                   std::vector<std::string> & accessions,
00777                                                   std::vector<int> & dbIndexes) const = 0;
00778 
00780             virtual int findPeptides(const int startHit, 
00781                                      const std::string & str, 
00782                                      FIND_FLAGS item,
00783                                      FIND_COMPARE_FLAGS compareFlags,
00784                                      std::vector<int> & q,
00785                                      std::vector<int> & p) const = 0;
00786 
00788             virtual QUANT_COMPONENT_STATUS getQuantitationComponentForPeptide(
00789                     const matrix_science::ms_peptide & peptide,
00790                     matrix_science::ms_quant_component & component,
00791                     const matrix_science::ms_quant_method * method = NULL) const = 0;
00792 
00793 #ifndef SWIG
00794             void addProtein(const std::string & accession, 
00795                             const int dbIdx,
00796                             const int frame,
00797                             const long start, const long end, 
00798                             const long multiplicity,
00799                             const int q, const int p,
00800                             const double score,
00801                             const double uncorrectedScore,
00802                             const char residueBefore,
00803                             const char residueAfter,
00804                             const ms_protein * component = 0,
00805                             const ms_peptide::SEARCH_PHASE searchPhase = ms_peptide::SEARCH_PHASE_PRIMARY,
00806                             const bool isUnigene = false,
00807                             const bool isIgnored = false);
00808 
00809             const ms_protein * addComponentProtein(const std::string & accession, 
00810                                                    const int dbIdx,
00811                                                    const int frame,
00812                                                    const long start, 
00813                                                    const long end, 
00814                                                    const long multiplicity,
00815                                                    const int q, const int p,
00816                                                    const double score,
00817                                                    const double uncorrectedScore,
00818                                                    const char residueBefore,
00819                                                    const char residueAfter,
00820                                                    const ms_protein * component = 0,
00821                                                    const ms_peptide::SEARCH_PHASE searchPhase = ms_peptide::SEARCH_PHASE_PRIMARY,
00822                                                    const bool isIgnored = false);
00823 #endif
00824 
00825             virtual double getProteinScoreForHistogram(const int num) const;
00826 
00828             bool isNA() const;
00829 
00831 
00834             unsigned int getFlags() const { return flags_; }
00835 
00837 
00841             unsigned int getFlags2() const { return flags2_; }
00842 
00844             int getMinPepLenInPepSummary() const;
00845 
00847             virtual long getNumHitsAboveIdentity(double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType = DS_COUNT_PSM, DB_MATCH_TYPE dbType = DM_FASTA);
00848 
00850             virtual long getNumDecoyHitsAboveIdentity(double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType = DS_COUNT_PSM, DB_MATCH_TYPE dbType = DM_FASTA);
00851 
00853             virtual long getNumHitsAboveHomology(double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType = DS_COUNT_PSM, DB_MATCH_TYPE dbType = DM_FASTA);
00854 
00856             virtual long getNumDecoyHitsAboveHomology(double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType = DS_COUNT_PSM, DB_MATCH_TYPE dbType = DM_FASTA);
00857 
00859             virtual void setSubsetsThreshold(const double scoreFraction);
00860 
00862             virtual double getProbabilityThreshold() const;
00863 
00864 #if defined(SWIGPERL) || defined(SWIGPYTHON)
00865             // Don't supply default parameters for perl or python because these are defined as output parameters in msparser.i
00866             bool getThresholdForFDRAboveIdentity(double targetFDR, double *closestFDR, double *minProbability, int * numTargetMatches, int * numDecoyMatches);
00867             bool getThresholdForFDRAboveIdentity(double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *closestFDR, double *minProbability, int * numTargetMatches, int * numDecoyMatches);
00868 #else
00869 
00870             bool getThresholdForFDRAboveIdentity(double targetFDR, double *closestFDR, double *minProbability, int * numTargetMatches = 0, int * numDecoyMatches = 0);
00872             bool getThresholdForFDRAboveIdentity(double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *closestFDR, double *minProbability, int * numTargetMatches = 0, int * numDecoyMatches = 0);
00873 #endif
00874 
00875 #if defined(SWIGPERL) || defined(SWIGPYTHON)
00876             // Don't supply default parameters for perl or python because these are defined as output parameters in msparser.i
00877             bool getThresholdForFDRAboveHomology(double targetFDR, double *closestFDR, double *minProbability, int * numTargetMatches, int * numDecoyMatches);
00878             bool getThresholdForFDRAboveHomology(double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *closestFDR, double *minProbability, int * numTargetMatches, int * numDecoyMatches);
00879 #else
00880 
00881             bool getThresholdForFDRAboveHomology(double targetFDR, double *closestFDR, double *minProbability, int * numTargetMatches = 0, int * numDecoyMatches = 0);
00883             bool getThresholdForFDRAboveHomology(double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double *closestFDR, double *minProbability, int * numTargetMatches = 0, int * numDecoyMatches = 0);
00884 #endif
00885 
00886             virtual std::vector<int> getPepsWithSameScore(const int q, const int p) const = 0;
00887 
00889             virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & value, double & rawValue) const = 0;
00890 
00909             bool queryRemoveThisPeptide(const unsigned short dupeFlags) const { return dupeRemoveIDs_.find(dupeFlags) != dupeRemoveIDs_.end(); }
00910 
00929             bool queryScoreThisPeptide(const unsigned short dupeFlags) const { return dupeIncludeInScoreIDs_.find(dupeFlags) != dupeIncludeInScoreIDs_.end(); }
00930 
00932             virtual bool loadPepMatchesForProteinFromCache(ms_protein * prot, const bool loadRelated=true) { return false; }
00933 
00935             virtual bool isValidQandP(const int q, const int p) const = 0;
00936 
00938             std::string lookupParsedLocalModsStr(const int q, const int p, const std::string localModsStr) const;
00939 
00941             std::string lookupParsedLocalModsStr(const ms_peptide *thisOnePep) const;
00942             
00943         protected:
00944             // Not safe to copy or assign this object.
00945 #ifndef SWIG
00946             ms_mascotresults(const ms_mascotresults & rhs);
00947             ms_mascotresults & operator=(const ms_mascotresults & rhs);
00948 
00949             virtual bool getThresholdForFDR(bool homology, double targetFDR, 
00950                     DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType,
00951                     double * closestFDR, double * minProbability,
00952                     int * pNumTargetMatches, int * pNumDecoyMatches) = 0;
00953             virtual void calculateDecoyStats(double dOneInXprobRnd) = 0;
00954 
00955             virtual double getPeptideExpectationValueProtected(const double score, 
00956                                                                const int query,
00957                                                                const ms_mascotresfile::section summary_section,
00958                                                                const THRESHOLD_TYPE thresholdType,
00959                                                                const DB_MATCH_TYPE dbType) const;
00960             virtual double getQplughole(const int query, const ms_mascotresfile::section sec) const;
00961             virtual ms_mascotresfile::section getSrcSectionProtected(const int query, const int rank) const;
00962 
00963 
00964             enum QMATCH_PLUGHOLE_INDEX_SECTIONS {
00965                 QMATCH_PLUGHOLE_SEC_SUMMARY       = 0,
00966                 QMATCH_PLUGHOLE_SEC_DECOYSUMMARY  = 1,
00967                 QMATCH_PLUGHOLE_SEC_ERRTOLSUMMARY = 2,
00968                 QMATCH_PLUGHOLE_SEC_LIBRARYSUMMARY= 3,
00969                 QMATCH_PLUGHOLE_SEC_LAST          = 4
00970             };
00971             virtual void getQmatchValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const {} ;
00972             virtual void getPlugholeValuesFromCacheFile(const QMATCH_PLUGHOLE_INDEX_SECTIONS index) const {};
00973 
00974 #endif
00975 
00976             ms_mascotresfile &resfile_;
00977             int   numQueries_;
00978             double tolFactor_;
00979 
00980             proteinSet proteins_;
00981 
00982             // For unigene, the original proteins are not saved in proteins_
00983             proteinSet componentProteins_;
00984 
00985             // - Not documented - Get the 'corrected' ions score given multiplicity
00986             double getIonsScoreCorrected(const double ionsScore, 
00987                                          const long   multiplicity) const;
00988 
00989             unsigned int      flags_;
00990             unsigned int      flags2_;
00991             double            minProbability_;
00992             int               maxHitsToReport_;
00993             std::string       unigeneIndexFile_;
00994             ms_unigene      * unigene_;
00995             bool              tooOld_;
00996             int               minPepLenInPepSummary_;
00997             std::string       singleHit_;
00998             double            ignoreIonsScoreBelow_;
00999 
01000             int               numDatabases_;
01001 
01002             // The elements in the vector of peptides are accessed by
01003             // q + (p * num queries)
01004             std::vector<ms_peptide *> peptides_;
01005 
01006             double slScoreStats_[4];
01007             double& slScoreStatsMascotMean() { return slScoreStats_[0]; }
01008             double& slScoreStatsMascotStdev() { return slScoreStats_[1]; }
01009             double& slScoreStatsLibraryMean() { return slScoreStats_[2]; }
01010             double& slScoreStatsLibraryStdev() { return slScoreStats_[3]; }
01011             const double& slScoreStatsMascotMean() const { return slScoreStats_[0]; }
01012             const double& slScoreStatsMascotStdev() const { return slScoreStats_[1]; }
01013             const double& slScoreStatsLibraryMean() const { return slScoreStats_[2]; }
01014             const double& slScoreStatsLibraryStdev() const { return slScoreStats_[3]; }
01015 
01017             virtual double getPepIdentThreshProtected(const int query, 
01018                                                       double OneInXprobRnd,
01019                                                       ms_mascotresfile::section sec,
01020                                                       DB_MATCH_TYPE dbType,
01021                                                       double * pQmatch = 0) const;
01022             virtual double getHomologyThreshProtected(const int query,
01023                                                       double OneInXprobRnd,
01024                                                       ms_mascotresfile::section sec,
01025                                                       const int rank=1,
01026                                                       const ERROR_TOLERANT_PEPTIDE etPep = ETPEP_UNKNOWN ) const;
01027 
01028             inline bool checkCreated(const char * funcname, unsigned int t) const {
01029                 if (!(completedTasks_ & t)) {
01030                     resfile_.setError(ms_mascotresfile::ERR_RESULTS_NOT_CREATED, funcname);
01031                     return false;
01032                 } else {
01033                     return true;
01034                 }
01035             }
01036 
01037 
01038             msparser_internal::ms_unassigned * unassigned_;
01039 
01040             double top50Scores_[50];
01041             std::set<unsigned short>dupeRemoveIDs_;
01042             std::set<unsigned short>dupeIncludeInScoreIDs_;
01043 
01044             bool bDecoyStatsCalculated_;
01045             double dOneInXprobRndForDecoy_;
01046             // Array size comes from one plus a sum of the maximum level
01047             // of each factor.
01048             enum { DECOY_STATS_FACTOR_LEVELS = 1 + (1*1 + 1*2 + 2*4 + 1*16) };
01049             long decoyStats_[DECOY_STATS_FACTOR_LEVELS];
01050             enum DECOY_STATS_THRESHOLD_TYPE { DS_IDENTITY = 0, DS_HOMOLOGY = 1 };
01051             inline long& decoyStatsOf(bool decoy, DECOY_STATS_COUNT_TYPE countType, DECOY_STATS_THRESHOLD_TYPE thrType, DB_MATCH_TYPE dbType)
01052             {
01053                 return decoyStats_[countType * 1 + thrType * 2 + dbType * 4 + decoy * 16];
01054             }
01055             static void checkDecoyStatsArguments(double &OneInXprobRnd, DECOY_STATS_COUNT_TYPE &countType, DB_MATCH_TYPE &dbType);
01056             inline void resetDecoyStats(long c)
01057             {
01058                 for (int i = 0; i < DECOY_STATS_FACTOR_LEVELS; i++)
01059                     decoyStats_[i] = c;
01060             }
01061 
01062             ms_mascotresfile::section secSummary_;
01063             ms_mascotresfile::section secMixture_;
01064             ms_mascotresfile::section secPeptides_;
01065             ms_mascotresfile::section secProteins_;
01066             double subsetsScoreFraction_;
01067             msparser_internal::ms_proteininference * pProteinInferencer_;
01068             bool nucleicAcid_;
01069             mutable int cachedAvePepIdentThreshMascot_;
01070             mutable int cachedAvePepIdentThreshSL_;
01071             std::vector<int> ionsScoreHistogramTopMatchMascot_;
01072             std::vector<int> ionsScoreHistogramTop10Mascot_;
01073             std::vector<int> ionsScoreHistogramTopMatchSL_;
01074             std::vector<int> ionsScoreHistogramTop10SL_;
01075             int maxRankValue_;
01076             bool isPercolator_;
01077             THRESHOLD_TYPE thresholdType_;
01078             bool cancelCreateSummary_;
01079             double minScoreThreshold_;
01080 
01081             typedef std::map<char, int> mod_residue_count_t;
01082             static const char mod_residue_count_NTERM = '[';
01083             static const char mod_residue_count_CTERM = ']';
01084             bool countModificationInstances(
01085                     std::vector<mod_residue_count_t> &fixed_mod_count, 
01086                     std::vector<mod_residue_count_t> &var_mod_count, 
01087                     std::vector<std::string> &var_mod_position_types, 
01088                     std::map< std::string, std::map<std::string, mod_residue_count_t> > &local_mod_count, 
01089                     std::map< std::string, std::map<std::string, mod_residue_count_t> > &et_mod_count,
01090                     std::vector<std::string> &et_mod_deltas,
01091                     int &num_nonspecific_cleavage, 
01092                     std::vector<mod_residue_count_t> &library_mod_count);
01093             virtual bool cacheModificationCounts() = 0;
01094             std::vector<mod_residue_count_t> discoveredFixedMods_;
01095             std::vector<mod_residue_count_t> discoveredVariableMods_;
01096             std::vector<std::string> discoveredVariableModPositionTypes_;
01097             std::map<std::string, std::map<std::string, mod_residue_count_t> > discoveredLocalMods_;
01098             std::map<std::string, std::map<std::string, mod_residue_count_t> > discoveredErrorTolerantMods_;
01099             std::vector<std::string> discoveredErrorTolerantModDeltas_;
01100             int discoveredNonSpecificCleavageNum_;
01101             std::vector<mod_residue_count_t> discoveredLibraryMods_;
01102             bool modificationCountsLoaded_;
01103             bool modificationCountsAvailable_;
01104 
01105             typedef std::map<std::string, std::set<std::pair<int, int> >, LexicoCompare > map_mod_value_to_qp;
01106 
01107             static std::vector<int> parseLibraryModString(const int peplen, const std::string &libmods_str);
01108 
01109             enum COMPLETED_TASKS {
01110                 CT_NONE                 = 0x0000,
01111                 CT_LOADQUERIES          = 0x0001,
01112                 CT_SRCRANKINITIALISED   = 0x0002,
01113                 CT_PERCOLATORRESULTS    = 0x0004,
01114                 CT_INFERENCING          = 0x0008,
01115                 CT_UNASSIGNEDLIST       = 0x0010,
01116                 CT_COMPONENTINTENSITIES = 0x0020,
01117                 CT_COUNTMODS            = 0x0040,
01118                 CT_CREATECDB            = 0x0080,
01119                 CT_ALLDONE              = 0xFFFF
01120             };
01121             mutable unsigned int completedTasks_;
01122             
01123             static double getValidMinProbability(const double minProbability);
01124 
01125         private:
01126             mutable bool   cachedHomology_[2];
01127             mutable double cachedHomologyProb_[2];
01128             mutable std::vector<double> cachedHomologyValues_[2];
01129             mutable std::vector<int> cachedQMatch_[QMATCH_PLUGHOLE_SEC_LAST];
01130             mutable std::vector<double> cachedQPlughole_[QMATCH_PLUGHOLE_SEC_LAST];
01131             typedef std::map<std::pair<int, std::string>, int> dbIdxPlusAccToId_t;
01132             mutable dbIdxPlusAccToId_t summarySectionAccs_;
01133 
01134             mutable bool loadedQmatchFromCacheFile_[QMATCH_PLUGHOLE_SEC_LAST];
01135             mutable bool loadedPlugholeFromCacheFile_[QMATCH_PLUGHOLE_SEC_LAST];
01136 
01137             bool getProteinDescriptionAndMass(const char * accession, const int dbIdx,
01138                                               double & mass, std::string & desc) const;
01139             void cacheSummarySectionAccs() const;
01140             void debugCheckReloadablePeps() const;
01141             bool setQmatch(const int query, const QMATCH_PLUGHOLE_INDEX_SECTIONS index, const int value) const;
01142             bool setPlughole(const int query, const QMATCH_PLUGHOLE_INDEX_SECTIONS index, const double value) const;
01143 
01144             // Reduce IT_MODS parsing.
01145             typedef std::map<int, std::string> queryIT_MODS_t;
01146             mutable queryIT_MODS_t queryIT_MODS_;
01147             typedef std::map<std::string, std::vector<std::string> > parsedLocalMods_t;
01148             mutable parsedLocalMods_t parsedLocalMods_;
01149             typedef std::map<std::pair<int, int>, std::string> pepParsedLocalMods_t;
01150             mutable pepParsedLocalMods_t pepParsedLocalMods_;
01151 
01152             static std::string parseLocalModsStr(const std::string &localModsStr, const std::vector<std::string> &local_modnames);
01153 
01154             const std::vector<std::string>& lookupParsedIT_MODS(int q) const;
01155 
01156             double sequenceMassResidueLookup_[256];
01157     };
01158 
01160     class MS_MASCOTRESFILE_API ms_mascotresults_params
01161     {
01162         public:
01164             ms_mascotresults_params(const unsigned int flags = ms_mascotresults::MSRES_GROUP_PROTEINS,
01165                                   double             minProbability = 0.05,
01166                                   int                maxHitsToReport = 0,
01167                                   const char *       unigeneIndexFile = 0,
01168                                   double             ignoreIonsScoreBelow = 0.0,
01169                                   int                minPepLenInPepSummary = 0,
01170                                   const char *       singleHit = 0,
01171                                   const unsigned int flags2 = 0);
01172 
01173             virtual ~ms_mascotresults_params();
01174 
01176             void setFlags(const unsigned int flags);
01177 
01179             unsigned int getFlags() const;
01180 
01182             void setFlags2(const unsigned int flags2);
01183 
01185             unsigned int getFlags2() const;
01186 
01188             double getMinProbability() const;
01189 
01191             void setMinProbability(const double minProbability);
01192 
01194             double getIgnoreIonsScoreBelow() const;
01195 
01197             void setIgnoreIonsScoreBelow(const double ignoreIonsScoreBelow);
01198 
01200             int getMaxHitsToReport() const;
01201 
01203             void setMaxHitsToReport(const int maxHitsToReport);
01204 
01206             int getMinPepLenInPeptideSummary() const;
01207 
01209             void setMinPepLenInPeptideSummary(const int minPepLenInPeptideSummary);
01210 
01212             std::string getUnigeneIndexFile() const;
01213 
01215             void setUnigeneIndexFile(const char* unigeneIndexFile);
01216 
01218             std::string getSingleHit() const;
01219 
01221             void setSingleHit(const char* singleHit);
01222 
01224             bool isUsePeptideSummary() const;
01225 
01227             void setUsePeptideSummary(const bool usePeptideSummary);
01228 
01229         private:
01230             unsigned int      flags_;
01231             double            minProbability_;
01232             int               maxHitsToReport_;
01233             double            ignoreIonsScoreBelow_;
01234             std::string       unigeneIndexFile_;
01235             int               minPepLenInPepSummary_;
01236             std::string       singleHit_;
01237             unsigned int      flags2_;
01238             bool              usePeptideSummary_;   
01239     };
01240  // end of resfile_group
01242 }   // matrix_science namespace
01243 
01244 #endif // MS_MASCOTRESULTS_HPP
01245 
01246 /*------------------------------- End of File -------------------------------*/
Copyright © 2016 Matrix Science Ltd.  All Rights Reserved. Generated on Fri Jun 2 2017 01:44:51