Matrix Science header

ms_spectral_lib_file.hpp

00001 /*
00002 ##############################################################################
00003 # File: ms_nist_msp.hpp                                                      #
00004 # Mascot Parser toolkit                                                      #
00005 # Encapsulates spectral library files (e.g. msp, sptext)                     #
00006 #                                                                            #
00007 ##############################################################################
00008 # COPYRIGHT NOTICE                                                           #
00009 # Copyright 2015 Matrix Science Limited  All Rights Reserved.                #
00010 #                                                                            #
00011 ##############################################################################
00012 #    $Source: parser/inc/ms_spectral_lib_file.hpp $
00013 #    $Author: dcreasy@matrixscience.com $ 
00014 #      $Date: 2019-10-23 11:32:40 +0100 $ 
00015 #  $Revision: e1e932b0e014aa84a197578948d47baeca33fd07 | MSPARSER_REL_2_8_1-0-gea32989045 $
00016 ##############################################################################
00017 */
00018 
00019 #ifndef MS_SPECTRAL_LIB_FILE
00020 #define MS_SPECTRAL_LIB_FILE
00021 
00022 
00023 // Includes from the standard template library
00024 
00025 #include <map>
00026 #include <set>
00027 #include <string>
00028 
00029 
00030 namespace matrix_science {
00031   
00037 
00038 
00065     class MS_MASCOTRESFILE_API ms_spectral_lib_file : public ms_errors
00066     {
00067     public:
00069         ms_spectral_lib_file(const char * fileName, const char * regexForAccession, const char * cdbFileName = 0);
00070 
00071 #ifndef SWIG
00072 
00073         ms_spectral_lib_file(const char * fileName, const char * regexForAccession, const char * cdbFileName, const std::map<std::string, std::string> & modificationAliases);
00074 #endif
00075 
00077         ~ms_spectral_lib_file();
00078         
00080         int getNumEntries() const;
00081         
00083         int getNumResidues() const;
00084 
00086         std::vector<int> findEntries(const char * sequence, const char * checksum = 0, const char * accession = 0, const char * mods=0) const;
00087         
00089         ms_spectral_lib_entry getEntryFromNumber(const int number) const;
00090 
00092         std::vector<std::string> getEntryFromNumberAsText(const int number) const;
00093 
00094 
00096         std::string getAccessionFromNumber(const int number) const;
00097 
00099         std::string getChecksumFromNumber(const int number) const;
00100 
00102         std::string getSequenceFromNumber(const int number) const;
00103 
00105         std::string getModsFromNumber(const int number) const;
00106 
00108         double getPrecursorMZFromNumber(const int number) const;
00109 
00111         int getPrecursorChargeFromNumber(const int number) const;
00112 
00114         std::vector<std::string> getAllMods() const;
00115 
00117         ms_spectral_lib::FILE_FORMAT getFormat() const;
00118 
00120         bool saveAs(const char * fileName, 
00121                     const bool   replaceProteinName  = true, 
00122                     ms_spectral_lib::FILE_FORMAT fileFormat = ms_spectral_lib::FORMAT_NIST_MSP,
00123                     const int    startNumber = 1,
00124                     const int    endNumber = -1,
00125                     const ms_spectral_lib_entry::WHAT_TO_ANNOTATE whatToAnnotate = ms_spectral_lib_entry::ANNOTATE_REPLACE_QUESTION_MARKS,
00126                     const double annotateTol = 0.6,
00127                     const char * annotateTolu = "Da",
00128                     const ms_umod_configfile * unimod = 0) const;
00129 
00131         std::string getStatsInformation() const;
00132 
00134         std::string getFileName() const;
00135 
00137         long getQmatch(double minMz, double maxMz) const;
00138 
00140         bool verifyThatModsAreInUnimod(const ms_umod_configfile & unimod);
00141 
00142     private:
00143         //Do not copy this object
00144         ms_spectral_lib_file(const ms_spectral_lib_file & src);
00145         ms_spectral_lib_file& operator=(const ms_spectral_lib_file & right);
00146 
00147         struct sequentialIndex_t {
00148             std::string accession;
00149             std::string checksum;
00150             std::string sequence;
00151             std::string mods;
00152             double      precursorMZ;
00153             int         precursorCharge;
00154             OFFSET64_T  fileOffset;
00155         };
00156         void initialise(const char * fileName, const char * regexForAccession, const char * cdbFileName, const std::map<std::string, std::string> & aliases);
00157         bool readFile(int recordNumber) const;
00158         void getline(std::string & str) const;
00159         ms_errs::err getEntryAsText(const int number, std::vector<std::string> & lines, std::string & errorMsg) const;
00160 
00161         void saveIndexes(const  ms_spectral_lib_entry & msp, 
00162                          const  OFFSET64_T          fileOffset);
00163         OFFSET64_T getFileOffsetFromId(int number) const;
00164         const char * getSpectrumDelimiter() const;
00165 
00166         int applyModAliases(const std::map<std::string, std::string> & aliases);
00167         void replaceModAliases(std::string & comment, const ms_spectral_lib_entry & spect) const;
00168 
00169         // the following are always valid - with or without a cdb file
00170                 std::string  fileName_;
00171         mutable int          numEntries_;  // May be less than the total in the file if it's being read on demand
00172         mutable int          numResidues_; // May be less than the total in the file if it's being read on demand
00173                 ms_parserule parseRule_;
00174         mutable bool         wholeFileRead_;
00175                 FILE       * ifs_;
00176         mutable ms_spectral_lib::FILE_FORMAT  fileFormat_;
00177 
00178         // the following are valid when creating or using the cdb file
00179         ms_tinycdb * cdb_;
00180 
00181         // the following are only valid when no cdb specified *or* when *first creating* the cdb file
00182         mutable OFFSET64_T offsetToNextRecord_;
00183         typedef std::map<std::string, std::vector<int> > strLookup_t;  // e.g. sequence => vector of spectra
00184         std::vector<strLookup_t> lookup_;
00185         std::vector<sequentialIndex_t> sequentialIndex_;
00186         typedef std::map<std::string, std::string> modMap_t;  // nameInMsp => nameInUnimod (or empty if no alias)
00187         mutable modMap_t modsMap_;
00188 
00189         typedef std::multiset<double> precursors_t;
00190         mutable precursors_t precursors_;
00191 
00192     }; // class ms_spectral_lib_file
00193  // end of tools_group
00195 } // namespace matrix_science
00196 
00197 #endif // MS_SPECTRAL_LIB_FILE
00198 
00199 /*------------------------------- End of File -------------------------------*/

Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:30