00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef MS_TAXONOMYRULES_HPP
00020 #define MS_TAXONOMYRULES_HPP
00021
00022
00023 #include <string>
00024 #include <vector>
00025
00026
00027 namespace matrix_science {
00038 enum TAX_SPECIES_FORMAT
00039 {
00040 TAX_SPECIES_NCBI,
00041 TAX_SPECIES_SWISSPROT,
00042 TAX_SPECIES_PDB,
00043 TAX_SPECIES_GI2TAXID,
00044 TAX_SPECIES_ACC2TAXID,
00045 TAX_SPECIES_EXPLICIT,
00046 TAX_SPECIES_FORMAT_COUNT
00047 };
00048
00050 class MS_MASCOTRESFILE_API ms_taxspeciesfiles
00051 {
00052 friend class ms_datfile;
00053 friend class ms_taxonomyrules;
00054
00055 public:
00057 ms_taxspeciesfiles();
00058
00060 ms_taxspeciesfiles(const ms_taxspeciesfiles& src);
00061
00063 ~ms_taxspeciesfiles();
00064
00066 void defaultValues();
00067
00069 void copyFrom(const ms_taxspeciesfiles* right);
00070
00071 #ifndef SWIG
00072
00073 ms_taxspeciesfiles& operator=(const ms_taxspeciesfiles& right);
00074 #endif
00075
00076 TAX_SPECIES_FORMAT getFormat() const;
00077
00079 void setFormat(const TAX_SPECIES_FORMAT value);
00080
00082 std::string getFileName() const;
00083
00085 void setFileName(const char* name);
00086
00087 private:
00088 TAX_SPECIES_FORMAT format_;
00089 std::string filename_;
00090
00091 std::string getStringValue() const;
00092 };
00093
00099 enum TAX_NODE_FORMAT
00100 {
00101 TAX_NODE_NCBI,
00102 TAX_NODE_GENCODE
00103 };
00104
00106
00108 class MS_MASCOTRESFILE_API ms_taxnodesfiles
00109 {
00110 friend class ms_datfile;
00111 friend class ms_taxonomyrules;
00112 public:
00113
00115 ms_taxnodesfiles();
00116
00118 ms_taxnodesfiles(const ms_taxnodesfiles& src);
00119
00121 ~ms_taxnodesfiles();
00122
00124 void defaultValues();
00125
00127 void copyFrom(const ms_taxnodesfiles* right);
00128
00129 #ifndef SWIG
00130
00131 ms_taxnodesfiles& operator=(const ms_taxnodesfiles& right);
00132 #endif
00133
00134 TAX_NODE_FORMAT getFormat() const;
00135
00137 void setFormat(const TAX_NODE_FORMAT value);
00138
00140 std::string getFileName() const;
00141
00143 void setFileName(const char* name);
00144
00145 private:
00146 TAX_NODE_FORMAT format_;
00147 std::string filename_;
00148 std::string getStringValue() const;
00149 };
00150
00152 class MS_MASCOTRESFILE_API ms_parserule_plus
00153 {
00154 friend class ms_datfile;
00155 friend class ms_taxonomyrules;
00156
00157 public:
00159
00162 enum TAX_CHOP_TYPES
00163 {
00164 TAX_CHOP_PREFIX = 0x0001,
00165 TAX_CHOP_SUFFIX = 0x0002,
00166 TAX_CHOP_WORDS = 0x0004
00167 };
00168
00170 typedef unsigned int TAX_CHOP_SRC;
00171
00173 ms_parserule_plus();
00174
00176 ms_parserule_plus(const ms_parserule_plus& src);
00177
00179 ~ms_parserule_plus();
00180
00182 void defaultValues();
00183
00185 void copyFrom(const ms_parserule_plus* right);
00186
00187 #ifndef SWIG
00188
00189 ms_parserule_plus& operator=(const ms_parserule_plus& right);
00190 #endif
00191
00192 TAX_SPECIES_FORMAT getFileTypeToSearch() const;
00193
00195 void setFileTypeToSearch(const TAX_SPECIES_FORMAT value);
00196
00198 const ms_parserule* getRule() const;
00199
00201 void setRule(const ms_parserule* src);
00202
00204 TAX_CHOP_SRC getChopSource() const;
00205
00207 void setChopSource(const TAX_CHOP_SRC value);
00208
00210 std::string getNameOfDB() const;
00211
00213 void setNameOfDB(const char* name);
00214
00215 private:
00216 TAX_SPECIES_FORMAT fileTypeToSearch_;
00217 ms_parserule rule_;
00218 TAX_CHOP_SRC chopSrc_;
00219 std::string nameOfDb_;
00220 std::string getStringValue() const;
00221 };
00222
00224
00246 class MS_MASCOTRESFILE_API ms_taxonomyrules: public ms_customproperty
00247 {
00248 friend class ms_datfile;
00249
00250 public:
00252 ms_taxonomyrules();
00253
00255 ms_taxonomyrules(const ms_taxonomyrules& src);
00256
00258 ~ms_taxonomyrules();
00259
00261 void defaultValues();
00262
00264 void copyFrom(const ms_taxonomyrules* right);
00265
00266 #ifndef SWIG
00267
00268 ms_taxonomyrules& operator=(const ms_taxonomyrules& right);
00269 #endif
00270
00271 bool isSectionAvailable() const;
00272
00274 void setSectionAvailable(const bool value);
00275
00277 bool isEnabled() const;
00278
00280 void setEnabled(const bool flag);
00281
00283 std::string getIdentifier() const;
00284
00286 void setIdentifier(const char* str);
00287
00289 int getErrorLevel() const;
00290
00292 void setErrorLevel(const int value);
00293
00295 bool isFromRefFile() const;
00296
00298 void setFromRefFile(const bool flag);
00299
00301 bool isConcatRefFileLines() const;
00302
00304 void setConcatRefFileLines(const bool flag);
00305
00307 char getDescriptionLineSep() const;
00308
00310 void setDescriptionLineSep(const char value);
00311
00313 int getNumberOfNoBreakDescLineIf() const;
00314
00316 std::string getNoBreakDescLineIf(const int index) const;
00317
00319 void clearNoBreakDescLineIf();
00320
00322 void appendNoBreakDescLineIf(const char* str);
00323
00325 int getNumberOfSpeciesFiles() const;
00326
00328 const ms_taxspeciesfiles * getSpeciesFile(const int index) const;
00329
00331 void clearSpeciesFiles();
00332
00334 void appendSpeciesFile(const ms_taxspeciesfiles * item);
00335
00336
00337 int getNumberOfStrFiles() const;
00338
00339 const ms_taxspeciesfiles * getStrFile(const int index) const;
00340
00341 void clearStrFiles();
00342
00343 void appendStrFile(const ms_taxspeciesfiles * item);
00344
00345
00346 const ms_parserule_plus* getStrRule() const;
00347
00348 void setStrRule(const ms_parserule_plus* src);
00349
00351 int getNumberOfNodesFiles() const;
00352
00354 const ms_taxnodesfiles * getNodesFile(const int index) const;
00355
00357 void clearNodesFiles();
00358
00360 void appendNodesFile(const ms_taxnodesfiles * item);
00361
00363 int getNumberOfGencodeFiles() const;
00364
00366 const ms_taxnodesfiles * getGencodeFile(const int index) const;
00367
00369 void clearGencodeFiles();
00370
00372 void appendGencodeFile(const ms_taxnodesfiles * item);
00373
00375 const ms_parserule_plus* getDefaultRule() const;
00376
00378 void setDefaultRule(const ms_parserule_plus* src);
00379
00381 int getNumberOfPrefixRemoves() const;
00382
00384 std::string getPrefixRemove(const int index) const;
00385
00387 void clearPrefixRemoves();
00388
00390 void appendPrefixRemove(const char * item);
00391
00393 int getNumberOfSuffixRemoves() const;
00394
00396 std::string getSuffixRemove(const int index) const;
00397
00399 void clearSuffixRemoves();
00400
00402 void appendSuffixRemove(const char * item);
00403
00405 const ms_parserule* getSrcDatabaseRule() const;
00406
00408 void setSrcDatabaseRule(const ms_parserule* src);
00409
00411 int getNumberOfPerDbSrcRules() const;
00412
00414 const ms_parserule_plus * getPerDbSrcRule(const int index) const;
00415
00417 void clearPerDbSrcRules();
00418
00420 void appendPerDbSrcRule(const ms_parserule_plus * item);
00421
00423 const ms_parserule* getDoThisRuleFirst() const;
00424
00426 void setDoThisRuleFirst(const ms_parserule* src);
00427
00429 const ms_parserule* getAccFromSpeciesLine() const;
00430
00432 void setAccFromSpeciesLine(const ms_parserule* src);
00433
00435 std::string getQuickRefSearch() const;
00436
00438 void setQuickRefSearch(const char* str);
00439
00441 int getDBLevelTaxId() const;
00442
00444 void setDBLevelTaxId(const int value);
00445
00447 bool isDBLevelTaxId() const;
00448
00450 void clearDBLevelTaxId();
00451
00453 bool isMitochondrialTranslation() const;
00454
00456 void setMitochondrialTranslation(const bool flag);
00457
00459 std::string getSpeciesFormatRegex(const TAX_SPECIES_FORMAT format) const;
00460
00462 void setSpeciesFormatRegex(const TAX_SPECIES_FORMAT format, const std::string regex);
00463
00464 #ifdef SUPPRESS_MS_CUSTOMPROPERTY_INHERITANCE
00465 #include "suppress_ms_customproperty.hpp"
00466 #endif
00467
00468 private:
00469 bool sectionAvailable_;
00470 bool enabled_;
00471 std::string identifier_;
00472 int errorLevel_;
00473 bool fromRefFile_;
00474 bool concatRefFileLines_;
00475 char descriptionLineSep_;
00476
00477 std::vector< std::string > noBreakDescLineIf_;
00478 std::vector< ms_taxspeciesfiles* > speciesFiles_;
00479 std::vector< ms_taxspeciesfiles* > strStrFiles_;
00480 std::vector< ms_taxnodesfiles* > nodesFiles_;
00481 std::vector< ms_taxnodesfiles* > gencodeFiles_;
00482 std::vector< ms_parserule_plus* > perDbSrcRules_;
00483 std::vector< std::string > prefixRemoves_;
00484 std::vector< std::string > suffixRemoves_;
00485 std::vector< std::string > speciesFormatRegex_;
00486
00487 ms_parserule srcDatabaseRule_;
00488 ms_parserule doThisRuleFirst_;
00489 ms_parserule accFromSpeciesLine_;
00490 ms_parserule_plus defaultRule_;
00491 ms_parserule_plus strStrRule_;
00492
00493 std::string quickRefSearch_;
00494 int dbLevelTaxId_;
00495 bool mitochondrialTranslation_;
00496 };
00497
00498 class ms_tinycdb;
00499 class ms_taxonomychoice;
00500
00502
00540 class MS_MASCOTRESFILE_API ms_taxonomytree : public ms_errors
00541 {
00542 public:
00544 ms_taxonomytree(const ms_taxonomyrules * taxonomyRules,
00545 const char * taxonomyDirectory = "../taxonomy",
00546 const bool useIndex = true,
00547 const bool createList = false,
00548 const char * preparingDirectory = "../taxonomy/preparing",
00549 const char * oldDirectory = "../taxonomy/old");
00550
00552 ms_taxonomytree(const ms_taxonomytree & src);
00553
00555 ~ms_taxonomytree();
00556
00558 void copyFrom(const ms_taxonomytree * right);
00559
00560 #ifndef SWIG
00561
00562 ms_taxonomytree& operator=(const ms_taxonomytree & right);
00563
00564 struct TAX_TREE_NODE {
00565 int parentId;
00566 int tableId;
00567 };
00568 typedef std::vector<TAX_TREE_NODE> TAX_TREE_NODES;
00569
00571 TAX_TREE_NODES * getTaxIDArray();
00572 #endif
00573
00575 bool usingIndex() const;
00576
00578 std::string getTaxonomyAndIndexFiles() const;
00579
00581 bool isSpeciesDescendantOf(const int parentID, const int id) const;
00582
00583 #ifndef SWIG
00584
00585 bool getParent(const int id, int & ttParent, int & ttGenTable) const;
00586 #else // SWIG Multiple return values
00587 bool getParent(const int id, int & OUTPUT, int & OUTPUT) const;
00588 #endif
00589
00591 bool isIncludedIn(const int id, const ms_taxonomychoice * choice) const;
00592
00593 private:
00594 bool usingCDB_;
00595 std::vector<ms_taxnodesfiles> files_;
00596 TAX_TREE_NODES nodes_;
00597 std::vector<std::string> fileNames_;
00598 std::vector<ms_tinycdb *> cdbFiles_;
00599
00600 bool readFile(const std::string & filename,
00601 const bool isMitochondrialTranslation);
00602
00603 };
00604
00605
00607 }
00608
00609 #endif // MS_TAXONOMYRULES_HPP
00610
00611