This class represents a single Taxonomy_XXX section in mascot.dat
.
More...
#include <ms_taxonomyrules.hpp>
Public Member Functions | |
ms_taxonomyrules () | |
Default constructor. | |
ms_taxonomyrules (const ms_taxonomyrules &src) | |
Copying constructor. | |
~ms_taxonomyrules () | |
Destructor. | |
void | appendGencodeFile (const ms_taxnodesfiles *item) |
Adds an entry into the GencodeFiles list. | |
void | appendNoBreakDescLineIf (const char *str) |
Adds an entry into the NoBreakDescLineIf list. | |
void | appendNodesFile (const ms_taxnodesfiles *item) |
Adds an entry into the NodesFiles list. | |
void | appendPerDbSrcRule (const ms_parserule_plus *item) |
Adds a new database source string into the list. | |
void | appendPrefixRemove (const char *item) |
Adds an entry into the PrefixRemoves list. | |
void | appendSpeciesFile (const ms_taxspeciesfiles *item) |
Adds an entry into the SpeciesFiles list. | |
void | appendStrFile (const ms_taxspeciesfiles *item) |
void | appendSuffixRemove (const char *item) |
Adds an entry into the SuffixRemoves list. | |
void | clearDBLevelTaxId () |
Deletes DBLevelTaxId; it will not be saved in the file. | |
void | clearGencodeFiles () |
Deletes all GencodeFiles entries. | |
void | clearNoBreakDescLineIf () |
Deletes all NoBreakDescLineIf entries. | |
void | clearNodesFiles () |
Deletes all NodesFiles entries. | |
void | clearPerDbSrcRules () |
Deletes all database source strings. | |
void | clearPrefixRemoves () |
Deletes all PrefixRemoves entries. | |
void | clearSpeciesFiles () |
Deletes all entries for SpeciesFiles. | |
void | clearSuffixRemoves () |
Deletes all SuffixRemoves entries. | |
void | copyFrom (const ms_taxonomyrules *right) |
Can be used to create a copy of another object. | |
void | defaultValues () |
Initialises the instance with default values. | |
const ms_parserule * | getAccFromSpeciesLine () const |
Returns the value of AccFromSpeciesLine. | |
int | getDBLevelTaxId () const |
Returns the value of DBLevelTaxId. | |
const ms_parserule_plus * | getDefaultRule () const |
Returns the value of DefaultRule. | |
char | getDescriptionLineSep () const |
Returns the value of DescriptionLineSep. | |
const ms_parserule * | getDoThisRuleFirst () const |
Returns the value of DoThisRuleFirst. | |
int | getErrorLevel () const |
Returns the value of ErrorLevel. | |
const ms_taxnodesfiles * | getGencodeFile (const int index) const |
Returns an instance of ms_taxnodesfiles describing an entry in GencodeFiles. | |
std::string | getIdentifier () const |
Returns the value of Identifier. | |
std::string | getNoBreakDescLineIf (const int index) const |
Returns a NoBreakDescLineIf entry by its number. | |
const ms_taxnodesfiles * | getNodesFile (const int index) const |
Returns an instance of ms_taxnodesfiles describing an entry in NodesFiles. | |
int | getNumberOfGencodeFiles () const |
Returns the number of file names specified in GencodeFiles. | |
int | getNumberOfNoBreakDescLineIf () const |
Returns the number of NoBreakDescLineIf entries. | |
int | getNumberOfNodesFiles () const |
Returns the number of file names specified in NodesFiles. | |
int | getNumberOfPerDbSrcRules () const |
Returns the number of database source strings. | |
int | getNumberOfPrefixRemoves () const |
Returns the number of PrefixRemoves entries. | |
int | getNumberOfSpeciesFiles () const |
Returns the number of file names specified in SpeciesFiles. | |
int | getNumberOfStrFiles () const |
int | getNumberOfSuffixRemoves () const |
Returns the number of SuffixRemoves entries. | |
const ms_parserule_plus * | getPerDbSrcRule (const int index) const |
Returns a database source string by its number. | |
std::string | getPreceedingComments () const |
Returns any comments preceeding the section. | |
std::string | getPrefixRemove (const int index) const |
Returns the PrefixRemoves string by number. | |
std::string | getQuickRefSearch () const |
Returns the value of QuickRefSearch. | |
const ms_taxspeciesfiles * | getSpeciesFile (const int index) const |
Returns an instance of ms_taxspeciesfiles describing an entry in SpeciesFiles. | |
std::string | getSpeciesFormatRegex (const TAX_SPECIES_FORMAT format) const |
Returns a regular expression, if any for the species format. | |
const ms_parserule * | getSrcDatabaseRule () const |
Returns the value of SrcDatabaseRule. | |
const ms_taxspeciesfiles * | getStrFile (const int index) const |
const ms_parserule_plus * | getStrRule () const |
std::string | getSuffixRemove (const int index) const |
Returns a SuffixRemoves string by number. | |
bool | isConcatRefFileLines () const |
Returns TRUE if ConcatRefFileLines parameter is set to 1 and FALSE otherwise. | |
bool | isDBLevelTaxId () const |
Returns TRUE if there is a database level taxonomy ID in the file. | |
bool | isEnabled () const |
Returns TRUE if Enabled parameter is set to 1 and FALSE otherwise. | |
bool | isFromRefFile () const |
Returns TRUE if FromRefFile parameter is set to 1 and FALSE otherwise. | |
bool | isMitochondrialTranslation () const |
Returns TRUE if MitochondrialTranslation parameter is set to 1 and FALSE otherwise. | |
bool | isSectionAvailable () const |
Checks whether the section has been actually read from the file. | |
ms_taxonomyrules & | operator= (const ms_taxonomyrules &right) |
Assignment operator for C++ client applications. | |
void | setAccFromSpeciesLine (const ms_parserule *src) |
Change the value of AccFromSpeciesLine. | |
void | setConcatRefFileLines (const bool flag) |
Change the value of ConcatRefFileLines. | |
void | setDBLevelTaxId (const int value) |
Change the value of DBLevelTaxId. | |
void | setDefaultRule (const ms_parserule_plus *src) |
Change the value of DefaultRule. | |
void | setDescriptionLineSep (const char value) |
Change the value of DescriptionLineSep. | |
void | setDoThisRuleFirst (const ms_parserule *src) |
Change the value of DoThisRuleFirst. | |
void | setEnabled (const bool flag) |
Change the value of Enabled. | |
void | setErrorLevel (const int value) |
Change the value of ErrorLevel. | |
void | setFromRefFile (const bool flag) |
Change the value of FromRefFile. | |
void | setIdentifier (const char *str) |
Change the value of Identifier. | |
void | setMitochondrialTranslation (const bool flag) |
Set the value of MitochondrialTranslation. | |
void | setPreceedingComments (const std::string &comments) |
Sets any comments preceeding the section. | |
void | setQuickRefSearch (const char *str) |
Change the value of QuickRefSearch. | |
void | setSectionAvailable (const bool value) |
Changes availability of the section, i.e. whether it should be saved in a file. | |
void | setSpeciesFormatRegex (const TAX_SPECIES_FORMAT format, const std::string regex) |
Set the regular expression, if any, for the species format. | |
void | setSrcDatabaseRule (const ms_parserule *src) |
Change the value of SrcDatabaseRule. | |
void | setStrRule (const ms_parserule_plus *src) |
This class represents a single Taxonomy_XXX section in mascot.dat
.
The Taxonomy section defines a set of taxonomy rules that can be selected for a database. Usage of taxonomy rules can be turned off by setting Enabled property to 0. See isEnabled() for more information.
Instances of this class are created in ms_datfile.
Also get yourselves acquainted with the base class ms_customproperty. It facilitates the following tasks:
More functionality is described in the documentation for ms_customproperty.
void appendGencodeFile | ( | const ms_taxnodesfiles * | item ) |
Adds an entry into the GencodeFiles list.
See getGencodeFile() for more information.
item | an item to add a copy of into the list. |
void appendNoBreakDescLineIf | ( | const char * | str ) |
Adds an entry into the NoBreakDescLineIf list.
See getNoBreakDescLineIf() for more information.
str | an item to add a copy of into the list. |
void appendNodesFile | ( | const ms_taxnodesfiles * | item ) |
Adds an entry into the NodesFiles list.
See getNodesFile() for more information.
item | an item to add a copy of into the list. |
void appendPerDbSrcRule | ( | const ms_parserule_plus * | item ) |
Adds a new database source string into the list.
See getPerDbSrcRule() for more information.
item | an item to add a copy of into the list |
void appendPrefixRemove | ( | const char * | item ) |
Adds an entry into the PrefixRemoves list.
See getPrefixRemove() for more information.
item | an item to add a copy of into the list. |
void appendSpeciesFile | ( | const ms_taxspeciesfiles * | item ) |
Adds an entry into the SpeciesFiles list.
See getSpeciesFile() for more information.
item | an item to add a copy of into the list. |
void appendStrFile | ( | const ms_taxspeciesfiles * | item ) |
item | pointer to a taxonomy species file |
void appendSuffixRemove | ( | const char * | item ) |
Adds an entry into the SuffixRemoves list.
See getSuffixRemove() for more information.
item | an item to add a copy of into the list. |
void clearGencodeFiles | ( | ) |
Deletes all GencodeFiles entries.
See getGencodeFile() for more information.
void clearNoBreakDescLineIf | ( | ) |
Deletes all NoBreakDescLineIf entries.
See getNoBreakDescLineIf() for more information.
void clearNodesFiles | ( | ) |
Deletes all NodesFiles entries.
See getNodesFile() for more information.
void clearPerDbSrcRules | ( | ) |
Deletes all database source strings.
See getPerDbSrcRule() for more information.
void clearPrefixRemoves | ( | ) |
Deletes all PrefixRemoves entries.
See getPrefixRemove() for more information.
void clearSpeciesFiles | ( | ) |
Deletes all entries for SpeciesFiles.
See getSpeciesFile() for more information.
void clearSuffixRemoves | ( | ) |
Deletes all SuffixRemoves entries.
See getSuffixRemove() for more information.
const ms_parserule * getAccFromSpeciesLine | ( | ) | const |
Returns the value of AccFromSpeciesLine.
MSDB database explicitly associates each species line with the accession string of the primary database entry. A further rule, AccFromSpeciesLine, is used to extract this accession string.
int getDBLevelTaxId | ( | ) | const |
Returns the value of DBLevelTaxId.
Database level taxonomy ID can be specified in order to find proper NA translation table without names.dmp
and nodes.dmp
files usage.
Default is -1, which is not a valid taxonomy ID.
const ms_parserule_plus * getDefaultRule | ( | ) | const |
Returns the value of DefaultRule.
The DefaultRule describes how to find the species name in the line of text in the reference file. The string in quotes is a regular expression. All words in the PrefixRemoves and SuffixRemoves keywords should be removed before trying to do a match. For more detailed information on how to specify this parameter consult Mascot manual.
If specified, DoThisRuleFirst is applied first, and the default rule would only be used if this failed.
char getDescriptionLineSep | ( | ) | const |
Returns the value of DescriptionLineSep.
The line that contains the species IDs has multiple IDs, separated by a character, whose ASCII code is specified by DescriptionLineSep.
There is no default value for this parameter.
const ms_parserule * getDoThisRuleFirst | ( | ) | const |
Returns the value of DoThisRuleFirst.
See getDefaultRule().
int getErrorLevel | ( | ) | const |
Returns the value of ErrorLevel.
ErrorLevel indicates the type of warnings or errors that are found when creating the taxonomy information. Possible values and their meanings:
NoTaxonomyMatch.txt
file for every sequence where no taxonomy information is found. NoTaxonomyMatch.txt
file for every sequence that had any accession string without a match. Since some sequences in NCBI will have up to 200 gi
numbers (sources), there is a reasonable chance that some of these entries will not have species information, and this would cause the errors files to become very large. Default is 1.
const ms_taxnodesfiles * getGencodeFile | ( | const int | index ) | const |
Returns an instance of ms_taxnodesfiles describing an entry in GencodeFiles.
The GencodeFiles file is used to find a proper NA translation table for a given taxonomy. For more detailed information on how to configure this parameter consult Mascot manual and documentation for ms_taxnodesfiles.
By default the list of files is empty.
index | file number from 0 to (getNumberOfGencodeFiles()-1). |
std::string getIdentifier | ( | ) | const |
Returns the value of Identifier.
This parameter contains a symbolic name for the taxonomy specification as seen, for instance, in the database GUI utility.
By default this is empty.
std::string getNoBreakDescLineIf | ( | const int | index ) | const |
Returns a NoBreakDescLineIf entry by its number.
NoBreakDescLineIf specifies keywords that prevent description line from being broken into separate words.
By default this is an empty list.
index | number of entry from 0 to (getNumberOfNoBreakDescLineIf()-1). |
const ms_taxnodesfiles * getNodesFile | ( | const int | index ) | const |
Returns an instance of ms_taxnodesfiles describing an entry in NodesFiles.
The NodesFiles file is used to traverse taxonomy hierarchy. It contains taxonomy ID nodes together with their parents IDs. For more detailed information on how to configure this parameter consult Mascot manual and documentation for ms_taxnodesfiles.
By default the list of files is empty.
index | file number from 0 to (getNumberOfNodesFiles()-1). |
int getNumberOfGencodeFiles | ( | ) | const |
Returns the number of file names specified in GencodeFiles.
See getGencodeFile() and documentation for ms_taxnodesfiles.
int getNumberOfNoBreakDescLineIf | ( | ) | const |
Returns the number of NoBreakDescLineIf entries.
See getNoBreakDescLineIf() for explanations.
int getNumberOfNodesFiles | ( | ) | const |
Returns the number of file names specified in NodesFiles.
See getNodesFile() and documentation for ms_taxnodesfiles.
int getNumberOfPerDbSrcRules | ( | ) | const |
Returns the number of database source strings.
See getPerDbSrcRule() and documentation for ms_parserule_plus.
int getNumberOfPrefixRemoves | ( | ) | const |
Returns the number of PrefixRemoves entries.
See getPrefixRemove().
int getNumberOfSpeciesFiles | ( | ) | const |
Returns the number of file names specified in SpeciesFiles.
See getSpeciesFile() and documentation for ms_taxspeciesfiles.
int getNumberOfStrFiles | ( | ) | const |
int getNumberOfSuffixRemoves | ( | ) | const |
Returns the number of SuffixRemoves entries.
See getSuffixRemove().
const ms_parserule_plus * getPerDbSrcRule | ( | const int | index ) | const |
Returns a database source string by its number.
Database source strings contained in taxonomy section all look like RULE_XXX or OTHERRULE.
By default the list of rules is empty.
index | database source string number from 0 to (getNumberOfPerDbSrcRules()-1). |
std::string getPreceedingComments | ( | ) | const [inherited] |
Returns any comments preceeding the section.
Sections in the mascot.dat file may have comments preceeding them. In most cases, these comments need to stay 'attached' to the section.
Multiple line comments are supported by having a single string with newline characters
std::string getPrefixRemove | ( | const int | index ) | const |
Returns the PrefixRemoves string by number.
See getDefaultRule() for information on this parameter.
By default the list of prefixes is empty.
index | number of a string specified in the parameter from 0 to (getNumberOfPrefixRemoves()-1). |
std::string getQuickRefSearch | ( | ) | const |
Returns the value of QuickRefSearch.
The QuickRefSearch string is used to speed up the compressing of the database. Rather than use the regular expression for each line in the .ref
file, this text is used for a fast compare to the string C;Species
. Other lines are ignored.
By default this is empty.
const ms_taxspeciesfiles * getSpeciesFile | ( | const int | index ) | const |
Returns an instance of ms_taxspeciesfiles describing an entry in SpeciesFiles.
The SpeciesFiles file is used to convert a species name to a taxonomy ID. For more detailed information on how to configure this parameter consult Mascot manual and documentation for ms_taxspeciesfiles.
By default the list is empty.
index | file number from 0 to (getNumberOfSpeciesFiles()-1). |
std::string getSpeciesFormatRegex | ( | const TAX_SPECIES_FORMAT | format ) | const |
Returns a regular expression, if any for the species format.
Introduced in Mascot 2.4, allows the use of a regular expression for a species file. The regular expression needs to return two values: the 'accession' or 'id' and the taxonomy_id
For example, in the Taxonomy_X section of mascot.dat, there is, for example an entry:
* SWISSPROTRegex "^\\([A-Z0-9]*\\) *[ABEV] *\\([0-9]*\\):"
which has two bracketed expressions intended to extract AADNV and 648330 from
AADNV V 648330: N=Aedes albopictus densovirus (isolate Boublik/1994)
If there is no value for SWISSPROTRegex in the mascot.dat file, then the value returned is the default as shown above.
format | can be any TAX_SPECIES_FORMAT value, although only TAX_SPECIES_SWISSPROT is currently supported in Mascot Server. |
const ms_parserule * getSrcDatabaseRule | ( | ) | const |
Returns the value of SrcDatabaseRule.
The parameter is used for finding database source string with regular expression.
const ms_taxspeciesfiles * getStrFile | ( | const int | index ) | const |
index | file index to look for |
const ms_parserule_plus * getStrRule | ( | ) | const |
std::string getSuffixRemove | ( | const int | index ) | const |
Returns a SuffixRemoves string by number.
See getDefaultRule() for information on this parameter.
By default the list of suffixes is empty.
index | number of a string specified in the parameter from 0 to (getNumberOfSuffixRemoves()-1). |
bool isConcatRefFileLines | ( | ) | const |
Returns TRUE if ConcatRefFileLines parameter is set to 1 and FALSE otherwise.
A value of 1 (default) means that there might be multiple lines in reference file describing the same entry of the database.
bool isEnabled | ( | ) | const |
Returns TRUE if Enabled parameter is set to 1 and FALSE otherwise.
Set Enabled parameter to 0 to disable the taxonomy. Most of the other parameters will be ignored although the taxonomy will be available for a database to select in database maintenance utility.
Default is 0.
bool isFromRefFile | ( | ) | const |
Returns TRUE if FromRefFile parameter is set to 1 and FALSE otherwise.
FromRefFile is set to 0 to indicate that the taxonomy should be found in the .fasta
file rather than in a reference file.
Default is 0.
bool isMitochondrialTranslation | ( | ) | const |
Returns TRUE if MitochondrialTranslation parameter is set to 1 and FALSE otherwise.
Two types of NA translation tables can be used: nuclear and mitochondrial. Setting this parameter to 0 indicates the first type whereas 1 indicates the second one.
Default is 0.
bool isSectionAvailable | ( | ) | const |
Checks whether the section has been actually read from the file.
By default a Taxonomy
section is unavailable until it has been set to a different state.
void setAccFromSpeciesLine | ( | const ms_parserule * | src ) |
Change the value of AccFromSpeciesLine.
See getAccFromSpeciesLine() for more information.
src | pointer to a parser rule |
void setConcatRefFileLines | ( | const bool | flag ) |
Change the value of ConcatRefFileLines.
See isConcatRefFileLines() for more information.
flag | boolean value to set whether there might be multiple lines for the same entry |
void setDefaultRule | ( | const ms_parserule_plus * | src ) |
void setDescriptionLineSep | ( | const char | value ) |
Change the value of DescriptionLineSep.
See getDescriptionLineSep() for more information.
value | description line separator |
void setDoThisRuleFirst | ( | const ms_parserule * | src ) |
Change the value of DoThisRuleFirst.
See getDoThisRuleFirst().
src | pointer to a parser rule for which it should do this rule first |
void setEnabled | ( | const bool | flag ) |
Change the value of Enabled.
See isEnabled() for more information.
flag | whether the taxonomy rules are enabled or not |
void setErrorLevel | ( | const int | value ) |
Change the value of ErrorLevel.
See getErrorLevel() for more information.
value | error level |
void setFromRefFile | ( | const bool | flag ) |
Change the value of FromRefFile.
See isFromRefFile() for more information.
flag | boolean value to define if the taxonomy should be found in the fasta file |
void setIdentifier | ( | const char * | str ) |
Change the value of Identifier.
For more information see getIdentifier().
str | identifier for the taxonomy |
void setMitochondrialTranslation | ( | const bool | flag ) |
Set the value of MitochondrialTranslation.
See isMitochondrialTranslation().
flag | true if NA translation tables used are mitochondrial, false if they are nuclear |
void setPreceedingComments | ( | const std::string & | comments ) | [inherited] |
Sets any comments preceeding the section.
Sections in the mascot.dat file may have comments preceeding them. In most cases, these comments need to stay 'attached' to the section.
Multiple line comments are supported by having a single string with newline characters
comments | any comments preceeding the section |
void setQuickRefSearch | ( | const char * | str ) |
Change the value of QuickRefSearch.
See getQuickRefSearch().
str | string used to speed up database compression |
void setSpeciesFormatRegex | ( | const TAX_SPECIES_FORMAT | format, |
const std::string | regex | ||
) |
Set the regular expression, if any, for the species format.
Instroduced in Mascot 2.4, allows the use of a regular expression for a species file. The regular expression needs to return two values: the 'accession' or 'id' and the taxonomy_id
For example, in the Taxonomy_X section of mascot.dat, there is, for example an entry:
* SWISSPROTRegex "^\\([A-Z0-9]*\\) *[ABEV] *\\([0-9]*\\):"
which has two bracketed expressions intended to extract AADNV and 648330 from
AADNV V 648330: N=Aedes albopictus densovirus (isolate Boublik/1994)
format | can be any TAX_SPECIES_FORMAT value, although only TAX_SPECIES_SWISSPROT is currently supported in Mascot Server. |
regex | is the new regular expression or an empty string. |
void setSrcDatabaseRule | ( | const ms_parserule * | src ) |
Change the value of SrcDatabaseRule.
See getSrcDatabaseRule().
src | pointer to a parser rule |
void setStrRule | ( | const ms_parserule_plus * | src ) |
src | pointer to a parser str rule |
Copyright © 2022 Matrix Science Ltd. All Rights Reserved. Generated on Thu Mar 31 2022 01:12:38 |