Read in the mascot.dat file.
#!/usr/bin/python ############################################################################## # file: config_mascotdat.pl # # 'msparser' toolkit example code # ############################################################################## # COPYRIGHT NOTICE # # Copyright 1998-2010 Matrix Science Limited All Rights Reserved. # # # ############################################################################## # $Source: parser/examples/test_python/config_mascotdat.py $ # # $Author: villek@matrixscience.com $ # # $Date: 2018-07-30 16:23:53 +0100 $ # # $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ # # $NoKeywords:: $ # ############################################################################## import msparser import sys if len(sys.argv) < 2 : print(""" Location of mascot.dat has to be specified as a parameter. The location should either be the full path to the mascot.dat file or a URL to a Mascot server - e.g. http://mascot-server/mascot/cgi """) sys.exit(1) # A sessionID can optionally be passed as the second parameter. # This will only be required if the 'file' is a URL. if len(sys.argv) > 2 : cs = msparser.ms_connection_settings() cs.setSessionID(sys.argv[2]) file = msparser.ms_datfile(sys.argv[1], 0, cs) else : file = msparser.ms_datfile(sys.argv[1]) if not file.isValid() : print("There are errors. Cannot continue. The last error description:") print(file.getLastErrorString()) sys.exit(1) # Retrieving section 'Databases' content. dbs = file.getDatabases() # Check if there is actually a 'Databases' section in the file before # continuing. if dbs.isSectionAvailable() : n = dbs.getNumberOfDatabases() print("There are %d databases configured:" % n) for i in range(n) : database = dbs.getDatabase(i).getName() if dbs.getDatabase(i).isActive() : inuse = "active" else : inuse = "inactive" print('{:<20}'.format(database) + '{:<}'.format(inuse)) else : print("Section 'Databases' is missing") print("") # Retrieving section 'Parse' content. parseOptions = file.getParseOptions() # Check if there is a 'Parse' section in the file before continuing. if parseOptions.isSectionAvailable() : n = parseOptions.getNumberOfParseRules() print("There are %d parse rules in the file; the following are specified:" % n) for i in range(n) : # Not all of them need be specified in the file. if parseOptions.getParseRule(i).isAvailable() : print("Rule_%d : %s" % (i, parseOptions.getParseRule(i).getRuleStr())) else : print("Section 'Parse' is missing") print("") # Retrieving section 'WWW' content. wwwOptions = file.getWWWOptions() # Check if there is a 'WWW' section in the file before continuing. if wwwOptions.isSectionAvailable() : n = wwwOptions.getNumberOfEntries() print("There are %d sequence report sources configured:" % n) for i in range(n) : if wwwOptions.getEntry(i).getType() == msparser.WWW_SEQ : type = "SEQ" else : type = "REP" print("%s_%s" % (wwwOptions.getEntry(i).getName(), type)) else : print("Section 'WWW' is missing") print("") # Retrieving section 'Taxonomy' content. print("Available taxonomy sources:") maxtax = file.getMaxTaxonomyRules() activetax = 0 for taxind in range(1, 1 + maxtax) : # Check whether the taxonomy section exists. if file.getTaxonomyRules(taxind) : activetax += 1 print("TAXONOMY_%d" % taxind) print(file.getTaxonomyRules(taxind).getIdentifier()) if activetax == 0 : print("(none)") print("") # Retrieving section 'Cluster' content. clusterParams = file.getClusterParams() # Check if there is a 'Cluster' section in the file before continuing. if clusterParams.isSectionAvailable() : if clusterParams.isEnabled() : print("Cluster mode: enabled") else : print("Cluster mode: disabled") else : print("Section 'Cluster' is missing") print("") # Retrieving section 'Processor' content. procOptions = file.getProcessors() # Check if there is a 'Processor' section in the file before continuing. if procOptions.isSectionAvailable() : print("%d CPU(s) configured" % procOptions.getNumberOfProcessors()) else : print("Section 'Processor' is missing") print("") # Retrieving section 'Options' content. mascotOptions = file.getMascotOptions() # Check if there is an 'Options' section in the file before continuing. if mascotOptions.isSectionAvailable() : print("MascotCmdLine: %s" % mascotOptions.getMascotCmdLine()) else : print("Section 'Options' is missing") print("") # Retrieving section 'Cron' content. cronOptions = file.getCronOptions() # Check if there is a 'Cron' section in the file before continuing. if cronOptions.isSectionAvailable() : if cronOptions.isCronEnabled() : n = cronOptions.getNumberOfCronJobs() if n == 0 : print("There are %d cron jobs configured" % n) else : print("There are %d cron jobs configured:" % n) for i in range(n) : print(cronOptions.getCronJob(i).getCommandStr()) else : print("Cron functionality is disabled") else : print("Section 'Cron' is missing") print("") """ Running the program as python config_mascotdat.py /usr/local/mascot/config/mascot.dat will give the following output under Mascot Server 2.3 (exact details depend on how the server has been configured): There are 29 databases configured: contaminants : active cRAP : active EST_human : inactive EST_mouse : inactive EST_others : inactive IPI_arabidopsis : inactive IPI_bovine : inactive IPI_chicken : inactive IPI_human : inactive IPI_mouse : inactive IPI_rat : inactive IPI_zebrafish : inactive MSIPI_human : inactive MSIPI_mouse : inactive NCBInr : active SwissProt : active Trembl : inactive UniRef100 : inactive Environmental_EST : inactive Fungi_EST : inactive Human_EST : inactive Invertebrates_EST : inactive Mammals_EST : inactive Mus_EST : inactive Plants_EST : inactive Prokaryotes_EST : inactive Rodents_EST : inactive Unclassified_EST : inactive Vertebrates_EST : inactive There are 256 parse rules in the file; the following are specified: Rule_1 : >owl[^ ]*|\([^ ]*\) Rule_2 : >owl[^ ]*|[^ ]*[ ]\(.*\) Rule_3 : >[A-Z][0-9];\([^ ]*\) Rule_4 : >\([^ ]*\) Rule_5 : >[^ ]* \(.*\) Rule_6 : >\(gi|[0-9]*\) Rule_7 : >[^ ]* \(.*\) Rule_8 : \*\(.*\)> Rule_9 : \*.*\(>[A-Z][0-9];.*\) Rule_10 : \(LOCUS .*\)ORIGIN Rule_11 : \(LOCUS .*\) Rule_12 : >\([^ ]*\) Rule_13 : >[^ ]* \(.*\) Rule_14 : <pre>\(.*\)</pre> Rule_15 : ^ID \([^ ]*\) Rule_16 : \*.*\(ID [A-Z0-9]*_[A-Z0-9]* .*\) Rule_17 : >\([^ ]*\) Rule_18 : >[^ ]* \(.*\) Rule_19 : >[A-Z][0-9];\([^ ]*\)[ ]* Rule_20 : >\(.*\) Rule_21 : >IPI:\([^| .]*\) Rule_22 : \*.*\(ID IPI[0-9]* .*\) Rule_23 : \(.*\) Rule_24 : \*.*\(ID [-A-Z0-9_].*\) Rule_25 : >[^(]*.\([^)]*\) Rule_26 : ^AC \([^ ;]*\) Rule_27 : \*.*\(AC \S.*\) Rule_28 : ^ID \([^ .]*\) Rule_29 : \*.*\(ID IPI[0-9.]* .*\) Rule_30 : >UniRef100_\([^ ]*\) Rule_31 : >[^|]*|\([^ ]*\) Rule_32 : >\([^|]*\) Rule_33 : >..|[^|]*|\([^ ]*\) Rule_34 : >..|\([^|]*\) Rule_35 : >sp|\([^|]*\) Rule_36 : >IPI:CON_\([^|]*\) Rule_37 : >MSIPI:s*p*|*\([^| .]*\) Rule_38 : >EM_EST:\([A-Z0-9]*\); Rule_39 : >EM_ENV:\([A-Z0-9]*\); There are 56 sequence report sources configured: contaminants_SEQ cRAP_SEQ EST_human_REP EST_human_SEQ EST_mouse_REP EST_mouse_SEQ EST_others_REP EST_others_SEQ IPI_arabidopsis_REP IPI_arabidopsis_SEQ IPI_bovine_REP IPI_bovine_SEQ IPI_chicken_REP IPI_chicken_SEQ IPI_human_REP IPI_human_SEQ IPI_mouse_REP IPI_mouse_SEQ IPI_rat_REP IPI_rat_SEQ IPI_zebrafish_REP IPI_zebrafish_SEQ MSIPI_human_REP MSIPI_human_SEQ MSIPI_mouse_REP MSIPI_mouse_SEQ NCBInr_REP NCBInr_SEQ SwissProt_REP SwissProt_SEQ Trembl_REP Trembl_SEQ UniRef100_REP UniRef100_SEQ Environmental_EST_SEQ Environmental_EST_REP Fungi_EST_SEQ Fungi_EST_REP Human_EST_SEQ Human_EST_REP Invertebrates_EST_SEQ Invertebrates_EST_REP Mammals_EST_SEQ Mammals_EST_REP Mus_EST_SEQ Mus_EST_REP Plants_EST_SEQ Plants_EST_REP Prokaryotes_EST_SEQ Prokaryotes_EST_REP Rodents_EST_SEQ Rodents_EST_REP Unclassified_EST_SEQ Unclassified_EST_REP Vertebrates_EST_SEQ Vertebrates_EST_REP Available taxonomy sources: TAXONOMY_1 Obsolete TAXONOMY_2 OWL REF TAXONOMY_3 SwissProt FASTA TAXONOMY_4 Obsolete TAXONOMY_5 Swiss-prot DAT TAXONOMY_6 MSDB REF (pre 20000621) TAXONOMY_7 MSDB REF TAXONOMY_8 NCBI nr FASTA using GI2TAXID TAXONOMY_9 dbEST FASTA using GI2TAXID TAXONOMY_10 EST_human FASTA with TaxID TAXONOMY_11 EST_mouse FASTA with TaxID TAXONOMY_12 UniRef Fasta TAXONOMY_13 EMBL EST Fasta Cluster mode : disabled Section 'Processor' is missing MascotCmdLine : ../cgi/nph-mascot.exe Cron functionality is disabled """
Copyright © 2022 Matrix Science Ltd. All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29 |