Read in the mascot.dat file.
#!/usr/local/bin/perl ############################################################################## # file: config_mascotdat.pl # # 'msparser' toolkit example code # ############################################################################## # COPYRIGHT NOTICE # # Copyright 1998-2010 Matrix Science Limited All Rights Reserved. # # # ############################################################################## # $Source: parser/examples/test_perl/config_mascotdat.pl $ # # $Author: villek@matrixscience.com $ # # $Date: 2018-07-30 16:23:53 +0100 $ # # $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ # # $NoKeywords:: $ # ############################################################################## use strict; ############################################################################## use msparser; if (!defined($ARGV[0])) { print <<EOF; Location of mascot.dat has to be specified as a parameter. The location should either be the full path to the mascot.dat file or a URL to a Mascot server - e.g. http://mascot-server/mascot/cgi EOF exit 1; } # Note: $cs must not be lexically scoped; you need to keep it in scope # for as long as you use $file. See "Using the toolkit from Perl, Java and # Python" in Mascot Parser manual. my ($file, $cs); # A sessionID can optionally be passed as the second parameter. # This will only be required if the 'file' is a URL. if (defined($ARGV[1])) { $cs = new msparser::ms_connection_settings; $cs->setSessionID($ARGV[1]); $file = new msparser::ms_datfile($ARGV[0], 0, $cs); } else { $file = new msparser::ms_datfile($ARGV[0]); } if (!$file->isValid) { print "There are errors. Cannot continue. The last error description:\n"; print $file->getLastErrorString(), "\n"; exit 1; } # Retrieving section 'Databases' content. my $dbs = $file->getDatabases; # Check if there is actually a 'Databases' section in the file before # continuing. if ($dbs->isSectionAvailable) { my $n = $dbs->getNumberOfDatabases; print "There are ", $n, " databases configured:\n"; for my $i (0 .. $n-1) { print $dbs->getDatabase($i)->getName, " : "; if ($dbs->getDatabase($i)->isActive()) { print "active\n"; } else { print "inactive\n"; } } } else { print "Section 'Databases' is missing\n"; } print "\n"; # Retrieving section 'Parse' content. my $parseOptions = $file->getParseOptions(); # Check if there is a 'Parse' section in the file before continuing. if ($parseOptions->isSectionAvailable) { my $n = $parseOptions->getNumberOfParseRules(); print "There are ", $n, " parse rules in the file; the following are specified:\n"; for my $i (0 .. $n-1) { # Not all of them need be specified in the file. if ($parseOptions->getParseRule($i)->isAvailable()) { print "Rule_", $i, " : "; print $parseOptions->getParseRule($i)->getRuleStr(), "\n"; } } } else { print "Section 'Parse' is missing\n"; } print "\n"; # Retrieving section 'WWW' content. my $wwwOptions = $file->getWWWOptions(); # Check if there is a 'WWW' section in the file before continuing. if ($wwwOptions->isSectionAvailable) { my $n = $wwwOptions->getNumberOfEntries(); print "There are ", $n, " sequence report sources configured:\n"; for my $i (0 .. $n-1) { print $wwwOptions->getEntry($i)->getName(), "_"; if ($wwwOptions->getEntry($i)->getType() == $msparser::WWW_SEQ) { print "SEQ\n"; } else { print "REP\n"; } } } else { print "Section 'WWW' is missing\n"; } print "\n"; # Retrieving section 'Taxonomy' content. print "Available taxonomy sources:\n"; my $maxtax = $file->getMaxTaxonomyRules(); my $activetax = 0; for my $taxind (1 .. $maxtax) { # Check whether the taxonomy section exists. if ($file->getTaxonomyRules($taxind)) { ++$activetax; print "TAXONOMY_", $taxind, " "; print $file->getTaxonomyRules($taxind)->getIdentifier(), "\n"; } } if ($activetax == 0) { print "(none)\n"; } print "\n"; # Retrieving section 'Cluster' content. my $clusterParams = $file->getClusterParams(); # Check if there is a 'Cluster' section in the file before continuing. if ($clusterParams->isSectionAvailable) { print "Cluster mode : "; if ($clusterParams->isEnabled) { print "enabled\n"; } else { print "disabled\n"; } } else { print "Section 'Cluster' is missing\n"; } print "\n"; # Retrieving section 'Processor' content. my $procOptions = $file->getProcessors(); # Check if there is a 'Processor' section in the file before continuing. if ($procOptions->isSectionAvailable) { print $procOptions->getNumberOfProcessors, " CPU(s) configured\n"; } else { print "Section 'Processor' is missing\n"; } print "\n"; # Retrieving section 'Options' content. my $mascotOptions = $file->getMascotOptions(); # Check if there is an 'Options' section in the file before continuing. if ($mascotOptions->isSectionAvailable) { print "MascotCmdLine : ", $mascotOptions->getMascotCmdLine(), "\n"; } else { print "Section 'Options' is missing\n"; } print "\n"; # Retrieving section 'Cron' content. my $cronOptions = $file->getCronOptions(); # Check if there is a 'Cron' section in the file before continuing. if ($cronOptions->isSectionAvailable) { if ($cronOptions->isCronEnabled) { my $n = $cronOptions->getNumberOfCronJobs; if ($n == 0) { print "There are ", $n, " cron jobs configured\n"; } else { print "There are ", $n, " cron jobs configured:\n"; } for my $i (0 .. $n-1) { print $cronOptions->getCronJob($i)->getCommandStr(), "\n"; } } else { print "Cron functionality is disabled\n"; } } else { print "Section 'Cron' is missing\n"; } print "\n"; =pod Running the program as perl -I../bin config_mascotdat.pl ../config/mascot.dat will give the following output under Mascot Server 2.3 (exact details depend on how the server has been configured): There are 29 databases configured: contaminants : active cRAP : active EST_human : inactive EST_mouse : inactive EST_others : inactive IPI_arabidopsis : inactive IPI_bovine : inactive IPI_chicken : inactive IPI_human : inactive IPI_mouse : inactive IPI_rat : inactive IPI_zebrafish : inactive MSIPI_human : inactive MSIPI_mouse : inactive NCBInr : active SwissProt : active Trembl : inactive UniRef100 : inactive Environmental_EST : inactive Fungi_EST : inactive Human_EST : inactive Invertebrates_EST : inactive Mammals_EST : inactive Mus_EST : inactive Plants_EST : inactive Prokaryotes_EST : inactive Rodents_EST : inactive Unclassified_EST : inactive Vertebrates_EST : inactive There are 256 parse rules in the file; the following are specified: Rule_1 : >owl[^ ]*|\([^ ]*\) Rule_2 : >owl[^ ]*|[^ ]*[ ]\(.*\) Rule_3 : >[A-Z][0-9];\([^ ]*\) Rule_4 : >\([^ ]*\) Rule_5 : >[^ ]* \(.*\) Rule_6 : >\(gi|[0-9]*\) Rule_7 : >[^ ]* \(.*\) Rule_8 : \*\(.*\)> Rule_9 : \*.*\(>[A-Z][0-9];.*\) Rule_10 : \(LOCUS .*\)ORIGIN Rule_11 : \(LOCUS .*\) Rule_12 : >\([^ ]*\) Rule_13 : >[^ ]* \(.*\) Rule_14 : <pre>\(.*\)</pre> Rule_15 : ^ID \([^ ]*\) Rule_16 : \*.*\(ID [A-Z0-9]*_[A-Z0-9]* .*\) Rule_17 : >\([^ ]*\) Rule_18 : >[^ ]* \(.*\) Rule_19 : >[A-Z][0-9];\([^ ]*\)[ ]* Rule_20 : >\(.*\) Rule_21 : >IPI:\([^| .]*\) Rule_22 : \*.*\(ID IPI[0-9]* .*\) Rule_23 : \(.*\) Rule_24 : \*.*\(ID [-A-Z0-9_].*\) Rule_25 : >[^(]*.\([^)]*\) Rule_26 : ^AC \([^ ;]*\) Rule_27 : \*.*\(AC \S.*\) Rule_28 : ^ID \([^ .]*\) Rule_29 : \*.*\(ID IPI[0-9.]* .*\) Rule_30 : >UniRef100_\([^ ]*\) Rule_31 : >[^|]*|\([^ ]*\) Rule_32 : >\([^|]*\) Rule_33 : >..|[^|]*|\([^ ]*\) Rule_34 : >..|\([^|]*\) Rule_35 : >sp|\([^|]*\) Rule_36 : >IPI:CON_\([^|]*\) Rule_37 : >MSIPI:s*p*|*\([^| .]*\) Rule_38 : >EM_EST:\([A-Z0-9]*\); Rule_39 : >EM_ENV:\([A-Z0-9]*\); There are 56 sequence report sources configured: contaminants_SEQ cRAP_SEQ EST_human_REP EST_human_SEQ EST_mouse_REP EST_mouse_SEQ EST_others_REP EST_others_SEQ IPI_arabidopsis_REP IPI_arabidopsis_SEQ IPI_bovine_REP IPI_bovine_SEQ IPI_chicken_REP IPI_chicken_SEQ IPI_human_REP IPI_human_SEQ IPI_mouse_REP IPI_mouse_SEQ IPI_rat_REP IPI_rat_SEQ IPI_zebrafish_REP IPI_zebrafish_SEQ MSIPI_human_REP MSIPI_human_SEQ MSIPI_mouse_REP MSIPI_mouse_SEQ NCBInr_REP NCBInr_SEQ SwissProt_REP SwissProt_SEQ Trembl_REP Trembl_SEQ UniRef100_REP UniRef100_SEQ Environmental_EST_SEQ Environmental_EST_REP Fungi_EST_SEQ Fungi_EST_REP Human_EST_SEQ Human_EST_REP Invertebrates_EST_SEQ Invertebrates_EST_REP Mammals_EST_SEQ Mammals_EST_REP Mus_EST_SEQ Mus_EST_REP Plants_EST_SEQ Plants_EST_REP Prokaryotes_EST_SEQ Prokaryotes_EST_REP Rodents_EST_SEQ Rodents_EST_REP Unclassified_EST_SEQ Unclassified_EST_REP Vertebrates_EST_SEQ Vertebrates_EST_REP Available taxonomy sources: TAXONOMY_1 Obsolete TAXONOMY_2 OWL REF TAXONOMY_3 SwissProt FASTA TAXONOMY_4 Obsolete TAXONOMY_5 Swiss-prot DAT TAXONOMY_6 MSDB REF (pre 20000621) TAXONOMY_7 MSDB REF TAXONOMY_8 NCBI nr FASTA using GI2TAXID TAXONOMY_9 dbEST FASTA using GI2TAXID TAXONOMY_10 EST_human FASTA with TaxID TAXONOMY_11 EST_mouse FASTA with TaxID TAXONOMY_12 UniRef Fasta TAXONOMY_13 EMBL EST Fasta Cluster mode : disabled Section 'Processor' is missing MascotCmdLine : ../cgi/nph-mascot.exe Cron functionality is disabled =cut
Copyright © 2022 Matrix Science Ltd. All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29 |