Matrix Science header

config_mascotdat.pl

Read in the mascot.dat file.

#!/usr/local/bin/perl
##############################################################################
# file: config_mascotdat.pl                                                  #
# 'msparser' toolkit example code                                            #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2010 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#     $Source: parser/examples/test_perl/config_mascotdat.pl $ #
#     $Author: villek@matrixscience.com $                                                      #
#       $Date: 2018-07-30 16:23:53 +0100 $                                         #
#   $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $                                                         #
# $NoKeywords::                                                            $ #
##############################################################################
use strict;
##############################################################################

use msparser;

if (!defined($ARGV[0])) { 
    print <<EOF;
Location of mascot.dat has to be specified as a parameter.
The location should either be the full path to the mascot.dat file
or a URL to a Mascot server - e.g. http://mascot-server/mascot/cgi
EOF
    exit 1;
}

# Note: $cs must not be lexically scoped; you need to keep it in scope
# for as long as you use $file. See "Using the toolkit from Perl, Java and 
# Python" in Mascot Parser manual.
my ($file, $cs);

# A sessionID can optionally be passed as the second parameter.
# This will only be required if the 'file' is a URL.
if (defined($ARGV[1])) {
    $cs = new msparser::ms_connection_settings;
    $cs->setSessionID($ARGV[1]);
    $file = new msparser::ms_datfile($ARGV[0], 0, $cs);
} else {
    $file = new msparser::ms_datfile($ARGV[0]);
}

if (!$file->isValid) {
    print "There are errors. Cannot continue. The last error description:\n";
    print $file->getLastErrorString(), "\n";
    exit 1;
}

# Retrieving section 'Databases' content.
my $dbs = $file->getDatabases;

# Check if there is actually a 'Databases' section in the file before 
# continuing.
if ($dbs->isSectionAvailable) {
    my $n = $dbs->getNumberOfDatabases;
    print "There are ", $n, " databases configured:\n";

    for my $i (0 .. $n-1) {
        print $dbs->getDatabase($i)->getName, " : ";

        if ($dbs->getDatabase($i)->isActive()) {
            print "active\n";
        } else {
            print "inactive\n";
        }
    }
} else {
    print "Section 'Databases' is missing\n";
}

print "\n";

# Retrieving section 'Parse' content.
my $parseOptions = $file->getParseOptions();

# Check if there is a 'Parse' section in the file before continuing.
if ($parseOptions->isSectionAvailable) {
    my $n = $parseOptions->getNumberOfParseRules();
    print "There are ", $n, " parse rules in the file; the following are specified:\n";

    for my $i (0 .. $n-1) {
        # Not all of them need be specified in the file.
        if ($parseOptions->getParseRule($i)->isAvailable()) {
            print "Rule_", $i, " : ";
            print $parseOptions->getParseRule($i)->getRuleStr(), "\n";
        }
    }
} else {
    print "Section 'Parse' is missing\n";
}

print "\n";

# Retrieving section 'WWW' content.
my $wwwOptions = $file->getWWWOptions();

# Check if there is a 'WWW' section in the file before continuing.
if ($wwwOptions->isSectionAvailable) {
    my $n = $wwwOptions->getNumberOfEntries();
    print "There are ", $n, " sequence report sources configured:\n";

    for my $i (0 .. $n-1) {
        print $wwwOptions->getEntry($i)->getName(), "_";

        if ($wwwOptions->getEntry($i)->getType() == $msparser::WWW_SEQ) {
            print "SEQ\n";
        } else {
            print "REP\n";
        }
    }
} else {
    print "Section 'WWW' is missing\n";
}

print "\n";

# Retrieving section 'Taxonomy' content.
print "Available taxonomy sources:\n";

my $maxtax = $file->getMaxTaxonomyRules();
my $activetax = 0;

for my $taxind (1 .. $maxtax) {
    # Check whether the taxonomy section exists.
    if ($file->getTaxonomyRules($taxind)) {
        ++$activetax;
        print "TAXONOMY_", $taxind, " ";
        print $file->getTaxonomyRules($taxind)->getIdentifier(), "\n";
    }
}

if ($activetax == 0) {
    print "(none)\n";
}

print "\n";

# Retrieving section 'Cluster' content.
my $clusterParams = $file->getClusterParams();

# Check if there is a 'Cluster' section in the file before continuing.
if ($clusterParams->isSectionAvailable) {
    print "Cluster mode : ";

    if ($clusterParams->isEnabled) {
        print "enabled\n";
    } else {
        print "disabled\n";
    }
} else {
    print "Section 'Cluster' is missing\n";
}

print "\n";

# Retrieving section 'Processor' content.
my $procOptions = $file->getProcessors();

# Check if there is a 'Processor' section in the file before continuing.
if ($procOptions->isSectionAvailable) {
    print $procOptions->getNumberOfProcessors, " CPU(s) configured\n";
} else {
    print "Section 'Processor' is missing\n";
}

print "\n";

# Retrieving section 'Options' content.
my $mascotOptions = $file->getMascotOptions();

# Check if there is an 'Options' section in the file before continuing.
if ($mascotOptions->isSectionAvailable) {
    print "MascotCmdLine : ", $mascotOptions->getMascotCmdLine(), "\n";
} else {
    print "Section 'Options' is missing\n";
}

print "\n";

# Retrieving section 'Cron' content.
my $cronOptions = $file->getCronOptions();

# Check if there is a 'Cron' section in the file before continuing.
if ($cronOptions->isSectionAvailable)  {
    if ($cronOptions->isCronEnabled) {
        my $n = $cronOptions->getNumberOfCronJobs;

        if ($n == 0) {
            print "There are ", $n, " cron jobs configured\n";
        } else {
            print "There are ", $n, " cron jobs configured:\n";
        }

        for my $i (0 .. $n-1) {
            print $cronOptions->getCronJob($i)->getCommandStr(), "\n";
        }
    } else {
        print "Cron functionality is disabled\n";
    }
} else {
    print "Section 'Cron' is missing\n";
}

print "\n";


=pod

Running the program as 

perl -I../bin config_mascotdat.pl ../config/mascot.dat

will give the following output under Mascot Server 2.3 (exact details depend 
on how the server has been configured):

There are 29 databases configured:
contaminants : active
cRAP : active
EST_human : inactive
EST_mouse : inactive
EST_others : inactive
IPI_arabidopsis : inactive
IPI_bovine : inactive
IPI_chicken : inactive
IPI_human : inactive
IPI_mouse : inactive
IPI_rat : inactive
IPI_zebrafish : inactive
MSIPI_human : inactive
MSIPI_mouse : inactive
NCBInr : active
SwissProt : active
Trembl : inactive
UniRef100 : inactive
Environmental_EST : inactive
Fungi_EST : inactive
Human_EST : inactive
Invertebrates_EST : inactive
Mammals_EST : inactive
Mus_EST : inactive
Plants_EST : inactive
Prokaryotes_EST : inactive
Rodents_EST : inactive
Unclassified_EST : inactive
Vertebrates_EST : inactive

There are 256 parse rules in the file; the following are specified:
Rule_1 : >owl[^ ]*|\([^ ]*\)
Rule_2 : >owl[^ ]*|[^ ]*[ ]\(.*\)
Rule_3 : >[A-Z][0-9];\([^ ]*\)
Rule_4 : >\([^ ]*\)
Rule_5 : >[^ ]* \(.*\)
Rule_6 : >\(gi|[0-9]*\)
Rule_7 : >[^ ]* \(.*\)
Rule_8 : \*\(.*\)>
Rule_9 : \*.*\(>[A-Z][0-9];.*\)
Rule_10 : \(LOCUS .*\)ORIGIN 
Rule_11 : \(LOCUS .*\)
Rule_12 : >\([^ ]*\)
Rule_13 : >[^ ]* \(.*\)
Rule_14 : <pre>\(.*\)</pre>
Rule_15 : ^ID   \([^ ]*\)
Rule_16 : \*.*\(ID   [A-Z0-9]*_[A-Z0-9]* .*\)
Rule_17 : >\([^ ]*\)
Rule_18 : >[^ ]* \(.*\)
Rule_19 : >[A-Z][0-9];\([^ ]*\)[ ]*
Rule_20 : >\(.*\)
Rule_21 : >IPI:\([^| .]*\)
Rule_22 : \*.*\(ID   IPI[0-9]* .*\)
Rule_23 : \(.*\)
Rule_24 : \*.*\(ID   [-A-Z0-9_].*\)
Rule_25 : >[^(]*.\([^)]*\)
Rule_26 : ^AC   \([^ ;]*\)
Rule_27 : \*.*\(AC   \S.*\)
Rule_28 : ^ID   \([^ .]*\)
Rule_29 : \*.*\(ID   IPI[0-9.]* .*\)
Rule_30 : >UniRef100_\([^ ]*\)
Rule_31 : >[^|]*|\([^ ]*\)
Rule_32 : >\([^|]*\)
Rule_33 : >..|[^|]*|\([^ ]*\)
Rule_34 : >..|\([^|]*\)
Rule_35 : >sp|\([^|]*\)
Rule_36 : >IPI:CON_\([^|]*\)
Rule_37 : >MSIPI:s*p*|*\([^| .]*\)
Rule_38 : >EM_EST:\([A-Z0-9]*\);
Rule_39 : >EM_ENV:\([A-Z0-9]*\);

There are 56 sequence report sources configured:
contaminants_SEQ
cRAP_SEQ
EST_human_REP
EST_human_SEQ
EST_mouse_REP
EST_mouse_SEQ
EST_others_REP
EST_others_SEQ
IPI_arabidopsis_REP
IPI_arabidopsis_SEQ
IPI_bovine_REP
IPI_bovine_SEQ
IPI_chicken_REP
IPI_chicken_SEQ
IPI_human_REP
IPI_human_SEQ
IPI_mouse_REP
IPI_mouse_SEQ
IPI_rat_REP
IPI_rat_SEQ
IPI_zebrafish_REP
IPI_zebrafish_SEQ
MSIPI_human_REP
MSIPI_human_SEQ
MSIPI_mouse_REP
MSIPI_mouse_SEQ
NCBInr_REP
NCBInr_SEQ
SwissProt_REP
SwissProt_SEQ
Trembl_REP
Trembl_SEQ
UniRef100_REP
UniRef100_SEQ
Environmental_EST_SEQ
Environmental_EST_REP
Fungi_EST_SEQ
Fungi_EST_REP
Human_EST_SEQ
Human_EST_REP
Invertebrates_EST_SEQ
Invertebrates_EST_REP
Mammals_EST_SEQ
Mammals_EST_REP
Mus_EST_SEQ
Mus_EST_REP
Plants_EST_SEQ
Plants_EST_REP
Prokaryotes_EST_SEQ
Prokaryotes_EST_REP
Rodents_EST_SEQ
Rodents_EST_REP
Unclassified_EST_SEQ
Unclassified_EST_REP
Vertebrates_EST_SEQ
Vertebrates_EST_REP

Available taxonomy sources:
TAXONOMY_1 Obsolete
TAXONOMY_2 OWL REF
TAXONOMY_3 SwissProt FASTA
TAXONOMY_4 Obsolete
TAXONOMY_5 Swiss-prot DAT
TAXONOMY_6 MSDB REF (pre 20000621)
TAXONOMY_7 MSDB REF
TAXONOMY_8 NCBI nr FASTA using GI2TAXID
TAXONOMY_9 dbEST FASTA using GI2TAXID
TAXONOMY_10 EST_human FASTA with TaxID
TAXONOMY_11 EST_mouse FASTA with TaxID
TAXONOMY_12 UniRef Fasta
TAXONOMY_13 EMBL EST Fasta

Cluster mode : disabled

Section 'Processor' is missing

MascotCmdLine : ../cgi/nph-mascot.exe

Cron functionality is disabled


=cut


Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29