msparser/help/config_mascotdat_8pl-example.html

#!/usr/local/bin/perl

##############################################################################

# file: config_mascotdat.pl                                                  #

# 'msparser' toolkit example code                                            #

##############################################################################

# COPYRIGHT NOTICE                                                           #

# Copyright 1998-2010 Matrix Science Limited  All Rights Reserved.           #

#                                                                            #

##############################################################################

#     $Source: parser/examples/test_perl/config_mascotdat.pl $ #

#     $Author: villek@matrixscience.com $                                                      #

#       $Date: 2018-07-30 16:23:53 +0100 $                                         #

#   $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_3_1_0-2025-07-27-0-gea47708fac $                                                         #

# $NoKeywords::                                                            $ #

##############################################################################

use strict;

##############################################################################


use msparser;


if (!defined($ARGV[0])) {

    print <<EOF;

Location of mascot.dat has to be specified as a parameter.

The location should either be the full path to the mascot.dat file

or a URL to a Mascot server - e.g. http://mascot-server/mascot/cgi

EOF

    exit 1;

}


# Note: $cs must not be lexically scoped; you need to keep it in scope

# for as long as you use $file. See "Using the toolkit from Perl, Java and

# Python" in Mascot Parser manual.

my ($file, $cs);


# A sessionID can optionally be passed as the second parameter.

# This will only be required if the 'file' is a URL.

if (defined($ARGV[1])) {

    $cs = new msparser::ms_connection_settings;

    $cs->setSessionID($ARGV[1]);

    $file = new msparser::ms_datfile($ARGV[0], 0, $cs);

} else {

    $file = new msparser::ms_datfile($ARGV[0]);

}


if (!$file->isValid) {

    print "There are errors. Cannot continue. The last error description:\n";

    print $file->getLastErrorString(), "\n";

    exit 1;

}


# Retrieving section 'Databases' content.

my $dbs = $file->getDatabases;


# Check if there is actually a 'Databases' section in the file before

# continuing.

if ($dbs->isSectionAvailable) {

    my $n = $dbs->getNumberOfDatabases;

    print "There are ", $n, " databases configured:\n";


    for my $i (0 .. $n-1) {

        print $dbs->getDatabase($i)->getName, " : ";


        if ($dbs->getDatabase($i)->isActive()) {

            print "active\n";

        } else {

            print "inactive\n";

        }

    }

} else {

    print "Section 'Databases' is missing\n";

}


print "\n";


# Retrieving section 'Parse' content.

my $parseOptions = $file->getParseOptions();


# Check if there is a 'Parse' section in the file before continuing.

if ($parseOptions->isSectionAvailable) {

    my $n = $parseOptions->getNumberOfParseRules();

    print "There are ", $n, " parse rules in the file; the following are specified:\n";


    for my $i (0 .. $n-1) {

        # Not all of them need be specified in the file.

        if ($parseOptions->getParseRule($i)->isAvailable()) {

            print "Rule_", $i, " : ";

            print $parseOptions->getParseRule($i)->getRuleStr(), "\n";

        }

    }

} else {

    print "Section 'Parse' is missing\n";

}


print "\n";


# Retrieving section 'WWW' content.

my $wwwOptions = $file->getWWWOptions();


# Check if there is a 'WWW' section in the file before continuing.

if ($wwwOptions->isSectionAvailable) {

    my $n = $wwwOptions->getNumberOfEntries();

    print "There are ", $n, " sequence report sources configured:\n";


    for my $i (0 .. $n-1) {

        print $wwwOptions->getEntry($i)->getName(), "_";


        if ($wwwOptions->getEntry($i)->getType() == $msparser::WWW_SEQ) {

            print "SEQ\n";

        } else {

            print "REP\n";

        }

    }

} else {

    print "Section 'WWW' is missing\n";

}


print "\n";


# Retrieving section 'Taxonomy' content.

print "Available taxonomy sources:\n";


my $maxtax = $file->getMaxTaxonomyRules();

my $activetax = 0;


for my $taxind (1 .. $maxtax) {

    # Check whether the taxonomy section exists.

    if ($file->getTaxonomyRules($taxind)) {

        ++$activetax;

        print "TAXONOMY_", $taxind, " ";

        print $file->getTaxonomyRules($taxind)->getIdentifier(), "\n";

    }

}


if ($activetax == 0) {

    print "(none)\n";

}


print "\n";


# Retrieving section 'Cluster' content.

my $clusterParams = $file->getClusterParams();


# Check if there is a 'Cluster' section in the file before continuing.

if ($clusterParams->isSectionAvailable) {

    print "Cluster mode : ";


    if ($clusterParams->isEnabled) {

        print "enabled\n";

    } else {

        print "disabled\n";

    }

} else {

    print "Section 'Cluster' is missing\n";

}


print "\n";


# Retrieving section 'Processor' content.

my $procOptions = $file->getProcessors();


# Check if there is a 'Processor' section in the file before continuing.

if ($procOptions->isSectionAvailable) {

    print $procOptions->getNumberOfProcessors, " CPU(s) configured\n";

} else {

    print "Section 'Processor' is missing\n";

}


print "\n";


# Retrieving section 'Options' content.

my $mascotOptions = $file->getMascotOptions();


# Check if there is an 'Options' section in the file before continuing.

if ($mascotOptions->isSectionAvailable) {

    print "MascotCmdLine : ", $mascotOptions->getMascotCmdLine(), "\n";

} else {

    print "Section 'Options' is missing\n";

}


print "\n";


# Retrieving section 'Cron' content.

my $cronOptions = $file->getCronOptions();


# Check if there is a 'Cron' section in the file before continuing.

if ($cronOptions->isSectionAvailable)  {

    if ($cronOptions->isCronEnabled) {

        my $n = $cronOptions->getNumberOfCronJobs;


        if ($n == 0) {

            print "There are ", $n, " cron jobs configured\n";

        } else {

            print "There are ", $n, " cron jobs configured:\n";

        }


        for my $i (0 .. $n-1) {

            print $cronOptions->getCronJob($i)->getCommandStr(), "\n";

        }

    } else {

        print "Cron functionality is disabled\n";

    }

} else {

    print "Section 'Cron' is missing\n";

}


print "\n";


=pod


Running the program as


perl -I../bin config_mascotdat.pl ../config/mascot.dat


will give the following output under Mascot Server 2.3 (exact details depend

on how the server has been configured):


There are 29 databases configured:

contaminants : active

cRAP : active

EST_human : inactive

EST_mouse : inactive

EST_others : inactive

IPI_arabidopsis : inactive

IPI_bovine : inactive

IPI_chicken : inactive

IPI_human : inactive

IPI_mouse : inactive

IPI_rat : inactive

IPI_zebrafish : inactive

MSIPI_human : inactive

MSIPI_mouse : inactive

NCBInr : active

SwissProt : active

Trembl : inactive

UniRef100 : inactive

Environmental_EST : inactive

Fungi_EST : inactive

Human_EST : inactive

Invertebrates_EST : inactive

Mammals_EST : inactive

Mus_EST : inactive

Plants_EST : inactive

Prokaryotes_EST : inactive

Rodents_EST : inactive

Unclassified_EST : inactive

Vertebrates_EST : inactive


There are 256 parse rules in the file; the following are specified:

Rule_1 : >owl[^ ]*|\‍([^ ]*\‍)

Rule_2 : >owl[^ ]*|[^ ]*[ ]\‍(.*\‍)

Rule_3 : >[A-Z][0-9];\‍([^ ]*\‍)

Rule_4 : >\‍([^ ]*\‍)

Rule_5 : >[^ ]* \‍(.*\‍)

Rule_6 : >\‍(gi|[0-9]*\‍)

Rule_7 : >[^ ]* \‍(.*\‍)

Rule_8 : \*\‍(.*\‍)>

Rule_9 : \*.*\‍(>[A-Z][0-9];.*\‍)

Rule_10 : \‍(LOCUS .*\‍)ORIGIN

Rule_11 : \‍(LOCUS .*\‍)

Rule_12 : >\‍([^ ]*\‍)

Rule_13 : >[^ ]* \‍(.*\‍)

Rule_14 : <pre>\‍(.*\‍)</pre>

Rule_15 : ^ID   \‍([^ ]*\‍)

Rule_16 : \*.*\‍(ID   [A-Z0-9]*_[A-Z0-9]* .*\‍)

Rule_17 : >\‍([^ ]*\‍)

Rule_18 : >[^ ]* \‍(.*\‍)

Rule_19 : >[A-Z][0-9];\‍([^ ]*\‍)[ ]*

Rule_20 : >\‍(.*\‍)

Rule_21 : >IPI:\‍([^| .]*\‍)

Rule_22 : \*.*\‍(ID   IPI[0-9]* .*\‍)

Rule_23 : \‍(.*\‍)

Rule_24 : \*.*\‍(ID   [-A-Z0-9_].*\‍)

Rule_25 : >[^(]*.\‍([^)]*\‍)

Rule_26 : ^AC   \‍([^ ;]*\‍)

Rule_27 : \*.*\‍(AC   \S.*\‍)

Rule_28 : ^ID   \‍([^ .]*\‍)

Rule_29 : \*.*\‍(ID   IPI[0-9.]* .*\‍)

Rule_30 : >UniRef100_\‍([^ ]*\‍)

Rule_31 : >[^|]*|\‍([^ ]*\‍)

Rule_32 : >\‍([^|]*\‍)

Rule_33 : >..|[^|]*|\‍([^ ]*\‍)

Rule_34 : >..|\‍([^|]*\‍)

Rule_35 : >sp|\‍([^|]*\‍)

Rule_36 : >IPI:CON_\‍([^|]*\‍)

Rule_37 : >MSIPI:s*p*|*\‍([^| .]*\‍)

Rule_38 : >EM_EST:\‍([A-Z0-9]*\‍);

Rule_39 : >EM_ENV:\‍([A-Z0-9]*\‍);


There are 56 sequence report sources configured:

contaminants_SEQ

cRAP_SEQ

EST_human_REP

EST_human_SEQ

EST_mouse_REP

EST_mouse_SEQ

EST_others_REP

EST_others_SEQ

IPI_arabidopsis_REP

IPI_arabidopsis_SEQ

IPI_bovine_REP

IPI_bovine_SEQ

IPI_chicken_REP

IPI_chicken_SEQ

IPI_human_REP

IPI_human_SEQ

IPI_mouse_REP

IPI_mouse_SEQ

IPI_rat_REP

IPI_rat_SEQ

IPI_zebrafish_REP

IPI_zebrafish_SEQ

MSIPI_human_REP

MSIPI_human_SEQ

MSIPI_mouse_REP

MSIPI_mouse_SEQ

NCBInr_REP

NCBInr_SEQ

SwissProt_REP

SwissProt_SEQ

Trembl_REP

Trembl_SEQ

UniRef100_REP

UniRef100_SEQ

Environmental_EST_SEQ

Environmental_EST_REP

Fungi_EST_SEQ

Fungi_EST_REP

Human_EST_SEQ

Human_EST_REP

Invertebrates_EST_SEQ

Invertebrates_EST_REP

Mammals_EST_SEQ

Mammals_EST_REP

Mus_EST_SEQ

Mus_EST_REP

Plants_EST_SEQ

Plants_EST_REP

Prokaryotes_EST_SEQ

Prokaryotes_EST_REP

Rodents_EST_SEQ

Rodents_EST_REP

Unclassified_EST_SEQ

Unclassified_EST_REP

Vertebrates_EST_SEQ

Vertebrates_EST_REP


Available taxonomy sources:

TAXONOMY_1 Obsolete

TAXONOMY_2 OWL REF

TAXONOMY_3 SwissProt FASTA

TAXONOMY_4 Obsolete

TAXONOMY_5 Swiss-prot DAT

TAXONOMY_6 MSDB REF (pre 20000621)

TAXONOMY_7 MSDB REF

TAXONOMY_8 NCBI nr FASTA using GI2TAXID

TAXONOMY_9 dbEST FASTA using GI2TAXID

TAXONOMY_10 EST_human FASTA with TaxID

TAXONOMY_11 EST_mouse FASTA with TaxID

TAXONOMY_12 UniRef Fasta

TAXONOMY_13 EMBL EST Fasta


Cluster mode : disabled


Section 'Processor' is missing


MascotCmdLine : ../cgi/nph-mascot.exe


Cron functionality is disabled


=cut