Matrix Science header

config_mascotdat.py

Read in the mascot.dat file.

#!/usr/bin/python
##############################################################################
# file: config_mascotdat.pl                                                  #
# 'msparser' toolkit example code                                            #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2010 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#     $Source: parser/examples/test_python/config_mascotdat.py $ #
#     $Author: villek@matrixscience.com $                                                      #
#       $Date: 2018-07-30 16:23:53 +0100 $                                         #
#   $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $                                                         #
# $NoKeywords::                                                            $ #
##############################################################################

import msparser
import sys

if len(sys.argv) < 2 :
    print("""
Location of mascot.dat has to be specified as a parameter.
The location should either be the full path to the mascot.dat file
or a URL to a Mascot server - e.g. http://mascot-server/mascot/cgi
""")
    sys.exit(1)

# A sessionID can optionally be passed as the second parameter.
# This will only be required if the 'file' is a URL.
if len(sys.argv) > 2 :
    cs = msparser.ms_connection_settings()
    cs.setSessionID(sys.argv[2])
    file = msparser.ms_datfile(sys.argv[1], 0, cs)
else :
    file = msparser.ms_datfile(sys.argv[1])

if not file.isValid() :
    print("There are errors. Cannot continue. The last error description:")
    print(file.getLastErrorString())
    sys.exit(1)

# Retrieving section 'Databases' content.
dbs = file.getDatabases()

# Check if there is actually a 'Databases' section in the file before 
# continuing.
if dbs.isSectionAvailable() :
    n = dbs.getNumberOfDatabases()
    print("There are %d databases configured:" % n)

    for i in range(n) :
        database = dbs.getDatabase(i).getName()

        if dbs.getDatabase(i).isActive() :
            inuse = "active"
        else :
            inuse = "inactive"
        print('{:<20}'.format(database) + '{:<}'.format(inuse))
else :
    print("Section 'Databases' is missing")

print("") 

# Retrieving section 'Parse' content.
parseOptions = file.getParseOptions()

# Check if there is a 'Parse' section in the file before continuing.
if parseOptions.isSectionAvailable() :
    n = parseOptions.getNumberOfParseRules()
    print("There are %d parse rules in the file; the following are specified:" % n)

    for i in range(n) :
        # Not all of them need be specified in the file.
        if parseOptions.getParseRule(i).isAvailable() :
            print("Rule_%d : %s" % (i, parseOptions.getParseRule(i).getRuleStr()))
else :
    print("Section 'Parse' is missing")

print("") 

# Retrieving section 'WWW' content.
wwwOptions = file.getWWWOptions()

# Check if there is a 'WWW' section in the file before continuing.
if wwwOptions.isSectionAvailable() :
    n = wwwOptions.getNumberOfEntries()
    print("There are %d sequence report sources configured:" % n)

    for i in range(n) :
        if wwwOptions.getEntry(i).getType() == msparser.WWW_SEQ :
            type = "SEQ"
        else :
            type = "REP"

        print("%s_%s" % (wwwOptions.getEntry(i).getName(), type))
else :
    print("Section 'WWW' is missing")

print("") 

# Retrieving section 'Taxonomy' content.
print("Available taxonomy sources:")

maxtax = file.getMaxTaxonomyRules()
activetax = 0

for taxind in range(1, 1 + maxtax) :
    # Check whether the taxonomy section exists.
    if file.getTaxonomyRules(taxind) :
        activetax += 1
        print("TAXONOMY_%d" % taxind)
        print(file.getTaxonomyRules(taxind).getIdentifier())

if activetax == 0 :
    print("(none)")

print("") 

# Retrieving section 'Cluster' content.
clusterParams = file.getClusterParams()

# Check if there is a 'Cluster' section in the file before continuing.
if clusterParams.isSectionAvailable() :
    
    if clusterParams.isEnabled() :
        print("Cluster mode: enabled")
    else :
        print("Cluster mode: disabled")
else :
    print("Section 'Cluster' is missing")

print("") 

# Retrieving section 'Processor' content.
procOptions = file.getProcessors()

# Check if there is a 'Processor' section in the file before continuing.
if procOptions.isSectionAvailable() :
    print("%d CPU(s) configured" % procOptions.getNumberOfProcessors())
else :
    print("Section 'Processor' is missing")

print("") 

# Retrieving section 'Options' content.
mascotOptions = file.getMascotOptions()

# Check if there is an 'Options' section in the file before continuing.
if mascotOptions.isSectionAvailable() :
    print("MascotCmdLine: %s" % mascotOptions.getMascotCmdLine())
else :
    print("Section 'Options' is missing")

print("") 

# Retrieving section 'Cron' content.
cronOptions = file.getCronOptions()

# Check if there is a 'Cron' section in the file before continuing.
if cronOptions.isSectionAvailable() :
    if cronOptions.isCronEnabled() :
        n = cronOptions.getNumberOfCronJobs()

        if n == 0 :
            print("There are %d cron jobs configured" % n)
        else :
            print("There are %d cron jobs configured:" % n)

        for i in range(n) :
            print(cronOptions.getCronJob(i).getCommandStr())
    else :
        print("Cron functionality is disabled")
else :
    print("Section 'Cron' is missing")

print("") 


"""

Running the program as 

python config_mascotdat.py /usr/local/mascot/config/mascot.dat

will give the following output under Mascot Server 2.3 (exact details depend 
on how the server has been configured):


There are 29 databases configured:
contaminants : active
cRAP : active
EST_human : inactive
EST_mouse : inactive
EST_others : inactive
IPI_arabidopsis : inactive
IPI_bovine : inactive
IPI_chicken : inactive
IPI_human : inactive
IPI_mouse : inactive
IPI_rat : inactive
IPI_zebrafish : inactive
MSIPI_human : inactive
MSIPI_mouse : inactive
NCBInr : active
SwissProt : active
Trembl : inactive
UniRef100 : inactive
Environmental_EST : inactive
Fungi_EST : inactive
Human_EST : inactive
Invertebrates_EST : inactive
Mammals_EST : inactive
Mus_EST : inactive
Plants_EST : inactive
Prokaryotes_EST : inactive
Rodents_EST : inactive
Unclassified_EST : inactive
Vertebrates_EST : inactive

There are 256 parse rules in the file; the following are specified:
Rule_1 : >owl[^ ]*|\([^ ]*\)
Rule_2 : >owl[^ ]*|[^ ]*[ ]\(.*\)
Rule_3 : >[A-Z][0-9];\([^ ]*\)
Rule_4 : >\([^ ]*\)
Rule_5 : >[^ ]* \(.*\)
Rule_6 : >\(gi|[0-9]*\)
Rule_7 : >[^ ]* \(.*\)
Rule_8 : \*\(.*\)>
Rule_9 : \*.*\(>[A-Z][0-9];.*\)
Rule_10 : \(LOCUS .*\)ORIGIN 
Rule_11 : \(LOCUS .*\)
Rule_12 : >\([^ ]*\)
Rule_13 : >[^ ]* \(.*\)
Rule_14 : <pre>\(.*\)</pre>
Rule_15 : ^ID   \([^ ]*\)
Rule_16 : \*.*\(ID   [A-Z0-9]*_[A-Z0-9]* .*\)
Rule_17 : >\([^ ]*\)
Rule_18 : >[^ ]* \(.*\)
Rule_19 : >[A-Z][0-9];\([^ ]*\)[ ]*
Rule_20 : >\(.*\)
Rule_21 : >IPI:\([^| .]*\)
Rule_22 : \*.*\(ID   IPI[0-9]* .*\)
Rule_23 : \(.*\)
Rule_24 : \*.*\(ID   [-A-Z0-9_].*\)
Rule_25 : >[^(]*.\([^)]*\)
Rule_26 : ^AC   \([^ ;]*\)
Rule_27 : \*.*\(AC   \S.*\)
Rule_28 : ^ID   \([^ .]*\)
Rule_29 : \*.*\(ID   IPI[0-9.]* .*\)
Rule_30 : >UniRef100_\([^ ]*\)
Rule_31 : >[^|]*|\([^ ]*\)
Rule_32 : >\([^|]*\)
Rule_33 : >..|[^|]*|\([^ ]*\)
Rule_34 : >..|\([^|]*\)
Rule_35 : >sp|\([^|]*\)
Rule_36 : >IPI:CON_\([^|]*\)
Rule_37 : >MSIPI:s*p*|*\([^| .]*\)
Rule_38 : >EM_EST:\([A-Z0-9]*\);
Rule_39 : >EM_ENV:\([A-Z0-9]*\);

There are 56 sequence report sources configured:
contaminants_SEQ
cRAP_SEQ
EST_human_REP
EST_human_SEQ
EST_mouse_REP
EST_mouse_SEQ
EST_others_REP
EST_others_SEQ
IPI_arabidopsis_REP
IPI_arabidopsis_SEQ
IPI_bovine_REP
IPI_bovine_SEQ
IPI_chicken_REP
IPI_chicken_SEQ
IPI_human_REP
IPI_human_SEQ
IPI_mouse_REP
IPI_mouse_SEQ
IPI_rat_REP
IPI_rat_SEQ
IPI_zebrafish_REP
IPI_zebrafish_SEQ
MSIPI_human_REP
MSIPI_human_SEQ
MSIPI_mouse_REP
MSIPI_mouse_SEQ
NCBInr_REP
NCBInr_SEQ
SwissProt_REP
SwissProt_SEQ
Trembl_REP
Trembl_SEQ
UniRef100_REP
UniRef100_SEQ
Environmental_EST_SEQ
Environmental_EST_REP
Fungi_EST_SEQ
Fungi_EST_REP
Human_EST_SEQ
Human_EST_REP
Invertebrates_EST_SEQ
Invertebrates_EST_REP
Mammals_EST_SEQ
Mammals_EST_REP
Mus_EST_SEQ
Mus_EST_REP
Plants_EST_SEQ
Plants_EST_REP
Prokaryotes_EST_SEQ
Prokaryotes_EST_REP
Rodents_EST_SEQ
Rodents_EST_REP
Unclassified_EST_SEQ
Unclassified_EST_REP
Vertebrates_EST_SEQ
Vertebrates_EST_REP

Available taxonomy sources:
TAXONOMY_1 Obsolete
TAXONOMY_2 OWL REF
TAXONOMY_3 SwissProt FASTA
TAXONOMY_4 Obsolete
TAXONOMY_5 Swiss-prot DAT
TAXONOMY_6 MSDB REF (pre 20000621)
TAXONOMY_7 MSDB REF
TAXONOMY_8 NCBI nr FASTA using GI2TAXID
TAXONOMY_9 dbEST FASTA using GI2TAXID
TAXONOMY_10 EST_human FASTA with TaxID
TAXONOMY_11 EST_mouse FASTA with TaxID
TAXONOMY_12 UniRef Fasta
TAXONOMY_13 EMBL EST Fasta

Cluster mode : disabled

Section 'Processor' is missing

MascotCmdLine : ../cgi/nph-mascot.exe

Cron functionality is disabled

"""


Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29