Matrix Science header

resfile_summary.py

Example program for extracting the peptide or protein summary.

#!/usr/bin/python
##############################################################################
# file: resfile_summary.py                                                   #
# 'msparser' toolkit example code                                            #
#                                                                            #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2016 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#   $Source: /vol/cvsroot/parser/examples/test_python/resfile_summary.py,v $ #
#   $Author: francoisr $                                                     #
#       $Date: 2016/07/19 14:22:55 $                                         #
#   $Revision: 1.2 $                                                         #
# $NoKeywords::                                                            $ #
##############################################################################

import msparser
import sys
import re

def main() :
    returnValue = 1    

    if len(sys.argv) < 2 :
        print("Must specify results filename as parameter" )
        return returnValue

    resfile = msparser.ms_mascotresfile(sys.argv[1])

    if checkErrors(resfile) :
        
        # The parameters passed to ms_peptidesummary or ms_proteinsummary determine
        # the type of grouping and the number of proteins and peptides displayed.
        # Default parameters can be returned using ms_mascotresfile::get_ms_mascotresults_params().
        # The return values from this function depend on the type of search,
        # and also on values in the mascot.dat configuration file if that is available.

        # You may need to change this path
        datfile = msparser.ms_datfile("../config/mascot.dat")

        # if the mascot.dat isn't available, use defaults
        mascotOptions = msparser.ms_mascotoptions()
        
        if (datfile.isValid()) :
            mascotOptions = datfile.getMascotOptions()
            
        (scriptName, 
        flags, 
        minProbability, 
        maxHitsToReport, 
        ignoreIonsScoreBelow, 
        minPepLenInPepSummary, 
        usePeptideSummary, 
        flags2) = resfile.get_ms_mascotresults_params(mascotOptions)
         
        bResult = False
        if (usePeptideSummary) :

            # For peptide summary
            #
            # Flags defined for hierarchical clustering algorithm:
            #     flags: ms_mascotresults::MSRES_CLUSTER_PROTEINS
            #            | ms_mascotresults::MSRES_SHOW_SUBSETS
            #            | ms_mascotresults::MSRES_MUDPIT_PROTEIN_SCORE;
            #     flags2: ms_peptidesummary::MSPEPSUM_USE_HOMOLOGY_THRESH;
            #
            # Flags defined for original simple parsimony algorithm:
            #     flags: ms_mascotresults::MSRES_GROUP_PROTEINS | ms_mascotresults::MSRES_SHOW_SUBSETS;

            print("-------------------------------------------------------------")
            print("---   Peptide summary report                              ---")
            print("-------------------------------------------------------------")
             
            bResult = show_results(resfile,
                                   usePeptideSummary,
                                   flags,
                                   minProbability,
                                   maxHitsToReport,
                                   ignoreIonsScoreBelow,
                                   minPepLenInPepSummary,
                                   flags2)
        else :
            # Show results from full protein summary, remove grouping
            flags &= ~msparser.ms_mascotresults.MSRES_GROUP_PROTEINS
            flags &= ~msparser.ms_mascotresults.MSRES_SHOW_SUBSETS
            
            print("-------------------------------------------------------------")
            print("---   Full Protein summary report                         ---")
            print("-------------------------------------------------------------")
            bResult = show_results(resfile,
                                   usePeptideSummary,
                                   flags,
                                   minProbability,
                                   maxHitsToReport,
                                   ignoreIonsScoreBelow,
                                   minPepLenInPepSummary,
                                   flags2)
                                   
            if (bResult and checkErrors(resfile)) :
                print("")
                
                # Show results from concise protein summary, add grouping
                flags |= msparser.ms_mascotresults.MSRES_GROUP_PROTEINS
                flags |= msparser.ms_mascotresults.MSRES_SHOW_SUBSETS
                
                print("-------------------------------------------------------------")
                print("---   Concise Protein summary report                      ---")
                print("-------------------------------------------------------------")
                bResult = show_results(resfile,
                                       usePeptideSummary,
                                       flags,
                                       minProbability,
                                       maxHitsToReport,
                                       ignoreIonsScoreBelow,
                                       minPepLenInPepSummary,
                                       flags2)

        if (bResult and checkErrors(resfile)) :
            returnValue = 0
             
        return returnValue


def show_results(resfile, usePeptideSummary, flags, minProteinProb, maxHits, minIonsScore, minPepLenInPepSummary, flags2) :

    if usePeptideSummary :
        results = msparser.ms_peptidesummary(
            resfile, flags, minProteinProb, maxHits, "", minIonsScore, minPepLenInPepSummary, "", flags2
            )
    else :
        results = msparser.ms_proteinsummary(
            resfile, flags, minProteinProb, maxHits
            )

    if not checkErrors(resfile) :
        return False
    
    family = 1
    hit  = 1
    prot = results.getHit(hit)

    while prot :
        accession   = prot.getAccession()
        description = results.getProteinDescription(accession)
        mass        = results.getProteinMass(accession)
        dbIdx       = prot.getDB()

        protein_hit = "Protein Hit %d" % hit
        if flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS :
            protein_hit = protein_hit + "." + str(family)
        
        print protein_hit
        print("===================")
        print("Accession   : %s" % accession)
        print("Description : %s" % description)
        print("Score       : %s" % prot.getScore())
        print("Mass        : %s" % mass)
        print("Frame       : %s" % prot.getFrame())
        print("Coverage    : %s" % prot.getCoverage())
        print("RMS error   : %s" % prot.getRMSDeltas(results))
        print("Peptides    : %s" % prot.getNumDisplayPeptides())

        # Each protein has a number of peptides that matched - list them:
        num_peps = prot.getNumPeptides()

        for i in range(1, 1+ num_peps) :
            query = prot.getPeptideQuery(i)
            p     = prot.getPeptideP(i)

            isDupSameQuery = prot.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery 
            if p != -1 and query != -1 and not isDupSameQuery :
                pep = results.getPeptide(query, p)
                if not pep:
                    continue

                displayPeptideInfo(
                    0, pep, results, 
                    prot.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_Duplicate,
                    prot.getPeptideIsBold(i),
                    prot.getPeptideShowCheckbox(i)
                    )

        # Now display list of all proteins that contain a subset or a same set of the matching peptides

        if flags & msparser.ms_mascotresults.MSRES_GROUP_PROTEINS or flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS :
            print("Proteins matching the same set of peptides:")

            i = 1
            similar_prot = results.getNextSimilarProteinOf(accession, dbIdx, 1)
            while similar_prot :
                similar_accession = similar_prot.getAccession()
                similar_dbIdx = similar_prot.getDB()
                if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS and similar_dbIdx > 1) :
                    print(str(similar_dbIdx) + "::"),
                print(similar_accession + " - Total score:" + str(similar_prot.getScore())),
                print(" - Peptides matched:" + str(similar_prot.getNumDisplayPeptides()))
                i += 1
                similar_prot = results.getNextSimilarProteinOf(accession, dbIdx, i)
            
            if flags & msparser.ms_mascotresults.MSRES_SHOW_SUBSETS :
                print("Proteins matching a subset of these peptides:")

                i = 1
                subset_prot = results.getNextSubsetProteinOf(accession, dbIdx, 1)
                while subset_prot :
                    subset_accession = subset_prot.getAccession()
                    subset_dbIdx = subset_prot.getDB()
                    if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS and subset_dbIdx > 1) :
                        print(str(subset_dbIdx) + "::"),
                    print(subset_accession + " - Total score:" + str(subset_prot.getScore())),
                    print(" - Peptides matched:" + str(subset_prot.getNumDisplayPeptides()))
                    
                    if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS) :
                        j = 1
                        similar_subset_prot = results.getNextSimilarProteinOf(subset_accession, subset_dbIdx, j)
                        if similar_subset_prot :
                            print("  Proteins matching the same set of peptides for this subset:")
                        while similar_subset_prot :
                            similar_subset_accession = similar_subset_prot.getAccession()
                            similar_subset_dbIdx = similar_subset_prot.getDB()
                            print("  "),
                            if similar_subset_dbIdx > 1 :
                                print(str(similar_subset_dbIdx) + "::"),
                            print(similar_subset_accession + " - Total score:" + str(similar_subset_prot.getScore())), 
                            print(" Peptides matched:" + str(similar_subset_prot.getNumDisplayPeptides()))
                            j += 1
                            similar_subset_prot = results.getNextSimilarProteinOf(subset_accession, subset_dbIdx, j) 
                    
                    i += 1
                    subset_prot = results.getNextSubsetProteinOf(accession, dbIdx, i)

        if flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS :           
            prot = results.getNextFamilyProtein(hit, family)
            family += 1 
            if not prot :
                hit += 1
                prot = results.getHit(hit)
                family = 1
        else :
            hit += 1
            prot = results.getHit(hit)
            
        print(" ")

    results.createUnassignedList(msparser.ms_mascotresults.SCORE)

    if results.getNumberOfUnassigned() :
        print("Unassigned list")
        print("---------------")

        for u in range(1, 1 + results.getNumberOfUnassigned()) :
            pep = results.getUnassigned(u)
            displayPeptideInfo(0, pep, results, 0, 1, 1)
        
    if usePeptideSummary :
        print(" ")
        displayYellowPopupInfo(results, 1)
        
    return True


def displayYellowPopupInfo(results, q) :
    """
    Shows the equivalent of the yellow popup box for given query
    - results is the results object
    - q is the query number
    """

    fmt = "%5s %5s %9s %7s %7s"
    print(fmt % ("Score", "Delta", "Hit", "Protein", "Peptide"))

    for p in range(1, 11) :
        pep = results.getPeptide(q, p)
        if not pep: continue

        seq = pep.getPeptideStr()
        if not seq: continue

        tmp = results.getProteinsWithThisPepMatch(q, p)

        (hit, protein) = ('', '')

        if tmp :
            hit, protein, morethan = re.search('(\d+):([^ ]*)[ ]*(.*)', tmp).groups()

            if morethan :
                hit += "+"

        print(fmt % (pep.getIonsScore(), pep.getDelta(), hit, protein, seq))

    p = 1
    print("Accessions that matched query %s rank %s :- %s" % (q, p, results.getProteinsWithThisPepMatch(q, p)))


def displayPeptideInfo(showFullDetails, p, results, isDuplicate, isBold, showCheckBox) :
    q = p.getQuery()

    if not showFullDetails :
        fmt = "%2s %4s %4d %11f %4d(%4d) %-20s %s%3.2f%s %3s"

        cb, bold = "--", "dim"
        if showCheckBox : cb = "CB" 
        if isBold : bold = "BOLD" 
        
        paren1, paren2 = "", ""
        if isDuplicate : paren1 = "(" ; paren2 = ")" 
        
        print(fmt % (
            cb,
            bold,
            q,
            p.getObserved(),
            p.getRank(),
            p.getPrettyRank(),
            p.getPeptideStr(),
            paren1,
            p.getIonsScore(),
            paren2,
            results.getReadableVarMods(q, p.getRank())
            ))

        return
    

    print("Peptide hit")

    if p.getAnyMatch() :
        fmt = "    %-12s: %s"
        print(fmt % ('Query'       , q))
        print(fmt % ('Rank'        , p.getRank()))
        print(fmt % ('Matched'     , p.getAnyMatch()))
        print(fmt % ('missedCleave', p.getMissedCleavages()))
        print(fmt % ('mrCalc'      , p.getMrCalc()))
        print(fmt % ('delta'       , p.getDelta()))
        print(fmt % ('observed'    , p.getObserved()))
        print(fmt % ('charge'      , p.getCharge()))
        print(fmt % ('mrExp'       , p.getMrExperimental()))
        print(fmt % ('ionsMatched' , p.getNumIonsMatched()))
        print(fmt % ('peptideStr'  , p.getPeptideStr()))
        print(fmt % ('peaksUsed1'  , p.getPeaksUsedFromIons1()))
        print(fmt % ('varModsStr'  , p.getVarModsStr()))
        print(fmt % ('readable mod', results.getReadableVarMods(q, p.getRank)))
        print(fmt % ('ionsScore'   , p.getIonsScore()))
        print(fmt % ('seriesUsedS' , p.getSeriesUsedStr()))
        print(fmt % ('peaksUsed2'  , p.getPeaksUsedFromIons2()))
        print(fmt % ('peaksUsed3'  , p.getPeaksUsedFromIons3()))
        print(fmt % ('idth, hth, p', ', '.join(
            results.getPeptideIdentityThreshold(q, 20),
            results.getHomologyThreshold(q, 20),
            results.getProbOfPepBeingRandomMatch(p.getIonsScore(), q)
            )))
        print(" ")
    else :
        print("    No match")

def checkErrors(resfile) :
    if resfile.getLastError() :
        for i in range(1, 1 + resfile.getNumberOfErrors()) :
            print("Error number: %s : %s" % (resfile.getErrorNumber(i), resfile.getErrorString(i)))
    
    #Call isValid before clearAllErrors, otherwise this method always returns true
    bIsValid = resfile.isValid()
    resfile.clearAllErrors()
    return bIsValid

if __name__ == "__main__" :
    sys.exit(main())


"""

Running the program as

python resfile_summary.py F981123.dat

will give the following output under Mascot Server 2.3:


-------------------------------------------------------------
---   Peptide summary report                              ---
-------------------------------------------------------------
Protein Hit 1
===================
Accession   : CH60_HUMAN
Description : 60 kDa heat shock protein, mitochondrial precursor (Hsp60) (60 kDa chaperonin) (CPN60) (Heat shock 
Score       : 1225.18623377
Mass        : 61016.38
Frame       : 0
Coverage    : 283
RMS error   : 30.4200726378
Peptides    : 31
--  dim   52 1065.039917    2(   2) ALMLQGVDLLADAVAVTMGPK 57.90 Oxidation (M)
--  dim   53 1065.062256    2(   2) ALMLQGVDLLADAVAVTMGPK 7.49 Oxidation (M)
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:
CH60_PONPY Total score: 1007.90623377  Peptides matched: 25 
CH60_CRIGR Total score: 951.166233769  Peptides matched: 23 
CH60_MOUSE Total score: 951.166233769  Peptides matched: 23 
CH60_RAT Total score: 951.166233769  Peptides matched: 23 
CH60_BOVIN Total score: 917.682467539  Peptides matched: 22 
CH60_CHICK Total score: 875.976233769  Peptides matched: 19 
CH60C_DROME Total score: 120.5  Peptides matched: 2 
CH60C_ARATH Total score: 90.68  Peptides matched: 2 
HSP60_CANAL Total score: 45.35  Peptides matched: 1 
HSP60_PARBR Total score: 45.35  Peptides matched: 1 
HSP60_YEAST Total score: 45.35  Peptides matched: 1 
CH602_VIBPA Total score: 45.35  Peptides matched: 1 
CH602_VIBVU Total score: 45.35  Peptides matched: 1 
CH602_VIBVY Total score: 45.35  Peptides matched: 1 
CH60_EUGGR Total score: 45.33  Peptides matched: 1 

Protein Hit 2
===================
Accession   : CH60_DROME
Description : 60 kDa heat shock protein, mitochondrial precursor (Hsp60) (60 kDa chaperonin) (CPN60) (Heat shock 
Score       : 174.39
Mass        : 60770.89
Frame       : 0
Coverage    : 67
RMS error   : 29.5905072791
Peptides    : 4
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:
HSP60_SCHPO Total score: 87.04  Peptides matched: 2 

Protein Hit 3
===================
Accession   : CH60_CAEEL
Description : Chaperonin homolog Hsp-60, mitochondrial precursor (Heat shock protein 60) (HSP-60) - Caenorhabditi
Score       : 134.91
Mass        : 60063.75
Frame       : 0
Coverage    : 21
RMS error   : 36.5383063194
Peptides    : 3
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:

Protein Hit 4
===================
Accession   : CH60_XANAC
Description : 60 kDa chaperonin (Protein Cpn60) (groEL protein) - Xanthomonas axonopodis pv. citri
Score       : 42.2
Mass        : 57130.83
Frame       : 0
Coverage    : 9
RMS error   : 76.9237696062
Peptides    : 1
Proteins matching the same set of peptides:
CH60_XANAC Total score: 42.2  Peptides matched: 1
CH60_XANAC Total score: 42.2  Peptides matched: 1
CH60_XANAC Total score: 42.2  Peptides matched: 1
CH60_XANAC Total score: 42.2  Peptides matched: 1
CH60_XANAC Total score: 42.2  Peptides matched: 1
CH60_XANAC Total score: 42.2  Peptides matched: 1
Proteins matching a subset of these peptides:

Protein Hit 5
===================
Accession   : NMDE4_HUMAN
Description : 
Score       : 37.24
Mass        : 0.0
Frame       : 0
Coverage    : 10
RMS error   : 9.41906700791
Peptides    : 1
Proteins matching the same set of peptides:
NMDE4_HUMAN Total score: 37.24  Peptides matched: 1
NMDE4_HUMAN Total score: 36.2762337693  Peptides matched: 1
Proteins matching a subset of these peptides:

Protein Hit 6
===================
Accession   : YF81_THET2
Description : 
Score       : 34.76
Mass        : 0.0
Frame       : 0
Coverage    : 9
RMS error   : 37.0214184966
Peptides    : 1
Proteins matching the same set of peptides:
YF81_THET2 Total score: 34.76  Peptides matched: 1
Proteins matching a subset of these peptides:

Protein Hit 7
===================
Accession   : F4ST_FLACH
Description : 
Score       : 33.85
Mass        : 0.0
Frame       : 0
Coverage    : 9
RMS error   : 87.8815544839
Peptides    : 1
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:

Protein Hit 8
===================
Accession   : ZN711_HUMAN
Description : Zinc finger protein 711 (Zinc finger protein 6) - Homo sapiens (Human)
Score       : 30.84
Mass        : 87153.77
Frame       : 0
Coverage    : 13
RMS error   : 69.4028633218
Peptides    : 1
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:

Unassigned list
---------------
CB BOLD   14  442.228302    1(   1) LIAQTPLK             25.09    
CB BOLD    9  747.396179    1(   1) EGETRR               15.03    
CB BOLD    4  662.275574    1(   1) KNAMAK               14.09    
CB BOLD   23 1101.621704    1(   1) QLLMVAGVDR           12.04    
CB BOLD    5  662.417175    1(   1) AIACER               11.79    
CB BOLD    8  714.372498    1(   1) LAPAQSK              10.69    
CB BOLD    6  673.349487    1(   1) AVNDVR               10.63    
CB BOLD   22 1101.536621    1(   1) ENVIPADSEK           8.65    
CB BOLD   55 1099.094727    1(   1) LNAEAVRTLLSANGQKPSEAK 8.05    
CB BOLD   29  642.353577    1(   1) VVGVAGQGASALVR       7.91    
CB BOLD   28  642.352600    1(   1) KNVSVSQGPDPR         7.22    
CB BOLD   30  663.837891    1(   1) TPLLVGVAKGESR        7.20    
CB BOLD   50 1048.561523    1(   1) ALDEILEYQNYPVVCAKK   5.70    
CB BOLD   57  747.036072    1(   1) VMGSAFTALLDANEDAQKAMR 4.83    
CB BOLD   49 1020.987915    1(   1) HQRLSGLMQTALEEQQR    4.11 Oxidation (M)
CB BOLD   19  932.364380    1(   1) TGMTRNPR             4.09    
CB BOLD    2  500.256012    1(   1) LAVPT                3.87    
CB BOLD   38  749.383972    1(   1) IDLLADMMWDDK         3.43 2 Oxidation (M)
CB BOLD   20  933.499023    1(   1) SRDPGMVR             3.21 Oxidation (M)
CB BOLD   41  886.405884    1(   1) DRVALNQEVMAPEATK     1.85    
CB BOLD   10  747.412476    1(   1) MAPSTPK              1.68 Oxidation (M)
CB BOLD   18  930.703003    1(   1) LGSGIKAER            1.60    
CB BOLD    7  711.364685    1(   1) GGAHEIK              1.34    
CB BOLD   17  930.683105    1(   1) KIQAEITK             1.00    
CB BOLD   44  949.550720    1(   1) LLSWDSVFFIKNITSK     0.30    
CB BOLD    1  498.272888    1(   1)                      0.00    
CB BOLD    3  575.558411    1(   1)                      0.00    
CB BOLD   32  711.370728    1(   1)                      0.00    
CB BOLD   42  932.460815    1(   1)                      0.00    
CB BOLD   43  933.003784    1(   1)                      0.00    
CB BOLD   47  665.009583    1(   1)                      0.00    
CB BOLD   56 1119.045166    1(   1)                      0.00    
CB BOLD   63  832.798584    1(   1)                      0.00    
CB BOLD   66 1113.894653    1(   1)                      0.00    
CB BOLD   67 1116.177490    1(   1)                      0.00    

Score Delta       Hit Protein Peptide

Accessions that matched query 97 rank 1 :- 

"""

Copyright © 2016 Matrix Science Ltd.  All Rights Reserved. Generated on Fri Jun 2 2017 01:44:50