Matrix Science header

resfile_ms2quantitation.py

Example program extracting peptide ratios from a Reporter or Multiplex file.

#!/usr/bin/python
##############################################################################
# file: resfile_ms2quantitaion.py                                            #
# 'msparser' toolkit example code                                            #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2015 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#    $Source: parser/examples/test_python/resfile_ms2quantitation.py $                                #
#     $Author: villek@matrixscience.com $                                                    #
#       $Date: 2018-07-30 16:23:53 +0100 $                                         #
#   $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $                                                         #
# $NoKeywords::                                                            $ #
##############################################################################

import msparser
import sys

# If no arguments supplied, print help and exit

if len(sys.argv) != 5:
    print("Usage: %s <quantitation schema 1 filepath> <quantitation schema 2 filepath> <unimod schema filepath> <results file>" % sys.argv[0])
    print("The location of schema files should be e.g.")
    print("  ../html/xmlns/schema/quantitation_1/quantitation_1.xsd")
    print("  ../html/xmlns/schema/quantitation_2/quantitation_2.xsd")
    print("  ../html/xmlns/schema/unimod_2/unimod_2.xsd")
    print("if running from the Mascot cgi directory.")
    sys.exit(1)
    
quant_schema1_filepath = sys.argv[1]
quant_schema2_filepath = sys.argv[2]
unimod_schema_filepath = sys.argv[3]

resfile = msparser.ms_mascotresfile(sys.argv[4], 1)

if not resfile.isValid():
    print(resfile.getLastErrorString())
    sys.exit(1)

def dump_warnings(err):
    
    for i in ms_range(1, err.getNumberOfErrors()):
        print('Warning: %s' %(err.getErrorString(i)))

# ms_range function has been added to make the range supplied consistent with other languages such as Perl and C#
def ms_range(start, stop, step=1):
    i = start
    while i <= stop:
        yield i
        i += step
    
# And print out related warnings, if any (we know these are only warnings,
# because isValid() returned true earlier):

dump_warnings(resfile.getErrorHandler())
resfile.clearAllErrors()

resfile.setXMLschemaFilePath(msparser.ms_mascotresfile.XML_SCHEMA_QUANTITATION, "http://www.matrixscience.com/xmlns/schema/quantitation_2 " + quant_schema2_filepath + " http://www.matrixscience.com/xmlns/schema/quantitation_1 " + quant_schema1_filepath)
resfile.setXMLschemaFilePath(msparser.ms_mascotresfile.XML_SCHEMA_UNIMOD, "http://www.unimod.org/xmlns/schema/unimod_2 " + unimod_schema_filepath)

qmethod = msparser.ms_quant_method()
   
if not resfile.getQuantitationMethod(qmethod):
    print(resfile.getLastErrorString())
    sys.exit(1)

    
datfile = msparser.ms_datfile()
undef, flags, minprob, maxhits, iisb, minpeplen, use_pepsum, flags2 = resfile.get_ms_mascotresults_params(datfile.getMascotOptions())

if not use_pepsum:
    print("Results File cannot be opened as a peptide summary")
    sys.exit(1)
    
pepsum = msparser.ms_peptidesummary(resfile, flags, minprob, maxhits, '', iisb, minpeplen, '', flags2)

if not resfile.isValid():
    print(resfile.getLastErrorString)
    sys.exit(1)

dump_warnings(resfile.getErrorHandler())
resfile.clearAllErrors()

# Quantitate the file using ms_ms2quantitation:

ms2quant = msparser.ms_ms2quantitation(pepsum, qmethod)   

if not ms2quant.isValid():
    sys.exit(1)
    
if ms2quant.hasIntensityNormalisation():
    ms2quant.normaliseIntensities()
elif ms2quant.hasPeptideRatioNormalisation():
    ms2quant.normalisePeptideRatios()
    
# Update the quantitation method in case ms_ms2quantitation changed any of
# the parameters:
qmethod = ms2quant.getQuantitationMethod()

dump_warnings(ms2quant.getErrorHandler())
ms2quant.clearAllErrors()

# Now we can inspect peptide and protein data in ms2quant.
# But first, dump quantitation method parameters to see what we have:

comps = []
for i in ms_range(0, qmethod.getNumberOfComponents()-1):
    comp = qmethod.getComponentByNumber(i)
    comps.append(comp.getName())
    
print("Components: %s" %(comps))

rationames = []
for i in ms_range(0, qmethod.getNumberOfReportRatios()-1):
    reportratio = qmethod.getReportRatioByNumber(i)
    reportrationame = reportratio.getName()
    rationames.append(reportrationame)
print("Report ratios: %s" %(rationames))

print("Protein ratio type = %s" %(qmethod.getProteinRatioType()))
print("Min. number of peptides = %d" %(qmethod.getMinNumPeptides()))

if qmethod.haveQuality():
    q = qmethod.getQuality()
    print("Quality: min. precursor charge = %s" %(q.getMinPrecursorCharge()))
    print("Quality: pep. threshold type = %s" %(q.getPepThresholdType()))
    print("Quality: pep. threshold value = %s" %(q.getPepThresholdValue()))
else:
    print("Quality: no restrictions")

# Check for Normalisation 

if qmethod.haveNormalisation():
    q = qmethod.getNormalisation()
    print("Normalisation = %s" %(q.getMethod()))

    if q.haveProteins():
        p = q.getProteins()
        for i in ms_range(0, p.getNumberOfProteins()-1):
            accessions = []
            accessions.append(p.getProtein().getAccession())
        print(" - Accessions = %s" %(accessions)) 
        
    if q.havePeptides():
        p = q.getPeptides()
        for i in ms_range(0, p.getNumberOfPeptides()-1):
            peptides = []
            peptides.append(p.getPeptide().getSequence())
        print(" - Sequences = %s" %(peptides)) 
        
    component_bases = []
    
    for i in ms_range(0, qmethod.getNumberOfComponents()-1):
        component = qmethod.getComponentByNumber(i)
        compname = component.getName()
        baseNormalisation = ms2quant.getIntensityNormalisationBase(compname)
        component_bases.append(baseNormalisation)
        for rationame in rationames:
            ms2gprn = ms2quant.getPeptideRatioNormalisationBase(rationame)
    print("Intensity normalisation constants: %s" %(component_bases))
    print("Ratio normalisation constants: %s" %(ms2gprn))
        
else:
    print("Normalisation: none")
    
if qmethod.haveOutliers():
    q = qmethod.getOutliers()
    print("Outliers = %s" %(q.getMethod()))
else:
    print("Outliers: none")
    
print("\n")

# Collect proteins we're interested in:
proteins = []

for i in ms_range(1, pepsum.getNumberOfHits()):
    hit = pepsum.getHit(i)
    proteins.append(hit)
    j = 0
    protein = pepsum.getNextFamilyProtein(i, j)
    while protein !=None:
        proteins.append(protein)
        j += 1

protein = ''                
for protein in proteins:
    print("%d::%s:" %(protein.getDB(), protein.getAccession()))

    for rationame in rationames:
        ratio = ms2quant.getProteinRatio(protein.getAccession(), protein.getDB(), rationame)
        print(rationame)
        if ratio.isMissing():
            print("%10s: ---" %(rationame))
        else:
            print("%10s = %10g, stderr = %10g, N = %d, normal-p = %6g, hypo-p = %6g" %(rationame, ratio.getValue(), ratio.getStandardError_only(), ratio.getSampleSize(), ratio.getNormalityPvalue_only(), ratio.getHypothesisPvalue_only()))
    print(rationame)        
    print("  %10s %s" %('Peptide:', rationame))
    for i in ms_range(1, protein.getNumPeptides()):
        if protein.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery:
            continue
        
        q = protein.getPeptideQuery(i)
        p = protein.getPeptideP(i)
        
        peptide = pepsum.getPeptide(q, p)
        
        if not peptide:
            continue
            
        values = ''
        for rationame in rationames:
            # Note that the q,p arguments to ms_peptide_quant_key must be
            # integers. Otherwise the wrong constructor is chosen. Here they
            # are integers, as they are return values from Parser functions
            # that return ints.
            quantKey = msparser.ms_peptide_quant_key(q, p)
            ratio = ms2quant.getPeptideRatio(quantKey, rationame)
            
            if ratio.isMissing():
                values = "---"
                continue
                
            if ratio.isInfinite():
                values = "###"
            else:
                values = ratio.getValue()
                
        print("    %-9s  %-10s" %("q%d_p%d" %(q, p), str(values)))
        
    print("\n")
    
    

           

Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29