Example program extracting peptide ratios from a Reporter or Multiplex file.
#!/usr/bin/python ############################################################################## # file: resfile_ms2quantitaion.py # # 'msparser' toolkit example code # ############################################################################## # COPYRIGHT NOTICE # # Copyright 1998-2015 Matrix Science Limited All Rights Reserved. # # # ############################################################################## # $Source: parser/examples/test_python/resfile_ms2quantitation.py $ # # $Author: villek@matrixscience.com $ # # $Date: 2018-07-30 16:23:53 +0100 $ # # $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ # # $NoKeywords:: $ # ############################################################################## import msparser import sys # If no arguments supplied, print help and exit if len(sys.argv) != 5: print("Usage: %s <quantitation schema 1 filepath> <quantitation schema 2 filepath> <unimod schema filepath> <results file>" % sys.argv[0]) print("The location of schema files should be e.g.") print(" ../html/xmlns/schema/quantitation_1/quantitation_1.xsd") print(" ../html/xmlns/schema/quantitation_2/quantitation_2.xsd") print(" ../html/xmlns/schema/unimod_2/unimod_2.xsd") print("if running from the Mascot cgi directory.") sys.exit(1) quant_schema1_filepath = sys.argv[1] quant_schema2_filepath = sys.argv[2] unimod_schema_filepath = sys.argv[3] resfile = msparser.ms_mascotresfile(sys.argv[4], 1) if not resfile.isValid(): print(resfile.getLastErrorString()) sys.exit(1) def dump_warnings(err): for i in ms_range(1, err.getNumberOfErrors()): print('Warning: %s' %(err.getErrorString(i))) # ms_range function has been added to make the range supplied consistent with other languages such as Perl and C# def ms_range(start, stop, step=1): i = start while i <= stop: yield i i += step # And print out related warnings, if any (we know these are only warnings, # because isValid() returned true earlier): dump_warnings(resfile.getErrorHandler()) resfile.clearAllErrors() resfile.setXMLschemaFilePath(msparser.ms_mascotresfile.XML_SCHEMA_QUANTITATION, "http://www.matrixscience.com/xmlns/schema/quantitation_2 " + quant_schema2_filepath + " http://www.matrixscience.com/xmlns/schema/quantitation_1 " + quant_schema1_filepath) resfile.setXMLschemaFilePath(msparser.ms_mascotresfile.XML_SCHEMA_UNIMOD, "http://www.unimod.org/xmlns/schema/unimod_2 " + unimod_schema_filepath) qmethod = msparser.ms_quant_method() if not resfile.getQuantitationMethod(qmethod): print(resfile.getLastErrorString()) sys.exit(1) datfile = msparser.ms_datfile() undef, flags, minprob, maxhits, iisb, minpeplen, use_pepsum, flags2 = resfile.get_ms_mascotresults_params(datfile.getMascotOptions()) if not use_pepsum: print("Results File cannot be opened as a peptide summary") sys.exit(1) pepsum = msparser.ms_peptidesummary(resfile, flags, minprob, maxhits, '', iisb, minpeplen, '', flags2) if not resfile.isValid(): print(resfile.getLastErrorString) sys.exit(1) dump_warnings(resfile.getErrorHandler()) resfile.clearAllErrors() # Quantitate the file using ms_ms2quantitation: ms2quant = msparser.ms_ms2quantitation(pepsum, qmethod) if not ms2quant.isValid(): sys.exit(1) if ms2quant.hasIntensityNormalisation(): ms2quant.normaliseIntensities() elif ms2quant.hasPeptideRatioNormalisation(): ms2quant.normalisePeptideRatios() # Update the quantitation method in case ms_ms2quantitation changed any of # the parameters: qmethod = ms2quant.getQuantitationMethod() dump_warnings(ms2quant.getErrorHandler()) ms2quant.clearAllErrors() # Now we can inspect peptide and protein data in ms2quant. # But first, dump quantitation method parameters to see what we have: comps = [] for i in ms_range(0, qmethod.getNumberOfComponents()-1): comp = qmethod.getComponentByNumber(i) comps.append(comp.getName()) print("Components: %s" %(comps)) rationames = [] for i in ms_range(0, qmethod.getNumberOfReportRatios()-1): reportratio = qmethod.getReportRatioByNumber(i) reportrationame = reportratio.getName() rationames.append(reportrationame) print("Report ratios: %s" %(rationames)) print("Protein ratio type = %s" %(qmethod.getProteinRatioType())) print("Min. number of peptides = %d" %(qmethod.getMinNumPeptides())) if qmethod.haveQuality(): q = qmethod.getQuality() print("Quality: min. precursor charge = %s" %(q.getMinPrecursorCharge())) print("Quality: pep. threshold type = %s" %(q.getPepThresholdType())) print("Quality: pep. threshold value = %s" %(q.getPepThresholdValue())) else: print("Quality: no restrictions") # Check for Normalisation if qmethod.haveNormalisation(): q = qmethod.getNormalisation() print("Normalisation = %s" %(q.getMethod())) if q.haveProteins(): p = q.getProteins() for i in ms_range(0, p.getNumberOfProteins()-1): accessions = [] accessions.append(p.getProtein().getAccession()) print(" - Accessions = %s" %(accessions)) if q.havePeptides(): p = q.getPeptides() for i in ms_range(0, p.getNumberOfPeptides()-1): peptides = [] peptides.append(p.getPeptide().getSequence()) print(" - Sequences = %s" %(peptides)) component_bases = [] for i in ms_range(0, qmethod.getNumberOfComponents()-1): component = qmethod.getComponentByNumber(i) compname = component.getName() baseNormalisation = ms2quant.getIntensityNormalisationBase(compname) component_bases.append(baseNormalisation) for rationame in rationames: ms2gprn = ms2quant.getPeptideRatioNormalisationBase(rationame) print("Intensity normalisation constants: %s" %(component_bases)) print("Ratio normalisation constants: %s" %(ms2gprn)) else: print("Normalisation: none") if qmethod.haveOutliers(): q = qmethod.getOutliers() print("Outliers = %s" %(q.getMethod())) else: print("Outliers: none") print("\n") # Collect proteins we're interested in: proteins = [] for i in ms_range(1, pepsum.getNumberOfHits()): hit = pepsum.getHit(i) proteins.append(hit) j = 0 protein = pepsum.getNextFamilyProtein(i, j) while protein !=None: proteins.append(protein) j += 1 protein = '' for protein in proteins: print("%d::%s:" %(protein.getDB(), protein.getAccession())) for rationame in rationames: ratio = ms2quant.getProteinRatio(protein.getAccession(), protein.getDB(), rationame) print(rationame) if ratio.isMissing(): print("%10s: ---" %(rationame)) else: print("%10s = %10g, stderr = %10g, N = %d, normal-p = %6g, hypo-p = %6g" %(rationame, ratio.getValue(), ratio.getStandardError_only(), ratio.getSampleSize(), ratio.getNormalityPvalue_only(), ratio.getHypothesisPvalue_only())) print(rationame) print(" %10s %s" %('Peptide:', rationame)) for i in ms_range(1, protein.getNumPeptides()): if protein.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery: continue q = protein.getPeptideQuery(i) p = protein.getPeptideP(i) peptide = pepsum.getPeptide(q, p) if not peptide: continue values = '' for rationame in rationames: # Note that the q,p arguments to ms_peptide_quant_key must be # integers. Otherwise the wrong constructor is chosen. Here they # are integers, as they are return values from Parser functions # that return ints. quantKey = msparser.ms_peptide_quant_key(q, p) ratio = ms2quant.getPeptideRatio(quantKey, rationame) if ratio.isMissing(): values = "---" continue if ratio.isInfinite(): values = "###" else: values = ratio.getValue() print(" %-9s %-10s" %("q%d_p%d" %(q, p), str(values))) print("\n")
Copyright © 2022 Matrix Science Ltd. All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29 |