Example program for extracting the peptide or protein summary.
#!/usr/bin/python ############################################################################## # file: resfile_summary.py # # 'msparser' toolkit example code # # # ############################################################################## # COPYRIGHT NOTICE # # Copyright 1998-2016 Matrix Science Limited All Rights Reserved. # # # ############################################################################## # $Source: parser/examples/test_python/resfile_summary.py $ # # $Author: villek@matrixscience.com $ # # $Date: 2018-07-30 16:23:53 +0100 $ # # $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ # # $NoKeywords:: $ # ############################################################################## import msparser import sys import re def main() : returnValue = 1 if len(sys.argv) < 2 : print("Must specify results filename as parameter" ) return returnValue resfile = msparser.ms_mascotresfile(sys.argv[1]) if checkErrors(resfile) : # The parameters passed to ms_peptidesummary or ms_proteinsummary determine # the type of grouping and the number of proteins and peptides displayed. # Default parameters can be returned using ms_mascotresfile::get_ms_mascotresults_params(). # The return values from this function depend on the type of search, # and also on values in the mascot.dat configuration file if that is available. # You may need to change this path datfile = msparser.ms_datfile("../config/mascot.dat") # if the mascot.dat isn't available, use defaults mascotOptions = msparser.ms_mascotoptions() if (datfile.isValid()) : mascotOptions = datfile.getMascotOptions() (scriptName, flags, minProbability, maxHitsToReport, ignoreIonsScoreBelow, minPepLenInPepSummary, usePeptideSummary, flags2) = resfile.get_ms_mascotresults_params(mascotOptions) bResult = False if (usePeptideSummary) : # For peptide summary # # Flags defined for hierarchical clustering algorithm: # flags: ms_mascotresults::MSRES_CLUSTER_PROTEINS # | ms_mascotresults::MSRES_SHOW_SUBSETS # | ms_mascotresults::MSRES_MUDPIT_PROTEIN_SCORE; # flags2: ms_peptidesummary::MSPEPSUM_USE_HOMOLOGY_THRESH; # # Flags defined for original simple parsimony algorithm: # flags: ms_mascotresults::MSRES_GROUP_PROTEINS | ms_mascotresults::MSRES_SHOW_SUBSETS; print("-------------------------------------------------------------") print("--- Peptide summary report ---") print("-------------------------------------------------------------") bResult = show_results(resfile, usePeptideSummary, flags, minProbability, maxHitsToReport, ignoreIonsScoreBelow, minPepLenInPepSummary, flags2) else : # Show results from full protein summary, remove grouping flags &= ~msparser.ms_mascotresults.MSRES_GROUP_PROTEINS flags &= ~msparser.ms_mascotresults.MSRES_SHOW_SUBSETS print("-------------------------------------------------------------") print("--- Full Protein summary report ---") print("-------------------------------------------------------------") bResult = show_results(resfile, usePeptideSummary, flags, minProbability, maxHitsToReport, ignoreIonsScoreBelow, minPepLenInPepSummary, flags2) if (bResult and checkErrors(resfile)) : print("") # Show results from concise protein summary, add grouping flags |= msparser.ms_mascotresults.MSRES_GROUP_PROTEINS flags |= msparser.ms_mascotresults.MSRES_SHOW_SUBSETS print("-------------------------------------------------------------") print("--- Concise Protein summary report ---") print("-------------------------------------------------------------") bResult = show_results(resfile, usePeptideSummary, flags, minProbability, maxHitsToReport, ignoreIonsScoreBelow, minPepLenInPepSummary, flags2) if (bResult and checkErrors(resfile)) : returnValue = 0 return returnValue def show_results(resfile, usePeptideSummary, flags, minProteinProb, maxHits, minIonsScore, minPepLenInPepSummary, flags2) : if usePeptideSummary : results = msparser.ms_peptidesummary( resfile, flags, minProteinProb, maxHits, "", minIonsScore, minPepLenInPepSummary, "", flags2 ) else : results = msparser.ms_proteinsummary( resfile, flags, minProteinProb, maxHits ) if not checkErrors(resfile) : return False family = 1 hit = 1 prot = results.getHit(hit) while prot : accession = prot.getAccession() description = results.getProteinDescription(accession) mass = results.getProteinMass(accession) dbIdx = prot.getDB() protein_hit = "Protein Hit %d" % hit if flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS : protein_hit = protein_hit + "." + str(family) print protein_hit print("===================") print("Accession : %s" % accession) print("Description : %s" % description) print("Score : %s" % prot.getScore()) print("Mass : %s" % mass) print("Frame : %s" % prot.getFrame()) print("Coverage : %s" % prot.getCoverage()) print("RMS error : %s" % prot.getRMSDeltas(results)) print("Peptides : %s" % prot.getNumDisplayPeptides()) # Each protein has a number of peptides that matched - list them: num_peps = prot.getNumPeptides() for i in range(1, 1+ num_peps) : query = prot.getPeptideQuery(i) p = prot.getPeptideP(i) isDupSameQuery = prot.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery if p != -1 and query != -1 and not isDupSameQuery : pep = results.getPeptide(query, p) if not pep: continue displayPeptideInfo( 0, pep, results, prot.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_Duplicate, prot.getPeptideIsBold(i), prot.getPeptideShowCheckbox(i) ) # Now display list of all proteins that contain a subset or a same set of the matching peptides if flags & msparser.ms_mascotresults.MSRES_GROUP_PROTEINS or flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS : print("Proteins matching the same set of peptides:") i = 1 similar_prot = results.getNextSimilarProteinOf(accession, dbIdx, 1) while similar_prot : similar_accession = similar_prot.getAccession() similar_dbIdx = similar_prot.getDB() if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS and similar_dbIdx > 1) : print(str(similar_dbIdx) + "::"), print(similar_accession + " - Total score:" + str(similar_prot.getScore())), print(" - Peptides matched:" + str(similar_prot.getNumDisplayPeptides())) i += 1 similar_prot = results.getNextSimilarProteinOf(accession, dbIdx, i) if flags & msparser.ms_mascotresults.MSRES_SHOW_SUBSETS : print("Proteins matching a subset of these peptides:") i = 1 subset_prot = results.getNextSubsetProteinOf(accession, dbIdx, 1) while subset_prot : subset_accession = subset_prot.getAccession() subset_dbIdx = subset_prot.getDB() if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS and subset_dbIdx > 1) : print(str(subset_dbIdx) + "::"), print(subset_accession + " - Total score:" + str(subset_prot.getScore())), print(" - Peptides matched:" + str(subset_prot.getNumDisplayPeptides())) if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS) : j = 1 similar_subset_prot = results.getNextSimilarProteinOf(subset_accession, subset_dbIdx, j) if similar_subset_prot : print(" Proteins matching the same set of peptides for this subset:") while similar_subset_prot : similar_subset_accession = similar_subset_prot.getAccession() similar_subset_dbIdx = similar_subset_prot.getDB() print(" "), if similar_subset_dbIdx > 1 : print(str(similar_subset_dbIdx) + "::"), print(similar_subset_accession + " - Total score:" + str(similar_subset_prot.getScore())), print(" Peptides matched:" + str(similar_subset_prot.getNumDisplayPeptides())) j += 1 similar_subset_prot = results.getNextSimilarProteinOf(subset_accession, subset_dbIdx, j) i += 1 subset_prot = results.getNextSubsetProteinOf(accession, dbIdx, i) if flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS : prot = results.getNextFamilyProtein(hit, family) family += 1 if not prot : hit += 1 prot = results.getHit(hit) family = 1 else : hit += 1 prot = results.getHit(hit) print(" ") results.createUnassignedList(msparser.ms_mascotresults.SCORE) if results.getNumberOfUnassigned() : print("Unassigned list") print("---------------") for u in range(1, 1 + results.getNumberOfUnassigned()) : pep = results.getUnassigned(u) displayPeptideInfo(0, pep, results, 0, 1, 1) if usePeptideSummary : print(" ") displayYellowPopupInfo(results, 1) return True def displayYellowPopupInfo(results, q) : """ Shows the equivalent of the yellow popup box for given query - results is the results object - q is the query number """ fmt = "%5s %5s %9s %7s %7s" print(fmt % ("Score", "Delta", "Hit", "Protein", "Peptide")) for p in range(1, 11) : pep = results.getPeptide(q, p) if not pep: continue seq = pep.getPeptideStr() if not seq: continue tmp = results.getProteinsWithThisPepMatch(q, p) (hit, protein) = ('', '') if tmp : hit, protein, morethan = re.search('(\d+):([^ ]*)[ ]*(.*)', tmp).groups() if morethan : hit += "+" print(fmt % (pep.getIonsScore(), pep.getDelta(), hit, protein, seq)) p = 1 print("Accessions that matched query %s rank %s :- %s" % (q, p, results.getProteinsWithThisPepMatch(q, p))) def displayPeptideInfo(showFullDetails, p, results, isDuplicate, isBold, showCheckBox) : q = p.getQuery() if not showFullDetails : fmt = "%2s %4s %4d %11f %4d(%4d) %-20s %s%3.2f%s %3s" cb, bold = "--", "dim" if showCheckBox : cb = "CB" if isBold : bold = "BOLD" paren1, paren2 = "", "" if isDuplicate : paren1 = "(" ; paren2 = ")" print(fmt % ( cb, bold, q, p.getObserved(), p.getRank(), p.getPrettyRank(), p.getPeptideStr(), paren1, p.getIonsScore(), paren2, results.getReadableVarMods(q, p.getRank()) )) return print("Peptide hit") if p.getAnyMatch() : fmt = " %-12s: %s" print(fmt % ('Query' , q)) print(fmt % ('Rank' , p.getRank())) print(fmt % ('Matched' , p.getAnyMatch())) print(fmt % ('missedCleave', p.getMissedCleavages())) print(fmt % ('mrCalc' , p.getMrCalc())) print(fmt % ('delta' , p.getDelta())) print(fmt % ('observed' , p.getObserved())) print(fmt % ('charge' , p.getCharge())) print(fmt % ('mrExp' , p.getMrExperimental())) print(fmt % ('ionsMatched' , p.getNumIonsMatched())) print(fmt % ('peptideStr' , p.getPeptideStr())) print(fmt % ('peaksUsed1' , p.getPeaksUsedFromIons1())) print(fmt % ('varModsStr' , p.getVarModsStr())) print(fmt % ('readable mod', results.getReadableVarMods(q, p.getRank))) print(fmt % ('ionsScore' , p.getIonsScore())) print(fmt % ('seriesUsedS' , p.getSeriesUsedStr())) print(fmt % ('peaksUsed2' , p.getPeaksUsedFromIons2())) print(fmt % ('peaksUsed3' , p.getPeaksUsedFromIons3())) print(fmt % ('idth, hth, p', ', '.join( results.getPeptideIdentityThreshold(q, 20), results.getHomologyThreshold(q, 20), results.getProbOfPepBeingRandomMatch(p.getIonsScore(), q) ))) print(" ") else : print(" No match") def checkErrors(resfile) : if resfile.getLastError() : for i in range(1, 1 + resfile.getNumberOfErrors()) : print("Error number: %s : %s" % (resfile.getErrorNumber(i), resfile.getErrorString(i))) #Call isValid before clearAllErrors, otherwise this method always returns true bIsValid = resfile.isValid() resfile.clearAllErrors() return bIsValid if __name__ == "__main__" : sys.exit(main()) """ Running the program as python resfile_summary.py F981123.dat will give the following output under Mascot Server 2.3: ------------------------------------------------------------- --- Peptide summary report --- ------------------------------------------------------------- Protein Hit 1 =================== Accession : CH60_HUMAN Description : 60 kDa heat shock protein, mitochondrial precursor (Hsp60) (60 kDa chaperonin) (CPN60) (Heat shock Score : 1225.18623377 Mass : 61016.38 Frame : 0 Coverage : 283 RMS error : 30.4200726378 Peptides : 31 -- dim 52 1065.039917 2( 2) ALMLQGVDLLADAVAVTMGPK 57.90 Oxidation (M) -- dim 53 1065.062256 2( 2) ALMLQGVDLLADAVAVTMGPK 7.49 Oxidation (M) Proteins matching the same set of peptides: Proteins matching a subset of these peptides: CH60_PONPY Total score: 1007.90623377 Peptides matched: 25 CH60_CRIGR Total score: 951.166233769 Peptides matched: 23 CH60_MOUSE Total score: 951.166233769 Peptides matched: 23 CH60_RAT Total score: 951.166233769 Peptides matched: 23 CH60_BOVIN Total score: 917.682467539 Peptides matched: 22 CH60_CHICK Total score: 875.976233769 Peptides matched: 19 CH60C_DROME Total score: 120.5 Peptides matched: 2 CH60C_ARATH Total score: 90.68 Peptides matched: 2 HSP60_CANAL Total score: 45.35 Peptides matched: 1 HSP60_PARBR Total score: 45.35 Peptides matched: 1 HSP60_YEAST Total score: 45.35 Peptides matched: 1 CH602_VIBPA Total score: 45.35 Peptides matched: 1 CH602_VIBVU Total score: 45.35 Peptides matched: 1 CH602_VIBVY Total score: 45.35 Peptides matched: 1 CH60_EUGGR Total score: 45.33 Peptides matched: 1 Protein Hit 2 =================== Accession : CH60_DROME Description : 60 kDa heat shock protein, mitochondrial precursor (Hsp60) (60 kDa chaperonin) (CPN60) (Heat shock Score : 174.39 Mass : 60770.89 Frame : 0 Coverage : 67 RMS error : 29.5905072791 Peptides : 4 Proteins matching the same set of peptides: Proteins matching a subset of these peptides: HSP60_SCHPO Total score: 87.04 Peptides matched: 2 Protein Hit 3 =================== Accession : CH60_CAEEL Description : Chaperonin homolog Hsp-60, mitochondrial precursor (Heat shock protein 60) (HSP-60) - Caenorhabditi Score : 134.91 Mass : 60063.75 Frame : 0 Coverage : 21 RMS error : 36.5383063194 Peptides : 3 Proteins matching the same set of peptides: Proteins matching a subset of these peptides: Protein Hit 4 =================== Accession : CH60_XANAC Description : 60 kDa chaperonin (Protein Cpn60) (groEL protein) - Xanthomonas axonopodis pv. citri Score : 42.2 Mass : 57130.83 Frame : 0 Coverage : 9 RMS error : 76.9237696062 Peptides : 1 Proteins matching the same set of peptides: CH60_XANAC Total score: 42.2 Peptides matched: 1 CH60_XANAC Total score: 42.2 Peptides matched: 1 CH60_XANAC Total score: 42.2 Peptides matched: 1 CH60_XANAC Total score: 42.2 Peptides matched: 1 CH60_XANAC Total score: 42.2 Peptides matched: 1 CH60_XANAC Total score: 42.2 Peptides matched: 1 Proteins matching a subset of these peptides: Protein Hit 5 =================== Accession : NMDE4_HUMAN Description : Score : 37.24 Mass : 0.0 Frame : 0 Coverage : 10 RMS error : 9.41906700791 Peptides : 1 Proteins matching the same set of peptides: NMDE4_HUMAN Total score: 37.24 Peptides matched: 1 NMDE4_HUMAN Total score: 36.2762337693 Peptides matched: 1 Proteins matching a subset of these peptides: Protein Hit 6 =================== Accession : YF81_THET2 Description : Score : 34.76 Mass : 0.0 Frame : 0 Coverage : 9 RMS error : 37.0214184966 Peptides : 1 Proteins matching the same set of peptides: YF81_THET2 Total score: 34.76 Peptides matched: 1 Proteins matching a subset of these peptides: Protein Hit 7 =================== Accession : F4ST_FLACH Description : Score : 33.85 Mass : 0.0 Frame : 0 Coverage : 9 RMS error : 87.8815544839 Peptides : 1 Proteins matching the same set of peptides: Proteins matching a subset of these peptides: Protein Hit 8 =================== Accession : ZN711_HUMAN Description : Zinc finger protein 711 (Zinc finger protein 6) - Homo sapiens (Human) Score : 30.84 Mass : 87153.77 Frame : 0 Coverage : 13 RMS error : 69.4028633218 Peptides : 1 Proteins matching the same set of peptides: Proteins matching a subset of these peptides: Unassigned list --------------- CB BOLD 14 442.228302 1( 1) LIAQTPLK 25.09 CB BOLD 9 747.396179 1( 1) EGETRR 15.03 CB BOLD 4 662.275574 1( 1) KNAMAK 14.09 CB BOLD 23 1101.621704 1( 1) QLLMVAGVDR 12.04 CB BOLD 5 662.417175 1( 1) AIACER 11.79 CB BOLD 8 714.372498 1( 1) LAPAQSK 10.69 CB BOLD 6 673.349487 1( 1) AVNDVR 10.63 CB BOLD 22 1101.536621 1( 1) ENVIPADSEK 8.65 CB BOLD 55 1099.094727 1( 1) LNAEAVRTLLSANGQKPSEAK 8.05 CB BOLD 29 642.353577 1( 1) VVGVAGQGASALVR 7.91 CB BOLD 28 642.352600 1( 1) KNVSVSQGPDPR 7.22 CB BOLD 30 663.837891 1( 1) TPLLVGVAKGESR 7.20 CB BOLD 50 1048.561523 1( 1) ALDEILEYQNYPVVCAKK 5.70 CB BOLD 57 747.036072 1( 1) VMGSAFTALLDANEDAQKAMR 4.83 CB BOLD 49 1020.987915 1( 1) HQRLSGLMQTALEEQQR 4.11 Oxidation (M) CB BOLD 19 932.364380 1( 1) TGMTRNPR 4.09 CB BOLD 2 500.256012 1( 1) LAVPT 3.87 CB BOLD 38 749.383972 1( 1) IDLLADMMWDDK 3.43 2 Oxidation (M) CB BOLD 20 933.499023 1( 1) SRDPGMVR 3.21 Oxidation (M) CB BOLD 41 886.405884 1( 1) DRVALNQEVMAPEATK 1.85 CB BOLD 10 747.412476 1( 1) MAPSTPK 1.68 Oxidation (M) CB BOLD 18 930.703003 1( 1) LGSGIKAER 1.60 CB BOLD 7 711.364685 1( 1) GGAHEIK 1.34 CB BOLD 17 930.683105 1( 1) KIQAEITK 1.00 CB BOLD 44 949.550720 1( 1) LLSWDSVFFIKNITSK 0.30 CB BOLD 1 498.272888 1( 1) 0.00 CB BOLD 3 575.558411 1( 1) 0.00 CB BOLD 32 711.370728 1( 1) 0.00 CB BOLD 42 932.460815 1( 1) 0.00 CB BOLD 43 933.003784 1( 1) 0.00 CB BOLD 47 665.009583 1( 1) 0.00 CB BOLD 56 1119.045166 1( 1) 0.00 CB BOLD 63 832.798584 1( 1) 0.00 CB BOLD 66 1113.894653 1( 1) 0.00 CB BOLD 67 1116.177490 1( 1) 0.00 Score Delta Hit Protein Peptide Accessions that matched query 97 rank 1 :- """
Copyright © 2022 Matrix Science Ltd. All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29 |