Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
msparser_full.py

Large example / test program that illustrates most functionality.

1#!/usr/bin/python
2
21
22import msparser
23import sys
24import re
25import time
26
27def main() :
28 if len(sys.argv) < 2 :
29 print("Must specify results filename as parameter")
30 return 1
31
32 resfile = msparser.ms_mascotresfilebase.createResfile(sys.argv[1])
33
34 if not resfile.isValid() :
35 print("Cannot process file '%s':" % sys.argv[1])
36 print("Error number: %s" % resfile.getLastError())
37 print("Error string: %s" % resfile.getLastErrorString())
38 return 1
39
40 checkErrorHandler(resfile)
41 searchInformation(resfile)
42 searchParameters(resfile)
43 inputData(resfile)
44
45 print("-------------------------------------------------------------")
46 print("--- Full Protein summary report ---")
47 print("-------------------------------------------------------------")
48
49 show_results(
50 resfile,
51 0,
52 msparser.ms_mascotresults.MSRES_NOFLAG,
53 1.0 / 20.0, # minProbability
54 5 # Number of hits
55 )
56
57 print("-------------------------------------------------------------")
58 print("--- Concise Protein summary report ---")
59 print("-------------------------------------------------------------")
60
61 show_results(
62 resfile,
63 0,
64 msparser.ms_mascotresults.MSRES_GROUP_PROTEINS
65 | msparser.ms_mascotresults.MSRES_SHOW_SUBSETS,
66 0, # minProbability
67 3 # Number of hits
68 )
69
70 if resfile.isMSMS() :
71 print("-------------------------------------------------------------")
72 print( "--- Peptide summary report ---")
73 print("-------------------------------------------------------------")
74
75 show_results(
76 resfile,
77 1, # peptide summary
78 msparser.ms_mascotresults.MSRES_GROUP_PROTEINS
79 | msparser.ms_mascotresults.MSRES_SHOW_SUBSETS,
80 0, # minProbability
81 8, # number of hits
82 0, # ignoreIonsScoreBelow
83 0 # minimum peptide length
84 )
85 else :
86 print("Not an MS-MS results file - cannot show peptide summary report")
87
88
89def show_results(resfile, usePeptideSummary, flags, minProteinProb, maxHits, minIonsScore = 0, minPepLenInPepSummary = 0) :
90 """
91 - file is ms_mascotresfilebase
92 - usePeptideSummary is true for peptide summary, false for protein
93 - flags is the flags for display
94 - minProteinProb is the minimum protein probability to display
95 - maxHits is the maximum number of hits to display
96 - minIonsScore is the minimum ions score to use
97 - minPepLenInPepSummary is the minPepLenInPepSummary to use
98 """
99
100 if usePeptideSummary :
101 results = msparser.ms_peptidesummary(
102 resfile, flags, minProteinProb, maxHits, "", minIonsScore, minPepLenInPepSummary
103 )
104 else :
105 results = msparser.ms_proteinsummary(
106 resfile, flags, minProteinProb, maxHits
107 )
108
109 if resfile.getLastError() :
110 print("Error : %s" % resfile.getLastErrorString())
111 return
112
113 hit = 1
114 prot = results.getHit(hit)
115
116 while prot :
117 accession = prot.getAccession()
118 description = results.getProteinDescription(accession)
119 mass = results.getProteinMass(accession)
120
121 print("Protein Hit %d" % hit)
122 print("===================")
123 print("Accession : %s" % accession)
124 print("Description : %s" % description)
125 print("Score : %s" % prot.getScore())
126 print("Mass : %s" % mass)
127 print("Frame : %s" % prot.getFrame())
128 print("Coverage : %s" % prot.getCoverage())
129 print("RMS error : %s" % prot.getRMSDeltas(results))
130 print("Peptides : %s" % prot.getNumDisplayPeptides())
131
132 # Each protein has a number of peptides that matched - list them:
133 num_peps = prot.getNumPeptides()
134
135 for i in range(1, 1+ num_peps) :
136 query = prot.getPeptideQuery(i)
137 p = prot.getPeptideP(i)
138
139 isDupSameQuery = prot.getPeptideDuplicate(i) != msparser.ms_protein.DUPE_DuplicateSameQuery
140 if p != -1 and query != -1 and not isDupSameQuery :
141 pep = results.getPeptide(query, p)
142 if not pep:
143 continue
144
145 displayPeptideInfo(
146 0, pep, results,
147 prot.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_Duplicate,
148 prot.getPeptideIsBold(i),
149 prot.getPeptideShowCheckbox(i)
150 )
151
152 # Now display list of all proteins that contained subsets or and
153 # identical list of the matching peptides
154
155 if flags & msparser.ms_mascotresults.MSRES_GROUP_PROTEINS :
156 print("Proteins matching the same set of peptides:")
157
158 i = 1
159 similar_prot = results.getNextSimilarProtein(hit, 1)
160 while similar_prot :
161 accession = prot.getAccession()
162 print(accession, "Total score:", similar_prot.getScore(),)
163 print(" Peptides matched:", similar_prot.getNumDisplayPeptides())
164 i += 1
165 similar_prot = results.getNextSimilarProtein(hit, i)
166
167 if flags & msparser.ms_mascotresults.MSRES_SHOW_SUBSETS :
168 print("Proteins matching a subset of these peptides:")
169
170 i = 1
171 subset_prot = results.getNextSubsetProtein(hit, 1)
172 while subset_prot :
173 accession = subset_prot.getAccession()
174 print(accession, "Total score:", subset_prot.getScore(),)
175 print(" Peptides matched:", subset_prot.getNumDisplayPeptides(), "")
176 i += 1
177 subset_prot = results.getNextSubsetProtein(hit, i)
178
179 print(" ")
180
181 hit += 1
182 prot = results.getHit(hit)
183
184
185 results.createUnassignedList(msparser.ms_mascotresults.SCORE)
186
187 if results.getNumberOfUnassigned() :
188 print("Unassigned list")
189 print("---------------")
190
191 for u in range(1, 1 + results.getNumberOfUnassigned()) :
192 pep = results.getUnassigned(u)
193 displayPeptideInfo(0, pep, results, 0, 1, 1)
194
195
196 if usePeptideSummary :
197 print(" ")
198 displayYellowPopupInfo(results, 97)
199
200
201def displayYellowPopupInfo(results, q) :
202 """
203 Shows the equivalent of the yellow popup box for given query
204 - results is the results object
205 - q is the query number
206 """
207
208 fmt = "%5s %5s %9s %7s %7s"
209 print(fmt % ("Score", "Delta", "Hit", "Protein", "Peptide"))
210
211 for p in range(1, 11) :
212 pep = results.getPeptide(q, p)
213 if not pep: continue
214
215 seq = pep.getPeptideStr()
216 if not seq: continue
217
218 tmp = results.getProteinsWithThisPepMatch(q, p)
219
220 (hit, protein) = ('', '')
221
222 if tmp :
223 match = re.search('(\d+):([^ ]*)[ ]*(.*)', tmp)
224 hit, protein, morethan = match.group(1), match.group(2), match.group(3)
225
226 if morethan :
227 hit += "+"
228
229 print(fmt % (pep.getIonsScore(), pep.getDelta(), hit, protein, seq))
230
231 p = 1
232 print("Accessions that matched query %s rank %s :- %s" % (q, p, results.getProteinsWithThisPepMatch(q, p)))
233
234
235def displayPeptideInfo(showFullDetails, p, results, isDuplicate, isBold, showCheckBox) :
236 q = p.getQuery()
237
238 if not showFullDetails :
239 fmt = "%2s %4s %4d %11f %4d(%4d) %-20s %s%3.2f%s %3s"
240
241 cb, bold = "--", "dim"
242 if showCheckBox : cb = "CB"
243 if isBold : bold = "BOLD"
244
245 paren1, paren2 = "", ""
246 if isDuplicate : paren1 = "(" ; paren2 = ")"
247
248 print(fmt % (
249 cb,
250 bold,
251 q,
252 p.getObserved(),
253 p.getRank(),
254 p.getPrettyRank(),
255 p.getPeptideStr(),
256 paren1,
257 p.getIonsScore(),
258 paren2,
259 results.getReadableVarMods(q, p.getRank())
260 ))
261
262 return
263
264
265 print("Peptide hit")
266
267 if p.getAnyMatch() :
268 fmt = " %-12s: %s"
269 print(fmt % ('Query' , q))
270 print(fmt % ('Rank' , p.getRank()))
271 print(fmt % ('Matched' , p.getAnyMatch()))
272 print(fmt % ('missedCleave', p.getMissedCleavages()))
273 print(fmt % ('mrCalc' , p.getMrCalc()))
274 print(fmt % ('delta' , p.getDelta()))
275 print(fmt % ('observed' , p.getObserved()))
276 print(fmt % ('charge' , p.getCharge()))
277 print(fmt % ('mrExp' , p.getMrExperimental()))
278 print(fmt % ('ionsMatched' , p.getNumIonsMatched()))
279 print(fmt % ('peptideStr' , p.getPeptideStr()))
280 print(fmt % ('peaksUsed1' , p.getPeaksUsedFromIons1()))
281 print(fmt % ('varModsStr' , p.getVarModsStr()))
282 print(fmt % ('readable mod', results.getReadableVarMods(q, p.getRank)))
283 print(fmt % ('ionsScore' , p.getIonsScore()))
284 print(fmt % ('seriesUsedS' , p.getSeriesUsedStr()))
285 print(fmt % ('peaksUsed2' , p.getPeaksUsedFromIons2()))
286 print(fmt % ('peaksUsed3' , p.getPeaksUsedFromIons3()))
287 print(fmt % ('idth, hth, p', ', '.join(
288 results.getPeptideIdentityThreshold(q, 20),
289 results.getHomologyThreshold(q, 20),
290 results.getProbOfPepBeingRandomMatch(p.getIonsScore(), q)
291 )))
292 print(" ")
293 else :
294 print(" No match")
295
296
297def checkErrorHandler(resfile) :
298 """
299 Calls a couple of functions with invalid arguments, then prints
300 their error messages.
301 """
302
303 print("Testing the error handling... ")
304 print("=========================================")
305
306 numQueries = resfile.getNumQueries()
307 resfile.getObservedCharge(numQueries + 40); # Should fail
308
309 print("Error number: %s" % resfile.getLastError())
310 print("Error string: %s" % resfile.getLastErrorString())
311
312 resfile.clearAllErrors()
313 print("Cleared all errors - should have no errors left: %s errors left" %resfile.getNumberOfErrors())
314
315 for x in range(1, 21) :
316 resfile.getObservedCharge(numQueries + x); # Should fail
317
318 # Now, the best way, print out all errors.
319 print("More errors added - there are now %s errors" % resfile.getNumberOfErrors())
320
321 for i in range(1, 1 + resfile.getNumberOfErrors()) :
322 print("Error number: %s : %s" % (resfile.getErrorNumber(i), resfile.getErrorString(i)))
323
324
325 print(" ")
326 resfile.clearAllErrors()
327
328
329def searchInformation(resfile) :
330 """
331 Display parameters from the ms_mascotresfilebase object. The functions
332 anyPMF, anyMSMS, anySQ should normally be used in preference to isPMF etc
333 because some people submit MSMS though the sequence query window etc.
334 """
335
336 fmt = "%-20s: %s"
337
338 print("Search information from ms_mascotresfilebase")
339 print("========================================")
340 print(fmt % ("Number of queries" , resfile.getNumQueries()))
341 print(fmt % ("Number of sequences" , resfile.getNumSeqs()))
342 print(fmt % ("Sequences after tax" , resfile.getNumSeqsAfterTax()))
343 print(fmt % ("Number of residues" , resfile.getNumResidues()))
344 print(fmt % ("Execution time" , resfile.getExecTime()))
345 print(fmt % ("Date (seconds)" , resfile.getDate()))
346
347 date = time.localtime(resfile.getDate())
348 Wdays = "Mon Tue Wed Thu Fri Sat Sun".split(" ")
349 Mons = "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec".split(" ")
350
351 print(fmt %( "Date", "%s %s %d %02d:%02d:%02d %d" % (
352 Wdays[date.tm_wday],
353 Mons[date.tm_mon - 1],
354 date.tm_mday,
355 date.tm_hour,
356 date.tm_min,
357 date.tm_sec,
358 date.tm_year
359 )))
360
361 print(fmt % ("Mascot version" , resfile.getMascotVer()))
362 print(fmt % ("Fasta version" , resfile.getFastaVer()))
363 print(fmt % ("Is PMF?" , resfile.isPMF()))
364 print(fmt % ("Is MSMS?" , resfile.isMSMS()))
365 print(fmt % ("Is SQ?" , resfile.isSQ()))
366 print(fmt % ("Is Error tolerant" , resfile.isErrorTolerant()))
367 print(fmt % ("Any PMF?" , resfile.anyPMF()))
368 print(fmt % ("Any MSMS?" , resfile.anyMSMS()))
369 print(fmt % ("Any SQ?" , resfile.anySQ()))
370 print(fmt % ("Any peptide matches" , resfile.anyFastaMatches()))
371
372 print(" ")
373
374
375
376def searchParameters(resfile) :
377 """
378 Display parameters from the ms_searchparams object.
379 The values come from the parameters and the masses sections of the file.
380 """
381
382 params = resfile.params()
383 fmt = "%-20s: %s"
384
385 print("Search parameters from ms_searchparams")
386 print("=========================================")
387 print(fmt % ("License" , params.getLICENSE()))
388 print(fmt % ("Search title" , params.getCOM()))
389 print(fmt % ("SEG mass" , params.getSEG()))
390 print(fmt % ("Peptide tol" , params.getTOL()))
391 print(fmt % ("Peptide tol units" , params.getTOLU()))
392 print(fmt % ("Fragment tol" , params.getITOL()))
393 print(fmt % ("Fragment tol units" , params.getITOLU()))
394 print(fmt % ("Missed cleavages" , params.getPFA()))
395 print(fmt % ("Database" , params.getDB()))
396 print(fmt % ("Static mods" , params.getMODS()))
397 print(fmt % ("Average/monoisotopic", params.getMASS()))
398 print(fmt % ("Enzyme" , params.getCLE()))
399 print(fmt % ("Raw data file name" , params.getFILENAME()))
400 print(fmt % ("Input data" , params.getQUE()))
401 print(fmt % ("Type of search" , params.getSEARCH()))
402 print(fmt % ("User name" , params.getUSERNAME()))
403 print(fmt % ("User email" , params.getUSEREMAIL()))
404 print(fmt % ("Charge state" , params.getCHARGE()))
405 print(fmt % ("Repeat search file" , params.getINTERMEDIATE()))
406 print(fmt % ("Num hits to display" , params.getREPORT()))
407 print(fmt % ("Show overview" , params.getOVERVIEW()))
408 print(fmt % ("Data file format" , params.getFORMAT()))
409 print(fmt % ("Form version" , params.getFORMVER()))
410 print(fmt % ("Variable mods" , params.getIT_MODS()))
411
412 for i in range(12) :
413 print(fmt % ( "User%02d" % i, params.getUSERField(i)))
414
415 print(fmt % ("Precursor mass" , params.getPRECURSOR()))
416 print(fmt % ("Taxonomy filter" , params.getTAXONOMY()))
417 print(fmt % ("Type of report" , params.getREPTYPE()))
418 print(fmt % ("Accessions to search", params.getACCESSION()))
419 print(fmt % ("Subcluster used" , params.getSUBCLUSTER()))
420 print(fmt % ("ICAT search?" , params.getICAT()))
421 print(fmt % ("Instrument type" , params.getINSTRUMENT()))
422 print(fmt % ("Error tolerant?" , params.getERRORTOLERANT()))
423 print(fmt % ("Rules (ions series)" , params.getRULES()))
424
425 for ch in range(ord('A'), 1 + ord('Z')) :
426 print(fmt % ("Residue " + chr(ch), params.getResidueMass(chr(ch))))
427
428 print(fmt % ("C terminus mass" , params.getCTermMass()))
429 print(fmt % ("N terminus mass" , params.getNTermMass()))
430 print(fmt % ("Mass of hydrogen", params.getHydrogenMass()))
431 print(fmt % ("Mass of oxygen" , params.getOxygenMass()))
432 print(fmt % ("Mass of carbon" , params.getCarbonMass()))
433 print(fmt % ("Mass of nitrogen", params.getNitrogenMass()))
434 print(fmt % ("Mass of electron", params.getElectronMass()))
435
436 i = 1
437 while params.getVarModsName(i) :
438 print(fmt % ("Variable mod name" , params.getVarModsName(i)))
439 print(fmt % ("Variable mod delta" , params.getVarModsDelta(i)))
440 print(fmt % ("Variable mod neutral", params.getVarModsNeutralLoss(i)))
441 i += 1
442
443 print(" ")
444
445
446def inputData(resfile) :
447 # display input data via inputquery get functions
448 for i in range(1, 2) :
449 # replace range with (1, 1 + resfile.getNumQueries()) to get all input
450 # data.
451
452 print("Input data for query number %d" % i)
453 print("=========================================")
454
455 q = msparser.ms_inputquery(resfile, i)
456 fmt = " %-16s: %s"
457
458 print(fmt % ("title" , q.getStringTitle(True)))
459 print(fmt % ("mass_min" , q.getMassMin()))
460 print(fmt % ("mass_max" , q.getMassMax()))
461 print(fmt % ("int_min" , q.getIntMin()))
462 print(fmt % ("int_max" , q.getIntMax()))
463 print(fmt % ("num_vals" , q.getNumVals()))
464 print(fmt % ("num_used1" , q.getNumUsed()))
465 print(fmt % ("ions1" , q.getStringIons1()))
466 print(fmt % ("ions2" , q.getStringIons2()))
467 print(fmt % ("ions3" , q.getStringIons3()))
468 print(fmt % ("peptol" , q.getPepTol()))
469 print(fmt % ("peptol units", q.getPepTolUnits()))
470 print(fmt % ("peptol str" , q.getPepTolString()))
471 print(fmt % ("repeat srch" , resfile.getRepeatSearchString(i)))
472
473 num_peaks = q.getNumberOfPeaks(1)
474 for j in range (1, 1+ num_peaks) :
475 print("%f, %f" % (q.getPeakMass(1, j), q.getPeakIntensity(1, j)))
476
477 print(" ")
478
479
480if __name__ == "__main__" :
481 sys.exit(main())
482