comparison tools/ncbi_blast_plus/blastxml_to_tabular.py @ 22:6f386c5dc4fb draft

v0.2.01 add -max_hsps, -use_sw_tback; lists args; internal updates
author peterjc
date Mon, 18 Sep 2017 06:21:27 -0400
parents 7538e2bfcd41
children 31e517610e1f
comparison
equal deleted inserted replaced
21:7538e2bfcd41 22:6f386c5dc4fb
59 This script attempts to produce identical output to what BLAST+ would have done. 59 This script attempts to produce identical output to what BLAST+ would have done.
60 However, check this with "diff -b ..." since BLAST+ sometimes includes an extra 60 However, check this with "diff -b ..." since BLAST+ sometimes includes an extra
61 space character (probably a bug). 61 space character (probably a bug).
62 """ 62 """
63 63
64 from __future__ import print_function
64 65
65 import os 66 import os
66 import re 67 import re
67 import sys 68 import sys
68 69
69 from optparse import OptionParser 70 from optparse import OptionParser
70 71
71 if "-v" in sys.argv or "--version" in sys.argv: 72 if "-v" in sys.argv or "--version" in sys.argv:
72 print "v0.2.00" 73 print("v0.2.01")
73 sys.exit(0) 74 sys.exit(0)
74 75
75 if sys.version_info[:2] >= (2, 5): 76 if sys.version_info[:2] >= (2, 5):
76 try: 77 try:
77 from xml.etree import cElementTree as ElementTree 78 from xml.etree import cElementTree as ElementTree
293 try: 294 try:
294 sallseqid = ";".join(name.split(None, 1)[0] for name in hit_def.split(" >")) 295 sallseqid = ";".join(name.split(None, 1)[0] for name in hit_def.split(" >"))
295 salltitles = "<>".join(name.split(None, 1)[1] for name in hit_def.split(" >")) 296 salltitles = "<>".join(name.split(None, 1)[1] for name in hit_def.split(" >"))
296 except IndexError as e: 297 except IndexError as e:
297 sys.exit("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e)) 298 sys.exit("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e))
298 # print hit_def, "-->", sallseqid 299 # print(hit_def, "-->", sallseqid)
299 positive = hsp.findtext("Hsp_positive") 300 positive = hsp.findtext("Hsp_positive")
300 ppos = "%0.2f" % (100 * float(positive) / float(length)) 301 ppos = "%0.2f" % (100 * float(positive) / float(length))
301 qframe = hsp.findtext("Hsp_query-frame") 302 qframe = hsp.findtext("Hsp_query-frame")
302 sframe = hsp.findtext("Hsp_hit-frame") 303 sframe = hsp.findtext("Hsp_hit-frame")
303 if blast_program == "blastp": 304 if blast_program == "blastp":
323 salltitles, 324 salltitles,
324 ]) 325 ])
325 if cols: 326 if cols:
326 # Only a subset of the columns are needed 327 # Only a subset of the columns are needed
327 values = [values[colnames.index(c)] for c in cols] 328 values = [values[colnames.index(c)] for c in cols]
328 # print "\t".join(values) 329 # print("\t".join(values))
329 output_handle.write("\t".join(values) + "\n") 330 output_handle.write("\t".join(values) + "\n")
330 # prevents ElementTree from growing large datastructure 331 # prevents ElementTree from growing large datastructure
331 root.clear() 332 root.clear()
332 elem.clear() 333 elem.clear()
333 334