comparison tools/regVariation/qv_to_bqv.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #!/usr/bin/env python
2
3 """
4 Adapted from bx/scripts/qv_to_bqv.py
5
6 Convert a qual (qv) file to several BinnedArray files for fast seek.
7 This script takes approximately 4 seconds per 1 million base pairs.
8
9 The input format is fasta style quality -- fasta headers followed by
10 whitespace separated integers.
11
12 usage: %prog qual_file output_file
13 """
14
15 import pkg_resources
16 pkg_resources.require( "bx-python" )
17 pkg_resources.require( "numpy" )
18 import string
19 import psyco_full
20 import sys, re, os, tempfile
21 from bx.binned_array import BinnedArrayWriter
22 from bx.cookbook import *
23 import fileinput
24
25 def load_scores_ba_dir( dir ):
26 """
27 Return a dict-like object (keyed by chromosome) that returns
28 FileBinnedArray objects created from "key.ba" files in `dir`
29 """
30 return FileBinnedArrayDir( dir )
31
32 def main():
33 args = sys.argv[1:]
34 try:
35 qual_file_dir = args[0]
36 #mydir="/home/gua110/Desktop/chimp_quality_scores/chr22.qa"
37 mydir="/home/gua110/Desktop/rhesus_quality_scores/rheMac2.qual.qv"
38 qual_file_dir = mydir.replace(mydir.split("/")[-1], "")
39 output_file = args[ 1 ]
40 fo = open(output_file,"w")
41 except:
42 print "usage: qual_file output_file"
43 sys.exit()
44
45 tmpfile = tempfile.NamedTemporaryFile()
46 cmdline = "ls " + qual_file_dir + "*.qa | cat >> " + tmpfile.name
47 os.system (cmdline)
48 for qual_file in tmpfile.readlines():
49 qual = fileinput.FileInput( qual_file.strip() )
50 outfile = None
51 outbin = None
52 base_count = 0
53 mega_count = 0
54
55 for line in qual:
56 line = line.rstrip("\r\n")
57 if line.startswith(">"):
58 # close old
59 if outbin and outfile:
60 print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
61 outbin.finish()
62 outfile.close()
63 # start new file
64 region = line.lstrip(">")
65 #outfname = output_file + "." + region + ".bqv" #CHANGED
66 outfname = qual_file.strip() + ".bqv"
67 print >>fo, "Writing region " + region + " to file " + outfname
68 outfile = open( outfname , "wb")
69 outbin = BinnedArrayWriter(outfile, typecode='b', default=0)
70 base_count = 0
71 mega_count = 0
72 else:
73 if outfile and outbin:
74 nums = line.split()
75 for val in nums:
76 outval = int(val)
77 assert outval <= 255 and outval >= 0
78 outbin.write(outval)
79 base_count += 1
80 if (mega_count * 1000000) <= base_count:
81 sys.stdout.write(str(mega_count)+" ")
82 sys.stdout.flush()
83 mega_count = base_count // 1000000 + 1
84 if outbin and outfile:
85 print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
86 outbin.finish()
87 outfile.close()
88
89 if __name__ == "__main__":
90 main()