annotate tools/filters/lav_to_bed.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 #Reads a LAV file and writes two BED files.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 import sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 import pkg_resources
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 import bx.align.lav
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 def stop_err( msg ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 sys.stderr.write( msg )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 lav_file = open(sys.argv[1],'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 bed_file1 = open(sys.argv[2],'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 bed_file2 = open(sys.argv[3],'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 stop_err( str( e ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 lavsRead = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 bedsWritten = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 species = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 # TODO: this is really bad since everything is read into memory. Can we eliminate this tool?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 for lavBlock in bx.align.lav.Reader( lav_file ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 lavsRead += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 for c in lavBlock.components:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 spec, chrom = bx.align.lav.src_split( c.src )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 if bedsWritten < 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 if len( species )==0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 species[spec]=bed_file1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 elif len( species )==1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 species[spec]=bed_file2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 continue #this is a pairwise alignment...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 if spec in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 species[spec].write( "%s\t%i\t%i\t%s_%s\t%i\t%s\n" % ( chrom, c.start, c.end, spec, str( bedsWritten ), 0, c.strand ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 bedsWritten += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 for spec,file in species.items():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 print "#FILE\t%s\t%s" % (file.name, spec)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 lav_file.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 bed_file1.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 bed_file2.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 print "%d lav blocks read, %d regions written\n" % (lavsRead,bedsWritten)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 if __name__ == "__main__": main()