annotate tools/maf/maf_to_bed.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Read a maf and output intervals for specified list of species.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 import sys, os, tempfile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 from bx.align import maf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 input_filename = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 output_filename = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 #where to store files that become additional output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 database_tmp_dir = sys.argv[5]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 species = sys.argv[3].split(',')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 partial = sys.argv[4]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 out_files = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 primary_spec = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 if "None" in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 species = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 for c in m.components:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 spec,chrom = maf.src_split( c.src )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 if not spec or not chrom:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 spec = chrom = c.src
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 species[spec] = ""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 species = species.keys()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 print >>sys.stderr, "Invalid MAF file specified"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 return
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 if "?" in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 print >>sys.stderr, "Invalid dbkey specified"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 return
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 for i in range( 0, len( species ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 spec = species[i]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 if i == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 out_files[spec] = open( output_filename, 'w' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 primary_spec = spec
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 filename = out_files[spec].name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 out_files[spec].close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 out_files[spec] = open( filename, 'w' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 num_species = len( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 print "Restricted to species:", ",".join( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 file_in = open( input_filename, 'r' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 maf_reader = maf.Reader( file_in )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 block_num = -1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 for i, m in enumerate( maf_reader ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 block_num += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 if "None" not in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 m = m.limit_to_species( species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 l = m.components
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 if len(l) < num_species and partial == "partial_disallowed": continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 for c in l:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 spec,chrom = maf.src_split( c.src )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 if not spec or not chrom:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 spec = chrom = c.src
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 if spec not in out_files.keys():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 filename = out_files[spec].name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 out_files[spec].close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 out_files[spec] = open( filename, 'w' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 if c.strand == "-":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 file_in.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 for file_out in out_files.keys():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 out_files[file_out].close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 for spec in out_files.keys():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 if spec != primary_spec:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 print "#FILE1\t" + spec + "\t" + out_files[spec].name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 if __name__ == "__main__": __main__()