annotate tools/maf/maf_to_interval.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Read a maf and output intervals for specified list of species.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 import sys, os
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 from bx.align import maf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 from galaxy.tools.util import maf_utilities
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 input_filename = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 output_filename = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 output_id = sys.argv[3]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 #where to store files that become additional output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 database_tmp_dir = sys.argv[4]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 primary_spec = sys.argv[5]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 species = sys.argv[6].split( ',' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 all_species = sys.argv[7].split( ',' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 partial = sys.argv[8]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 keep_gaps = sys.argv[9]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 out_files = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 if "None" in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 species = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 if primary_spec not in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 species.append( primary_spec )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 if primary_spec not in all_species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 all_species.append( primary_spec )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 all_species.sort()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 for spec in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 if spec == primary_spec:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 out_files[ spec ] = open( output_filename, 'wb+' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 out_files[ spec ] = open( os.path.join( database_tmp_dir, 'primary_%s_%s_visible_interval_%s' % ( output_id, spec, spec ) ), 'wb+' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 out_files[ spec ].write( '#chrom\tstart\tend\tstrand\tscore\tname\t%s\n' % ( '\t'.join( all_species ) ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 num_species = len( all_species )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 file_in = open( input_filename, 'r' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 maf_reader = maf.Reader( file_in )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 for i, m in enumerate( maf_reader ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 for j, block in enumerate( maf_utilities.iter_blocks_split_by_species( m ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 if len( block.components ) < num_species and partial == "partial_disallowed": continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 sequences = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 for c in block.components:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 spec, chrom = maf_utilities.src_split( c.src )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 if keep_gaps == 'remove_gaps':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 sequences[ spec ] = c.text.replace( '-', '' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 sequences[ spec ] = c.text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 sequences = '\t'.join( [ sequences.get( spec, '' ) for spec in all_species ] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 for spec in species:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 c = block.get_component_by_src_start( spec )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 if c is not None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 spec2, chrom = maf_utilities.src_split( c.src )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 assert spec2 == spec, Exception( 'Species name inconsistancy found in component: %s != %s' % ( spec, spec2 ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 out_files[ spec ].write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( chrom, c.forward_strand_start, c.forward_strand_end, c.strand, m.score, "%s_%s_%s" % (spec, i, j), sequences ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 file_in.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 for file_out in out_files.values():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 file_out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 if __name__ == "__main__": __main__()