Mercurial > repos > xuebing > sharplabtool
comparison tools/maf/maf_to_interval.py @ 0:9071e359b9a3
Uploaded
| author | xuebing |
|---|---|
| date | Fri, 09 Mar 2012 19:37:19 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9071e359b9a3 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 Read a maf and output intervals for specified list of species. | |
| 5 """ | |
| 6 import sys, os | |
| 7 from galaxy import eggs | |
| 8 import pkg_resources; pkg_resources.require( "bx-python" ) | |
| 9 from bx.align import maf | |
| 10 from galaxy.tools.util import maf_utilities | |
| 11 | |
| 12 assert sys.version_info[:2] >= ( 2, 4 ) | |
| 13 | |
| 14 def __main__(): | |
| 15 input_filename = sys.argv[1] | |
| 16 output_filename = sys.argv[2] | |
| 17 output_id = sys.argv[3] | |
| 18 #where to store files that become additional output | |
| 19 database_tmp_dir = sys.argv[4] | |
| 20 primary_spec = sys.argv[5] | |
| 21 species = sys.argv[6].split( ',' ) | |
| 22 all_species = sys.argv[7].split( ',' ) | |
| 23 partial = sys.argv[8] | |
| 24 keep_gaps = sys.argv[9] | |
| 25 out_files = {} | |
| 26 | |
| 27 if "None" in species: | |
| 28 species = [] | |
| 29 | |
| 30 if primary_spec not in species: | |
| 31 species.append( primary_spec ) | |
| 32 if primary_spec not in all_species: | |
| 33 all_species.append( primary_spec ) | |
| 34 | |
| 35 all_species.sort() | |
| 36 for spec in species: | |
| 37 if spec == primary_spec: | |
| 38 out_files[ spec ] = open( output_filename, 'wb+' ) | |
| 39 else: | |
| 40 out_files[ spec ] = open( os.path.join( database_tmp_dir, 'primary_%s_%s_visible_interval_%s' % ( output_id, spec, spec ) ), 'wb+' ) | |
| 41 out_files[ spec ].write( '#chrom\tstart\tend\tstrand\tscore\tname\t%s\n' % ( '\t'.join( all_species ) ) ) | |
| 42 num_species = len( all_species ) | |
| 43 | |
| 44 file_in = open( input_filename, 'r' ) | |
| 45 maf_reader = maf.Reader( file_in ) | |
| 46 | |
| 47 for i, m in enumerate( maf_reader ): | |
| 48 for j, block in enumerate( maf_utilities.iter_blocks_split_by_species( m ) ): | |
| 49 if len( block.components ) < num_species and partial == "partial_disallowed": continue | |
| 50 sequences = {} | |
| 51 for c in block.components: | |
| 52 spec, chrom = maf_utilities.src_split( c.src ) | |
| 53 if keep_gaps == 'remove_gaps': | |
| 54 sequences[ spec ] = c.text.replace( '-', '' ) | |
| 55 else: | |
| 56 sequences[ spec ] = c.text | |
| 57 sequences = '\t'.join( [ sequences.get( spec, '' ) for spec in all_species ] ) | |
| 58 for spec in species: | |
| 59 c = block.get_component_by_src_start( spec ) | |
| 60 if c is not None: | |
| 61 spec2, chrom = maf_utilities.src_split( c.src ) | |
| 62 assert spec2 == spec, Exception( 'Species name inconsistancy found in component: %s != %s' % ( spec, spec2 ) ) | |
| 63 out_files[ spec ].write( "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( chrom, c.forward_strand_start, c.forward_strand_end, c.strand, m.score, "%s_%s_%s" % (spec, i, j), sequences ) ) | |
| 64 file_in.close() | |
| 65 for file_out in out_files.values(): | |
| 66 file_out.close() | |
| 67 | |
| 68 if __name__ == "__main__": __main__() |
