diff tools/maf/maf_to_bed.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/maf/maf_to_bed.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+
+"""
+Read a maf and output intervals for specified list of species.
+"""
+import sys, os, tempfile
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.align import maf
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def __main__():
+        
+    input_filename = sys.argv[1]
+    output_filename = sys.argv[2]
+    #where to store files that become additional output
+    database_tmp_dir = sys.argv[5]
+    
+    species = sys.argv[3].split(',')
+    partial = sys.argv[4]
+    out_files = {}
+    primary_spec = None
+    
+    if "None" in species:
+        species = {}
+        try:
+            for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ):
+                for c in m.components:
+                    spec,chrom = maf.src_split( c.src )
+                    if not spec or not chrom:
+                        spec = chrom = c.src
+                    species[spec] = ""
+            species = species.keys()
+        except:
+            print >>sys.stderr, "Invalid MAF file specified"
+            return
+        
+    if "?" in species:
+        print >>sys.stderr, "Invalid dbkey specified"
+        return
+        
+    
+    for i in range( 0, len( species ) ):
+        spec = species[i]
+        if i == 0:
+            out_files[spec] = open( output_filename, 'w' )
+            primary_spec = spec
+        else:
+            out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
+            filename = out_files[spec].name
+            out_files[spec].close()
+            out_files[spec] = open( filename, 'w' )
+    num_species = len( species )
+    
+    print "Restricted to species:", ",".join( species )
+    
+    file_in = open( input_filename, 'r' )
+    maf_reader = maf.Reader( file_in )
+    
+    block_num = -1
+    
+    for i, m in enumerate( maf_reader ):
+        block_num += 1
+        if "None" not in species:
+            m = m.limit_to_species( species )
+        l = m.components
+        if len(l) < num_species and partial == "partial_disallowed": continue
+        for c in l:
+            spec,chrom = maf.src_split( c.src )
+            if not spec or not chrom:
+                    spec = chrom = c.src
+            if spec not in out_files.keys():
+                out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' )
+                filename = out_files[spec].name
+                out_files[spec].close()
+                out_files[spec] = open( filename, 'w' )
+            
+            if c.strand == "-":
+                out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
+            else:
+                out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" )
+            
+    file_in.close()
+    for file_out in out_files.keys():
+        out_files[file_out].close()
+
+    for spec in out_files.keys():
+        if spec != primary_spec:
+            print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] )
+        else:
+            print "#FILE1\t" + spec + "\t" + out_files[spec].name
+
+if __name__ == "__main__": __main__()