view tools/filters/lav_to_bed.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
line wrap: on
line source

#!/usr/bin/env python
#Reads a LAV file and writes two BED files.
import sys
from galaxy import eggs
import pkg_resources
pkg_resources.require( "bx-python" )
import bx.align.lav

assert sys.version_info[:2] >= ( 2, 4 )

def stop_err( msg ):
    sys.stderr.write( msg )
    sys.exit()

def main():
    try:
        lav_file = open(sys.argv[1],'r')
        bed_file1 = open(sys.argv[2],'w')
        bed_file2 = open(sys.argv[3],'w')
    except Exception, e:
        stop_err( str( e ) )
        
    lavsRead = 0
    bedsWritten = 0
    species = {}
    # TODO: this is really bad since everything is read into memory.  Can we eliminate this tool?
    for lavBlock in bx.align.lav.Reader( lav_file ):
        lavsRead += 1
        for c in lavBlock.components:
            spec, chrom = bx.align.lav.src_split( c.src )
            if bedsWritten < 1:
                if len( species )==0:
                    species[spec]=bed_file1
                elif len( species )==1:
                    species[spec]=bed_file2
                else:
                    continue #this is a pairwise alignment...
            if spec in species:
                species[spec].write( "%s\t%i\t%i\t%s_%s\t%i\t%s\n" % ( chrom, c.start, c.end, spec, str( bedsWritten ), 0, c.strand ) )
        bedsWritten += 1
        

    for spec,file in species.items():
        print "#FILE\t%s\t%s" % (file.name, spec)
    
    lav_file.close()
    bed_file1.close()
    bed_file2.close()
    
    print "%d lav blocks read, %d regions written\n" % (lavsRead,bedsWritten)



if __name__ == "__main__": main()