view mytools/fimo2bed.py @ 5:84e2c77a6abd

Uploaded
author xuebing
date Fri, 16 Mar 2012 14:01:16 -0400
parents 39217fa39ff2
children
line wrap: on
line source

'''
#pattern name	sequence name	start	stop	score	p-value	q-value	matched sequence
constitutive-donor	mm9_chr1_39533592_39535592_-	1815	1823	12.032	4.26e-06	0.397	CAGGTAAGT
constitutive-donor	mm9_chr1_59313750_59315750_+	1889	1897	12.032	4.26e-06	0.397	CAGGTAAGT

#pattern name	sequence name	start	stop	score	p-value	q-value	matched sequence
constitutive-donor	mm9_chr1_172019075_172021075_-	1947	1955	12.032	4.26e-06	0.843	CAGGTAAGT
constitutive-donor	mm9_chr1_15300532_15302532_+	156	164	12.032	4.26e-06	0.843	CAGGTAAGT
'''

import sys

def fimo2bed(filename,rc):
    '''
    parse fimo output to make a bed file
    rc: the sequence have been reverse complemented
    '''
    f = open(filename)
    header = f.readline()
    for line in f:
        pattern,posi,begin,stop,score,pv,qv,seq = line.strip().split('\t')
        flds = posi.split('_')
        start = flds[-3]
        end = flds[-2]
        strand = flds[-1]
        chrom = '_'.join(flds[1:-3]) #'chrX_random'
        if not rc:
            if strand == '+':
                start1 = str(int(start) + int(begin)-1)
                end1 = str(int(start) + int(stop))
                print '\t'.join([chrom,start1,end1,seq,score,strand]) 
            else:
                start1 = str(int(end) - int(stop))
                end1 = str(int(end) - int(begin)+1)
                print '\t'.join([chrom,start1,end1,seq,score,strand])
        else:
            if strand == '-':
                start1 = str(int(start) + int(begin)-1)
                end1 = str(int(start) + int(stop))
                print '\t'.join([chrom,start1,end1,seq,score,'+']) 
            else:
                start1 = str(int(end) - int(stop))
                end1 = str(int(end) - int(begin)+1)
                print '\t'.join([chrom,start1,end1,seq,score,'-'])      

fimo2bed(sys.argv[1],sys.argv[2]=='rc')