Mercurial > repos > vipints > fml_gff3togtf
comparison GFFtools-GX/gff_to_bed.py @ 3:ff2c2e6f4ab3
Uploaded version 2.0.0 of gfftools ready to import to local instance
author | vipints |
---|---|
date | Wed, 11 Jun 2014 16:29:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:db3c67b03d55 | 3:ff2c2e6f4ab3 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Convert genome annotation data in GFF/GTF to a 12 column BED format. | |
4 BED format typically represents the transcript models. | |
5 | |
6 Usage: python gff_to_bed.py in.gff > out.bed | |
7 | |
8 Requirement: | |
9 GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py | |
10 | |
11 Copyright (C) | |
12 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. | |
13 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA. | |
14 """ | |
15 | |
16 import re | |
17 import sys | |
18 import GFFParser | |
19 | |
20 def writeBED(tinfo): | |
21 """ | |
22 writing result files in bed format | |
23 | |
24 @args tinfo: list of genes | |
25 @args tinfo: numpy object | |
26 """ | |
27 | |
28 for ent1 in tinfo: | |
29 for idx, tid in enumerate(ent1['transcripts']): | |
30 exon_cnt = len(ent1['exons'][idx]) | |
31 exon_len = '' | |
32 exon_cod = '' | |
33 rel_start = None | |
34 rel_stop = None | |
35 for idz, ex_cod in enumerate(ent1['exons'][idx]):#check for exons of corresponding transcript | |
36 exon_len += '%d,' % (ex_cod[1]-ex_cod[0]+1) | |
37 if idz == 0: #calculate the relative start position | |
38 exon_cod += '0,' | |
39 rel_start = int(ex_cod[0]) | |
40 rel_stop = ex_cod[1] | |
41 else: | |
42 exon_cod += '%d,' % (ex_cod[0]-rel_start) | |
43 rel_stop = int(ex_cod[1]) | |
44 | |
45 if exon_len: | |
46 score = '0' | |
47 score = ent1['score'][0] if ent1['score'] else score | |
48 out_print = [ent1['chr'], | |
49 str(rel_start), | |
50 str(rel_stop), | |
51 tid[0], | |
52 score, | |
53 ent1['strand'], | |
54 str(rel_start), | |
55 str(rel_stop), | |
56 '0', | |
57 str(exon_cnt), | |
58 exon_len, | |
59 exon_cod] | |
60 print '\t'.join(out_print) | |
61 | |
62 def __main__(): | |
63 try: | |
64 query_file = sys.argv[1] | |
65 except: | |
66 print __doc__ | |
67 sys.exit(-1) | |
68 | |
69 Transcriptdb = GFFParser.Parse(query_file) | |
70 writeBED(Transcriptdb) | |
71 | |
72 if __name__ == "__main__": | |
73 __main__() |