Mercurial > repos > vipints > fml_gff3togtf
comparison gtf_to_gff.py @ 5:6e589f267c14
Uploaded
author | devteam |
---|---|
date | Tue, 04 Nov 2014 12:15:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:619e0fcd9126 | 5:6e589f267c14 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Convert Gene Transfer Format [GTF] to Generic Feature Format Version 3 [GFF3]. | |
4 | |
5 Usage: python gtf_to_gff.py in.gtf > out.gff3 | |
6 | |
7 Requirement: | |
8 GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py | |
9 helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py | |
10 | |
11 Copyright (C) | |
12 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. | |
13 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA. | |
14 """ | |
15 | |
16 import re | |
17 import sys | |
18 import GFFParser | |
19 import helper | |
20 | |
21 def GFFWriter(gtf_content): | |
22 """ | |
23 write the feature information to GFF format | |
24 | |
25 @args gtf_content: Parsed object from gtf file | |
26 @type gtf_content: numpy array | |
27 """ | |
28 | |
29 print '##gff-version 3' | |
30 | |
31 for ent1 in gtf_content: | |
32 | |
33 chr_name = ent1['chr'] | |
34 strand = ent1['strand'] | |
35 start = ent1['start'] | |
36 stop = ent1['stop'] | |
37 source = ent1['source'] | |
38 ID = ent1['name'] | |
39 Name = ent1['gene_info']['Name'] | |
40 | |
41 Name = ID if not Name else Name | |
42 | |
43 print '%s\t%s\tgene\t%d\t%d\t.\t%s\t.\tID=%s;Name=%s' % (chr_name, source, start, stop, strand, ID, Name) | |
44 | |
45 for idx, tid in enumerate(ent1['transcripts']): | |
46 print idx | |
47 print tid | |
48 | |
49 t_start = ent1['exons'][idx][0][0] | |
50 t_stop = ent1['exons'][idx][-1][-1] | |
51 t_type = ent1['transcript_type'][idx] | |
52 | |
53 utr5_exons, utr3_exons = [], [] | |
54 if ent1['exons'][idx].any() and ent1['cds_exons'][idx].any(): | |
55 utr5_exons, utr3_exons = helper.buildUTR(ent1['cds_exons'][idx], ent1['exons'][idx], strand) | |
56 | |
57 print '%s\t%s\t%s\t%d\t%d\t.\t%s\t.\tID=%s;Parent=%s' % (chr_name, source, t_type, t_start, t_stop, strand, tid[0], ID) | |
58 | |
59 for ex_cod in utr5_exons: | |
60 print '%s\t%s\tfive_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]) | |
61 | |
62 for ex_cod in ent1['cds_exons'][idx]: | |
63 print '%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, ex_cod[2], tid[0]) | |
64 | |
65 for ex_cod in utr3_exons: | |
66 print '%s\t%s\tthree_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]) | |
67 | |
68 for ex_cod in ent1['exons'][idx]: | |
69 print '%s\t%s\texon\t%d\t%d\t.\t%s\t.\tParent=%s' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0]) | |
70 | |
71 | |
72 def __main__(): | |
73 | |
74 try: | |
75 gtf_fname = sys.argv[1] | |
76 except: | |
77 print __doc__ | |
78 sys.exit(-1) | |
79 | |
80 gtf_file_content = GFFParser.Parse(gtf_fname) | |
81 | |
82 GFFWriter(gtf_file_content) | |
83 | |
84 if __name__ == "__main__": | |
85 __main__() |