Mercurial > repos > vipints > fml_gff3togtf
comparison gff_to_gtf.py @ 10:c42c69aa81f8
fixed manually the upload of version 2.1.0 - deleted accidentally added files to the repo
| author | vipints <vipin@cbio.mskcc.org> |
|---|---|
| date | Thu, 23 Apr 2015 18:01:45 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 9:7d67331368f3 | 10:c42c69aa81f8 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Program to convert data from GFF to GTF | |
| 4 | |
| 5 Usage: python gff_to_gtf.py in.gff > out.gtf | |
| 6 | |
| 7 Requirement: | |
| 8 GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py | |
| 9 | |
| 10 Copyright (C) | |
| 11 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. | |
| 12 2012-2015 Memorial Sloan Kettering Cancer Center New York City, USA. | |
| 13 """ | |
| 14 | |
| 15 import re | |
| 16 import sys | |
| 17 import GFFParser | |
| 18 | |
| 19 def printGTF(tinfo): | |
| 20 """ | |
| 21 writing result file in GTF format | |
| 22 | |
| 23 @args tinfo: parsed object from gff file | |
| 24 @type tinfo: numpy array | |
| 25 """ | |
| 26 | |
| 27 for ent1 in tinfo: | |
| 28 for idx, tid in enumerate(ent1['transcripts']): | |
| 29 | |
| 30 exons = ent1['exons'][idx] | |
| 31 cds_exons = ent1['cds_exons'][idx] | |
| 32 | |
| 33 stop_codon = start_codon = () | |
| 34 | |
| 35 if ent1['strand'] == '+': | |
| 36 if cds_exons.any(): | |
| 37 start_codon = (cds_exons[0][0], cds_exons[0][0]+2) | |
| 38 stop_codon = (cds_exons[-1][1]-2, cds_exons[-1][1]) | |
| 39 elif ent1['strand'] == '-': | |
| 40 if cds_exons.any(): | |
| 41 start_codon = (cds_exons[-1][1]-2, cds_exons[-1][1]) | |
| 42 stop_codon = (cds_exons[0][0], cds_exons[0][0]+2) | |
| 43 else: | |
| 44 sys.stdout.write('STRAND information missing - %s, skip the transcript - %s\n' % (ent1['strand'], tid[0])) | |
| 45 pass | |
| 46 | |
| 47 last_cds_cod = 0 | |
| 48 for idz, ex_cod in enumerate(exons): | |
| 49 | |
| 50 sys.stdout.write('%s\t%s\texon\t%d\t%d\t.\t%s\t.\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; \n' % (ent1['chr'], ent1['source'], ex_cod[0], ex_cod[1], ent1['strand'], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name'])) | |
| 51 | |
| 52 if cds_exons.any(): | |
| 53 try: | |
| 54 sys.stdout.write('%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; \n' % (ent1['chr'], ent1['source'], cds_exons[idz][0], cds_exons[idz][1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name'])) | |
| 55 last_cds_cod = idz | |
| 56 except: | |
| 57 pass | |
| 58 | |
| 59 if idz == 0: | |
| 60 sys.stdout.write('%s\t%s\tstart_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; \n' % (ent1['chr'], ent1['source'], start_codon[0], start_codon[1], ent1['strand'], cds_exons[idz][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name'])) | |
| 61 | |
| 62 if stop_codon: | |
| 63 sys.stdout.write('%s\t%s\tstop_codon\t%d\t%d\t.\t%s\t%d\tgene_id "%s"; transcript_id "%s"; exon_number "%d"; gene_name "%s"; \n' % (ent1['chr'], ent1['source'], stop_codon[0], stop_codon[1], ent1['strand'], cds_exons[last_cds_cod][2], ent1['name'], tid[0], idz+1, ent1['gene_info']['Name'])) | |
| 64 | |
| 65 | |
| 66 if __name__ == "__main__": | |
| 67 | |
| 68 try: | |
| 69 gff_fname = sys.argv[1] | |
| 70 except: | |
| 71 print __doc__ | |
| 72 sys.exit(-1) | |
| 73 | |
| 74 Transcriptdb = GFFParser.Parse(gff_fname) | |
| 75 | |
| 76 printGTF(Transcriptdb) |
