annotate GFFtools-GX/gff_to_bed.py @ 3:ff2c2e6f4ab3

Uploaded version 2.0.0 of gfftools ready to import to local instance
author vipints
date Wed, 11 Jun 2014 16:29:25 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
1 #!/usr/bin/env python
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
2 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
3 Convert genome annotation data in GFF/GTF to a 12 column BED format.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
4 BED format typically represents the transcript models.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
5
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
6 Usage: python gff_to_bed.py in.gff > out.bed
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
7
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
8 Requirement:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
9 GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
10
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
11 Copyright (C)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
12 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
13 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA.
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
14 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
15
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
16 import re
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
17 import sys
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
18 import GFFParser
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
19
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
20 def writeBED(tinfo):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
21 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
22 writing result files in bed format
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
23
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
24 @args tinfo: list of genes
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
25 @args tinfo: numpy object
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
26 """
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
27
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
28 for ent1 in tinfo:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
29 for idx, tid in enumerate(ent1['transcripts']):
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
30 exon_cnt = len(ent1['exons'][idx])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
31 exon_len = ''
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
32 exon_cod = ''
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
33 rel_start = None
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
34 rel_stop = None
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
35 for idz, ex_cod in enumerate(ent1['exons'][idx]):#check for exons of corresponding transcript
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
36 exon_len += '%d,' % (ex_cod[1]-ex_cod[0]+1)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
37 if idz == 0: #calculate the relative start position
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
38 exon_cod += '0,'
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
39 rel_start = int(ex_cod[0])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
40 rel_stop = ex_cod[1]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
41 else:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
42 exon_cod += '%d,' % (ex_cod[0]-rel_start)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
43 rel_stop = int(ex_cod[1])
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
44
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
45 if exon_len:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
46 score = '0'
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
47 score = ent1['score'][0] if ent1['score'] else score
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
48 out_print = [ent1['chr'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
49 str(rel_start),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
50 str(rel_stop),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
51 tid[0],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
52 score,
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
53 ent1['strand'],
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
54 str(rel_start),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
55 str(rel_stop),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
56 '0',
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
57 str(exon_cnt),
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
58 exon_len,
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
59 exon_cod]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
60 print '\t'.join(out_print)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
61
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
62 def __main__():
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
63 try:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
64 query_file = sys.argv[1]
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
65 except:
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
66 print __doc__
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
67 sys.exit(-1)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
68
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
69 Transcriptdb = GFFParser.Parse(query_file)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
70 writeBED(Transcriptdb)
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
71
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
72 if __name__ == "__main__":
ff2c2e6f4ab3 Uploaded version 2.0.0 of gfftools ready to import to local instance
vipints
parents:
diff changeset
73 __main__()