Mercurial > repos > earlhaminst > gff3_to_json
annotate gff3_to_json.py @ 1:befe6021e476 draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
author | earlhaminst |
---|---|
date | Tue, 28 Feb 2017 12:06:04 -0500 |
parents | be6cec883b02 |
children |
rev | line source |
---|---|
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
1 from __future__ import print_function |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
2 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
3 import json |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
4 import optparse |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
5 import sys |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
6 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
7 gene_count = 0 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
8 |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
9 |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
10 def remove_type_from_list_of_ids(l): |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
11 return ','.join(remove_type_from_id(_) for _ in l.split(',')) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
12 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
13 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
14 def remove_type_from_id(id_): |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
15 colon_index = id_.find(':') |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
16 if colon_index >= 0: |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
17 return id_[colon_index + 1:] |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
18 else: |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
19 return id_ |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
20 |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
21 |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
22 def feature_to_dict(cols, parent_dict=None): |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
23 d = { |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
24 'end': int(cols[4]), |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
25 'start': int(cols[3]), |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
26 } |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
27 for attr in cols[8].split(';'): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
28 if '=' in attr: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
29 (tag, value) = attr.split('=') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
30 if tag == 'ID': |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
31 tag = 'id' |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
32 value = remove_type_from_id(value) |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
33 elif tag == 'Parent': |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
34 value = remove_type_from_list_of_ids(value) |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
35 d[tag] = value |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
36 if cols[6] == '+': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
37 d['strand'] = 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
38 elif cols[6] == '-': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
39 d['strand'] = -1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
40 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
41 raise Exception("Unrecognized strand '%s'" % cols[6]) |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
42 if parent_dict is not None and 'Parent' in d: |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
43 # a 3' UTR can be split among multiple exons |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
44 # a 5' UTR can be split among multiple exons |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
45 # a CDS can be part of multiple transcripts |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
46 for parent in d['Parent'].split(','): |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
47 if parent not in parent_dict: |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
48 parent_dict[parent] = [d] |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
49 else: |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
50 parent_dict[parent].append(d) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
51 return d |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
52 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
53 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
54 def add_gene_to_dict(cols, species, gene_dict): |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
55 global gene_count |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
56 gene = feature_to_dict(cols) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
57 gene.update({ |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
58 'member_id': gene_count, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
59 'object_type': 'Gene', |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
60 'seq_region_name': cols[0], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
61 'species': species, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
62 'Transcript': [], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
63 }) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
64 gene_dict[gene['id']] = gene |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
65 gene_count = gene_count + 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
66 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
67 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
68 def add_transcript_to_dict(cols, species, transcript_dict): |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
69 transcript = feature_to_dict(cols) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
70 transcript.update({ |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
71 'object_type': 'Transcript', |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
72 'seq_region_name': cols[0], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
73 'species': species, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
74 }) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
75 transcript_dict[transcript['id']] = transcript |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
76 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
77 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
78 def add_exon_to_dict(cols, species, exon_parent_dict): |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
79 exon = feature_to_dict(cols, exon_parent_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
80 exon.update({ |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
81 'length': int(cols[4]) - int(cols[3]) + 1, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
82 'object_type': 'Exon', |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
83 'seq_region_name': cols[0], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
84 'species': species, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
85 }) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
86 if 'id' not in exon and 'Name' in exon: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
87 exon['id'] = exon['Name'] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
88 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
89 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
90 def add_cds_to_dict(cols, cds_parent_dict): |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
91 cds = feature_to_dict(cols, cds_parent_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
92 if 'id' not in cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
93 if 'Name' in cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
94 cds['id'] = cds['Name'] |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
95 elif 'Parent' in cds and ',' not in cds['Parent']: |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
96 cds['id'] = cds['Parent'] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
97 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
98 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
99 def join_dicts(gene_dict, transcript_dict, exon_parent_dict, cds_parent_dict, five_prime_utr_parent_dict, three_prime_utr_parent_dict): |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
100 for parent, exon_list in exon_parent_dict.items(): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
101 if parent in transcript_dict: |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
102 exon_list.sort(key=lambda _: _['start']) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
103 transcript_dict[parent]['Exon'] = exon_list |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
104 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
105 for transcript_id, transcript in transcript_dict.items(): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
106 translation = { |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
107 'CDS': [], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
108 'id': None, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
109 'end': transcript['end'], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
110 'object_type': 'Translation', |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
111 'species': transcript['species'], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
112 'start': transcript['start'], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
113 } |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
114 found_cds = False |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
115 derived_translation_start = None |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
116 derived_translation_end = None |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
117 if transcript_id in cds_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
118 cds_list = cds_parent_dict[transcript_id] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
119 cds_ids = set(_['id'] for _ in cds_list) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
120 if len(cds_ids) > 1: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
121 raise Exception("Transcript %s has multiple CDSs: this is not supported by Ensembl JSON format" % parent) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
122 translation['id'] = cds_ids.pop() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
123 cds_list.sort(key=lambda _: _['start']) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
124 translation['CDS'] = cds_list |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
125 translation['start'] = cds_list[0]['start'] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
126 translation['end'] = cds_list[-1]['end'] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
127 found_cds = True |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
128 if transcript_id in five_prime_utr_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
129 five_prime_utr_list = five_prime_utr_parent_dict[transcript_id] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
130 five_prime_utr_list.sort(key=lambda _: _['start']) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
131 if transcript['strand'] == 1: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
132 derived_translation_start = five_prime_utr_list[-1]['end'] + 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
133 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
134 derived_translation_end = five_prime_utr_list[0]['start'] - 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
135 if transcript_id in three_prime_utr_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
136 three_prime_utr_list = three_prime_utr_parent_dict[transcript_id] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
137 three_prime_utr_list.sort(key=lambda _: _['start']) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
138 if transcript['strand'] == 1: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
139 derived_translation_end = three_prime_utr_list[0]['start'] - 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
140 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
141 derived_translation_start = three_prime_utr_list[-1]['end'] + 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
142 if derived_translation_start is not None: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
143 if found_cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
144 if derived_translation_start > translation['start']: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
145 raise Exception("UTR overlaps with CDS") |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
146 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
147 translation['start'] = derived_translation_start |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
148 if derived_translation_end is not None: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
149 if found_cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
150 if derived_translation_end < translation['end']: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
151 raise Exception("UTR overlaps with CDS") |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
152 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
153 translation['end'] = derived_translation_end |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
154 if found_cds or derived_translation_start is not None or derived_translation_end is not None: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
155 transcript['Translation'] = translation |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
156 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
157 for transcript in transcript_dict.values(): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
158 if 'Parent' in transcript: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
159 # A polycistronic transcript can have multiple parents |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
160 for parent in transcript['Parent'].split(','): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
161 if parent in gene_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
162 gene_dict[parent]['Transcript'].append(transcript) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
163 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
164 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
165 def update_full_gene_dict_no_overwrite(full_gene_dict, gene_dict): |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
166 gene_intersection = set(full_gene_dict.keys()) & set(gene_dict.keys()) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
167 if gene_intersection: |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
168 raise Exception("Information for genes '%s' are present in multiple files" % ', '.join(gene_intersection)) |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
169 full_gene_dict.update(gene_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
170 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
171 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
172 def write_json(full_gene_dict, outfile=None, sort_keys=False): |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
173 if outfile: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
174 with open(outfile, 'w') as f: |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
175 json.dump(full_gene_dict, f, sort_keys=sort_keys) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
176 else: |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
177 json.dump(full_gene_dict, sys.stdout, sort_keys=sort_keys) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
178 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
179 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
180 def __main__(): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
181 parser = optparse.OptionParser() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
182 parser.add_option('--gff3', action='append', default=[], help='GFF3 file to convert, in SPECIES:FILENAME format. Use multiple times to add more files') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
183 parser.add_option('--json', action='append', default=[], help='JSON file to merge. Use multiple times to add more files') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
184 parser.add_option('-s', '--sort', action='store_true', help='Sort the keys in the JSON output') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
185 parser.add_option('-o', '--output', help='Path of the output file. If not specified, will print on the standard output') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
186 options, args = parser.parse_args() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
187 if args: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
188 raise Exception('Use options to provide inputs') |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
189 |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
190 full_gene_dict = dict() |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
191 for gff3_arg in options.gff3: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
192 try: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
193 (species, filename) = gff3_arg.split(':') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
194 except ValueError: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
195 raise Exception("Argument for --gff3 '%s' is not in the SPECIES:FILENAME format" % gff3_arg) |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
196 gene_dict = dict() |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
197 transcript_dict = dict() |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
198 exon_parent_dict = dict() |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
199 cds_parent_dict = dict() |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
200 five_prime_utr_parent_dict = dict() |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
201 three_prime_utr_parent_dict = dict() |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
202 with open(filename) as f: |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
203 for i, line in enumerate(f, start=1): |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
204 line = line.strip() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
205 if not line: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
206 # skip empty lines |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
207 continue |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
208 if line[0] == '#': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
209 # skip comment lines |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
210 continue |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
211 cols = line.split('\t') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
212 if len(cols) != 9: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
213 raise Exception("Line %i in file '%s': '%s' does not have 9 columns" % (i, filename, line)) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
214 feature_type = cols[2] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
215 try: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
216 if feature_type == 'gene': |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
217 add_gene_to_dict(cols, species, gene_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
218 elif feature_type in ('mRNA', 'transcript'): |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
219 add_transcript_to_dict(cols, species, transcript_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
220 elif feature_type == 'exon': |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
221 add_exon_to_dict(cols, species, exon_parent_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
222 elif feature_type == 'five_prime_UTR': |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
223 feature_to_dict(cols, five_prime_utr_parent_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
224 elif feature_type == 'three_prime_UTR': |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
225 feature_to_dict(cols, three_prime_utr_parent_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
226 elif feature_type == 'CDS': |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
227 add_cds_to_dict(cols, cds_parent_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
228 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
229 print("Line %i in file '%s': '%s' is not an implemented feature type" % (i, filename, feature_type), file=sys.stderr) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
230 except Exception as e: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
231 raise Exception("Line %i in file '%s': %s" % (i, filename, e)) |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
232 join_dicts(gene_dict, transcript_dict, exon_parent_dict, cds_parent_dict, five_prime_utr_parent_dict, three_prime_utr_parent_dict) |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
233 update_full_gene_dict_no_overwrite(full_gene_dict, gene_dict) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
234 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
235 for json_arg in options.json: |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
236 with open(json_arg) as f: |
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
237 update_full_gene_dict_no_overwrite(full_gene_dict, json.load(f)) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
238 |
1
befe6021e476
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 5e5fbe362ed5a4714debda0f2c0834cbbfd34147
earlhaminst
parents:
0
diff
changeset
|
239 write_json(full_gene_dict, options.output, options.sort) |
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
240 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
241 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
242 if __name__ == '__main__': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
243 __main__() |