Mercurial > repos > earlhaminst > gff3_to_json
annotate gff3_to_json.py @ 0:be6cec883b02 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
author | earlhaminst |
---|---|
date | Wed, 21 Dec 2016 10:02:59 -0500 |
parents | |
children | befe6021e476 |
rev | line source |
---|---|
0
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
1 from __future__ import print_function |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
2 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
3 import json |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
4 import optparse |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
5 import sys |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
6 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
7 cds_parent_dict = dict() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
8 exon_parent_dict = dict() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
9 five_prime_utr_parent_dict = dict() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
10 gene_count = 0 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
11 gene_dict = dict() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
12 transcript_dict = dict() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
13 three_prime_utr_parent_dict = dict() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
14 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
15 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
16 def feature_to_json(cols): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
17 d = { |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
18 'end': int(cols[4]), |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
19 'start': int(cols[3]), |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
20 } |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
21 for attr in cols[8].split(';'): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
22 if '=' in attr: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
23 (tag, value) = attr.split('=') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
24 if tag == 'ID': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
25 d['id'] = value |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
26 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
27 d[tag] = value |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
28 if cols[6] == '+': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
29 d['strand'] = 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
30 elif cols[6] == '-': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
31 d['strand'] = -1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
32 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
33 raise Exception("Unrecognized strand '%s'" % cols[6]) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
34 return d |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
35 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
36 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
37 def gene_to_json(cols, species): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
38 global gene_count |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
39 gene = feature_to_json(cols) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
40 gene.update({ |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
41 'member_id': gene_count, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
42 'object_type': 'Gene', |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
43 'seq_region_name': cols[0], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
44 'species': species, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
45 'Transcript': [], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
46 }) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
47 gene_dict[gene['id']] = gene |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
48 gene_count = gene_count + 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
49 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
50 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
51 def transcript_to_json(cols, species): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
52 transcript = feature_to_json(cols) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
53 transcript.update({ |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
54 'object_type': 'Transcript', |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
55 'seq_region_name': cols[0], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
56 'species': species, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
57 }) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
58 transcript_dict[transcript['id']] = transcript |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
59 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
60 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
61 def exon_to_json(cols, species): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
62 exon = feature_to_json(cols) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
63 exon.update({ |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
64 'length': int(cols[4]) - int(cols[3]) + 1, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
65 'object_type': 'Exon', |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
66 'seq_region_name': cols[0], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
67 'species': species, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
68 }) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
69 if 'id' not in exon and 'Name' in exon: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
70 exon['id'] = exon['Name'] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
71 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
72 if 'Parent' in exon: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
73 for parent in exon['Parent'].split(','): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
74 if parent not in exon_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
75 exon_parent_dict[parent] = [exon] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
76 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
77 exon_parent_dict[parent].append(exon) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
78 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
79 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
80 def five_prime_utr_to_json(cols): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
81 five_prime_utr = feature_to_json(cols) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
82 if 'Parent' in five_prime_utr: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
83 for parent in five_prime_utr['Parent'].split(','): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
84 # the 5' UTR can be split among multiple exons |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
85 if parent not in five_prime_utr_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
86 five_prime_utr_parent_dict[parent] = [five_prime_utr] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
87 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
88 five_prime_utr_parent_dict[parent].append(five_prime_utr) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
89 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
90 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
91 def three_prime_utr_to_json(cols): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
92 three_prime_utr = feature_to_json(cols) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
93 if 'Parent' in three_prime_utr: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
94 for parent in three_prime_utr['Parent'].split(','): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
95 # the 3' UTR can be split among multiple exons |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
96 if parent not in three_prime_utr_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
97 three_prime_utr_parent_dict[parent] = [three_prime_utr] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
98 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
99 three_prime_utr_parent_dict[parent].append(three_prime_utr) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
100 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
101 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
102 def cds_to_json(cols): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
103 cds = feature_to_json(cols) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
104 if 'id' not in cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
105 if 'Name' in cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
106 cds['id'] = cds['Name'] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
107 elif 'Parent' in cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
108 cds['id'] = cds['Parent'] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
109 if 'Parent' in cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
110 # At this point we are sure than 'id' is in cds |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
111 for parent in cds['Parent'].split(','): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
112 if parent not in cds_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
113 cds_parent_dict[parent] = [cds] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
114 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
115 cds_parent_dict[parent].append(cds) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
116 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
117 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
118 def join_dicts(): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
119 for parent, exon_list in exon_parent_dict.items(): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
120 exon_list.sort(key=lambda _: _['start']) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
121 if parent in transcript_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
122 transcript_dict[parent]['Exon'] = exon_list |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
123 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
124 for transcript_id, transcript in transcript_dict.items(): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
125 translation = { |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
126 'CDS': [], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
127 'id': None, |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
128 'end': transcript['end'], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
129 'object_type': 'Translation', |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
130 'species': transcript['species'], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
131 'start': transcript['start'], |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
132 } |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
133 found_cds = False |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
134 derived_translation_start = None |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
135 derived_translation_end = None |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
136 if transcript_id in cds_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
137 cds_list = cds_parent_dict[transcript_id] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
138 cds_ids = set(_['id'] for _ in cds_list) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
139 if len(cds_ids) > 1: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
140 raise Exception("Transcript %s has multiple CDSs: this is not supported by Ensembl JSON format" % parent) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
141 translation['id'] = cds_ids.pop() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
142 cds_list.sort(key=lambda _: _['start']) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
143 translation['CDS'] = cds_list |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
144 translation['start'] = cds_list[0]['start'] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
145 translation['end'] = cds_list[-1]['end'] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
146 found_cds = True |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
147 if transcript_id in five_prime_utr_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
148 five_prime_utr_list = five_prime_utr_parent_dict[transcript_id] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
149 five_prime_utr_list.sort(key=lambda _: _['start']) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
150 if transcript['strand'] == 1: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
151 derived_translation_start = five_prime_utr_list[-1]['end'] + 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
152 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
153 derived_translation_end = five_prime_utr_list[0]['start'] - 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
154 if transcript_id in three_prime_utr_parent_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
155 three_prime_utr_list = three_prime_utr_parent_dict[transcript_id] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
156 three_prime_utr_list.sort(key=lambda _: _['start']) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
157 if transcript['strand'] == 1: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
158 derived_translation_end = three_prime_utr_list[0]['start'] - 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
159 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
160 derived_translation_start = three_prime_utr_list[-1]['end'] + 1 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
161 if derived_translation_start is not None: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
162 if found_cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
163 if derived_translation_start > translation['start']: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
164 raise Exception("UTR overlaps with CDS") |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
165 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
166 translation['start'] = derived_translation_start |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
167 if derived_translation_end is not None: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
168 if found_cds: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
169 if derived_translation_end < translation['end']: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
170 raise Exception("UTR overlaps with CDS") |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
171 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
172 translation['end'] = derived_translation_end |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
173 if found_cds or derived_translation_start is not None or derived_translation_end is not None: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
174 transcript['Translation'] = translation |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
175 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
176 for transcript in transcript_dict.values(): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
177 if 'Parent' in transcript: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
178 # A polycistronic transcript can have multiple parents |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
179 for parent in transcript['Parent'].split(','): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
180 if parent in gene_dict: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
181 gene_dict[parent]['Transcript'].append(transcript) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
182 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
183 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
184 def merge_dicts(json_arg): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
185 with open(json_arg) as f: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
186 dict_from_json = json.load(f) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
187 gene_intersection = set(gene_dict.keys()) & set(dict_from_json.keys()) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
188 if gene_intersection: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
189 raise Exception("JSON file '%s' contains information for genes '%s', which are also present in other files" % (json_arg, ', '.join(gene_intersection))) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
190 gene_dict.update(dict_from_json) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
191 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
192 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
193 def write_json(outfile=None, sort_keys=False): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
194 if outfile: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
195 with open(outfile, 'w') as f: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
196 json.dump(gene_dict, f, sort_keys=sort_keys) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
197 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
198 print(json.dumps(gene_dict, indent=3, sort_keys=sort_keys)) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
199 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
200 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
201 def __main__(): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
202 parser = optparse.OptionParser() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
203 parser.add_option('--gff3', action='append', default=[], help='GFF3 file to convert, in SPECIES:FILENAME format. Use multiple times to add more files') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
204 parser.add_option('--json', action='append', default=[], help='JSON file to merge. Use multiple times to add more files') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
205 parser.add_option('-s', '--sort', action='store_true', help='Sort the keys in the JSON output') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
206 parser.add_option('-o', '--output', help='Path of the output file. If not specified, will print on the standard output') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
207 options, args = parser.parse_args() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
208 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
209 if args: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
210 raise Exception('Use options to provide inputs') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
211 for gff3_arg in options.gff3: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
212 try: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
213 (species, filename) = gff3_arg.split(':') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
214 except ValueError: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
215 raise Exception("Argument for --gff3 '%s' is not in the SPECIES:FILENAME format" % gff3_arg) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
216 with open(filename) as f: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
217 for i, line in enumerate(f): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
218 line = line.strip() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
219 if not line: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
220 # skip empty lines |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
221 continue |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
222 if line[0] == '#': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
223 # skip comment lines |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
224 continue |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
225 cols = line.split('\t') |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
226 if len(cols) != 9: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
227 raise Exception("Line %i in file '%s': '%s' does not have 9 columns" % (i, filename, line)) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
228 feature_type = cols[2] |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
229 try: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
230 if feature_type == 'gene': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
231 gene_to_json(cols, species) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
232 elif feature_type in ('mRNA', 'transcript'): |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
233 transcript_to_json(cols, species) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
234 elif feature_type == 'exon': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
235 exon_to_json(cols, species) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
236 elif feature_type == 'five_prime_UTR': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
237 five_prime_utr_to_json(cols) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
238 elif feature_type == 'three_prime_UTR': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
239 three_prime_utr_to_json(cols) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
240 elif feature_type == 'CDS': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
241 cds_to_json(cols) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
242 else: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
243 print("Line %i in file '%s': '%s' is not an implemented feature type" % (i, filename, feature_type), file=sys.stderr) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
244 except Exception as e: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
245 raise Exception("Line %i in file '%s': %s" % (i, filename, e)) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
246 join_dicts() |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
247 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
248 for json_arg in options.json: |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
249 merge_dicts(json_arg) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
250 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
251 write_json(options.output, options.sort) |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
252 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
253 |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
254 if __name__ == '__main__': |
be6cec883b02
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
255 __main__() |