Galaxy |

Changeset 14:598e9172b8e7 (2020-12-18)

Previous changeset 13:51a7a2a82902 (2020-10-06) Next changeset 15:9c62ad7dd113 (2021-04-29)

Commit message:
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit ea67c766934266e690d05e3f9ebb4cca12b8e3e7"

modified:
gstf_preparation.py

diff -r 51a7a2a82902 -r 598e9172b8e7 gstf_preparation.py
--- a/gstf_preparation.py Tue Oct 06 17:10:37 2020 +0000
+++ b/gstf_preparation.py Fri Dec 18 08:57:43 2020 +0000

[

@@ -215,10 +215,14 @@
         derived_translation_end = None
         if transcript_id in cds_parent_dict:
             cds_list = cds_parent_dict[transcript_id]
-            cds_ids = {_['id'] for _ in cds_list}
-            if len(cds_ids) > 1:
-                raise Exception("Transcript %s has multiple CDSs: this is not supported by Ensembl JSON format" % transcript_id)
-            cds_id = cds_ids.pop()
+            unique_cds_ids = {cds['id'] for cds in cds_list}
+            if len(unique_cds_ids) > 1:
+                msg = """Found multiple CDS IDs (%s) for transcript '%s'.
+This is not supported by the Ensembl JSON format. If a CDS is split across
+multiple discontinuous genomic locations, the GFF3 standard requires that all
+corresponding lines use the same ID attribute."""
+                raise Exception(msg % (unique_cds_ids, transcript_id))
+            cds_id = unique_cds_ids.pop()
             translation['id'] = cds_id
             cds_list.sort(key=lambda _: _['start'])
             translation['CDS'] = cds_list