Mercurial > repos > earlhaminst > gstf_preparation
changeset 14:598e9172b8e7 draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit ea67c766934266e690d05e3f9ebb4cca12b8e3e7"
author | earlhaminst |
---|---|
date | Fri, 18 Dec 2020 08:57:43 +0000 |
parents | 51a7a2a82902 |
children | 9c62ad7dd113 |
files | gstf_preparation.py |
diffstat | 1 files changed, 8 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/gstf_preparation.py Tue Oct 06 17:10:37 2020 +0000 +++ b/gstf_preparation.py Fri Dec 18 08:57:43 2020 +0000 @@ -215,10 +215,14 @@ derived_translation_end = None if transcript_id in cds_parent_dict: cds_list = cds_parent_dict[transcript_id] - cds_ids = {_['id'] for _ in cds_list} - if len(cds_ids) > 1: - raise Exception("Transcript %s has multiple CDSs: this is not supported by Ensembl JSON format" % transcript_id) - cds_id = cds_ids.pop() + unique_cds_ids = {cds['id'] for cds in cds_list} + if len(unique_cds_ids) > 1: + msg = """Found multiple CDS IDs (%s) for transcript '%s'. +This is not supported by the Ensembl JSON format. If a CDS is split across +multiple discontinuous genomic locations, the GFF3 standard requires that all +corresponding lines use the same ID attribute.""" + raise Exception(msg % (unique_cds_ids, transcript_id)) + cds_id = unique_cds_ids.pop() translation['id'] = cds_id cds_list.sort(key=lambda _: _['start']) translation['CDS'] = cds_list