Repository 'cpt_fix_sixpack'
hg clone https://toolshed.g2.bx.psu.edu/repos/cpt/cpt_fix_sixpack

Changeset 3:efa4dfc23549 (2023-06-05)
Previous changeset 2:6a9b026cb378 (2022-05-20) Next changeset 4:326429ea1d33 (2023-09-03)
Commit message:
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
added:
cpt-macros.xml
gff3.py
gff3_fix_sixpack.py
gff3_fix_sixpack.xml
macros.xml
test-data/miro.6pfix.gff3
test-data/miro.cds.gff3
test-data/miro.cds6pfix.gff3
test-data/miro.gff3
removed:
cpt_fix_sixpack/cpt-macros.xml
cpt_fix_sixpack/gff3.py
cpt_fix_sixpack/gff3_fix_sixpack.py
cpt_fix_sixpack/gff3_fix_sixpack.xml
cpt_fix_sixpack/macros.xml
cpt_fix_sixpack/test-data/miro.6pfix.gff3
cpt_fix_sixpack/test-data/miro.cds.gff3
cpt_fix_sixpack/test-data/miro.cds6pfix.gff3
cpt_fix_sixpack/test-data/miro.gff3
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Mon Jun 05 02:42:28 2023 +0000
[
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+ <requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+ <![CDATA[
+ cd '$__tool_directory__' && git rev-parse HEAD
+ ]]>
+ </version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+        <yield/>
+    </xml>
+</macros>
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt_fix_sixpack/cpt-macros.xml
--- a/cpt_fix_sixpack/cpt-macros.xml Fri May 20 08:48:25 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="gff_requirements">
- <requirements>
- <requirement type="package" version="2.7">python</requirement>
- <requirement type="package" version="1.65">biopython</requirement>
- <requirement type="package" version="2.12.1">requests</requirement>
- <yield/>
- </requirements>
- <version_command>
- <![CDATA[
- cd $__tool_directory__ && git rev-parse HEAD
- ]]>
- </version_command>
- </xml>
- <xml name="citation/mijalisrasche">
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">@unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- </xml>
- <xml name="citations">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation> 
- <yield/>
- </citations>
- </xml>
-     <xml name="citations-crr">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020-AJC-solo">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-clm">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="sl-citations-clm">
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </xml>
-</macros>
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt_fix_sixpack/gff3.py
--- a/cpt_fix_sixpack/gff3.py Fri May 20 08:48:25 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,346 +0,0 @@\n-import copy\n-import logging\n-\n-log = logging.getLogger()\n-log.setLevel(logging.WARN)\n-\n-\n-def feature_lambda(\n-    feature_list,\n-    test,\n-    test_kwargs,\n-    subfeatures=True,\n-    parent=None,\n-    invert=False,\n-    recurse=True,\n-):\n-    """Recursively search through features, testing each with a test function, yielding matches.\n-\n-    GFF3 is a hierachical data structure, so we need to be able to recursively\n-    search through features. E.g. if you\'re looking for a feature with\n-    ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n-    case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n-\n-    :type feature_list: list\n-    :param feature_list: an iterable of features\n-\n-    :type test: function reference\n-    :param test: a closure with the method signature (feature, **kwargs) where\n-                 the kwargs are those passed in the next argument. This\n-                 function should return True or False, True if the feature is\n-                 to be yielded as part of the main feature_lambda function, or\n-                 False if it is to be ignored. This function CAN mutate the\n-                 features passed to it (think "apply").\n-\n-    :type test_kwargs: dictionary\n-    :param test_kwargs: kwargs to pass to your closure when it is called.\n-\n-    :type subfeatures: boolean\n-    :param subfeatures: when a feature is matched, should just that feature be\n-                        yielded to the caller, or should the entire sub_feature\n-                        tree for that feature be included? subfeatures=True is\n-                        useful in cases such as searching for a gene feature,\n-                        and wanting to know what RBS/Shine_Dalgarno_sequences\n-                        are in the sub_feature tree (which can be accomplished\n-                        with two feature_lambda calls). subfeatures=False is\n-                        useful in cases when you want to process (and possibly\n-                        return) the entire feature tree, such as applying a\n-                        qualifier to every single feature.\n-\n-    :type invert: boolean\n-    :param invert: Negate/invert the result of the filter.\n-\n-    :rtype: yielded list\n-    :return: Yields a list of matching features.\n-    """\n-    # Either the top level set of [features] or the subfeature attribute\n-    for feature in feature_list:\n-        feature._parent = parent\n-        if not parent:\n-            # Set to self so we cannot go above root.\n-            feature._parent = feature\n-        test_result = test(feature, **test_kwargs)\n-        # if (not invert and test_result) or (invert and not test_result):\n-        if invert ^ test_result:\n-            if not subfeatures:\n-                feature_copy = copy.deepcopy(feature)\n-                feature_copy.sub_features = list()\n-                yield feature_copy\n-            else:\n-                yield feature\n-\n-        if recurse and hasattr(feature, "sub_features"):\n-            for x in feature_lambda(\n-                feature.sub_features,\n-                test,\n-                test_kwargs,\n-                subfeatures=subfeatures,\n-                parent=feature,\n-                invert=invert,\n-                recurse=recurse,\n-            ):\n-                yield x\n-\n-\n-def fetchParent(feature):\n-    if not hasattr(feature, "_parent") or feature._parent is None:\n-        return feature\n-    else:\n-        return fetchParent(feature._parent)\n-\n-\n-def feature_test_true(feature, **kwargs):\n-    return True\n-\n-\n-def feature_test_type(feature, **kwargs):\n-    if "type" in kwargs:\n-        return str(feature.type).upper() == str(kwargs["type"]).upper()\n-    elif "types" in kwargs:\n-      for x in kwargs["types"]:\n-        if str(feature.type).upper() == str(x).upper():\n-          return True\n-      return False\n-    raise Exception("Incorrect feature_test_type call, ne'..b'feature.location.start,\n-        # feature.location.end,\n-        # feature.location.strand\n-        # )\n-    return result\n-\n-\n-def get_gff3_id(gene):\n-    return gene.qualifiers.get("Name", [gene.id])[0]\n-\n-\n-def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n-    # This prevents frameshift errors\n-    while start < 0:\n-        start += 3\n-    while end < 0:\n-        end += 3\n-    while start > parent_length:\n-        start -= 3\n-    while end > parent_length:\n-        end -= 3\n-    return (start, end)\n-\n-\n-def coding_genes(feature_list):\n-    for x in genes(feature_list):\n-        if (\n-            len(\n-                list(\n-                    feature_lambda(\n-                        x.sub_features,\n-                        feature_test_type,\n-                        {"type": "CDS"},\n-                        subfeatures=False,\n-                    )\n-                )\n-            )\n-            > 0\n-        ):\n-            yield x\n-\n-\n-def genes(feature_list, feature_type="gene", sort=False):\n-    """\n-    Simple filter to extract gene features from the feature set.\n-    """\n-\n-    if not sort:\n-        for x in feature_lambda(\n-            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n-        ):\n-            yield x\n-    else:\n-        data = list(genes(feature_list, feature_type=feature_type, sort=False))\n-        data = sorted(data, key=lambda feature: feature.location.start)\n-        for x in data:\n-            yield x\n-\n-\n-def wa_unified_product_name(feature):\n-    """\n-    Try and figure out a name. We gave conflicting instructions, so\n-    this isn\'t as trivial as it should be. Sometimes it will be in\n-    \'product\' or \'Product\', othertimes in \'Name\'\n-    """\n-    # Manually applied tags.\n-    protein_product = feature.qualifiers.get(\n-        "product", feature.qualifiers.get("Product", [None])\n-    )[0]\n-\n-    # If neither of those are available ...\n-    if protein_product is None:\n-        # And there\'s a name...\n-        if "Name" in feature.qualifiers:\n-            if not is_uuid(feature.qualifiers["Name"][0]):\n-                protein_product = feature.qualifiers["Name"][0]\n-\n-    return protein_product\n-\n-\n-def is_uuid(name):\n-    return name.count("-") == 4 and len(name) == 36\n-\n-\n-def get_rbs_from(gene):\n-    # Normal RBS annotation types\n-    rbs_rbs = list(\n-        feature_lambda(\n-            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n-        )\n-    )\n-    rbs_sds = list(\n-        feature_lambda(\n-            gene.sub_features,\n-            feature_test_type,\n-            {"type": "Shine_Dalgarno_sequence"},\n-            subfeatures=False,\n-        )\n-    )\n-    # Fraking apollo\n-    apollo_exons = list(\n-        feature_lambda(\n-            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n-        )\n-    )\n-    apollo_exons = [x for x in apollo_exons if len(x) < 10]\n-    # These are more NCBI\'s style\n-    regulatory_elements = list(\n-        feature_lambda(\n-            gene.sub_features,\n-            feature_test_type,\n-            {"type": "regulatory"},\n-            subfeatures=False,\n-        )\n-    )\n-    rbs_regulatory = list(\n-        feature_lambda(\n-            regulatory_elements,\n-            feature_test_quals,\n-            {"regulatory_class": ["ribosome_binding_site"]},\n-            subfeatures=False,\n-        )\n-    )\n-    # Here\'s hoping you find just one ;)\n-    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n-\n-\n-def nice_name(record):\n-    """\n-    get the real name rather than NCBI IDs and so on. If fails, will return record.id\n-    """\n-    name = record.id\n-    likely_parental_contig = list(genes(record.features, feature_type="contig"))\n-    if len(likely_parental_contig) == 1:\n-        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n-    return name\n-\n-\n-def fsort(it):\n-    for i in sorted(it, key=lambda x: int(x.location.start)):\n-        yield i\n'
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt_fix_sixpack/gff3_fix_sixpack.py
--- a/cpt_fix_sixpack/gff3_fix_sixpack.py Fri May 20 08:48:25 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,46 +0,0 @@
-#!/usr/bin/env python
-import sys
-import logging
-import argparse
-from CPT_GFFParser import gffParse, gffWrite
-from Bio.SeqFeature import SeqFeature
-from gff3 import feature_lambda, feature_test_type
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-
-def fixed_feature(rec):
-    # Get all gene features to remove the mRNAs from
-    for feature in feature_lambda(
-        rec.features, feature_test_type, {"type": "gene"}, subfeatures=True
-    ):
-        gene = feature
-        sub_features = []
-        # Filter out mRNA subfeatures, save other ones to new gene object.
-        for sf in feature_lambda(
-            feature.sub_features,
-            feature_test_type,
-            {"type": "mRNA"},
-            subfeatures=True,
-            invert=True,
-        ):
-            sf.qualifiers["Parent"] = gene.qualifiers["ID"]
-            sub_features.append(sf)
-        # override original subfeatures with our filtered list
-        gene.sub_features = sub_features
-        yield gene
-
-
-def gff_filter(gff3):
-    for rec in gffParse(gff3):
-        rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start)
-        rec.annotations = {}
-        gffWrite([rec], sys.stdout)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Fix gene model from naive ORF caller")
-    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
-    args = parser.parse_args()
-    gff_filter(**vars(args))
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt_fix_sixpack/gff3_fix_sixpack.xml
--- a/cpt_fix_sixpack/gff3_fix_sixpack.xml Fri May 20 08:48:25 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,38 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt.gff3.fixsixpack" name="GFF3 Add Gene to CDS for Sixpack" version="19.1.0.0">
-  <description>Properly formats naive ORF caller output for Apollo</description>
-  <macros>
-    <import>macros.xml</import>
- <import>cpt-macros.xml</import>
-  </macros>
-  <expand macro="requirements"/>
-  <command detect_errors="aggressive"><![CDATA[
-$__tool_directory__/gff3_fix_sixpack.py
-@INPUT_GFF@
-> $output]]></command>
-  <inputs>
-      <expand macro="gff3_input" />
-  </inputs>
-  <outputs>
-    <data format="gff3" name="output"/>
-  </outputs>
-  <tests>
-      <test>
- <param name="gff3_data" value="miro.gff3" />
- <output name="output" file="miro.6pfix.gff3" />
- </test>
- <test>
- <param name="gff3_data" value="miro.cds.gff3" />
- <output name="output" file="miro.cds6pfix.gff3" />
-        </test>
-  </tests>
-  <help><![CDATA[
-**What it does**
-
-This tool **strips ALL the mRNA features** in a GFF3 file. It specifically formats 
-the output of the naïve ORF call Sixpack such that it will be compatible with 
-Apollo via JBrowse.
-
-      ]]></help>
- <expand macro="citations" />
-</tool>
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt_fix_sixpack/macros.xml
--- a/cpt_fix_sixpack/macros.xml Fri May 20 08:48:25 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,66 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="requirements">
- <requirements>
- <requirement type="package" version="3.8.13">python</requirement>
- <requirement type="package" version="1.79">biopython</requirement>
- <requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
- <yield/>
- </requirements>
- </xml>
- <xml name="genome_selector">
- <conditional name="reference_genome">
- <param name="reference_genome_source" type="select" label="Reference Genome">
- <option value="history" selected="True">From History</option>
- <option value="cached">Locally Cached</option>
- </param>
- <when value="cached">
- <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
- <options from_data_table="all_fasta"/>
- </param>
- </when>
- <when value="history">
- <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
- </when>
- </conditional>
- </xml>
- <xml name="gff3_input">
- <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
- </xml>
- <xml name="input/gff3+fasta">
- <expand macro="gff3_input" />
- <expand macro="genome_selector" />
- </xml>
- <token name="@INPUT_GFF@">
- "$gff3_data"
- </token>
- <token name="@INPUT_FASTA@">
-#if str($reference_genome.reference_genome_source) == 'cached':
- "${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
- genomeref.fa
-#end if
- </token>
- <token name="@GENOME_SELECTOR_PRE@">
-#if $reference_genome.reference_genome_source == 'history':
- ln -s $reference_genome.genome_fasta genomeref.fa;
-#end if
- </token>
- <token name="@GENOME_SELECTOR@">
-#if str($reference_genome.reference_genome_source) == 'cached':
- "${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
- genomeref.fa
-#end if
- </token>
-        <xml name="input/fasta">
- <param label="Fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-
- <token name="@SEQUENCE@">
- "$sequences"
- </token>
- <xml name="input/fasta/protein">
- <param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-</macros>
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt_fix_sixpack/test-data/miro.6pfix.gff3
--- a/cpt_fix_sixpack/test-data/miro.6pfix.gff3 Fri May 20 08:48:25 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,820 +0,0 @@\n-##gff-version 3\n-Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1;\n-Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1;\n-Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2;\n-Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2;\n-Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3;\n-Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD (12-34) N in%2C C out;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3;\n-Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4;\n-Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3975\t3979\t.\t-\t1\tAlias=Miro_4;ID=Miro_4.RBS;Name=Miro_4;Parent=Miro_4;\n-Miro\tfeature\tgene\t4038\t5334\t.\t-\t.\tID=Miro_5;\n-Miro\tGenBank\tCDS\t4038\t5324\t.\t-\t1\tID=Miro_5.CDS;Name=Miro_5;Note=T4 gp52-like;Parent=Miro_5;obsolete_name=Miro_159;product=DNA topoisomerase II medium subunit;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t5331\t5334\t.\t-\t1\tAlias=Miro_5;ID=Miro_5.RBS;Name=Miro_5;Parent=Miro_5;\n-Miro\tfeature\tgene\t5324\t7231\t.\t-\t.\tID=Miro_6;\n-Miro\tGenBank\tCDS\t5324\t7222\t.\t-\t1\tID=Miro_6.CDS;Name=Miro_6;Note=T4 gp39-like;Parent=Miro_6;obsolete_name=Miro_158;product=DNA topoisomerase II%2C large subunit;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t7228\t7231\t.\t-\t1\tAlias=Miro_6;ID=Miro_6.RBS;Name=Miro_6;Parent=Miro_6;\n-Miro\tfeature\tgene\t7290\t7465\t.\t-\t.\tID=Miro_7;\n-Miro\tGenBank\tCDS\t7290\t7454\t.\t-\t1\tID=Miro_7.CDS;Name=Miro_7;Note=contains zinc ribbon domain;Parent=Miro_7;obsolete_name=Miro_157;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t7463\t7465\t.\t-\t1\tAlias=Miro_7;ID=Miro_7.RBS;Name=Miro_7;Parent=Miro_7;\n-Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8;\n-Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8;\n-Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9;\n-Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9;\n-Miro\tfeature\tgene\t8488\t8779\t.\t-\t.\tID=Miro_10;\n-Miro\tGenBank\tCDS\t8488\t8766\t.\t-\t1\tID=Miro_10.CDS;Name=Miro_10;Parent=Miro_10;obsolete_name=Miro_154;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t8776\t8779\t.\t-\t1\tAlias=Miro_10;ID=Miro_10.RBS;Name=Miro_10;Parent=Miro_10;\n-Miro\tfeature\tgene\t8763\t9370\t.\t-\t.\tID=Miro_11;\n-Miro\tGenBank\tCDS\t8763\t9359\t.\t-\t1\tID=Miro_11.CDS;Name=Miro_11;Parent=Miro_11;obsolete_name=Miro_153;product=hypothetical conserved;tmhelix=1 TMD (10-29) N out%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t9367\t9370\t.\t-\t1\tAlias=Miro_11;ID=Miro_11.RBS;Name=Miro_11;Parent=Miro_11;\n-Miro\tfeature\tgene\t9399\t9740\t.\t-\t.\tID=Miro_12;\n-Miro\tGenBank\tCDS\t9399\t9728\t.\t-\t1\tID=Miro_12.CDS;Name=Miro_12;Parent=Miro_12;obsolete_name=Miro_152;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t9736\t9740\t.\t-\t1\tAlias=Miro_12;ID=Miro_12.RBS;Name=Miro_12;Parent=Miro_12;\n-Miro\tfeature\tgene\t9788\t10025\t.\t-\t.\tID=Miro_13;\n-Miro\tGenBank\tCDS\t9788\t10012\t.\t-\t1\tID=Miro_13.CDS;Name=Miro_13;Parent=Miro_13;obsolete_name=Miro_151;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t10021\t10025\t.\t-\t1\tAlias=Miro_13;ID=Miro_13.RBS;Name=Miro_13;Parent=Miro_13;\n-Miro\tfeature\tgene\t10086\t10361\t.\t-\t.\tID=Miro_14;\n-Miro\tGenBank\tCDS\t10086\t10349\t.\t-\t1\tID=Miro_14.CDS;Na'..b'product=tail fiber;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t166968\t166971\t.\t+\t1\tID=Miro_264.RBS;Name=Miro_264;Parent=Miro_264;\n-Miro\tfeature\tgene\t167645\t171466\t.\t+\t.\tID=Miro_265;\n-Miro\tGenBank\tCDS\t167654\t171466\t.\t+\t1\tAlias=Miro_265;ID=Miro_265.CDS;Name=Miro_265;Parent=Miro_265;obsolete_name=Miro_175;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t167645\t167648\t.\t+\t1\tID=Miro_265.RBS;Name=Miro_265;Parent=Miro_265;\n-Miro\tfeature\tgene\t171490\t172030\t.\t+\t.\tID=Miro_266;\n-Miro\tGenBank\tCDS\t171503\t172030\t.\t+\t1\tAlias=Miro_266;ID=Miro_266.CDS;Name=Miro_266;Note=T2 gp38-like;Parent=Miro_266;obsolete_name=Miro_174;product=receptor-recognizing protein;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t171490\t171494\t.\t+\t1\tID=Miro_266.RBS;Name=Miro_266;Parent=Miro_266;\n-Miro\tfeature\tgene\t172135\t172794\t.\t+\t.\tID=Miro_267;\n-Miro\tGenBank\tCDS\t172147\t172794\t.\t+\t1\tAlias=Miro_267;ID=Miro_267.CDS;Name=Miro_267;Note=T4 gpT-like;Parent=Miro_267;obsolete_name=Miro_173;product=holin;tmhelix=1 TMD (29-47) N in%2C C out;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t172135\t172137\t.\t+\t1\tID=Miro_267.RBS;Name=Miro_267;Parent=Miro_267;\n-Miro\tfeature\tgene\t172795\t173074\t.\t-\t.\tID=Miro_268;\n-Miro\tGenBank\tCDS\t172795\t173064\t.\t-\t1\tID=Miro_268.CDS;Name=Miro_268;Parent=Miro_268;obsolete_name=Miro_172;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t173071\t173074\t.\t-\t1\tAlias=Miro_268;ID=Miro_268.RBS;Name=Miro_268;Parent=Miro_268;\n-Miro\tfeature\tgene\t173061\t173361\t.\t-\t.\tID=Miro_269;\n-Miro\tGenBank\tCDS\t173061\t173351\t.\t-\t1\tID=Miro_269.CDS;Name=Miro_269;Parent=Miro_269;obsolete_name=Miro_171;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t173358\t173361\t.\t-\t1\tAlias=Miro_269;ID=Miro_269.RBS;Name=Miro_269;Parent=Miro_269;\n-Miro\tfeature\tgene\t173351\t174160\t.\t-\t.\tID=Miro_270;\n-Miro\tGenBank\tCDS\t173351\t174148\t.\t-\t1\tID=Miro_270.CDS;Name=Miro_270;Parent=Miro_270;obsolete_name=Miro_170;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t174156\t174160\t.\t-\t1\tAlias=Miro_270;ID=Miro_270.RBS;Name=Miro_270;Parent=Miro_270;\n-Miro\tfeature\tgene\t174229\t174467\t.\t-\t.\tID=Miro_271;\n-Miro\tGenBank\tCDS\t174229\t174456\t.\t-\t1\tID=Miro_271.CDS;Name=Miro_271;Parent=Miro_271;obsolete_name=Miro_169;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t174464\t174467\t.\t-\t1\tAlias=Miro_271;ID=Miro_271.RBS;Name=Miro_271;Parent=Miro_271;\n-Miro\tfeature\tgene\t174453\t174796\t.\t-\t.\tID=Miro_272;\n-Miro\tGenBank\tCDS\t174453\t174785\t.\t-\t1\tID=Miro_272.CDS;Name=Miro_272;Parent=Miro_272;obsolete_name=Miro_168;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t174793\t174796\t.\t-\t1\tAlias=Miro_272;ID=Miro_272.RBS;Name=Miro_272;Parent=Miro_272;\n-Miro\tfeature\tgene\t174760\t174903\t.\t-\t.\tID=Miro_273;\n-Miro\tGenBank\tCDS\t174760\t174894\t.\t-\t1\tID=Miro_273.CDS;Name=Miro_273;Parent=Miro_273;obsolete_name=Miro_167;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t174900\t174903\t.\t-\t1\tAlias=Miro_273;ID=Miro_273.RBS;Name=Miro_273;Parent=Miro_273;\n-Miro\tfeature\tgene\t174939\t175197\t.\t-\t.\tID=Miro_274;\n-Miro\tGenBank\tCDS\t174939\t175187\t.\t-\t1\tID=Miro_274.CDS;Name=Miro_274;Note=contains FCH and colicin domains;Parent=Miro_274;obsolete_name=Miro_166;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t175195\t175197\t.\t-\t1\tAlias=Miro_274;ID=Miro_274.RBS;Name=Miro_274;Parent=Miro_274;\n-Miro\tfeature\tgene\t175224\t175675\t.\t-\t.\tID=Miro_275;\n-Miro\tGenBank\tCDS\t175224\t175661\t.\t-\t1\tID=Miro_275.CDS;Name=Miro_275;Parent=Miro_275;obsolete_name=Miro_165;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t175673\t175675\t.\t-\t1\tAlias=Miro_275;ID=Miro_275.RBS;Name=Miro_275;Parent=Miro_275;\n-Miro\tfeature\tgene\t175636\t176021\t.\t-\t.\tID=Miro_276;\n-Miro\tGenBank\tCDS\t175636\t176010\t.\t-\t1\tID=Miro_276.CDS;Name=Miro_276;Parent=Miro_276;obsolete_name=Miro_164;product=hypothetical conserved;tmhelix=2TMDs (44-66%2C 76-98) N in%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t176017\t176021\t.\t-\t1\tAlias=Miro_276;ID=Miro_276.RBS;Name=Miro_276;Parent=Miro_276;\n'
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt_fix_sixpack/test-data/miro.cds.gff3
--- a/cpt_fix_sixpack/test-data/miro.cds.gff3 Fri May 20 08:48:25 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,278 +0,0 @@\n-##gff-version 3\n-##sequence-region Miro 1 176055\n-Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;obsolete_name=Miro_047;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;obsolete_name=Miro_240;product=baseplate structural protein;\n-Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;obsolete_name=Miro_239;product=baseplate structural protein;\n-Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;obsolete_name=Miro_238;product=baseplate structural protein;\n-Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein;\n-Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD (4-26) N out%2C C in;\n-Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;obsolete_name=Miro_163;product=rIIb;\n-Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;obsolete_name=Miro_162;product=rIIa;\n-Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD (12-34) N in%2C C out;\n-Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;obsolete_name=Miro_160;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t4038\t5324\t.\t-\t1\tID=Miro_5.CDS;Name=Miro_5;Note=T4 gp52-like;obsolete_name=Miro_159;product=DNA topoisomerase II medium subunit;\n-Miro\tGenBank\tCDS\t5324\t7222\t.\t-\t1\tID=Miro_6.CDS;Name=Miro_6;Note=T4 gp39-like;obsolete_name=Miro_158;product=DNA topoisomerase II%2C large subunit;\n-Miro\tGenBank\tCDS\t7290\t7454\t.\t-\t1\tID=Miro_7.CDS;Name=Miro_7;Note=contains zinc ribbon domain;obsolete_name=Miro_157;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t124222\t124968\t.\t+\t1\tAlias=Miro_207;ID=Miro_207.CDS;Name=Miro_207;Note=T4 gp14-like;obsolete_name=Miro_233;product=neck protein;\n-Miro\tGenBank\tCDS\t173061\t173351\t.\t-\t1\tID=Miro_269.CDS;Name=Miro_269;obsolete_name=Miro_171;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t66523\t66807\t.\t-\t1\tID=Miro_112.CDS;Name=Miro_112;obsolete_name=Miro_052;product=hypothetical conserved;signal=signal peptidase I cleavage site 17-18;\n-Miro\tGenBank\tCDS\t66879\t67163\t.\t-\t1\tID=Miro_113.CDS;Name=Miro_113;obsolete_name=Miro_051;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t44575\t45732\t.\t+\t1\tAlias=Miro_69;ID=Miro_69.CDS;Name=Miro_69;Note=T4 RNA ligase 1-like;obsolete_name=Miro_095;product=RNA ligase;\n-Miro\tGenBank\tCDS\t44229\t44594\t.\t+\t1\tAlias=Miro_68;ID=Miro_68.CDS;Name=Miro_68;Note=contains GIY-YIG domain;obsolete_name=Miro_096;product=homing endonuclease;\n-Miro\tGenBank\tCDS\t43905\t44225\t.\t+\t1\tID=Miro_67;Name=Miro_67;obsolete_name=Miro_097;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t42758\t43933\t.\t+\t1\tAlias=Miro_66;ID=Miro_66.CDS;Name=Miro_66;Note=T4 NrdB-like;obsolete_name=Miro_098;product=ribonucleotide reductase%2C small subunit;\n-Miro\tGenBank\tCDS\t40469\t42721\t.\t+\t1\tAlias=Miro_65;ID=Miro_65.CDS;Name=Miro_65;Note=T4 NrdA-like%3B E-val 0;obsolete_name=Miro_099;product=ribonucleotide reductase%2C large subunit;\n-Miro\tGenBank\tCDS\t39554\t40429\t.\t+\t1\tAlias=Miro_64;ID=Miro_64.CDS;Name=Miro_64;Note=T4-like;obsolete_name=Miro_100;product=thymidylate synthase;\n-Miro\tGenBank\tCDS\t38974\t39552\t.\t+\t1\tAlias=Miro_63;ID=Miro_63.CDS;Name=Miro_63;Note=T4-like;obsolete_name=Miro_101;product=dihydrofolate reductase;\n-Miro\tGenBank\tCDS\t38720'..b'ed;\n-Miro\tGenBank\tCDS\t9399\t9728\t.\t-\t1\tID=Miro_12.CDS;Name=Miro_12;obsolete_name=Miro_152;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t9788\t10012\t.\t-\t1\tID=Miro_13.CDS;Name=Miro_13;obsolete_name=Miro_151;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t8488\t8766\t.\t-\t1\tID=Miro_10.CDS;Name=Miro_10;obsolete_name=Miro_154;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t8763\t9359\t.\t-\t1\tID=Miro_11.CDS;Name=Miro_11;obsolete_name=Miro_153;product=hypothetical conserved;tmhelix=1 TMD (10-29) N out%2C C in;\n-Miro\tGenBank\tCDS\t10737\t10961\t.\t-\t1\tID=Miro_16.CDS;Name=Miro_16;obsolete_name=Miro_148;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t10965\t11114\t.\t-\t1\tID=Miro_17.CDS;Name=Miro_17;obsolete_name=Miro_147;product=hypothetical conserved;tmhelix=1 TMD (10-32) N out%2C C in;\n-Miro\tGenBank\tCDS\t10086\t10349\t.\t-\t1\tID=Miro_14.CDS;Name=Miro_14;obsolete_name=Miro_150;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t10438\t10737\t.\t-\t1\tID=Miro_15.CDS;Name=Miro_15;obsolete_name=Miro_149;product=hypothetical conserved;tmhelix=1 TMD (4-21) N out%2C C in;\n-Miro\tGenBank\tCDS\t53923\t54144\t.\t+\t1\tAlias=Miro_92;ID=Miro_92.CDS;Name=Miro_92;obsolete_name=Miro_072;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t54141\t54755\t.\t+\t1\tAlias=Miro_93;ID=Miro_93.CDS;Name=Miro_93;Note=contains haloacid dehydrogenase (HAD)-like domain;obsolete_name=Miro_071;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t53221\t53661\t.\t+\t1\tAlias=Miro_90;ID=Miro_90.CDS;Name=Miro_90;Note=T4 Y12G-like;obsolete_name=Miro_074;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t53658\t53921\t.\t+\t1\tAlias=Miro_91;ID=Miro_91.CDS;Name=Miro_91;obsolete_name=Miro_073;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t57000\t57203\t.\t+\t1\tAlias=Miro_96;ID=Miro_96.CDS;Name=Miro_96;obsolete_name=Miro_068;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t57206\t57382\t.\t+\t1\tAlias=Miro_97;ID=Miro_97.CDS;Name=Miro_97;obsolete_name=Miro_067;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t54736\t56265\t.\t+\t1\tAlias=Miro_94;ID=Miro_94.CDS;Name=Miro_94;Note=T4-like;obsolete_name=Miro_070;product=DNA ligase;\n-Miro\tGenBank\tCDS\t56342\t56911\t.\t+\t1\tAlias=Miro_95;ID=Miro_95.CDS;Name=Miro_95;obsolete_name=Miro_069;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t57379\t57543\t.\t+\t1\tAlias=Miro_98;ID=Miro_98.CDS;Name=Miro_98;obsolete_name=Miro_066;product=hypothetical conserved;tmhelix=2TMDs (7-26%2C 31-53) N in%2C C in;\n-Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD (15-37) N out%2C C in;\n-Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;obsolete_name=Miro_023;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs (2-21%2C 31-50) N in%2C C in;\n-Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved;\n-Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13;\n-Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved;\n'
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt_fix_sixpack/test-data/miro.cds6pfix.gff3
--- a/cpt_fix_sixpack/test-data/miro.cds6pfix.gff3 Fri May 20 08:48:25 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-##gff-version 3
b
diff -r 6a9b026cb378 -r efa4dfc23549 cpt_fix_sixpack/test-data/miro.gff3
--- a/cpt_fix_sixpack/test-data/miro.gff3 Fri May 20 08:48:25 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,827 +0,0 @@\n-##gff-version 3\n-##sequence-region Miro 1 176055\n-Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8\n-Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8\n-Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9\n-Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9\n-Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206\n-Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206\n-Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117\n-Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117\n-Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200\n-Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200\n-Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201\n-Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201\n-Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202\n-Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202\n-Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203\n-Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203\n-Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142\n-Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD %284-26%29 N out%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142\n-Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1\n-Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb\n-Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1\n-Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2\n-Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2\n-Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3\n-Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD %2812-34%29 N in%2C C out\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3\n-Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4\n-Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical'..b't=Miro_98;obsolete_name=Miro_066;product=hypothetical conserved;tmhelix=2TMDs %287-26%2C 31-53%29 N in%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98\n-Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99\n-Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99\n-Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143\n-Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD %2815-37%29 N out%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143\n-Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114\n-Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114\n-Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141\n-Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141\n-Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140\n-Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140\n-Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147\n-Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147\n-Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146\n-Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146\n-Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145\n-Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145\n-Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115\n-Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs %282-21%2C 31-50%29 N in%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115\n-Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149\n-Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149\n-Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148\n-Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148\n-Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116\n-Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116\n'
b
diff -r 6a9b026cb378 -r efa4dfc23549 gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3.py Mon Jun 05 02:42:28 2023 +0000
[
b'@@ -0,0 +1,346 @@\n+import copy\n+import logging\n+\n+log = logging.getLogger()\n+log.setLevel(logging.WARN)\n+\n+\n+def feature_lambda(\n+    feature_list,\n+    test,\n+    test_kwargs,\n+    subfeatures=True,\n+    parent=None,\n+    invert=False,\n+    recurse=True,\n+):\n+    """Recursively search through features, testing each with a test function, yielding matches.\n+\n+    GFF3 is a hierachical data structure, so we need to be able to recursively\n+    search through features. E.g. if you\'re looking for a feature with\n+    ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n+    case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n+\n+    :type feature_list: list\n+    :param feature_list: an iterable of features\n+\n+    :type test: function reference\n+    :param test: a closure with the method signature (feature, **kwargs) where\n+                 the kwargs are those passed in the next argument. This\n+                 function should return True or False, True if the feature is\n+                 to be yielded as part of the main feature_lambda function, or\n+                 False if it is to be ignored. This function CAN mutate the\n+                 features passed to it (think "apply").\n+\n+    :type test_kwargs: dictionary\n+    :param test_kwargs: kwargs to pass to your closure when it is called.\n+\n+    :type subfeatures: boolean\n+    :param subfeatures: when a feature is matched, should just that feature be\n+                        yielded to the caller, or should the entire sub_feature\n+                        tree for that feature be included? subfeatures=True is\n+                        useful in cases such as searching for a gene feature,\n+                        and wanting to know what RBS/Shine_Dalgarno_sequences\n+                        are in the sub_feature tree (which can be accomplished\n+                        with two feature_lambda calls). subfeatures=False is\n+                        useful in cases when you want to process (and possibly\n+                        return) the entire feature tree, such as applying a\n+                        qualifier to every single feature.\n+\n+    :type invert: boolean\n+    :param invert: Negate/invert the result of the filter.\n+\n+    :rtype: yielded list\n+    :return: Yields a list of matching features.\n+    """\n+    # Either the top level set of [features] or the subfeature attribute\n+    for feature in feature_list:\n+        feature._parent = parent\n+        if not parent:\n+            # Set to self so we cannot go above root.\n+            feature._parent = feature\n+        test_result = test(feature, **test_kwargs)\n+        # if (not invert and test_result) or (invert and not test_result):\n+        if invert ^ test_result:\n+            if not subfeatures:\n+                feature_copy = copy.deepcopy(feature)\n+                feature_copy.sub_features = list()\n+                yield feature_copy\n+            else:\n+                yield feature\n+\n+        if recurse and hasattr(feature, "sub_features"):\n+            for x in feature_lambda(\n+                feature.sub_features,\n+                test,\n+                test_kwargs,\n+                subfeatures=subfeatures,\n+                parent=feature,\n+                invert=invert,\n+                recurse=recurse,\n+            ):\n+                yield x\n+\n+\n+def fetchParent(feature):\n+    if not hasattr(feature, "_parent") or feature._parent is None:\n+        return feature\n+    else:\n+        return fetchParent(feature._parent)\n+\n+\n+def feature_test_true(feature, **kwargs):\n+    return True\n+\n+\n+def feature_test_type(feature, **kwargs):\n+    if "type" in kwargs:\n+        return str(feature.type).upper() == str(kwargs["type"]).upper()\n+    elif "types" in kwargs:\n+        for x in kwargs["types"]:\n+            if str(feature.type).upper() == str(x).upper():\n+                return True\n+        return False\n+    raise Exception("Incorrect feature_test'..b'feature.location.start,\n+        # feature.location.end,\n+        # feature.location.strand\n+        # )\n+    return result\n+\n+\n+def get_gff3_id(gene):\n+    return gene.qualifiers.get("Name", [gene.id])[0]\n+\n+\n+def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n+    # This prevents frameshift errors\n+    while start < 0:\n+        start += 3\n+    while end < 0:\n+        end += 3\n+    while start > parent_length:\n+        start -= 3\n+    while end > parent_length:\n+        end -= 3\n+    return (start, end)\n+\n+\n+def coding_genes(feature_list):\n+    for x in genes(feature_list):\n+        if (\n+            len(\n+                list(\n+                    feature_lambda(\n+                        x.sub_features,\n+                        feature_test_type,\n+                        {"type": "CDS"},\n+                        subfeatures=False,\n+                    )\n+                )\n+            )\n+            > 0\n+        ):\n+            yield x\n+\n+\n+def genes(feature_list, feature_type="gene", sort=False):\n+    """\n+    Simple filter to extract gene features from the feature set.\n+    """\n+\n+    if not sort:\n+        for x in feature_lambda(\n+            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n+        ):\n+            yield x\n+    else:\n+        data = list(genes(feature_list, feature_type=feature_type, sort=False))\n+        data = sorted(data, key=lambda feature: feature.location.start)\n+        for x in data:\n+            yield x\n+\n+\n+def wa_unified_product_name(feature):\n+    """\n+    Try and figure out a name. We gave conflicting instructions, so\n+    this isn\'t as trivial as it should be. Sometimes it will be in\n+    \'product\' or \'Product\', othertimes in \'Name\'\n+    """\n+    # Manually applied tags.\n+    protein_product = feature.qualifiers.get(\n+        "product", feature.qualifiers.get("Product", [None])\n+    )[0]\n+\n+    # If neither of those are available ...\n+    if protein_product is None:\n+        # And there\'s a name...\n+        if "Name" in feature.qualifiers:\n+            if not is_uuid(feature.qualifiers["Name"][0]):\n+                protein_product = feature.qualifiers["Name"][0]\n+\n+    return protein_product\n+\n+\n+def is_uuid(name):\n+    return name.count("-") == 4 and len(name) == 36\n+\n+\n+def get_rbs_from(gene):\n+    # Normal RBS annotation types\n+    rbs_rbs = list(\n+        feature_lambda(\n+            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n+        )\n+    )\n+    rbs_sds = list(\n+        feature_lambda(\n+            gene.sub_features,\n+            feature_test_type,\n+            {"type": "Shine_Dalgarno_sequence"},\n+            subfeatures=False,\n+        )\n+    )\n+    # Fraking apollo\n+    apollo_exons = list(\n+        feature_lambda(\n+            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n+        )\n+    )\n+    apollo_exons = [x for x in apollo_exons if len(x) < 10]\n+    # These are more NCBI\'s style\n+    regulatory_elements = list(\n+        feature_lambda(\n+            gene.sub_features,\n+            feature_test_type,\n+            {"type": "regulatory"},\n+            subfeatures=False,\n+        )\n+    )\n+    rbs_regulatory = list(\n+        feature_lambda(\n+            regulatory_elements,\n+            feature_test_quals,\n+            {"regulatory_class": ["ribosome_binding_site"]},\n+            subfeatures=False,\n+        )\n+    )\n+    # Here\'s hoping you find just one ;)\n+    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n+\n+\n+def nice_name(record):\n+    """\n+    get the real name rather than NCBI IDs and so on. If fails, will return record.id\n+    """\n+    name = record.id\n+    likely_parental_contig = list(genes(record.features, feature_type="contig"))\n+    if len(likely_parental_contig) == 1:\n+        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n+    return name\n+\n+\n+def fsort(it):\n+    for i in sorted(it, key=lambda x: int(x.location.start)):\n+        yield i\n'
b
diff -r 6a9b026cb378 -r efa4dfc23549 gff3_fix_sixpack.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_fix_sixpack.py Mon Jun 05 02:42:28 2023 +0000
[
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+import sys
+import logging
+import argparse
+from CPT_GFFParser import gffParse, gffWrite
+from Bio.SeqFeature import SeqFeature
+from gff3 import feature_lambda, feature_test_type
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+def fixed_feature(rec):
+    # Get all gene features to remove the mRNAs from
+    for feature in feature_lambda(
+        rec.features, feature_test_type, {"type": "gene"}, subfeatures=True
+    ):
+        gene = feature
+        sub_features = []
+        # Filter out mRNA subfeatures, save other ones to new gene object.
+        for sf in feature_lambda(
+            feature.sub_features,
+            feature_test_type,
+            {"type": "mRNA"},
+            subfeatures=True,
+            invert=True,
+        ):
+            sf.qualifiers["Parent"] = gene.qualifiers["ID"]
+            sub_features.append(sf)
+        # override original subfeatures with our filtered list
+        gene.sub_features = sub_features
+        yield gene
+
+
+def gff_filter(gff3):
+    for rec in gffParse(gff3):
+        rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start)
+        rec.annotations = {}
+        gffWrite([rec], sys.stdout)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Fix gene model from naive ORF caller")
+    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
+    args = parser.parse_args()
+    gff_filter(**vars(args))
b
diff -r 6a9b026cb378 -r efa4dfc23549 gff3_fix_sixpack.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_fix_sixpack.xml Mon Jun 05 02:42:28 2023 +0000
[
@@ -0,0 +1,37 @@
+<tool id="edu.tamu.cpt.gff3.fixsixpack" name="GFF3 Add Gene to CDS for Sixpack" version="19.1.0.0">
+  <description>Properly formats naive ORF caller output for Apollo</description>
+  <macros>
+    <import>macros.xml</import>
+    <import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="aggressive"><![CDATA[
+'$__tool_directory__/gff3_fix_sixpack.py'
+@INPUT_GFF@
+> '$output']]></command>
+  <inputs>
+    <expand macro="gff3_input"/>
+  </inputs>
+  <outputs>
+    <data format="gff3" name="output"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="gff3_data" value="miro.gff3"/>
+      <output name="output" file="miro.6pfix.gff3"/>
+    </test>
+    <test>
+      <param name="gff3_data" value="miro.cds.gff3"/>
+      <output name="output" file="miro.cds6pfix.gff3"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+**What it does**
+
+This tool **strips ALL the mRNA features** in a GFF3 file. It specifically formats 
+the output of the naïve ORF call Sixpack such that it will be compatible with 
+Apollo via JBrowse.
+
+      ]]></help>
+  <expand macro="citations"/>
+</tool>
b
diff -r 6a9b026cb378 -r efa4dfc23549 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jun 05 02:42:28 2023 +0000
b
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+ '$xmfa'
+ </token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+ '$sequences'
+ </token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+     '$gff3_data'
+ </token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+</macros>
b
diff -r 6a9b026cb378 -r efa4dfc23549 test-data/miro.6pfix.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/miro.6pfix.gff3 Mon Jun 05 02:42:28 2023 +0000
b
b'@@ -0,0 +1,820 @@\n+##gff-version 3\n+Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1;\n+Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1;\n+Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2;\n+Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2;\n+Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3;\n+Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD (12-34) N in%2C C out;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3;\n+Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4;\n+Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3975\t3979\t.\t-\t1\tAlias=Miro_4;ID=Miro_4.RBS;Name=Miro_4;Parent=Miro_4;\n+Miro\tfeature\tgene\t4038\t5334\t.\t-\t.\tID=Miro_5;\n+Miro\tGenBank\tCDS\t4038\t5324\t.\t-\t1\tID=Miro_5.CDS;Name=Miro_5;Note=T4 gp52-like;Parent=Miro_5;obsolete_name=Miro_159;product=DNA topoisomerase II medium subunit;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t5331\t5334\t.\t-\t1\tAlias=Miro_5;ID=Miro_5.RBS;Name=Miro_5;Parent=Miro_5;\n+Miro\tfeature\tgene\t5324\t7231\t.\t-\t.\tID=Miro_6;\n+Miro\tGenBank\tCDS\t5324\t7222\t.\t-\t1\tID=Miro_6.CDS;Name=Miro_6;Note=T4 gp39-like;Parent=Miro_6;obsolete_name=Miro_158;product=DNA topoisomerase II%2C large subunit;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t7228\t7231\t.\t-\t1\tAlias=Miro_6;ID=Miro_6.RBS;Name=Miro_6;Parent=Miro_6;\n+Miro\tfeature\tgene\t7290\t7465\t.\t-\t.\tID=Miro_7;\n+Miro\tGenBank\tCDS\t7290\t7454\t.\t-\t1\tID=Miro_7.CDS;Name=Miro_7;Note=contains zinc ribbon domain;Parent=Miro_7;obsolete_name=Miro_157;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t7463\t7465\t.\t-\t1\tAlias=Miro_7;ID=Miro_7.RBS;Name=Miro_7;Parent=Miro_7;\n+Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8;\n+Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8;\n+Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9;\n+Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9;\n+Miro\tfeature\tgene\t8488\t8779\t.\t-\t.\tID=Miro_10;\n+Miro\tGenBank\tCDS\t8488\t8766\t.\t-\t1\tID=Miro_10.CDS;Name=Miro_10;Parent=Miro_10;obsolete_name=Miro_154;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t8776\t8779\t.\t-\t1\tAlias=Miro_10;ID=Miro_10.RBS;Name=Miro_10;Parent=Miro_10;\n+Miro\tfeature\tgene\t8763\t9370\t.\t-\t.\tID=Miro_11;\n+Miro\tGenBank\tCDS\t8763\t9359\t.\t-\t1\tID=Miro_11.CDS;Name=Miro_11;Parent=Miro_11;obsolete_name=Miro_153;product=hypothetical conserved;tmhelix=1 TMD (10-29) N out%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t9367\t9370\t.\t-\t1\tAlias=Miro_11;ID=Miro_11.RBS;Name=Miro_11;Parent=Miro_11;\n+Miro\tfeature\tgene\t9399\t9740\t.\t-\t.\tID=Miro_12;\n+Miro\tGenBank\tCDS\t9399\t9728\t.\t-\t1\tID=Miro_12.CDS;Name=Miro_12;Parent=Miro_12;obsolete_name=Miro_152;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t9736\t9740\t.\t-\t1\tAlias=Miro_12;ID=Miro_12.RBS;Name=Miro_12;Parent=Miro_12;\n+Miro\tfeature\tgene\t9788\t10025\t.\t-\t.\tID=Miro_13;\n+Miro\tGenBank\tCDS\t9788\t10012\t.\t-\t1\tID=Miro_13.CDS;Name=Miro_13;Parent=Miro_13;obsolete_name=Miro_151;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t10021\t10025\t.\t-\t1\tAlias=Miro_13;ID=Miro_13.RBS;Name=Miro_13;Parent=Miro_13;\n+Miro\tfeature\tgene\t10086\t10361\t.\t-\t.\tID=Miro_14;\n+Miro\tGenBank\tCDS\t10086\t10349\t.\t-\t1\tID=Miro_14.CDS;Na'..b'product=tail fiber;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t166968\t166971\t.\t+\t1\tID=Miro_264.RBS;Name=Miro_264;Parent=Miro_264;\n+Miro\tfeature\tgene\t167645\t171466\t.\t+\t.\tID=Miro_265;\n+Miro\tGenBank\tCDS\t167654\t171466\t.\t+\t1\tAlias=Miro_265;ID=Miro_265.CDS;Name=Miro_265;Parent=Miro_265;obsolete_name=Miro_175;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t167645\t167648\t.\t+\t1\tID=Miro_265.RBS;Name=Miro_265;Parent=Miro_265;\n+Miro\tfeature\tgene\t171490\t172030\t.\t+\t.\tID=Miro_266;\n+Miro\tGenBank\tCDS\t171503\t172030\t.\t+\t1\tAlias=Miro_266;ID=Miro_266.CDS;Name=Miro_266;Note=T2 gp38-like;Parent=Miro_266;obsolete_name=Miro_174;product=receptor-recognizing protein;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t171490\t171494\t.\t+\t1\tID=Miro_266.RBS;Name=Miro_266;Parent=Miro_266;\n+Miro\tfeature\tgene\t172135\t172794\t.\t+\t.\tID=Miro_267;\n+Miro\tGenBank\tCDS\t172147\t172794\t.\t+\t1\tAlias=Miro_267;ID=Miro_267.CDS;Name=Miro_267;Note=T4 gpT-like;Parent=Miro_267;obsolete_name=Miro_173;product=holin;tmhelix=1 TMD (29-47) N in%2C C out;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t172135\t172137\t.\t+\t1\tID=Miro_267.RBS;Name=Miro_267;Parent=Miro_267;\n+Miro\tfeature\tgene\t172795\t173074\t.\t-\t.\tID=Miro_268;\n+Miro\tGenBank\tCDS\t172795\t173064\t.\t-\t1\tID=Miro_268.CDS;Name=Miro_268;Parent=Miro_268;obsolete_name=Miro_172;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t173071\t173074\t.\t-\t1\tAlias=Miro_268;ID=Miro_268.RBS;Name=Miro_268;Parent=Miro_268;\n+Miro\tfeature\tgene\t173061\t173361\t.\t-\t.\tID=Miro_269;\n+Miro\tGenBank\tCDS\t173061\t173351\t.\t-\t1\tID=Miro_269.CDS;Name=Miro_269;Parent=Miro_269;obsolete_name=Miro_171;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t173358\t173361\t.\t-\t1\tAlias=Miro_269;ID=Miro_269.RBS;Name=Miro_269;Parent=Miro_269;\n+Miro\tfeature\tgene\t173351\t174160\t.\t-\t.\tID=Miro_270;\n+Miro\tGenBank\tCDS\t173351\t174148\t.\t-\t1\tID=Miro_270.CDS;Name=Miro_270;Parent=Miro_270;obsolete_name=Miro_170;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t174156\t174160\t.\t-\t1\tAlias=Miro_270;ID=Miro_270.RBS;Name=Miro_270;Parent=Miro_270;\n+Miro\tfeature\tgene\t174229\t174467\t.\t-\t.\tID=Miro_271;\n+Miro\tGenBank\tCDS\t174229\t174456\t.\t-\t1\tID=Miro_271.CDS;Name=Miro_271;Parent=Miro_271;obsolete_name=Miro_169;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t174464\t174467\t.\t-\t1\tAlias=Miro_271;ID=Miro_271.RBS;Name=Miro_271;Parent=Miro_271;\n+Miro\tfeature\tgene\t174453\t174796\t.\t-\t.\tID=Miro_272;\n+Miro\tGenBank\tCDS\t174453\t174785\t.\t-\t1\tID=Miro_272.CDS;Name=Miro_272;Parent=Miro_272;obsolete_name=Miro_168;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t174793\t174796\t.\t-\t1\tAlias=Miro_272;ID=Miro_272.RBS;Name=Miro_272;Parent=Miro_272;\n+Miro\tfeature\tgene\t174760\t174903\t.\t-\t.\tID=Miro_273;\n+Miro\tGenBank\tCDS\t174760\t174894\t.\t-\t1\tID=Miro_273.CDS;Name=Miro_273;Parent=Miro_273;obsolete_name=Miro_167;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t174900\t174903\t.\t-\t1\tAlias=Miro_273;ID=Miro_273.RBS;Name=Miro_273;Parent=Miro_273;\n+Miro\tfeature\tgene\t174939\t175197\t.\t-\t.\tID=Miro_274;\n+Miro\tGenBank\tCDS\t174939\t175187\t.\t-\t1\tID=Miro_274.CDS;Name=Miro_274;Note=contains FCH and colicin domains;Parent=Miro_274;obsolete_name=Miro_166;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t175195\t175197\t.\t-\t1\tAlias=Miro_274;ID=Miro_274.RBS;Name=Miro_274;Parent=Miro_274;\n+Miro\tfeature\tgene\t175224\t175675\t.\t-\t.\tID=Miro_275;\n+Miro\tGenBank\tCDS\t175224\t175661\t.\t-\t1\tID=Miro_275.CDS;Name=Miro_275;Parent=Miro_275;obsolete_name=Miro_165;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t175673\t175675\t.\t-\t1\tAlias=Miro_275;ID=Miro_275.RBS;Name=Miro_275;Parent=Miro_275;\n+Miro\tfeature\tgene\t175636\t176021\t.\t-\t.\tID=Miro_276;\n+Miro\tGenBank\tCDS\t175636\t176010\t.\t-\t1\tID=Miro_276.CDS;Name=Miro_276;Parent=Miro_276;obsolete_name=Miro_164;product=hypothetical conserved;tmhelix=2TMDs (44-66%2C 76-98) N in%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t176017\t176021\t.\t-\t1\tAlias=Miro_276;ID=Miro_276.RBS;Name=Miro_276;Parent=Miro_276;\n'
b
diff -r 6a9b026cb378 -r efa4dfc23549 test-data/miro.cds.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/miro.cds.gff3 Mon Jun 05 02:42:28 2023 +0000
b
b'@@ -0,0 +1,278 @@\n+##gff-version 3\n+##sequence-region Miro 1 176055\n+Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;obsolete_name=Miro_047;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;obsolete_name=Miro_240;product=baseplate structural protein;\n+Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;obsolete_name=Miro_239;product=baseplate structural protein;\n+Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;obsolete_name=Miro_238;product=baseplate structural protein;\n+Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein;\n+Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD (4-26) N out%2C C in;\n+Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;obsolete_name=Miro_163;product=rIIb;\n+Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;obsolete_name=Miro_162;product=rIIa;\n+Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD (12-34) N in%2C C out;\n+Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;obsolete_name=Miro_160;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t4038\t5324\t.\t-\t1\tID=Miro_5.CDS;Name=Miro_5;Note=T4 gp52-like;obsolete_name=Miro_159;product=DNA topoisomerase II medium subunit;\n+Miro\tGenBank\tCDS\t5324\t7222\t.\t-\t1\tID=Miro_6.CDS;Name=Miro_6;Note=T4 gp39-like;obsolete_name=Miro_158;product=DNA topoisomerase II%2C large subunit;\n+Miro\tGenBank\tCDS\t7290\t7454\t.\t-\t1\tID=Miro_7.CDS;Name=Miro_7;Note=contains zinc ribbon domain;obsolete_name=Miro_157;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t124222\t124968\t.\t+\t1\tAlias=Miro_207;ID=Miro_207.CDS;Name=Miro_207;Note=T4 gp14-like;obsolete_name=Miro_233;product=neck protein;\n+Miro\tGenBank\tCDS\t173061\t173351\t.\t-\t1\tID=Miro_269.CDS;Name=Miro_269;obsolete_name=Miro_171;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t66523\t66807\t.\t-\t1\tID=Miro_112.CDS;Name=Miro_112;obsolete_name=Miro_052;product=hypothetical conserved;signal=signal peptidase I cleavage site 17-18;\n+Miro\tGenBank\tCDS\t66879\t67163\t.\t-\t1\tID=Miro_113.CDS;Name=Miro_113;obsolete_name=Miro_051;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t44575\t45732\t.\t+\t1\tAlias=Miro_69;ID=Miro_69.CDS;Name=Miro_69;Note=T4 RNA ligase 1-like;obsolete_name=Miro_095;product=RNA ligase;\n+Miro\tGenBank\tCDS\t44229\t44594\t.\t+\t1\tAlias=Miro_68;ID=Miro_68.CDS;Name=Miro_68;Note=contains GIY-YIG domain;obsolete_name=Miro_096;product=homing endonuclease;\n+Miro\tGenBank\tCDS\t43905\t44225\t.\t+\t1\tID=Miro_67;Name=Miro_67;obsolete_name=Miro_097;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t42758\t43933\t.\t+\t1\tAlias=Miro_66;ID=Miro_66.CDS;Name=Miro_66;Note=T4 NrdB-like;obsolete_name=Miro_098;product=ribonucleotide reductase%2C small subunit;\n+Miro\tGenBank\tCDS\t40469\t42721\t.\t+\t1\tAlias=Miro_65;ID=Miro_65.CDS;Name=Miro_65;Note=T4 NrdA-like%3B E-val 0;obsolete_name=Miro_099;product=ribonucleotide reductase%2C large subunit;\n+Miro\tGenBank\tCDS\t39554\t40429\t.\t+\t1\tAlias=Miro_64;ID=Miro_64.CDS;Name=Miro_64;Note=T4-like;obsolete_name=Miro_100;product=thymidylate synthase;\n+Miro\tGenBank\tCDS\t38974\t39552\t.\t+\t1\tAlias=Miro_63;ID=Miro_63.CDS;Name=Miro_63;Note=T4-like;obsolete_name=Miro_101;product=dihydrofolate reductase;\n+Miro\tGenBank\tCDS\t38720'..b'ed;\n+Miro\tGenBank\tCDS\t9399\t9728\t.\t-\t1\tID=Miro_12.CDS;Name=Miro_12;obsolete_name=Miro_152;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t9788\t10012\t.\t-\t1\tID=Miro_13.CDS;Name=Miro_13;obsolete_name=Miro_151;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t8488\t8766\t.\t-\t1\tID=Miro_10.CDS;Name=Miro_10;obsolete_name=Miro_154;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t8763\t9359\t.\t-\t1\tID=Miro_11.CDS;Name=Miro_11;obsolete_name=Miro_153;product=hypothetical conserved;tmhelix=1 TMD (10-29) N out%2C C in;\n+Miro\tGenBank\tCDS\t10737\t10961\t.\t-\t1\tID=Miro_16.CDS;Name=Miro_16;obsolete_name=Miro_148;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t10965\t11114\t.\t-\t1\tID=Miro_17.CDS;Name=Miro_17;obsolete_name=Miro_147;product=hypothetical conserved;tmhelix=1 TMD (10-32) N out%2C C in;\n+Miro\tGenBank\tCDS\t10086\t10349\t.\t-\t1\tID=Miro_14.CDS;Name=Miro_14;obsolete_name=Miro_150;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t10438\t10737\t.\t-\t1\tID=Miro_15.CDS;Name=Miro_15;obsolete_name=Miro_149;product=hypothetical conserved;tmhelix=1 TMD (4-21) N out%2C C in;\n+Miro\tGenBank\tCDS\t53923\t54144\t.\t+\t1\tAlias=Miro_92;ID=Miro_92.CDS;Name=Miro_92;obsolete_name=Miro_072;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t54141\t54755\t.\t+\t1\tAlias=Miro_93;ID=Miro_93.CDS;Name=Miro_93;Note=contains haloacid dehydrogenase (HAD)-like domain;obsolete_name=Miro_071;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t53221\t53661\t.\t+\t1\tAlias=Miro_90;ID=Miro_90.CDS;Name=Miro_90;Note=T4 Y12G-like;obsolete_name=Miro_074;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t53658\t53921\t.\t+\t1\tAlias=Miro_91;ID=Miro_91.CDS;Name=Miro_91;obsolete_name=Miro_073;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t57000\t57203\t.\t+\t1\tAlias=Miro_96;ID=Miro_96.CDS;Name=Miro_96;obsolete_name=Miro_068;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t57206\t57382\t.\t+\t1\tAlias=Miro_97;ID=Miro_97.CDS;Name=Miro_97;obsolete_name=Miro_067;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t54736\t56265\t.\t+\t1\tAlias=Miro_94;ID=Miro_94.CDS;Name=Miro_94;Note=T4-like;obsolete_name=Miro_070;product=DNA ligase;\n+Miro\tGenBank\tCDS\t56342\t56911\t.\t+\t1\tAlias=Miro_95;ID=Miro_95.CDS;Name=Miro_95;obsolete_name=Miro_069;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t57379\t57543\t.\t+\t1\tAlias=Miro_98;ID=Miro_98.CDS;Name=Miro_98;obsolete_name=Miro_066;product=hypothetical conserved;tmhelix=2TMDs (7-26%2C 31-53) N in%2C C in;\n+Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD (15-37) N out%2C C in;\n+Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;obsolete_name=Miro_023;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs (2-21%2C 31-50) N in%2C C in;\n+Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved;\n+Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13;\n+Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved;\n'
b
diff -r 6a9b026cb378 -r efa4dfc23549 test-data/miro.cds6pfix.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/miro.cds6pfix.gff3 Mon Jun 05 02:42:28 2023 +0000
b
@@ -0,0 +1,1 @@
+##gff-version 3
b
diff -r 6a9b026cb378 -r efa4dfc23549 test-data/miro.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/miro.gff3 Mon Jun 05 02:42:28 2023 +0000
b
b'@@ -0,0 +1,827 @@\n+##gff-version 3\n+##sequence-region Miro 1 176055\n+Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8\n+Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8\n+Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9\n+Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9\n+Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206\n+Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206\n+Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117\n+Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117\n+Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200\n+Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200\n+Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201\n+Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201\n+Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202\n+Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202\n+Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203\n+Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203\n+Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142\n+Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD %284-26%29 N out%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142\n+Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1\n+Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb\n+Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1\n+Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2\n+Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2\n+Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3\n+Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD %2812-34%29 N in%2C C out\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3\n+Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4\n+Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical'..b't=Miro_98;obsolete_name=Miro_066;product=hypothetical conserved;tmhelix=2TMDs %287-26%2C 31-53%29 N in%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98\n+Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99\n+Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99\n+Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143\n+Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD %2815-37%29 N out%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143\n+Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114\n+Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114\n+Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141\n+Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141\n+Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140\n+Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140\n+Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147\n+Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147\n+Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146\n+Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146\n+Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145\n+Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145\n+Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115\n+Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs %282-21%2C 31-50%29 N in%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115\n+Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149\n+Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149\n+Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148\n+Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148\n+Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116\n+Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116\n'