Repository 'cpt_gff_extract_seq'
hg clone https://toolshed.g2.bx.psu.edu/repos/cpt/cpt_gff_extract_seq

Changeset 4:34b80e483fb8 (2023-06-05)
Previous changeset 3:73390562b5a2 (2022-05-20) Next changeset 5:ab40380424c9 (2023-07-23)
Commit message:
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
added:
cpt-macros.xml
gff3.py
gff3_extract_sequence.py
gff3_extract_sequence.xml
macros.xml
test-data/Miro_ExtSeqIn.fa
test-data/Miro_ExtSeqIn.gff3
test-data/Miro_ExtSeqOut1.fa
test-data/Miro_ExtSeqOut2.fa
test-data/T7_ExtSeqIn.fasta
test-data/T7_ExtSeqIn.gff3
test-data/T7_ExtSeqOut.fasta
removed:
cpt_gff_extract_seq/cpt-macros.xml
cpt_gff_extract_seq/gff3.py
cpt_gff_extract_seq/gff3_extract_sequence.py
cpt_gff_extract_seq/gff3_extract_sequence.xml
cpt_gff_extract_seq/macros.xml
cpt_gff_extract_seq/test-data/Miro_ExtSeqIn.fa
cpt_gff_extract_seq/test-data/Miro_ExtSeqIn.gff3
cpt_gff_extract_seq/test-data/Miro_ExtSeqOut1.fa
cpt_gff_extract_seq/test-data/Miro_ExtSeqOut2.fa
cpt_gff_extract_seq/test-data/T7_ExtSeqIn.fasta
cpt_gff_extract_seq/test-data/T7_ExtSeqIn.gff3
cpt_gff_extract_seq/test-data/T7_ExtSeqOut.fasta
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Mon Jun 05 02:43:58 2023 +0000
[
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+ <requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+ <![CDATA[
+ cd '$__tool_directory__' && git rev-parse HEAD
+ ]]>
+ </version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+        <yield/>
+    </xml>
+</macros>
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/cpt-macros.xml
--- a/cpt_gff_extract_seq/cpt-macros.xml Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="gff_requirements">
- <requirements>
- <requirement type="package" version="2.7">python</requirement>
- <requirement type="package" version="1.65">biopython</requirement>
- <requirement type="package" version="2.12.1">requests</requirement>
- <yield/>
- </requirements>
- <version_command>
- <![CDATA[
- cd $__tool_directory__ && git rev-parse HEAD
- ]]>
- </version_command>
- </xml>
- <xml name="citation/mijalisrasche">
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">@unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- </xml>
- <xml name="citations">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation> 
- <yield/>
- </citations>
- </xml>
-     <xml name="citations-crr">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020-AJC-solo">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-clm">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="sl-citations-clm">
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </xml>
-</macros>
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/gff3.py
--- a/cpt_gff_extract_seq/gff3.py Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,346 +0,0 @@\n-import copy\n-import logging\n-\n-log = logging.getLogger()\n-log.setLevel(logging.WARN)\n-\n-\n-def feature_lambda(\n-    feature_list,\n-    test,\n-    test_kwargs,\n-    subfeatures=True,\n-    parent=None,\n-    invert=False,\n-    recurse=True,\n-):\n-    """Recursively search through features, testing each with a test function, yielding matches.\n-\n-    GFF3 is a hierachical data structure, so we need to be able to recursively\n-    search through features. E.g. if you\'re looking for a feature with\n-    ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n-    case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n-\n-    :type feature_list: list\n-    :param feature_list: an iterable of features\n-\n-    :type test: function reference\n-    :param test: a closure with the method signature (feature, **kwargs) where\n-                 the kwargs are those passed in the next argument. This\n-                 function should return True or False, True if the feature is\n-                 to be yielded as part of the main feature_lambda function, or\n-                 False if it is to be ignored. This function CAN mutate the\n-                 features passed to it (think "apply").\n-\n-    :type test_kwargs: dictionary\n-    :param test_kwargs: kwargs to pass to your closure when it is called.\n-\n-    :type subfeatures: boolean\n-    :param subfeatures: when a feature is matched, should just that feature be\n-                        yielded to the caller, or should the entire sub_feature\n-                        tree for that feature be included? subfeatures=True is\n-                        useful in cases such as searching for a gene feature,\n-                        and wanting to know what RBS/Shine_Dalgarno_sequences\n-                        are in the sub_feature tree (which can be accomplished\n-                        with two feature_lambda calls). subfeatures=False is\n-                        useful in cases when you want to process (and possibly\n-                        return) the entire feature tree, such as applying a\n-                        qualifier to every single feature.\n-\n-    :type invert: boolean\n-    :param invert: Negate/invert the result of the filter.\n-\n-    :rtype: yielded list\n-    :return: Yields a list of matching features.\n-    """\n-    # Either the top level set of [features] or the subfeature attribute\n-    for feature in feature_list:\n-        feature._parent = parent\n-        if not parent:\n-            # Set to self so we cannot go above root.\n-            feature._parent = feature\n-        test_result = test(feature, **test_kwargs)\n-        # if (not invert and test_result) or (invert and not test_result):\n-        if invert ^ test_result:\n-            if not subfeatures:\n-                feature_copy = copy.deepcopy(feature)\n-                feature_copy.sub_features = list()\n-                yield feature_copy\n-            else:\n-                yield feature\n-\n-        if recurse and hasattr(feature, "sub_features"):\n-            for x in feature_lambda(\n-                feature.sub_features,\n-                test,\n-                test_kwargs,\n-                subfeatures=subfeatures,\n-                parent=feature,\n-                invert=invert,\n-                recurse=recurse,\n-            ):\n-                yield x\n-\n-\n-def fetchParent(feature):\n-    if not hasattr(feature, "_parent") or feature._parent is None:\n-        return feature\n-    else:\n-        return fetchParent(feature._parent)\n-\n-\n-def feature_test_true(feature, **kwargs):\n-    return True\n-\n-\n-def feature_test_type(feature, **kwargs):\n-    if "type" in kwargs:\n-        return str(feature.type).upper() == str(kwargs["type"]).upper()\n-    elif "types" in kwargs:\n-      for x in kwargs["types"]:\n-        if str(feature.type).upper() == str(x).upper():\n-          return True\n-      return False\n-    raise Exception("Incorrect feature_test_type call, ne'..b'feature.location.start,\n-        # feature.location.end,\n-        # feature.location.strand\n-        # )\n-    return result\n-\n-\n-def get_gff3_id(gene):\n-    return gene.qualifiers.get("Name", [gene.id])[0]\n-\n-\n-def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n-    # This prevents frameshift errors\n-    while start < 0:\n-        start += 3\n-    while end < 0:\n-        end += 3\n-    while start > parent_length:\n-        start -= 3\n-    while end > parent_length:\n-        end -= 3\n-    return (start, end)\n-\n-\n-def coding_genes(feature_list):\n-    for x in genes(feature_list):\n-        if (\n-            len(\n-                list(\n-                    feature_lambda(\n-                        x.sub_features,\n-                        feature_test_type,\n-                        {"type": "CDS"},\n-                        subfeatures=False,\n-                    )\n-                )\n-            )\n-            > 0\n-        ):\n-            yield x\n-\n-\n-def genes(feature_list, feature_type="gene", sort=False):\n-    """\n-    Simple filter to extract gene features from the feature set.\n-    """\n-\n-    if not sort:\n-        for x in feature_lambda(\n-            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n-        ):\n-            yield x\n-    else:\n-        data = list(genes(feature_list, feature_type=feature_type, sort=False))\n-        data = sorted(data, key=lambda feature: feature.location.start)\n-        for x in data:\n-            yield x\n-\n-\n-def wa_unified_product_name(feature):\n-    """\n-    Try and figure out a name. We gave conflicting instructions, so\n-    this isn\'t as trivial as it should be. Sometimes it will be in\n-    \'product\' or \'Product\', othertimes in \'Name\'\n-    """\n-    # Manually applied tags.\n-    protein_product = feature.qualifiers.get(\n-        "product", feature.qualifiers.get("Product", [None])\n-    )[0]\n-\n-    # If neither of those are available ...\n-    if protein_product is None:\n-        # And there\'s a name...\n-        if "Name" in feature.qualifiers:\n-            if not is_uuid(feature.qualifiers["Name"][0]):\n-                protein_product = feature.qualifiers["Name"][0]\n-\n-    return protein_product\n-\n-\n-def is_uuid(name):\n-    return name.count("-") == 4 and len(name) == 36\n-\n-\n-def get_rbs_from(gene):\n-    # Normal RBS annotation types\n-    rbs_rbs = list(\n-        feature_lambda(\n-            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n-        )\n-    )\n-    rbs_sds = list(\n-        feature_lambda(\n-            gene.sub_features,\n-            feature_test_type,\n-            {"type": "Shine_Dalgarno_sequence"},\n-            subfeatures=False,\n-        )\n-    )\n-    # Fraking apollo\n-    apollo_exons = list(\n-        feature_lambda(\n-            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n-        )\n-    )\n-    apollo_exons = [x for x in apollo_exons if len(x) < 10]\n-    # These are more NCBI\'s style\n-    regulatory_elements = list(\n-        feature_lambda(\n-            gene.sub_features,\n-            feature_test_type,\n-            {"type": "regulatory"},\n-            subfeatures=False,\n-        )\n-    )\n-    rbs_regulatory = list(\n-        feature_lambda(\n-            regulatory_elements,\n-            feature_test_quals,\n-            {"regulatory_class": ["ribosome_binding_site"]},\n-            subfeatures=False,\n-        )\n-    )\n-    # Here\'s hoping you find just one ;)\n-    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n-\n-\n-def nice_name(record):\n-    """\n-    get the real name rather than NCBI IDs and so on. If fails, will return record.id\n-    """\n-    name = record.id\n-    likely_parental_contig = list(genes(record.features, feature_type="contig"))\n-    if len(likely_parental_contig) == 1:\n-        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n-    return name\n-\n-\n-def fsort(it):\n-    for i in sorted(it, key=lambda x: int(x.location.start)):\n-        yield i\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/gff3_extract_sequence.py
--- a/cpt_gff_extract_seq/gff3_extract_sequence.py Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,275 +0,0 @@\n-#!/usr/bin/env python\n-import sys\n-import argparse\n-import logging\n-import uuid\n-from CPT_GFFParser import gffParse, gffWrite\n-from Bio import SeqIO\n-from Bio.Seq import Seq\n-from Bio.SeqRecord import SeqRecord\n-from Bio.SeqFeature import FeatureLocation, CompoundLocation\n-from gff3 import feature_lambda, feature_test_type, get_id\n-\n-logging.basicConfig(level=logging.INFO)\n-log = logging.getLogger(__name__)\n-\n-\n-def main(fasta, gff3, feature_filter=None, nodesc=False):\n-    if feature_filter == "nice_cds":\n-        from gff2gb import gff3_to_genbank as cpt_Gff2Gbk\n-        \n-\n-        for rec in cpt_Gff2Gbk(gff3, fasta, 11):\n-            seenList = {}\n-            if rec.seq[0] == "?":\n-                sys.stderr.write("Error: No Fasta ID matches GFF ID \'" + rec.id + "\'\\n")\n-                exit(1)\n-            for feat in sorted(rec.features, key=lambda x: x.location.start):\n-                if feat.type != "CDS":\n-                    continue\n-\n-                ind = 0\n-                if (\n-                    str(\n-                        feat.qualifiers.get("locus_tag", get_id(feat)).replace(" ", "-")\n-                    )\n-                    in seenList.keys()\n-                ):\n-                    seenList[\n-                        str(\n-                            feat.qualifiers.get("locus_tag", get_id(feat)).replace(\n-                                " ", "-"\n-                            )\n-                        )\n-                    ] += 1\n-                    ind = seenList[\n-                        str(\n-                            feat.qualifiers.get("locus_tag", get_id(feat)).replace(\n-                                " ", "-"\n-                            )\n-                        )\n-                    ]\n-                else:\n-                    seenList[\n-                        str(\n-                            feat.qualifiers.get("locus_tag", get_id(feat)).replace(\n-                                " ", "-"\n-                            )\n-                        )\n-                    ] = 1\n-                append = ""\n-                if ind != 0:\n-                    append = "_" + str(ind)\n-\n-                if nodesc:\n-                    description = ""\n-                else:\n-                    feat.qualifiers["ID"] = [feat._ID]\n-                    product = feat.qualifiers.get("product", "")\n-                    description = "{1} [Location={0.location};ID={0.qualifiers[ID][0]}]".format(\n-                        feat, product\n-                    )\n-                yield [\n-                    SeqRecord(\n-                        feat.extract(rec).seq,\n-                        id=str(\n-                            feat.qualifiers.get("locus_tag", get_id(feat)).replace(\n-                                " ", "-"\n-                            )\n-                        )\n-                        + append,\n-                        description=description,\n-                    )\n-                ]\n-\n-    elif feature_filter == "unique_cds":\n-        seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))\n-        seen_ids = {}\n-\n-        for rec in gffParse(gff3, base_dict=seq_dict):\n-            noMatch = True\n-            if "Alias" in rec.features[0].qualifiers.keys():\n-                lColumn = rec.features[0].qualifiers["Alias"][0]\n-            else:\n-                lColumn = ""\n-            for x in seq_dict:\n-                if x == rec.id or x == lColumn:\n-                    noMatch = False\n-            if noMatch:\n-                sys.stderr.write("Error: No Fasta ID matches GFF ID \'" + rec.id + "\'\\n")\n-                exit(1)\n-            newfeats = []\n-            for feat in sorted(\n-                feature_lambda(\n-                    rec.features, feature_test_type, {"type": "CDS"}, subfeatures=False\n-                ),\n-                key=lambda f: f.location.start,\n-            ):\n-                nid = rec.id + "____" + feat.id\n-                if nid in seen_id'..b'umn = rec.features[0].qualifiers["Alias"][0]\n-            else:\n-                lColumn = ""\n-            for x in seq_dict:\n-                if x == rec.id or x == lColumn:\n-                    noMatch = False\n-            if noMatch:\n-                sys.stderr.write("Error: No Fasta ID matches GFF ID \'" + rec.id + "\'\\n")\n-                exit(1)\n-            for feat in sorted(\n-                feature_lambda(\n-                    rec.features,\n-                    feature_test_type,\n-                    {"type": feature_filter},\n-                    subfeatures=True,\n-                ),\n-                key=lambda f: f.location.start,\n-            ):\n-                id = feat.id\n-                if len(id) == 0:\n-                    id = get_id(feat)\n-\n-                if nodesc:\n-                    description = ""\n-                else:\n-                    if feat.strand == -1:\n-                      important_data = {"Location": FeatureLocation(feat.location.start + 1, feat.location.end - feat.phase, feat.strand)}\n-                    else:\n-                      important_data = {"Location": FeatureLocation(feat.location.start + 1 + feat.phase, feat.location.end, feat.strand)}\n-                    if "Name" in feat.qualifiers:\n-                        important_data["Name"] = feat.qualifiers.get("Name", [""])[0]\n-\n-                    description = "[{}]".format(\n-                        ";".join(\n-                            [\n-                                "{key}={value}".format(key=k, value=v)\n-                                for (k, v) in important_data.items()\n-                            ]\n-                        )\n-                    )\n-\n-                if isinstance(feat.location, CompoundLocation):\n-                  finSeq = ""\n-                  if feat.strand == -1:\n-                    for x in feat.location.parts:\n-                      finSeq += str((rec.seq[x.start: x.end - feat.phase]).reverse_complement())\n-                  else:\n-                    for x in feat.location.parts:\n-                      finSeq += str(rec.seq[x.start + feat.phase: x.end])\n-                  yield [\n-                    SeqRecord(\n-                        Seq(finSeq),\n-                        id=id.replace(" ", "-"),\n-                        description=description,\n-                    )\n-                  ]\n-\n-                else:\n-\n-                  if feat.strand == -1:\n-                    yield [\n-                      SeqRecord(\n-                          seq=Seq(str(rec.seq[feat.location.start: feat.location.end - feat.phase])).reverse_complement(),\n-                          id=id.replace(" ", "-"),\n-                          description=description,\n-                      )\n-                    ]\n-                  else:\n-                    yield [\n-                      SeqRecord(\n-                          #feat.extract(rec).seq,\n-                          seq=Seq(str(rec.seq[feat.location.start + feat.phase: feat.location.end])),\n-                          id=id.replace(" ", "-"),\n-                          description=description,\n-                      )\n-                    ]\n-\n-\n-if __name__ == "__main__":\n-    parser = argparse.ArgumentParser(\n-        description="Export corresponding sequence in genome from GFF3", epilog=""\n-    )\n-    parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")\n-    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File")\n-    parser.add_argument(\n-        "--feature_filter", default=None, help="Filter for specific feature types"\n-    )\n-    parser.add_argument(\n-        "--nodesc", action="store_true", help="Strip description field off"\n-    )\n-    args = parser.parse_args()\n-    for seq in main(**vars(args)):\n-        #if isinstance(seq, list):\n-        #  for x in seq:\n-        #    print(type(x.seq))\n-        #    SeqIO.write(x, sys.stdout, "fasta")\n-        #else:\n-          SeqIO.write(seq, sys.stdout, "fasta")\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/gff3_extract_sequence.xml
--- a/cpt_gff_extract_seq/gff3_extract_sequence.xml Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,71 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt.gff3.export_seq" name="GFF3 Feature Sequence Export" version="19.1.0.0">
-  <description>Export corresponding sequence in genome from GFF3</description>
-  <macros>
-    <import>macros.xml</import>
- <import>cpt-macros.xml</import>
-  </macros>
-  <expand macro="requirements"/>
-  <command detect_errors="aggressive"><![CDATA[
-@GENOME_SELECTOR_PRE@
-
-$__tool_directory__/gff3_extract_sequence.py
-@GENOME_SELECTOR@
-
-@INPUT_GFF@
-
-#if $feature_filter and $feature_filter is not None:
---feature_filter $feature_filter
-#end if
-$nodesc
-> $default]]></command>
-  <inputs>
-    <expand macro="genome_selector" />
-    <expand macro="gff3_input" />
- <param label="Filter for specific feature types" name="feature_filter" type="text"
- help="Use 'nice_cds' if your features are coming from Apollo, however this will fail on non-Apollo data"/>
-    <param label="Remove description (use if blasting)" name="nodesc" type="boolean" truevalue="--nodesc" falsevalue=""/>
-  </inputs>
-  <outputs>
-    <data format="fasta" hidden="false" name="default"/>
-  </outputs>
-  <tests>
- <test>
- <param name="reference_genome_source" value="history" />
- <param name="genome_fasta" value="T7_ExtSeqIn.fasta" />
- <param name="gff3_data" value="T7_ExtSeqIn.gff3" />
- <param name="nodesc" value="" />
- <param name="feature_filter" value="CDS" />
- <output name="output" file="T7_ExtSeqOut.fasta" />
- </test>
-                <test>
- <param name="reference_genome_source" value="history" />
- <param name="genome_fasta" value="Miro_ExtSeqIn.fa" />
- <param name="gff3_data" value="Miro_ExtSeqIn.gff3" />
- <param name="nodesc" value="" />
- <param name="feature_filter" value="CDS" />
- <output name="output" file="Miro_ExtSeqOut1.fa" />
- </test>
- <test>
- <param name="reference_genome_source" value="history" />
- <param name="genome_fasta" value="Miro_ExtSeqIn.fa" />
- <param name="gff3_data" value="Miro_ExtSeqIn.gff3" />
- <param name="feature_filter" value="CDS" />
- <param name="nodesc" value="--nodesc" />
- <output name="output" file="Miro_ExtSeqOut2.fa" />
- </test>
-  </tests>
-  <help><![CDATA[
-**What it does**
-
-From the FASTA sequence for a genome, extracts the *nucleotide* sequences for 
-all CDSs in an input GFF3 and outputs them as a multi-FASTA formatted file.
-
-The filter for specific feature types was designed for data retrieved from Apollo. 
-Using â€˜unique_cds’ extracts the sequence for all CDS. Using â€˜nice_cds’ will extract 
-the sequence only for CDS features with a start codon (recommended). If a gene from
-Apollo has multiple CDSs, the tool will append a "_##" to the end of subsequent genes.
-
-      ]]></help>
- <expand macro="citations" />
-</tool>
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/macros.xml
--- a/cpt_gff_extract_seq/macros.xml Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,62 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="requirements">
- <requirements>
- <requirement type="package" version="3.8.13">python</requirement>
- <requirement type="package" version="1.79">biopython</requirement>
- <requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
- <yield/>
- </requirements>
- </xml>
- <token name="@BLAST_TSV@">
- "$blast_tsv"
- </token>
- <xml name="blast_tsv">
- <param label="Blast Results" help="TSV/tabular (25 Column)"
- name="blast_tsv" type="data" format="tabular" />
- </xml>
-
- <token name="@BLAST_XML@">
- "$blast_xml"
- </token>
- <xml name="blast_xml">
- <param label="Blast Results" help="XML format"
- name="blast_xml" type="data" format="blastxml" />
- </xml>
- <xml name="gff3_with_fasta">
- <param label="Genome Sequences" name="fasta" type="data" format="fasta" />
- <param label="Genome Annotations" name="gff3" type="data" format="gff3" />
- </xml>
- <xml name="genome_selector">
-     <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
- </xml>
- <xml name="gff3_input">
- <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
- </xml>
- <xml name="input/gff3+fasta">
- <expand macro="gff3_input" />
- <expand macro="genome_selector" />
- </xml>
- <token name="@INPUT_GFF@">
- "$gff3_data"
- </token>
- <token name="@INPUT_FASTA@">
- genomeref.fa
- </token>
- <token name="@GENOME_SELECTOR_PRE@">
- ln -s $genome_fasta genomeref.fa;
- </token>
- <token name="@GENOME_SELECTOR@">
- genomeref.fa
- </token>
-        <xml name="input/fasta">
- <param label="Fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-
- <token name="@SEQUENCE@">
- "$sequences"
- </token>
- <xml name="input/fasta/protein">
- <param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-</macros>
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/test-data/Miro_ExtSeqIn.fa
--- a/cpt_gff_extract_seq/test-data/Miro_ExtSeqIn.fa Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2936 +0,0 @@\n->Miro\n-TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n-TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n-AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n-GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n-CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n-TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n-ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n-CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n-ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n-ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n-TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n-CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n-TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n-AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n-CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n-TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n-CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n-AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n-GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n-TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n-TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n-TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n-GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n-CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n-CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n-AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n-AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n-TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n-TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n-AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n-CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n-GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n-TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n-TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n-TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n-CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n-TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n-TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n-TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n-AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n-ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n-ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n-GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n-GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n-ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n-GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n-ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n-AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n-TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n-AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n-GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n-TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n-ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n-ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n-TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n-CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n-GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n-TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n-ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n-GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n-GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n-ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n-ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n-GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n-ACGG'..b'TCGGGTAATATCG\n-TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n-ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n-GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n-GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n-GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n-CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n-TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n-ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n-TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n-AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n-TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n-CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n-AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n-GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n-TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n-ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n-TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n-TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n-ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n-TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n-CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n-TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n-AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n-TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n-CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n-CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n-ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n-GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n-ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n-AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n-CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n-CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n-TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n-TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n-ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n-ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n-TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n-TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n-AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n-ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n-TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n-GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n-GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n-TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n-ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n-CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n-TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n-AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n-GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n-GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n-TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n-AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n-TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n-ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n-CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n-TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n-TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n-GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n-GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n-GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n-TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n-TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n-GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n-GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n-GGCGCGGTTGTTTAG\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/test-data/Miro_ExtSeqIn.gff3
--- a/cpt_gff_extract_seq/test-data/Miro_ExtSeqIn.gff3 Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,827 +0,0 @@\n-##gff-version 3\n-##sequence-region Miro 1 176055\n-Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8\n-Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8\n-Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9\n-Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9\n-Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206\n-Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206\n-Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117\n-Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117\n-Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200\n-Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200\n-Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201\n-Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201\n-Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202\n-Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202\n-Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203\n-Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203\n-Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142\n-Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD %284-26%29 N out%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142\n-Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1\n-Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb\n-Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1\n-Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2\n-Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2\n-Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3\n-Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD %2812-34%29 N in%2C C out\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3\n-Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4\n-Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical'..b't=Miro_98;obsolete_name=Miro_066;product=hypothetical conserved;tmhelix=2TMDs %287-26%2C 31-53%29 N in%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98\n-Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99\n-Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99\n-Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143\n-Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD %2815-37%29 N out%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143\n-Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114\n-Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114\n-Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141\n-Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141\n-Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140\n-Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140\n-Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147\n-Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147\n-Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146\n-Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146\n-Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145\n-Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145\n-Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115\n-Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs %282-21%2C 31-50%29 N in%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115\n-Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149\n-Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149\n-Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148\n-Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148\n-Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116\n-Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/test-data/Miro_ExtSeqOut1.fa
--- a/cpt_gff_extract_seq/test-data/Miro_ExtSeqOut1.fa Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,3212 +0,0 @@\n->Miro_1.CDS [Location=[1:899](-);Name=Miro_1]\n-TGGTCAAAATTCTCGACGAAGTACAACAGAAAGCTATCTATAATGGATGGTTGACGGGGG\n-CAAGTAAAACAGCACTGGCCCAACAGTTCAACGTAAGCGCCCGAACCATTGGGCGCGTTA\n-TCAACCGCAAGTTAGCGGAATTCCCTAAGAAAAGGGATGTAAAACAAAAGCCTAAAGTAT\n-CAGACACTGTACCGCGTATGATCGGATCTGAATCGTTCATTACGGTTGTATACGAAGGGC\n-GCGTTTTCATGGCGGGTGAAACACATCCGAACTTTAAGAAAGCGCATGAAATGCTGAAAG\n-CTGGTGATGTCAAAGGTGCTGTAACTTGTTTGGATACTCAAGAAGCGATCCGAACCTATA\n-GCAAAGGCAACATTAAAATCATTGGTCATCAGCTTCTATATAAAGATGTAGTGTTTGATT\n-CTGATATCACTCAACGAATCATTCGCGAAATGTATAACGATCGTCCGTATGAACATCTGG\n-TGAACTTCTTTGAACGGTTGATGCGAAACCCTTCACGCGATGCTGTATACCAGCTTTATG\n-GGTTCCTTGTACATAATGACATTGAACTGACCGATGATGGTTGTTTCCTTGCCTGGAAAC\n-GTGTGCGCGATAACTATAAAGATCTCGCTACTGGCAAATTTGATAATAGTCCTGGTGTGA\n-CTGTCTCCATGCCTCGAAACATGGTAGACGAAGATAAAACGCGCACCTGTTCTACTGGCC\n-TACACGTTGCGGCTAAATCATACCTTCCACACTACGGGGGCGGTGTTGGTAGAGTCATTC\n-AGGTAAAGGTGGATCCTGCTGACGTGGTAGCGATCCCAGTGGATTACAATAACGCAAAAA\n-TGCGTGTATGCCGTTATAAAGTCATGATTGATGTTACATATGGTTTTAGCCATTACTAA\n->Miro_2.CDS [Location=[900:3160](-);Name=Miro_2]\n-TGAAATTACGCGAAGATACACCGCAAACCATCTACCGTGTTGGTACAGATACCAGCAAGG\n-GACGAAACAAACTATCTCTACGCGCAAACAAGAAAGCGTATAAGATCCTTTCTTCGACAG\n-TATACAAATACAAGATCCGCGCAATCATTCGCGAACTGTCTTGTAATGCGATTGATGGAC\n-ACAAAGAAGCGGGAAATCAGAACCCGTTTGATGTCCAGTTGCCAACTGCTGTTGATCCTC\n-GTTTTGTTATTCGTGACTACGGGATCGGTATGTCTCCTGATTTCGTTAGTGATGCGTTTA\n-CCGTTTACTTCGAATCAACTAAAAATGATTCAAACGACCTGATCGGTTCTATGGGTCTGG\n-GTTGCAAATCTCCGCTTTGCTATTCCGATGCTTTCACGGTGGAATCCGTAAAAGACGGTA\n-TCAAATGCGGTTACACAATCTATATGGATGATGGGGAACCTTTCTGCGATCCTCTGTATG\n-AGATTGAAAGCGATGAACCTAACGGGGTTACTATCACTGTTCCGGTTAAGGTTGAAGATA\n-TCAAAGAATGGGAAAACGAAGCAGCAAGGGTATACGAATCATTTACTGATATTCGTCCTA\n-ACTTTGTTGGTGCTTCTATTCTCAAAATCAACTATCAGCCGAAAGAAGCAACCAACGATA\n-GCGGGGTGATCCGTCATAAATCAGCATACACTAGCGGTGTATATGCTCGCATGGGTAACA\n-TCATTTATCCTCTGGATAAAGATTTGTATGATACCTCGATGTTCTATTGCTATACAGAAA\n-GCCAGTATACGTATATTATTGACTTCCCGATCGGTGAACTTGATTTCATGCCTTCTCGCG\n-AAGAGTTGAGTATGGATAAAATGACAGTAGGGATTGTTAAAGAACGTCTGAAACAAATCA\n-GCCGGGTATATTTCAATCGAGTTAAATCAGAATTCGATAAGTTACAAACCGTGCGTGATA\n-AACTGACGTGGTTCCATTCTCTGCCTTCAATGGTACAAAACTTTGTTGGTAAAGATGCTA\n-ATTTTCGCATCAATGGTGATTCTATTGGCTGGATCCATAGCGAACTAGTCAAACCAAACA\n-AATATAATGATGATTATGTTGCTGGGTATTGGGCGAACGAATACGACGGTAAAGATGCAT\n-GGTATCAGGTAACTGGTAGCGGAGGCCGCTGGAGCAAGTATAAGCCAGAAACAACCAAAC\n-GCCAGGACATCACACGCATTTATTATCCGTGGAAACAGAAGAAACTCATTCTGTTAAAAG\n-TGGATACCAACACGGTTAAACCGTATATCGTCGGGTATGCTAAAATGCATAACCTAAGCC\n-GTGTTAGCTTTGTTGCATATTACGATAGTGATTCAAAGCGTGAGATTGTTAATGATATCG\n-TTCGGAAAGGTCATTTTGATGAATCGGAAATTGTGTATCTCCGTACCAGTGAAATGACAA\n-AAGAAAAAGAGATCTATGATGCAGATCGCGAAAAATCAAAAGCGCTATATGCACCGAAAA\n-ATTCAGAACCGCGCCCAAAAACCCCGACCGTATATCGTTATGAGTTGGATAGTAACGGCA\n-ATCTGGCAAAAACTTCTCTGTTTATGACAAAATCAGAGTTTTTATCACTGGATAAAGCCC\n-CAGGTGTTCGGTTGTATGGTATTGATGAATATAGCCGCTTAGATGGTGAAAATTCCGGGT\n-TGTCTATGGATTCAGCAATGAAAGAATCTACTTTATCGCGTATCATGCGCCATACTGGGA\n-TCCCTGTTGTGTTTGCTATACGTAACAGCCTCTGGAAGTGGATCCCGGATTCAAATCTGG\n-TATGCTTTGATGATATGCTGTGCAAGCAGTATGTTAAATCTGAAAAGGCATTGAAAGATA\n-ATTGCTTGCCTGGATGGATTGGGAAGGATCACTCAACAGAAACAGACGCATTGCATAGCC\n-GTTTCGGTGTATCGCTTGATCGGATTGTGAAAAACCGATATAATGAAAAACTGTATAAGA\n-TTGTTGATACCCTCGAACGTATAGTGTGTCTGGAAGGGTATGAGAAAGATGGTAAAAATC\n-TTTCCAGGGTGCGTTGTCCGATTCTGCGTGAGTCCGTAGCATCAATGCGCGTTAAACGCA\n-GCAATATGAAAAAACGTGTTGATCAGGCATGGGAAATCTTTAAATCACTAAACCCGTTAC\n-TGGCTTCACTTGTTGAACATAGCGATTCTTATTCAATTCGTCCGATTTACAACAACGAAA\n-AGAATCTCAGTGAGTTTAAGAAATTAATACGGTGGAAATAA\n->Miro_3.CDS [Location=[3172:3407](-);Name=Miro_3]\n-TGACACGTTATAATGCGCCTAAGTTGGGAAAATATCTGACCATATTCGGATTTTGTGCGT\n-TCTTTTCTGTCATTATCGGGGCTATCGTGTGGGGTATCCTTGATATGAAGAAACAGCAGG\n-TCGAGGAAGAAAAATTGGTAAAATTCCTTGACACCTATTGTGAAGTAGTAGAATATGGTC\n-TGAACAAAAAGCCAACGAAGTACTCTTGTGACCAAGTTATTTTTAATGTTAAGTGA\n->Miro_4.CDS [Location=[3412:3965](-);Name=Miro_4]\n-TGTTGAAAACTCGTAGTGCAATGGAAGTAACCCACTGGGCGCGTACCATGATGGAAAAAC\n-ACGGTTTGATCAGTAATGGTTGGACATTCCGGATCAATGGACGCATTACAAAAACTCTGG\n-GCCGTTGCAGTTATACCAAAAAACTGATCGAACTGTCTGGTCGTCATGTTGCGGAAGATA\n-TCTACGAGGATATTTTAGACACTCTTTTACACGAAATCGCTCATGCTCTTGTCGGGCGCG\n-GTTATGAACATGGTAAAGTGTG'..b'GAATACACGA\n-ATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCGACACGTG\n-ATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAACTACTATT\n-CTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATGAAGATAA\n-ACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGA\n->Miro_268.CDS [Location=[172795:173063](-);Name=Miro_268]\n-TGACGTTCGATGAGGCTATGAGAGCCGCTAAGAGAGGTTGTAAGGTTGCGCCAATAGGTA\n-GGCCATTCTTCGTGTATTATCACGACAGCGCTGGATACAGGAGGGTTACGTTGGGGGTTT\n-CGGCAAGGAATGAAGATTATGTTCCGGATCCGTGGGACAAATTGAAAGTTTGGGATATTT\n-ACGAGAAAAATAGCCCGGTGATGAACTTCTTTAAAGCAATCAAACGGGCTATTGGAAGAT\n-CACACAATTTAACACCAGCATTACGTTAA\n->Miro_269.CDS [Location=[173061:173350](-);Name=Miro_269]\n-TGACATACGAAGATATGCTTAATGAAGTGTGCGCGGGTAAAACCGCGTACCGCACTACCA\n-ATCCTGATATGATTGTGTTTCGTGAGGGGGATACAATCATCAGACGTACACACCGCAAGT\n-GTGAAGTAAACCAGGTATTCATTGCCACGGTAGAAGAACAAAAGGCTACTGATTGGGATA\n-TCGTCATTGACGAAGAAAACCACGATGAGCTAGATCATCCGATATTCATTTTGAGTGAAA\n-CTATCGGAATTCGACCATTCATGCATAGAAATTTTTGGGGGAATCCATGA\n->Miro_270.CDS [Location=[173351:174147](-);Name=Miro_270]\n-TGACTAACGCAACCGTAGTTACTACCACTTACTTCATTTCTTCTGGTCGTAAAAATGCAA\n-TGTATACCTTGCGTGTTGAGCGTAGAGGCGGCGGTTATACTTCTGATAACTATATCTGCA\n-ACTTATCTACCGATCCGGAGAAAGCGGAAGCGAAAGCGCGTGAGTATTTCGACCGTGTTT\n-CAGCCCGTTTAACTGAAACTGATACTTTCAAAATGATATTCCAGGGCTTTGCAGATTTCG\n-ATCTGTTCGAACGTCGCGGTAAACTGTCAGTGTTCGATACTGAAAAACTGGAACTGCTGG\n-AAAAAGGGATCATGCCGATTGGCAAGCGTAAAGGCGAAGTAATTGCAGAAATGCCAATGT\n-TTACTGTTCTTTGGTGGGCTGACCAGTCTAAAGAAGACAACAAAAACAGCCCTGTATTCG\n-ATGCTGTGTGCGCTTACTGCATGGGTGTTGCACTGGAAAAGGATTATATCGCCAAACGCG\n-AAGAAATCCGCGAACAGTGGGAACAAGAGCGCCAGGAACGCATTTCTAAAGCTAACCACA\n-TCGGTGAAATCAAACAACGTCTGGAAATGACCGGAACCGTTGAGAAAGTGATTTCACTGG\n-GATATACTCAAGTTTCTTACTACACTTCTGTAGAAAGATTCATGACCAAAATTAATGTTG\n-ATGGTAATGTTGTTGTTTACTTCGGTAATAACATTGCAAACGAAGGTGATGAAATCACTT\n-TCAAAGCAACCGTTAAAGAGCACGGAGAATACAAAGACGTTAAACAAACCATCGTTCAAC\n-GTGTAAAGGTTTTATAA\n->Miro_271.CDS [Location=[174229:174455](-);Name=Miro_271]\n-TGAGTATACAAATTCTGTTCAGTAAAGAAGATCCATTAGGTACGGAATCATCGGTTCAGC\n-GCTTATTCTGGCATACAGATTATATAAAAAATCGATGTCGTGGCCCAAACAACACATGGG\n-GAAGCAACAACGATATTGACAAAGCAAAAATTTATCCTCGCGGAAAGGCGTACTGGAAAA\n-TTTTTGATATTCATCTTCATAAAACAATGAAAGATTCGAATGAATGA\n->Miro_272.CDS [Location=[174453:174784](-);Name=Miro_272]\n-TGAAAACCGCTTTGATCATTCTTGAAGAAATTCCTGAAAATACAACGCTTTACAAAGTCG\n-AAACTGACGATAGCGAAGTAATTGAGATCCTCAAAACCGCCCACGGGCATTATGTAAACG\n-GTTCCGGCAACACCAAAGAACAGGATTATGCTGTTGATGTTGTTAACCTCATGCTGGGGC\n-CGAATACCGATGATAACCTGAAATGGGCGCGTGAATCCAACATTCCTGAAAAGTATGTTG\n-GTATGTTCTATCAGTGTAACATCGATAGCAAATCACCGTTTGAACCAGAAAAGAAAATCG\n-ATCTGATTGTTCGCACTGGGTTCTTCTTATGA\n->Miro_273.CDS [Location=[174760:174893](-);Name=Miro_273]\n-TGAATAATTTGGTAGCGAAACATGATTTCAACCGCGCTTCTACTCACCGCGATCGCAAGC\n-GAGCATTTAAAGAAGCAAAACGTAAACAGAAACACAAAGGTAAGTGTGATGAAAACCGCT\n-TTGATCATTCTTGA\n->Miro_274.CDS [Location=[174939:175186](-);Name=Miro_274]\n-TGAAACTGTTTAATAAAATTCGTTCTGTAGATGAAATCGTAGCAACTTTCGACAAAACCC\n-TGTCTGAACTGGAAGCGCGTATTGCTCACGATAATGAGCAGGTTGCACAGGTTGCAGCAG\n-ATCGTCAAGCAGCAGAAGAAGAACATCAGCGTAAGCTGGCGGAACTGGCTTCAAAAGAAA\n-GCGATCATACTGCAAGTGCAACCCGCGCCGGACGTATCGCAGACAAGATCCGCAAATTGC\n-TGGATTAA\n->Miro_275.CDS [Location=[175224:175660](-);Name=Miro_275]\n-TGAAAAGAATTCTGACAAGACTTGACATTCACAAATATGCAATTAAAATTGGTGACATGA\n-AACGCGGAACCTTCAAGAAATTCGAAGGCGTGGATCTGAAAGAAAACGGGTTTTACCTGG\n-TAGTTGATGATAAAACAGGGCGCGTAGTATTCCGTTTCTACGTAGCACCAAATCAACGCC\n-GTAAAACTGGTGCTAGTCGTACTCTGTTACGTATCAAGCGCCGCGAACACGAACCGTCTG\n-CGATGGTTGCGATGCTGGAAAGTGCTTCTCTGTATTATGTAGCACATGACAAAATCAAAA\n-ACCTGTTCGGTTTCCCGAACCTCAACGGATCAACGTTCGAAAGTGCTATTCAAATGCGTT\n-ATGGTGCTGGTGAAAACTACACCGAGTGGAACCGCGCACTGAATGACATTTATGATTTTG\n-AATTTCAACCATATTGA\n->Miro_276.CDS [Location=[175636:176009](-);Name=Miro_276]\n-TGAAATTTAAAAACATGCCTTCGGCTAACGAAACTCGCGCTAGTGGTGAATCTATTCTGA\n-AAACTGCCCGTGAAATGTGGGCCGACGACCGGATAGAATCCGGGAAAGGTTACGTATTTC\n-GTAAAGGATTTGGTGCATTTTATGTTGCAACCACCTTTATAGTAGGGTATAGTTGTTTAG\n-GAATCGCGTGGATGCAACGATATCTTGAACGTGCTGTTCTTTTTGTGATACGTTTTCTGT\n-TTACATTGATCAGCGTAGTGATTAGCATCCCATTTCTGAGCATCGCCTTGTTTGTTAACG\n-TAAAAACAGGCGAAATTACAAAATTAGGCAAACTGGTATCTAAAAAATATGAAAAGAATT\n-CTGACAAGACTTGA\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/test-data/Miro_ExtSeqOut2.fa
--- a/cpt_gff_extract_seq/test-data/Miro_ExtSeqOut2.fa Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,3212 +0,0 @@\n->Miro_1.CDS\n-TGGTCAAAATTCTCGACGAAGTACAACAGAAAGCTATCTATAATGGATGGTTGACGGGGG\n-CAAGTAAAACAGCACTGGCCCAACAGTTCAACGTAAGCGCCCGAACCATTGGGCGCGTTA\n-TCAACCGCAAGTTAGCGGAATTCCCTAAGAAAAGGGATGTAAAACAAAAGCCTAAAGTAT\n-CAGACACTGTACCGCGTATGATCGGATCTGAATCGTTCATTACGGTTGTATACGAAGGGC\n-GCGTTTTCATGGCGGGTGAAACACATCCGAACTTTAAGAAAGCGCATGAAATGCTGAAAG\n-CTGGTGATGTCAAAGGTGCTGTAACTTGTTTGGATACTCAAGAAGCGATCCGAACCTATA\n-GCAAAGGCAACATTAAAATCATTGGTCATCAGCTTCTATATAAAGATGTAGTGTTTGATT\n-CTGATATCACTCAACGAATCATTCGCGAAATGTATAACGATCGTCCGTATGAACATCTGG\n-TGAACTTCTTTGAACGGTTGATGCGAAACCCTTCACGCGATGCTGTATACCAGCTTTATG\n-GGTTCCTTGTACATAATGACATTGAACTGACCGATGATGGTTGTTTCCTTGCCTGGAAAC\n-GTGTGCGCGATAACTATAAAGATCTCGCTACTGGCAAATTTGATAATAGTCCTGGTGTGA\n-CTGTCTCCATGCCTCGAAACATGGTAGACGAAGATAAAACGCGCACCTGTTCTACTGGCC\n-TACACGTTGCGGCTAAATCATACCTTCCACACTACGGGGGCGGTGTTGGTAGAGTCATTC\n-AGGTAAAGGTGGATCCTGCTGACGTGGTAGCGATCCCAGTGGATTACAATAACGCAAAAA\n-TGCGTGTATGCCGTTATAAAGTCATGATTGATGTTACATATGGTTTTAGCCATTACTAA\n->Miro_2.CDS\n-TGAAATTACGCGAAGATACACCGCAAACCATCTACCGTGTTGGTACAGATACCAGCAAGG\n-GACGAAACAAACTATCTCTACGCGCAAACAAGAAAGCGTATAAGATCCTTTCTTCGACAG\n-TATACAAATACAAGATCCGCGCAATCATTCGCGAACTGTCTTGTAATGCGATTGATGGAC\n-ACAAAGAAGCGGGAAATCAGAACCCGTTTGATGTCCAGTTGCCAACTGCTGTTGATCCTC\n-GTTTTGTTATTCGTGACTACGGGATCGGTATGTCTCCTGATTTCGTTAGTGATGCGTTTA\n-CCGTTTACTTCGAATCAACTAAAAATGATTCAAACGACCTGATCGGTTCTATGGGTCTGG\n-GTTGCAAATCTCCGCTTTGCTATTCCGATGCTTTCACGGTGGAATCCGTAAAAGACGGTA\n-TCAAATGCGGTTACACAATCTATATGGATGATGGGGAACCTTTCTGCGATCCTCTGTATG\n-AGATTGAAAGCGATGAACCTAACGGGGTTACTATCACTGTTCCGGTTAAGGTTGAAGATA\n-TCAAAGAATGGGAAAACGAAGCAGCAAGGGTATACGAATCATTTACTGATATTCGTCCTA\n-ACTTTGTTGGTGCTTCTATTCTCAAAATCAACTATCAGCCGAAAGAAGCAACCAACGATA\n-GCGGGGTGATCCGTCATAAATCAGCATACACTAGCGGTGTATATGCTCGCATGGGTAACA\n-TCATTTATCCTCTGGATAAAGATTTGTATGATACCTCGATGTTCTATTGCTATACAGAAA\n-GCCAGTATACGTATATTATTGACTTCCCGATCGGTGAACTTGATTTCATGCCTTCTCGCG\n-AAGAGTTGAGTATGGATAAAATGACAGTAGGGATTGTTAAAGAACGTCTGAAACAAATCA\n-GCCGGGTATATTTCAATCGAGTTAAATCAGAATTCGATAAGTTACAAACCGTGCGTGATA\n-AACTGACGTGGTTCCATTCTCTGCCTTCAATGGTACAAAACTTTGTTGGTAAAGATGCTA\n-ATTTTCGCATCAATGGTGATTCTATTGGCTGGATCCATAGCGAACTAGTCAAACCAAACA\n-AATATAATGATGATTATGTTGCTGGGTATTGGGCGAACGAATACGACGGTAAAGATGCAT\n-GGTATCAGGTAACTGGTAGCGGAGGCCGCTGGAGCAAGTATAAGCCAGAAACAACCAAAC\n-GCCAGGACATCACACGCATTTATTATCCGTGGAAACAGAAGAAACTCATTCTGTTAAAAG\n-TGGATACCAACACGGTTAAACCGTATATCGTCGGGTATGCTAAAATGCATAACCTAAGCC\n-GTGTTAGCTTTGTTGCATATTACGATAGTGATTCAAAGCGTGAGATTGTTAATGATATCG\n-TTCGGAAAGGTCATTTTGATGAATCGGAAATTGTGTATCTCCGTACCAGTGAAATGACAA\n-AAGAAAAAGAGATCTATGATGCAGATCGCGAAAAATCAAAAGCGCTATATGCACCGAAAA\n-ATTCAGAACCGCGCCCAAAAACCCCGACCGTATATCGTTATGAGTTGGATAGTAACGGCA\n-ATCTGGCAAAAACTTCTCTGTTTATGACAAAATCAGAGTTTTTATCACTGGATAAAGCCC\n-CAGGTGTTCGGTTGTATGGTATTGATGAATATAGCCGCTTAGATGGTGAAAATTCCGGGT\n-TGTCTATGGATTCAGCAATGAAAGAATCTACTTTATCGCGTATCATGCGCCATACTGGGA\n-TCCCTGTTGTGTTTGCTATACGTAACAGCCTCTGGAAGTGGATCCCGGATTCAAATCTGG\n-TATGCTTTGATGATATGCTGTGCAAGCAGTATGTTAAATCTGAAAAGGCATTGAAAGATA\n-ATTGCTTGCCTGGATGGATTGGGAAGGATCACTCAACAGAAACAGACGCATTGCATAGCC\n-GTTTCGGTGTATCGCTTGATCGGATTGTGAAAAACCGATATAATGAAAAACTGTATAAGA\n-TTGTTGATACCCTCGAACGTATAGTGTGTCTGGAAGGGTATGAGAAAGATGGTAAAAATC\n-TTTCCAGGGTGCGTTGTCCGATTCTGCGTGAGTCCGTAGCATCAATGCGCGTTAAACGCA\n-GCAATATGAAAAAACGTGTTGATCAGGCATGGGAAATCTTTAAATCACTAAACCCGTTAC\n-TGGCTTCACTTGTTGAACATAGCGATTCTTATTCAATTCGTCCGATTTACAACAACGAAA\n-AGAATCTCAGTGAGTTTAAGAAATTAATACGGTGGAAATAA\n->Miro_3.CDS\n-TGACACGTTATAATGCGCCTAAGTTGGGAAAATATCTGACCATATTCGGATTTTGTGCGT\n-TCTTTTCTGTCATTATCGGGGCTATCGTGTGGGGTATCCTTGATATGAAGAAACAGCAGG\n-TCGAGGAAGAAAAATTGGTAAAATTCCTTGACACCTATTGTGAAGTAGTAGAATATGGTC\n-TGAACAAAAAGCCAACGAAGTACTCTTGTGACCAAGTTATTTTTAATGTTAAGTGA\n->Miro_4.CDS\n-TGTTGAAAACTCGTAGTGCAATGGAAGTAACCCACTGGGCGCGTACCATGATGGAAAAAC\n-ACGGTTTGATCAGTAATGGTTGGACATTCCGGATCAATGGACGCATTACAAAAACTCTGG\n-GCCGTTGCAGTTATACCAAAAAACTGATCGAACTGTCTGGTCGTCATGTTGCGGAAGATA\n-TCTACGAGGATATTTTAGACACTCTTTTACACGAAATCGCTCATGCTCTTGTCGGGCGCG\n-GTTATGAACATGGTAAAGTGTGGCAAGCTATGGCCTTGCGCCTGGGTGCGAAACCTTCAC\n-CAAGTAAAACAACTACAAAGGATGCTAATCTAGTTGATAAGAATGAGATCCTTTATTGTT\n-TGTTCATGAAAGATTATCAAGGCCGTGAAGTCTATCAGGCTAAAG'..b'CTAGCCTTATTTGATCGGGTAATATCGTTTGTGA\n-TGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTTATGTTTC\n-TTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGTGCCCTTG\n-TAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCCGCGTTGG\n-AACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTGGCATTCA\n-GACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTACCATCCC\n-AAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAATACACGA\n-ATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCGACACGTG\n-ATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAACTACTATT\n-CTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATGAAGATAA\n-ACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGA\n->Miro_268.CDS\n-TGACGTTCGATGAGGCTATGAGAGCCGCTAAGAGAGGTTGTAAGGTTGCGCCAATAGGTA\n-GGCCATTCTTCGTGTATTATCACGACAGCGCTGGATACAGGAGGGTTACGTTGGGGGTTT\n-CGGCAAGGAATGAAGATTATGTTCCGGATCCGTGGGACAAATTGAAAGTTTGGGATATTT\n-ACGAGAAAAATAGCCCGGTGATGAACTTCTTTAAAGCAATCAAACGGGCTATTGGAAGAT\n-CACACAATTTAACACCAGCATTACGTTAA\n->Miro_269.CDS\n-TGACATACGAAGATATGCTTAATGAAGTGTGCGCGGGTAAAACCGCGTACCGCACTACCA\n-ATCCTGATATGATTGTGTTTCGTGAGGGGGATACAATCATCAGACGTACACACCGCAAGT\n-GTGAAGTAAACCAGGTATTCATTGCCACGGTAGAAGAACAAAAGGCTACTGATTGGGATA\n-TCGTCATTGACGAAGAAAACCACGATGAGCTAGATCATCCGATATTCATTTTGAGTGAAA\n-CTATCGGAATTCGACCATTCATGCATAGAAATTTTTGGGGGAATCCATGA\n->Miro_270.CDS\n-TGACTAACGCAACCGTAGTTACTACCACTTACTTCATTTCTTCTGGTCGTAAAAATGCAA\n-TGTATACCTTGCGTGTTGAGCGTAGAGGCGGCGGTTATACTTCTGATAACTATATCTGCA\n-ACTTATCTACCGATCCGGAGAAAGCGGAAGCGAAAGCGCGTGAGTATTTCGACCGTGTTT\n-CAGCCCGTTTAACTGAAACTGATACTTTCAAAATGATATTCCAGGGCTTTGCAGATTTCG\n-ATCTGTTCGAACGTCGCGGTAAACTGTCAGTGTTCGATACTGAAAAACTGGAACTGCTGG\n-AAAAAGGGATCATGCCGATTGGCAAGCGTAAAGGCGAAGTAATTGCAGAAATGCCAATGT\n-TTACTGTTCTTTGGTGGGCTGACCAGTCTAAAGAAGACAACAAAAACAGCCCTGTATTCG\n-ATGCTGTGTGCGCTTACTGCATGGGTGTTGCACTGGAAAAGGATTATATCGCCAAACGCG\n-AAGAAATCCGCGAACAGTGGGAACAAGAGCGCCAGGAACGCATTTCTAAAGCTAACCACA\n-TCGGTGAAATCAAACAACGTCTGGAAATGACCGGAACCGTTGAGAAAGTGATTTCACTGG\n-GATATACTCAAGTTTCTTACTACACTTCTGTAGAAAGATTCATGACCAAAATTAATGTTG\n-ATGGTAATGTTGTTGTTTACTTCGGTAATAACATTGCAAACGAAGGTGATGAAATCACTT\n-TCAAAGCAACCGTTAAAGAGCACGGAGAATACAAAGACGTTAAACAAACCATCGTTCAAC\n-GTGTAAAGGTTTTATAA\n->Miro_271.CDS\n-TGAGTATACAAATTCTGTTCAGTAAAGAAGATCCATTAGGTACGGAATCATCGGTTCAGC\n-GCTTATTCTGGCATACAGATTATATAAAAAATCGATGTCGTGGCCCAAACAACACATGGG\n-GAAGCAACAACGATATTGACAAAGCAAAAATTTATCCTCGCGGAAAGGCGTACTGGAAAA\n-TTTTTGATATTCATCTTCATAAAACAATGAAAGATTCGAATGAATGA\n->Miro_272.CDS\n-TGAAAACCGCTTTGATCATTCTTGAAGAAATTCCTGAAAATACAACGCTTTACAAAGTCG\n-AAACTGACGATAGCGAAGTAATTGAGATCCTCAAAACCGCCCACGGGCATTATGTAAACG\n-GTTCCGGCAACACCAAAGAACAGGATTATGCTGTTGATGTTGTTAACCTCATGCTGGGGC\n-CGAATACCGATGATAACCTGAAATGGGCGCGTGAATCCAACATTCCTGAAAAGTATGTTG\n-GTATGTTCTATCAGTGTAACATCGATAGCAAATCACCGTTTGAACCAGAAAAGAAAATCG\n-ATCTGATTGTTCGCACTGGGTTCTTCTTATGA\n->Miro_273.CDS\n-TGAATAATTTGGTAGCGAAACATGATTTCAACCGCGCTTCTACTCACCGCGATCGCAAGC\n-GAGCATTTAAAGAAGCAAAACGTAAACAGAAACACAAAGGTAAGTGTGATGAAAACCGCT\n-TTGATCATTCTTGA\n->Miro_274.CDS\n-TGAAACTGTTTAATAAAATTCGTTCTGTAGATGAAATCGTAGCAACTTTCGACAAAACCC\n-TGTCTGAACTGGAAGCGCGTATTGCTCACGATAATGAGCAGGTTGCACAGGTTGCAGCAG\n-ATCGTCAAGCAGCAGAAGAAGAACATCAGCGTAAGCTGGCGGAACTGGCTTCAAAAGAAA\n-GCGATCATACTGCAAGTGCAACCCGCGCCGGACGTATCGCAGACAAGATCCGCAAATTGC\n-TGGATTAA\n->Miro_275.CDS\n-TGAAAAGAATTCTGACAAGACTTGACATTCACAAATATGCAATTAAAATTGGTGACATGA\n-AACGCGGAACCTTCAAGAAATTCGAAGGCGTGGATCTGAAAGAAAACGGGTTTTACCTGG\n-TAGTTGATGATAAAACAGGGCGCGTAGTATTCCGTTTCTACGTAGCACCAAATCAACGCC\n-GTAAAACTGGTGCTAGTCGTACTCTGTTACGTATCAAGCGCCGCGAACACGAACCGTCTG\n-CGATGGTTGCGATGCTGGAAAGTGCTTCTCTGTATTATGTAGCACATGACAAAATCAAAA\n-ACCTGTTCGGTTTCCCGAACCTCAACGGATCAACGTTCGAAAGTGCTATTCAAATGCGTT\n-ATGGTGCTGGTGAAAACTACACCGAGTGGAACCGCGCACTGAATGACATTTATGATTTTG\n-AATTTCAACCATATTGA\n->Miro_276.CDS\n-TGAAATTTAAAAACATGCCTTCGGCTAACGAAACTCGCGCTAGTGGTGAATCTATTCTGA\n-AAACTGCCCGTGAAATGTGGGCCGACGACCGGATAGAATCCGGGAAAGGTTACGTATTTC\n-GTAAAGGATTTGGTGCATTTTATGTTGCAACCACCTTTATAGTAGGGTATAGTTGTTTAG\n-GAATCGCGTGGATGCAACGATATCTTGAACGTGCTGTTCTTTTTGTGATACGTTTTCTGT\n-TTACATTGATCAGCGTAGTGATTAGCATCCCATTTCTGAGCATCGCCTTGTTTGTTAACG\n-TAAAAACAGGCGAAATTACAAAATTAGGCAAACTGGTATCTAAAAAATATGAAAAGAATT\n-CTGACAAGACTTGA\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/test-data/T7_ExtSeqIn.fasta
--- a/cpt_gff_extract_seq/test-data/T7_ExtSeqIn.fasta Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,667 +0,0 @@\n->NC_001604\n-TCTCACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCAC\n-CTAAAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGT\n-TTGTCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCTATCTGTTACAGTCTCCTAAA\n-GTATCCTCCTAAAGTCACCTCCTAACGTCCATCCTAAAGCCAACACCTAAAGCCTACACC\n-TAAAGACCCATCAAGTCAACGCCTATCTTAAAGTTTAAACATAAAGACCAGACCTAAAGA\n-CCAGACCTAAAGACACTACATAAAGACCAGACCTAAAGACGCCTTGTTGTTAGCCATAAA\n-GTGATAACCTTTAATCATTGTCTTTATTAATACAACTCACTATAAGGAGAGACAACTTAA\n-AGAGACTTAAAAGATTAATTTAAAATTTATCAAAAAGAGTATTGACTTAAAGTCTAACCT\n-ATAGGATACTTACAGCCATCGAGAGGGACACGGCGAATAGCCATCCCAATCGACACCGGG\n-GTCAACCGGATAAGTAGACAGCCTGATAAGTCGCACGAAAAACAGGTATTGACAACATGA\n-AGTAACATGCAGTAAGATACAAATCGCTAGGTAACACTAGCAGCGTCAACCGGGCGCACA\n-GTGCCTTCTAGGTGACTTAAGCGCACCACGGCACATAAGGTGAAACAAAACGGTTGACAA\n-CATGAAGTAAACACGGTACGATGTACCACATGAAACGACAGTGAGTCACCACACTGAAAG\n-GTGATGCGGTCTAACGAAACCTGACCTAAGACGCTCTTTAACAATCTGGTAAATAGCTCT\n-TGAGTGCATGACTAGCGGATAACTCAAGGGTATCGCAAGGTGCCCTTTATGATATTCACT\n-AATAACTGCACGAGGTAACACAAGATGGCTATGTCTAACATGACTTACAACAACGTTTTC\n-GACCACGCTTACGAAATGCTGAAAGAAAACATCCGTTATGATGACATCCGTGACACTGAT\n-GACCTGCACGATGCTATTCACATGGCTGCCGATAATGCAGTTCCGCACTACTACGCTGAC\n-ATCTTTAGCGTAATGGCAAGTGAGGGCATTGACCTTGAGTTCGAAGACTCTGGTCTGATG\n-CCTGACACCAAGGACGTAATCCGCATCCTGCAAGCGCGTATCTATGAGCAATTAACGATT\n-GACCTCTGGGAAGACGCAGAAGACTTGCTCAATGAATACTTGGAGGAAGTCGAGGAGTAC\n-GAGGAGGATGAAGAGTAATGTCTACTACCAACGTGCAATACGGTCTGACCGCTCAAACTG\n-TACTTTTCTATAGCGACATGGTGCGCTGTGGCTTTAACTGGTCACTCGCAATGGCACAGC\n-TCAAAGAACTGTACGAAAACAACAAGGCAATAGCTTTAGAATCTGCTGAGTGATAGACTC\n-AAGGTCGCTCCTAGCGAGTGGCCTTTATGATTATCACTTTACTTATGAGGGAGTAATGTA\n-TATGCTTACTATCGGTCTACTCACCGCTCTAGGTCTAGCTGTAGGTGCATCCTTTGGGAA\n-GGCTTTAGGTGTAGCTGTAGGTTCCTACTTTACCGCTTGCATCATCATAGGAATCATCAA\n-AGGGGCACTACGCAAATGATGAAGCACTACGTTATGCCAATCCACACGTCCAACGGGGCA\n-ACCGTATGTACACCTGATGGGTTCGCAATGAAACAACGAATCGAACGCCTTAAGCGTGAA\n-CTCCGCATTAACCGCAAGATTAACAAGATAGGTTCCGGCTATGACAGAACGCACTGATGG\n-CTTAAAGAAAGGTTATATGCCCAATGGCACACTATACGCTGCAAATCGGCGAATAGTGAG\n-AACTTGGCGAGAGAACAACCTCGAACGCCGCAAGGACAAGAGAGGGCGGCGTGGCATAGA\n-CGAAAGGAAAAGGTTAAAGCCAAGAAACTCGCCGCACTTGAACAGGCACTAGCCAACACA\n-CTGAACGCTATCTCATAACGAACATAAAGGACACAATGCAATGAACATTACCGACATCAT\n-GAACGCTATCGACGCAATCAAAGCACTGCCAATCTGTGAACTTGACAAGCGTCAAGGTAT\n-GCTTATCGACTTACTGGTCGAGATGGTCAACAGCGAGACGTGTGATGGCGAGCTAACCGA\n-ACTAAATCAGGCACTTGAGCATCAAGATTGGTGGACTACCTTGAAGTGTCTCACGGCTGA\n-CGCAGGGTTCAAGATGCTCGGTAATGGTCACTTCTCGGCTGCTTATAGTCACCCGCTGCT\n-ACCTAACAGAGTGATTAAGGTGGGCTTTAAGAAAGAGGATTCAGGCGCAGCCTATACCGC\n-ATTCTGCCGCATGTATCAGGGTCGTCCTGGTATCCCTAACGTCTACGATGTACAGCGCCA\n-CGCTGGATGCTATACGGTGGTACTTGACGCACTTAAGGATTGCGAGCGTTTCAACAATGA\n-TGCCCATTATAAATACGCTGAGATTGCAAGCGACATCATTGATTGCAATTCGGATGAGCA\n-TGATGAGTTAACTGGATGGGATGGTGAGTTTGTTGAAACTTGTAAACTAATCCGCAAGTT\n-CTTTGAGGGCATCGCCTCATTCGACATGCATAGCGGGAACATCATGTTCTCAAATGGAGA\n-CGTACCATACATCACCGACCCGGTATCATTCTCGCAGAAGAAAGACGGTGGCGCATTCAG\n-CATCGACCCTGAGGAACTCATCAAGGAAGTCGAGGAAGTCGCACGACAGAAAGAAATTGA\n-CCGCGCTAAGGCCCGTAAAGAACGTCACGAGGGGCGCTTAGAGGCACGCAGATTCAAACG\n-TCGCAACCGCAAGGCACGTAAAGCACACAAAGCTAAGCGCGAAAGAATGCTTGCTGCGTG\n-GCGATGGGCTGAACGTCAAGAACGGCGTAACCATGAGGTAGCTGTAGATGTACTAGGAAG\n-AACCAATAACGCTATGCTCTGGGTCAACATGTTCTCTGGGGACTTTAAGGCGCTTGAGGA\n-ACGAATCGCGCTGCACTGGCGTAATGCTGACCGGATGGCTATCGCTAATGGTCTTACGCT\n-CAACATTGATAAGCAACTTGACGCAATGTTAATGGGCTGATAGTCTTATCTTACAGGTCA\n-TCTGCGGGTGGCCTGAATAGGTACGATTTACTAACTGGAAGAGGCACTAAATGAACACGA\n-TTAACATCGCTAAGAACGACTTCTCTGACATCGAACTGGCTGCTATCCCGTTCAACACTC\n-TGGCTGACCATTACGGTGAGCGTTTAGCTCGCGAACAGTTGGCCCTTGAGCATGAGTCTT\n-ACGAGATGGGTGAAGCACGCTTCCGCAAGATGTTTGAGCGTCAACTTAAAGCTGGTGAGG\n-TTGCGGATAACGCTGCCGCCAAGCCTCTCATCACTACCCTACTCCCTAAGATGATTGCAC\n-GCATCAACGACTGGTTTGAGGAAGTGAAAGCTAAGCGCGGCAAGCGCCCGACAGCCTTCC\n-AGTTCCTGCAAGAAATCAAGCCGGAAGCCGTAGCGTACATCACCATTAAGACCACTCTGG\n-CTTGCCTAACCAGTGCTGACAATACAACCGTTCAGGCTGTAGCAAGCGCAATCGGTCGGG\n-CCATTGAGGACGAGGCTCGCTTCGGTCGTATCCGTGACCTTGAAGCTAAGCACTTCAAGA\n-AAAACGTTGAGGAACAACTCAACAAGCGCGTAGGGCACGTCTACAAGAAAGCATTTATGC\n-AAGTTGTCGAGGCTGACATGCTCTCTAAGGGTCTACTCGGTGGCGAGGCGTGGTCTTCGT\n-GGCATAAGGAAGACTCTATTCATGTAGGAGTACGCTGCATCGAGATGCTCATTGAGTCAA\n-'..b'TAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGT\n-GCCAACAACTCTTGGAACTTCTTCCGTACTGGCCCCGATGGAATCTACTTCATAGCCTCT\n-GATGGTGGATGGTTACGATTCCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCA\n-GACAGTCGTTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAATTGGTAAATCACAAG\n-GAAAGACGTGTAGTCCACGGATGGACTCTCAAGGAGGTACAAGGTGCTATCATTAGACTT\n-TAACAACGAATTGATTAAGGCTGCTCCAATTGTTGGGACGGGTGTAGCAGATGTTAGTGC\n-TCGACTGTTCTTTGGGTTAAGCCTTAACGAATGGTTCTACGTTGCTGCTATCGCCTACAC\n-AGTGGTTCAGATTGGTGCCAAGGTAGTCGATAAGATGATTGACTGGAAGAAAGCCAATAA\n-GGAGTGATATGTATGGAAAAGGATAAGAGCCTTATTACATTCTTAGAGATGTTGGACACT\n-GCGATGGCTCAGCGTATGCTTGCGGACCTTTCGGACCATGAGCGTCGCTCTCCGCAACTC\n-TATAATGCTATTAACAAACTGTTAGACCGCCACAAGTTCCAGATTGGTAAGTTGCAGCCG\n-GATGTTCACATCTTAGGTGGCCTTGCTGGTGCTCTTGAAGAGTACAAAGAGAAAGTCGGT\n-GATAACGGTCTTACGGATGATGATATTTACACATTACAGTGATATACTCAAGGCCACTAC\n-AGATAGTGGTCTTTATGGATGTCATTGTCTATACGAGATGCTCCTACGTGAAATCTGAAA\n-GTTAACGGGAGGCATTATGCTAGAATTTTTACGTAAGCTAATCCCTTGGGTTCTCGCTGG\n-GATGCTATTCGGGTTAGGATGGCATCTAGGGTCAGACTCAATGGACGCTAAATGGAAACA\n-GGAGGTACACAATGAGTACGTTAAGAGAGTTGAGGCTGCGAAGAGCACTCAAAGAGCAAT\n-CGATGCGGTATCTGCTAAGTATCAAGAAGACCTTGCCGCGCTGGAAGGGAGCACTGATAG\n-GATTATTTCTGATTTGCGTAGCGACAATAAGCGGTTGCGCGTCAGAGTCAAAACTACCGG\n-AACCTCCGATGGTCAGTGTGGATTCGAGCCTGATGGTCGAGCCGAACTTGACGACCGAGA\n-TGCTAAACGTATTCTCGCAGTGACCCAGAAGGGTGACGCATGGATTCGTGCGTTACAGGA\n-TACTATTCGTGAACTGCAACGTAAGTAGGAAATCAAGTAAGGAGGCAATGTGTCTACTCA\n-ATCCAATCGTAATGCGCTCGTAGTGGCGCAACTGAAAGGAGACTTCGTGGCGTTCCTATT\n-CGTCTTATGGAAGGCGCTAAACCTACCGGTGCCCACTAAGTGTCAGATTGACATGGCTAA\n-GGTGCTGGCGAATGGAGACAACAAGAAGTTCATCTTACAGGCTTTCCGTGGTATCGGTAA\n-GTCGTTCATCACATGTGCGTTCGTTGTGTGGTCCTTATGGAGAGACCCTCAGTTGAAGAT\n-ACTTATCGTATCAGCCTCTAAGGAGCGTGCAGACGCTAACTCCATCTTTATTAAGAACAT\n-CATTGACCTGCTGCCATTCCTATCTGAGTTAAAGCCAAGACCCGGACAGCGTGACTCGGT\n-AATCAGCTTTGATGTAGGCCCAGCCAATCCTGACCACTCTCCTAGTGTGAAATCAGTAGG\n-TATCACTGGTCAGTTAACTGGTAGCCGTGCTGACATTATCATTGCGGATGACGTTGAGAT\n-TCCGTCTAACAGCGCAACTATGGGTGCCCGTGAGAAGCTATGGACTCTGGTTCAGGAGTT\n-CGCTGCGTTACTTAAACCGCTGCCTTCCTCTCGCGTTATCTACCTTGGTACACCTCAGAC\n-AGAGATGACTCTCTATAAGGAACTTGAGGATAACCGTGGGTACACAACCATTATCTGGCC\n-TGCTCTGTACCCAAGGACACGTGAAGAGAACCTCTATTACTCACAGCGTCTTGCTCCTAT\n-GTTACGCGCTGAGTACGATGAGAACCCTGAGGCACTTGCTGGGACTCCAACAGACCCAGT\n-GCGCTTTGACCGTGATGACCTGCGCGAGCGTGAGTTGGAATACGGTAAGGCTGGCTTTAC\n-GCTACAGTTCATGCTTAACCCTAACCTTAGTGATGCCGAGAAGTACCCGCTGAGGCTTCG\n-TGACGCTATCGTAGCGGCCTTAGACTTAGAGAAGGCCCCAATGCATTACCAGTGGCTTCC\n-GAACCGTCAGAACATCATTGAGGACCTTCCTAACGTTGGCCTTAAGGGTGATGACCTGCA\n-TACGTACCACGATTGTTCCAACAACTCAGGTCAGTACCAACAGAAGATTCTGGTCATTGA\n-CCCTAGTGGTCGCGGTAAGGACGAAACAGGTTACGCTGTGCTGTACACACTGAACGGTTA\n-CATCTACCTTATGGAAGCTGGAGGTTTCCGTGATGGCTACTCCGATAAGACCCTTGAGTT\n-ACTCGCTAAGAAGGCAAAGCAATGGGGAGTCCAGACGGTTGTCTACGAGAGTAACTTCGG\n-TGACGGTATGTTCGGTAAGGTATTCAGTCCTATCCTTCTTAAACACCACAACTGTGCGAT\n-GGAAGAGATTCGTGCCCGTGGTATGAAAGAGATGCGTATTTGCGATACCCTTGAGCCAGT\n-CATGCAGACTCACCGCCTTGTAATTCGTGATGAGGTCATTAGGGCCGACTACCAGTCCGC\n-TCGTGACGTAGACGGTAAGCATGACGTTAAGTACTCGTTGTTCTACCAGATGACCCGTAT\n-CACTCGTGAGAAAGGCGCTCTGGCTCATGATGACCGATTGGATGCCCTTGCGTTAGGCAT\n-TGAGTATCTCCGTGAGTCCATGCAGTTGGATTCCGTTAAGGTCGAGGGTGAAGTACTTGC\n-TGACTTCCTTGAGGAACACATGATGCGTCCTACGGTTGCTGCTACGCATATCATTGAGAT\n-GTCTGTGGGAGGAGTTGATGTGTACTCTGAGGACGATGAGGGTTACGGTACGTCTTTCAT\n-TGAGTGGTGATTTATGCATTAGGACTGCATAGGGATGCACTATAGACCACGGATGGTCAG\n-TTCTTTAAGTTACTGAAAAGACACGATAAATTAATACGACTCACTATAGGGAGAGGAGGG\n-ACGAAAGGTTACTATATAGATACTGAATGAATACTTATAGAGTGCATAAAGTATGCATAA\n-TGGTGTACCTAGAGTGACCTCTAAGAATGGTGATTATATTGTATTAGTATCACCTTAACT\n-TAAGGACCAACATAAAGGGAGGAGACTCATGTTCCGCTTATTGTTGAACCTACTGCGGCA\n-TAGAGTCACCTACCGATTTCTTGTGGTACTTTGTGCTGCCCTTGGGTACGCATCTCTTAC\n-TGGAGACCTCAGTTCACTGGAGTCTGTCGTTTGCTCTATACTCACTTGTAGCGATTAGGG\n-TCTTCCTGACCGACTGATGGCTCACCGAGGGATTCAGCGGTATGATTGCATCACACCACT\n-TCATCCCTATAGAGTCAAGTCCTAAGGTATACCCATAAAGAGCCTCTAATGGTCTATCCT\n-AAGGTCTATACCTAAAGATAGGCCATCCTATCAGTGTCACCTAAAGAGGGTCTTAGAGAG\n-GGCCTATGGAGTTCCTATAGGGTCCTTTAAAATATACCATAAAAATCTGAGTGACTATCT\n-CACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCACCTA\n-AAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGTTTG\n-TCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCT\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/test-data/T7_ExtSeqIn.gff3
--- a/cpt_gff_extract_seq/test-data/T7_ExtSeqIn.gff3 Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b"@@ -1,171 +0,0 @@\n-##gff-version 3\n-NC_001604\tGenBank\tcontig\t1\t39937\t.\t+\t1\tID=NC_001604;Dbxref=BioProject:PRJNA485481,taxon:10760;Name=NC_001604;Note=Enterobacteria phage T7%2C complete genome.,VALIDATED REFSEQ: This record has undergone validation or preliminary review. The reference sequence was derived from V01146. The sequence was submitted by the authors [1] on magnetic tape and revised according to [3],[4],and [5]. [3] made changes at 8 positions in gene 1 without affecting the size of the total sequence but changing gene 1 amino acids 443,474,and 388 to 424. [4] inserted a T at nucleotide 17511,increasing the total sequence to 39937 bp. This change,originally found in T3 DNA [8],revealed gene 5.9 and shortened gene 6. [5] changed the nucleotides at 11061 and 11062 from GT to TG,changing amino acid 119 of T7 lysozyme (gene 3.5) from glycine to valine. Features have been extracted from [1] unless otherwise noted. The sequence shown is that of the l strand,which corresponds to the sequence of all mRNAs of known functional significance. Early mRNAs are produced by three major promoters for E. coli RNA polymerase A1,A2,and A3,located near the left end of the DNA. A fourth major E. coli promoter,A0 (also called D),that would direct transcription leftward,and several minor E. coli promoters (see Table 6 in [1]) function in vitro but have no known in vivo function. Late mRNAs are produced by 15 promoters for T7 RNA polymerase distributed across the right-most 85%25 of the DNA,and named e.g. phi10,for the first gene downstream of the promoter. There are also two T7 promoters,phiOL and phiOR,associated with possible origins of replication at the left and right ends of T7 DNA. The 23 base-pair consensus sequence for T7 promoters stretches from -17 to +6,where the initiating nucleotide is at +1. T7 DNA also contains a 160 base-pair terminal repetition. The beginning and end of RNAs are determined by the promoters,by a terminator for E. coli RNA polymerase,TE,located at the end of the early region,a terminator for T7 RNA polymerase,Tphi,located just downstream of gene 10,and a series of RNase III cleavage sites. Early mRNAs made by E. coli RNA polymerase are listed in Features. The many RNAs predicted to be made by T7 RNA polymerase are not listed but can be deduced from the position of the transcription signals (see Tables 8 and 9 in [1]). Promoters are listed in Features by the known or predicted first nucleotide of the RNA,terminators by the last nucleotide of the RNA,and RNase III sites by the nucleotide 5' of the position of cleavage. Genes are numbered 0.3 to 19.5 in order of their left-to-right position on the genome. Proteins are named by the gene number,e.g.,the gene 1 protein,or by a functional name,e.g.,T7 RNA polymerase. There is now genetic or biochemical evidence that proteins are produced from at least 52 of the 56 T7 genes. Gene 4 produces two proteins,4A and 4B,by initiating translation at two different sites in the same reading frame. Gene 10 produces two proteins,10A and 10B,by frameshifting during translation. Genes 0.6 and 5.5 probably also make two proteins by translational frameshifting,the gene 5.5 frameshift producing a gene 5.5-5.7 fusion protein. COMPLETENESS: full length. ;comment1=VALIDATED REFSEQ: This record has undergone validation or preliminary review. The reference sequence was derived from V01146. The sequence was submitted by the authors [1] on magnetic tape and revised according to [3]%2C [4]%2C and [5]. [3] made changes at 8 positions in gene 1 without affecting the size of the total sequence but changing gene 1 amino acids 443%2C 474%2C and 388 to 424. [4] inserted a T at nucleotide 17511%2C increasing the total sequence to 39937 bp. This change%2C originally found in T3 DNA [8]%2C revealed gene 5.9 and shortened gene 6. [5] changed the nucleotides at 11061 and 11062 from GT to TG%2C changing amino acid 119 of T7 lysozyme (gene 3.5) from glycine to valine. Features have been extracted from [1"..b' However%2C in phage T7 the holin protein gp17.5 does not appear to be essential and gp17.5 mutants only show a minor delay in lysis. Other names: gp17.5%3B lysis protein;codon_start=1;product=type II holin;protein_id=NP_042006.1;transl_table=11;translation=length.67;\n-NC_001604\tGenBank\tgene\t36344\t36547\t.\t+\t1\tID=T7p53.gene;Alias=T7p53;Dbxref=GeneID:1261022;Name=T7p53;Note=gene 17.5;\n-NC_001604\tGenBank\tCDS\t36553\t36822\t.\t+\t1\tID=T7p54;Dbxref=GOA:P03693,UniProtKB/Swiss-Prot:P03693,GeneID:1261042;Name=T7p54;Note=involved in the packaging of genome monomers into a procapsid using head-to-tail concatemers of genomes. other names: DNA packaging protein A%3B DNA maturation protein A%3B terminase%2C small subunit;codon_start=1;product=DNA packaging protein%2C small subunit;protein_id=NP_042007.1;transl_table=11;translation=length.89;\n-NC_001604\tGenBank\tregulatory\t36836\t36836\t.\t+\t1\tID=GenBank:regulatory:NC_001604:36836:36836;Note=E. coli promoter E[6];regulatory_class=promoter;\n-NC_001604\tGenBank\tsequence_secondary_structure\t36856\t36856\t.\t+\t1\tID=GenBank:sequence_secondary_structure:NC_001604:36856:36856;Note=RNase III site R18.5;\n-NC_001604\tGenBank\tgene\t36553\t36822\t.\t+\t1\tID=T7p54.gene;Alias=T7p54;Dbxref=GeneID:1261042;Name=T7p54;Note=gene 18;\n-NC_001604\tGenBank\tCDS\t36917\t37348\t.\t+\t1\tID=T7p55;Dbxref=GOA:P03803,UniProtKB/Swiss-Prot:P03803,GeneID:1261067;Name=T7p55;Note=analog of phage lambda protein Rz%2C a cell lysis protein. Rz and gp18.5 share distant sequence similarity%2C similar function%2C and a similar genome neighborhood. In T7%2C gp18.5 interacts with gp18.7%2C a lambda RZ1-like lysis protein. Other names: gp18.5;codon_start=1;product=phage lambda Rz-like lysis protein;protein_id=NP_042008.1;transl_table=11;translation=length.143;\n-NC_001604\tGenBank\tgene\t36917\t37348\t.\t+\t1\tID=T7p55.gene;Alias=T7p55;Dbxref=GeneID:1261067;Name=T7p55;Note=gene 18.5;\n-NC_001604\tGenBank\tCDS\t37032\t37283\t.\t+\t1\tID=T7p56;Dbxref=UniProtKB/Swiss-Prot:P03788,GeneID:1261057;Name=T7p56;Note=in Enterobacteria phage T7%2C this protein interacts with gp18.5 and is expressed from the -1 frame of a gene completely overlapping gene 18.5. This suggests that it may be an analog of lambda lysis protein Rz1. Other names: gp18.7.;codon_start=1;product=phage lambda Rz1-like protein;protein_id=NP_042009.1;transl_table=11;translation=length.83;\n-NC_001604\tGenBank\tgene\t37032\t37283\t.\t+\t1\tID=T7p56.gene;Alias=T7p56;Dbxref=GeneID:1261057;Name=T7p56;Note=gene 18.7;\n-NC_001604\tGenBank\tCDS\t37370\t39130\t.\t+\t1\tID=T7p57;Dbxref=GOA:P03694,UniProtKB/Swiss-Prot:P03694,GeneID:1261062;Name=T7p57;Note=gene 19;codon_start=1;product=DNA maturation protein;protein_id=NP_042010.1;transl_table=11;translation=length.586;\n-NC_001604\tGenBank\tgene\t37370\t39130\t.\t+\t1\tID=T7p57.gene;Alias=T7p57;Dbxref=GeneID:1261062;Name=T7p57;Note=gene 19;\n-NC_001604\tGenBank\tCDS\t38016\t38273\t.\t+\t1\tID=T7p58;Dbxref=UniProtKB/Swiss-Prot:P03789,GeneID:1261064;Name=T7p58;Note=gene 19.2;codon_start=1;product=hypothetical protein;protein_id=NP_042011.1;transl_table=11;translation=length.85;\n-NC_001604\tGenBank\tgene\t38016\t38273\t.\t+\t1\tID=T7p58.gene;Alias=T7p58;Dbxref=GeneID:1261064;Name=T7p58;Note=gene 19.2;\n-NC_001604\tGenBank\tCDS\t38553\t38726\t.\t+\t1\tID=T7p59;Dbxref=UniProtKB/Swiss-Prot:P03790,GeneID:1261066;Name=T7p59;Note=gene 19.3;codon_start=1;product=hypothetical protein;protein_id=NP_042012.1;transl_table=11;translation=length.57;\n-NC_001604\tGenBank\tregulatory\t39229\t39229\t.\t+\t1\tID=GenBank:regulatory:NC_001604:39229:39229;Note=T7 promoter phiOR;regulatory_class=promoter;\n-NC_001604\tGenBank\tgene\t38553\t38726\t.\t+\t1\tID=T7p59.gene;Alias=T7p59;Dbxref=GeneID:1261066;Name=T7p59;Note=gene 19.3;\n-NC_001604\tGenBank\tCDS\t39389\t39538\t.\t+\t1\tID=T7p60;Dbxref=UniProtKB/Swiss-Prot:P03804,GeneID:1261068;Name=T7p60;Note=gene 19.5;codon_start=1;product=hypothetical protein;protein_id=NP_042013.1;transl_table=11;translation=length.49;\n-NC_001604\tGenBank\tgene\t39389\t39538\t.\t+\t1\tID=T7p60.gene;Alias=T7p60;Dbxref=GeneID:1261068;Name=T7p60;Note=gene 19.5;\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 cpt_gff_extract_seq/test-data/T7_ExtSeqOut.fasta
--- a/cpt_gff_extract_seq/test-data/T7_ExtSeqOut.fasta Fri May 20 08:52:37 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,776 +0,0 @@\n->T7p01.p01 [Location=[926:1278](+);Name=T7p01]\n-TGGCTATGTCTAACATGACTTACAACAACGTTTTCGACCACGCTTACGAAATGCTGAAAG\n-AAAACATCCGTTATGATGACATCCGTGACACTGATGACCTGCACGATGCTATTCACATGG\n-CTGCCGATAATGCAGTTCCGCACTACTACGCTGACATCTTTAGCGTAATGGCAAGTGAGG\n-GCATTGACCTTGAGTTCGAAGACTCTGGTCTGATGCCTGACACCAAGGACGTAATCCGCA\n-TCCTGCAAGCGCGTATCTATGAGCAATTAACGATTGACCTCTGGGAAGACGCAGAAGACT\n-TGCTCAATGAATACTTGGAGGAAGTCGAGGAGTACGAGGAGGATGAAGAGTAA\n->T7p02 [Location=[1279:1433](+);Name=T7p02]\n-TGTCTACTACCAACGTGCAATACGGTCTGACCGCTCAAACTGTACTTTTCTATAGCGACA\n-TGGTGCGCTGTGGCTTTAACTGGTCACTCGCAATGGCACAGCTCAAAGAACTGTACGAAA\n-ACAACAAGGCAATAGCTTTAGAATCTGCTGAGTGA\n->T7p04 [Location=[1497:1639](+);Name=T7p04]\n-TGTATATGCTTACTATCGGTCTACTCACCGCTCTAGGTCTAGCTGTAGGTGCATCCTTTG\n-GGAAGGCTTTAGGTGTAGCTGTAGGTTCCTACTTTACCGCTTGCATCATCATAGGAATCA\n-TCAAAGGGGCACTACGCAAATGA\n->T7p05.cds1 [Location=[1637:1794](+);Name=T7p05]\n-TGATGAAGCACTACGTTATGCCAATCCACACGTCCAACGGGGCAACCGTATGTACACCTG\n-ATGGGTTCGCAATGAAACAACGAATCGAACGCCTTAAGCGTGAACTCCGCATTAACCGCA\n-AGATTAACAAGATAGGTTCCGGCTATGACAGAACGCAC\n->T7p06 [Location=[1637:1797](+);Name=T7p06]\n-TGATGAAGCACTACGTTATGCCAATCCACACGTCCAACGGGGCAACCGTATGTACACCTG\n-ATGGGTTCGCAATGAAACAACGAATCGAACGCCTTAAGCGTGAACTCCGCATTAACCGCA\n-AGATTAACAAGATAGGTTCCGGCTATGACAGAACGCACTGA\n->T7p05.cds2 [Location=[1797:1972](+);Name=T7p05]\n-ATGGCTTAAAGAAAGGTTATATGCCCAATGGCACACTATACGCTGCAAATCGGCGAATAG\n-TGAGAACTTGGCGAGAGAACAACCTCGAACGCCGCAAGGACAAGAGAGGGCGGCGTGGCA\n-TAGACGAAAGGAAAAGGTTAAAGCCAAGAAACTCGCCGCACTTGAACAGGCACTAG\n->T7p03.p01 [Location=[2022:3100](+);Name=T7p03]\n-TGAACATTACCGACATCATGAACGCTATCGACGCAATCAAAGCACTGCCAATCTGTGAAC\n-TTGACAAGCGTCAAGGTATGCTTATCGACTTACTGGTCGAGATGGTCAACAGCGAGACGT\n-GTGATGGCGAGCTAACCGAACTAAATCAGGCACTTGAGCATCAAGATTGGTGGACTACCT\n-TGAAGTGTCTCACGGCTGACGCAGGGTTCAAGATGCTCGGTAATGGTCACTTCTCGGCTG\n-CTTATAGTCACCCGCTGCTACCTAACAGAGTGATTAAGGTGGGCTTTAAGAAAGAGGATT\n-CAGGCGCAGCCTATACCGCATTCTGCCGCATGTATCAGGGTCGTCCTGGTATCCCTAACG\n-TCTACGATGTACAGCGCCACGCTGGATGCTATACGGTGGTACTTGACGCACTTAAGGATT\n-GCGAGCGTTTCAACAATGATGCCCATTATAAATACGCTGAGATTGCAAGCGACATCATTG\n-ATTGCAATTCGGATGAGCATGATGAGTTAACTGGATGGGATGGTGAGTTTGTTGAAACTT\n-GTAAACTAATCCGCAAGTTCTTTGAGGGCATCGCCTCATTCGACATGCATAGCGGGAACA\n-TCATGTTCTCAAATGGAGACGTACCATACATCACCGACCCGGTATCATTCTCGCAGAAGA\n-AAGACGGTGGCGCATTCAGCATCGACCCTGAGGAACTCATCAAGGAAGTCGAGGAAGTCG\n-CACGACAGAAAGAAATTGACCGCGCTAAGGCCCGTAAAGAACGTCACGAGGGGCGCTTAG\n-AGGCACGCAGATTCAAACGTCGCAACCGCAAGGCACGTAAAGCACACAAAGCTAAGCGCG\n-AAAGAATGCTTGCTGCGTGGCGATGGGCTGAACGTCAAGAACGGCGTAACCATGAGGTAG\n-CTGTAGATGTACTAGGAAGAACCAATAACGCTATGCTCTGGGTCAACATGTTCTCTGGGG\n-ACTTTAAGGCGCTTGAGGAACGAATCGCGCTGCACTGGCGTAATGCTGACCGGATGGCTA\n-TCGCTAATGGTCTTACGCTCAACATTGATAAGCAACTTGACGCAATGTTAATGGGCTGA\n->T7p07.p01 [Location=[3172:5822](+);Name=T7p07]\n-TGAACACGATTAACATCGCTAAGAACGACTTCTCTGACATCGAACTGGCTGCTATCCCGT\n-TCAACACTCTGGCTGACCATTACGGTGAGCGTTTAGCTCGCGAACAGTTGGCCCTTGAGC\n-ATGAGTCTTACGAGATGGGTGAAGCACGCTTCCGCAAGATGTTTGAGCGTCAACTTAAAG\n-CTGGTGAGGTTGCGGATAACGCTGCCGCCAAGCCTCTCATCACTACCCTACTCCCTAAGA\n-TGATTGCACGCATCAACGACTGGTTTGAGGAAGTGAAAGCTAAGCGCGGCAAGCGCCCGA\n-CAGCCTTCCAGTTCCTGCAAGAAATCAAGCCGGAAGCCGTAGCGTACATCACCATTAAGA\n-CCACTCTGGCTTGCCTAACCAGTGCTGACAATACAACCGTTCAGGCTGTAGCAAGCGCAA\n-TCGGTCGGGCCATTGAGGACGAGGCTCGCTTCGGTCGTATCCGTGACCTTGAAGCTAAGC\n-ACTTCAAGAAAAACGTTGAGGAACAACTCAACAAGCGCGTAGGGCACGTCTACAAGAAAG\n-CATTTATGCAAGTTGTCGAGGCTGACATGCTCTCTAAGGGTCTACTCGGTGGCGAGGCGT\n-GGTCTTCGTGGCATAAGGAAGACTCTATTCATGTAGGAGTACGCTGCATCGAGATGCTCA\n-TTGAGTCAACCGGAATGGTTAGCTTACACCGCCAAAATGCTGGCGTAGTAGGTCAAGACT\n-CTGAGACTATCGAACTCGCACCTGAATACGCTGAGGCTATCGCAACCCGTGCAGGTGCGC\n-TGGCTGGCATCTCTCCGATGTTCCAACCTTGCGTAGTTCCTCCTAAGCCGTGGACTGGCA\n-TTACTGGTGGTGGCTATTGGGCTAACGGTCGTCGTCCTCTGGCGCTGGTGCGTACTCACA\n-GTAAGAAAGCACTGATGCGCTACGAAGACGTTTACATGCCTGAGGTGTACAAAGCGATTA\n-ACATTGCGCAAAACACCGCATGGAAAATCAACAAGAAAGTCCTAGCGGTCGCCAACGTAA\n-TCACCAAGTGGAAGCATTGTCCGGTCGAGGACATCCCTGCGATTGAGCGTGAAGAACTCC\n-CGATGAAACCGGAAGACATCGACATGAATCCTGAGGCTCTCACCGCGTGGAAACGTGCTG\n-CCGCTGCTGTGTACCGCAAGGACAAGGCTCGCAAGTCTCGCCGTATCAGCCTTGAGTTCA\n-TGCTTGAGCAAGCCAATAAGTTTGCTAACCATAAGGCCATCTGGTTCCCTTACAAC'..b'TAA\n->T7p53 [Location=[36345:36547](+);Name=T7p53]\n-TGCTATCATTAGACTTTAACAACGAATTGATTAAGGCTGCTCCAATTGTTGGGACGGGTG\n-TAGCAGATGTTAGTGCTCGACTGTTCTTTGGGTTAAGCCTTAACGAATGGTTCTACGTTG\n-CTGCTATCGCCTACACAGTGGTTCAGATTGGTGCCAAGGTAGTCGATAAGATGATTGACT\n-GGAAGAAAGCCAATAAGGAGTGA\n->T7p54 [Location=[36554:36822](+);Name=T7p54]\n-TGGAAAAGGATAAGAGCCTTATTACATTCTTAGAGATGTTGGACACTGCGATGGCTCAGC\n-GTATGCTTGCGGACCTTTCGGACCATGAGCGTCGCTCTCCGCAACTCTATAATGCTATTA\n-ACAAACTGTTAGACCGCCACAAGTTCCAGATTGGTAAGTTGCAGCCGGATGTTCACATCT\n-TAGGTGGCCTTGCTGGTGCTCTTGAAGAGTACAAAGAGAAAGTCGGTGATAACGGTCTTA\n-CGGATGATGATATTTACACATTACAGTGA\n->T7p55 [Location=[36918:37348](+);Name=T7p55]\n-TGCTAGAATTTTTACGTAAGCTAATCCCTTGGGTTCTCGCTGGGATGCTATTCGGGTTAG\n-GATGGCATCTAGGGTCAGACTCAATGGACGCTAAATGGAAACAGGAGGTACACAATGAGT\n-ACGTTAAGAGAGTTGAGGCTGCGAAGAGCACTCAAAGAGCAATCGATGCGGTATCTGCTA\n-AGTATCAAGAAGACCTTGCCGCGCTGGAAGGGAGCACTGATAGGATTATTTCTGATTTGC\n-GTAGCGACAATAAGCGGTTGCGCGTCAGAGTCAAAACTACCGGAACCTCCGATGGTCAGT\n-GTGGATTCGAGCCTGATGGTCGAGCCGAACTTGACGACCGAGATGCTAAACGTATTCTCG\n-CAGTGACCCAGAAGGGTGACGCATGGATTCGTGCGTTACAGGATACTATTCGTGAACTGC\n-AACGTAAGTAG\n->T7p56 [Location=[37033:37283](+);Name=T7p56]\n-TGAGTACGTTAAGAGAGTTGAGGCTGCGAAGAGCACTCAAAGAGCAATCGATGCGGTATC\n-TGCTAAGTATCAAGAAGACCTTGCCGCGCTGGAAGGGAGCACTGATAGGATTATTTCTGA\n-TTTGCGTAGCGACAATAAGCGGTTGCGCGTCAGAGTCAAAACTACCGGAACCTCCGATGG\n-TCAGTGTGGATTCGAGCCTGATGGTCGAGCCGAACTTGACGACCGAGATGCTAAACGTAT\n-TCTCGCAGTGA\n->T7p57 [Location=[37371:39130](+);Name=T7p57]\n-TGTCTACTCAATCCAATCGTAATGCGCTCGTAGTGGCGCAACTGAAAGGAGACTTCGTGG\n-CGTTCCTATTCGTCTTATGGAAGGCGCTAAACCTACCGGTGCCCACTAAGTGTCAGATTG\n-ACATGGCTAAGGTGCTGGCGAATGGAGACAACAAGAAGTTCATCTTACAGGCTTTCCGTG\n-GTATCGGTAAGTCGTTCATCACATGTGCGTTCGTTGTGTGGTCCTTATGGAGAGACCCTC\n-AGTTGAAGATACTTATCGTATCAGCCTCTAAGGAGCGTGCAGACGCTAACTCCATCTTTA\n-TTAAGAACATCATTGACCTGCTGCCATTCCTATCTGAGTTAAAGCCAAGACCCGGACAGC\n-GTGACTCGGTAATCAGCTTTGATGTAGGCCCAGCCAATCCTGACCACTCTCCTAGTGTGA\n-AATCAGTAGGTATCACTGGTCAGTTAACTGGTAGCCGTGCTGACATTATCATTGCGGATG\n-ACGTTGAGATTCCGTCTAACAGCGCAACTATGGGTGCCCGTGAGAAGCTATGGACTCTGG\n-TTCAGGAGTTCGCTGCGTTACTTAAACCGCTGCCTTCCTCTCGCGTTATCTACCTTGGTA\n-CACCTCAGACAGAGATGACTCTCTATAAGGAACTTGAGGATAACCGTGGGTACACAACCA\n-TTATCTGGCCTGCTCTGTACCCAAGGACACGTGAAGAGAACCTCTATTACTCACAGCGTC\n-TTGCTCCTATGTTACGCGCTGAGTACGATGAGAACCCTGAGGCACTTGCTGGGACTCCAA\n-CAGACCCAGTGCGCTTTGACCGTGATGACCTGCGCGAGCGTGAGTTGGAATACGGTAAGG\n-CTGGCTTTACGCTACAGTTCATGCTTAACCCTAACCTTAGTGATGCCGAGAAGTACCCGC\n-TGAGGCTTCGTGACGCTATCGTAGCGGCCTTAGACTTAGAGAAGGCCCCAATGCATTACC\n-AGTGGCTTCCGAACCGTCAGAACATCATTGAGGACCTTCCTAACGTTGGCCTTAAGGGTG\n-ATGACCTGCATACGTACCACGATTGTTCCAACAACTCAGGTCAGTACCAACAGAAGATTC\n-TGGTCATTGACCCTAGTGGTCGCGGTAAGGACGAAACAGGTTACGCTGTGCTGTACACAC\n-TGAACGGTTACATCTACCTTATGGAAGCTGGAGGTTTCCGTGATGGCTACTCCGATAAGA\n-CCCTTGAGTTACTCGCTAAGAAGGCAAAGCAATGGGGAGTCCAGACGGTTGTCTACGAGA\n-GTAACTTCGGTGACGGTATGTTCGGTAAGGTATTCAGTCCTATCCTTCTTAAACACCACA\n-ACTGTGCGATGGAAGAGATTCGTGCCCGTGGTATGAAAGAGATGCGTATTTGCGATACCC\n-TTGAGCCAGTCATGCAGACTCACCGCCTTGTAATTCGTGATGAGGTCATTAGGGCCGACT\n-ACCAGTCCGCTCGTGACGTAGACGGTAAGCATGACGTTAAGTACTCGTTGTTCTACCAGA\n-TGACCCGTATCACTCGTGAGAAAGGCGCTCTGGCTCATGATGACCGATTGGATGCCCTTG\n-CGTTAGGCATTGAGTATCTCCGTGAGTCCATGCAGTTGGATTCCGTTAAGGTCGAGGGTG\n-AAGTACTTGCTGACTTCCTTGAGGAACACATGATGCGTCCTACGGTTGCTGCTACGCATA\n-TCATTGAGATGTCTGTGGGAGGAGTTGATGTGTACTCTGAGGACGATGAGGGTTACGGTA\n-CGTCTTTCATTGAGTGGTGA\n->T7p58 [Location=[38017:38273](+);Name=T7p58]\n-TGGGTACACAACCATTATCTGGCCTGCTCTGTACCCAAGGACACGTGAAGAGAACCTCTA\n-TTACTCACAGCGTCTTGCTCCTATGTTACGCGCTGAGTACGATGAGAACCCTGAGGCACT\n-TGCTGGGACTCCAACAGACCCAGTGCGCTTTGACCGTGATGACCTGCGCGAGCGTGAGTT\n-GGAATACGGTAAGGCTGGCTTTACGCTACAGTTCATGCTTAACCCTAACCTTAGTGATGC\n-CGAGAAGTACCCGCTGA\n->T7p59 [Location=[38554:38726](+);Name=T7p59]\n-TGGCTACTCCGATAAGACCCTTGAGTTACTCGCTAAGAAGGCAAAGCAATGGGGAGTCCA\n-GACGGTTGTCTACGAGAGTAACTTCGGTGACGGTATGTTCGGTAAGGTATTCAGTCCTAT\n-CCTTCTTAAACACCACAACTGTGCGATGGAAGAGATTCGTGCCCGTGGTATGA\n->T7p60 [Location=[39390:39538](+);Name=T7p60]\n-TGTTCCGCTTATTGTTGAACCTACTGCGGCATAGAGTCACCTACCGATTTCTTGTGGTAC\n-TTTGTGCTGCCCTTGGGTACGCATCTCTTACTGGAGACCTCAGTTCACTGGAGTCTGTCG\n-TTTGCTCTATACTCACTTGTAGCGATTAG\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3.py Mon Jun 05 02:43:58 2023 +0000
[
b'@@ -0,0 +1,346 @@\n+import copy\n+import logging\n+\n+log = logging.getLogger()\n+log.setLevel(logging.WARN)\n+\n+\n+def feature_lambda(\n+    feature_list,\n+    test,\n+    test_kwargs,\n+    subfeatures=True,\n+    parent=None,\n+    invert=False,\n+    recurse=True,\n+):\n+    """Recursively search through features, testing each with a test function, yielding matches.\n+\n+    GFF3 is a hierachical data structure, so we need to be able to recursively\n+    search through features. E.g. if you\'re looking for a feature with\n+    ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n+    case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n+\n+    :type feature_list: list\n+    :param feature_list: an iterable of features\n+\n+    :type test: function reference\n+    :param test: a closure with the method signature (feature, **kwargs) where\n+                 the kwargs are those passed in the next argument. This\n+                 function should return True or False, True if the feature is\n+                 to be yielded as part of the main feature_lambda function, or\n+                 False if it is to be ignored. This function CAN mutate the\n+                 features passed to it (think "apply").\n+\n+    :type test_kwargs: dictionary\n+    :param test_kwargs: kwargs to pass to your closure when it is called.\n+\n+    :type subfeatures: boolean\n+    :param subfeatures: when a feature is matched, should just that feature be\n+                        yielded to the caller, or should the entire sub_feature\n+                        tree for that feature be included? subfeatures=True is\n+                        useful in cases such as searching for a gene feature,\n+                        and wanting to know what RBS/Shine_Dalgarno_sequences\n+                        are in the sub_feature tree (which can be accomplished\n+                        with two feature_lambda calls). subfeatures=False is\n+                        useful in cases when you want to process (and possibly\n+                        return) the entire feature tree, such as applying a\n+                        qualifier to every single feature.\n+\n+    :type invert: boolean\n+    :param invert: Negate/invert the result of the filter.\n+\n+    :rtype: yielded list\n+    :return: Yields a list of matching features.\n+    """\n+    # Either the top level set of [features] or the subfeature attribute\n+    for feature in feature_list:\n+        feature._parent = parent\n+        if not parent:\n+            # Set to self so we cannot go above root.\n+            feature._parent = feature\n+        test_result = test(feature, **test_kwargs)\n+        # if (not invert and test_result) or (invert and not test_result):\n+        if invert ^ test_result:\n+            if not subfeatures:\n+                feature_copy = copy.deepcopy(feature)\n+                feature_copy.sub_features = list()\n+                yield feature_copy\n+            else:\n+                yield feature\n+\n+        if recurse and hasattr(feature, "sub_features"):\n+            for x in feature_lambda(\n+                feature.sub_features,\n+                test,\n+                test_kwargs,\n+                subfeatures=subfeatures,\n+                parent=feature,\n+                invert=invert,\n+                recurse=recurse,\n+            ):\n+                yield x\n+\n+\n+def fetchParent(feature):\n+    if not hasattr(feature, "_parent") or feature._parent is None:\n+        return feature\n+    else:\n+        return fetchParent(feature._parent)\n+\n+\n+def feature_test_true(feature, **kwargs):\n+    return True\n+\n+\n+def feature_test_type(feature, **kwargs):\n+    if "type" in kwargs:\n+        return str(feature.type).upper() == str(kwargs["type"]).upper()\n+    elif "types" in kwargs:\n+        for x in kwargs["types"]:\n+            if str(feature.type).upper() == str(x).upper():\n+                return True\n+        return False\n+    raise Exception("Incorrect feature_test'..b'feature.location.start,\n+        # feature.location.end,\n+        # feature.location.strand\n+        # )\n+    return result\n+\n+\n+def get_gff3_id(gene):\n+    return gene.qualifiers.get("Name", [gene.id])[0]\n+\n+\n+def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n+    # This prevents frameshift errors\n+    while start < 0:\n+        start += 3\n+    while end < 0:\n+        end += 3\n+    while start > parent_length:\n+        start -= 3\n+    while end > parent_length:\n+        end -= 3\n+    return (start, end)\n+\n+\n+def coding_genes(feature_list):\n+    for x in genes(feature_list):\n+        if (\n+            len(\n+                list(\n+                    feature_lambda(\n+                        x.sub_features,\n+                        feature_test_type,\n+                        {"type": "CDS"},\n+                        subfeatures=False,\n+                    )\n+                )\n+            )\n+            > 0\n+        ):\n+            yield x\n+\n+\n+def genes(feature_list, feature_type="gene", sort=False):\n+    """\n+    Simple filter to extract gene features from the feature set.\n+    """\n+\n+    if not sort:\n+        for x in feature_lambda(\n+            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n+        ):\n+            yield x\n+    else:\n+        data = list(genes(feature_list, feature_type=feature_type, sort=False))\n+        data = sorted(data, key=lambda feature: feature.location.start)\n+        for x in data:\n+            yield x\n+\n+\n+def wa_unified_product_name(feature):\n+    """\n+    Try and figure out a name. We gave conflicting instructions, so\n+    this isn\'t as trivial as it should be. Sometimes it will be in\n+    \'product\' or \'Product\', othertimes in \'Name\'\n+    """\n+    # Manually applied tags.\n+    protein_product = feature.qualifiers.get(\n+        "product", feature.qualifiers.get("Product", [None])\n+    )[0]\n+\n+    # If neither of those are available ...\n+    if protein_product is None:\n+        # And there\'s a name...\n+        if "Name" in feature.qualifiers:\n+            if not is_uuid(feature.qualifiers["Name"][0]):\n+                protein_product = feature.qualifiers["Name"][0]\n+\n+    return protein_product\n+\n+\n+def is_uuid(name):\n+    return name.count("-") == 4 and len(name) == 36\n+\n+\n+def get_rbs_from(gene):\n+    # Normal RBS annotation types\n+    rbs_rbs = list(\n+        feature_lambda(\n+            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n+        )\n+    )\n+    rbs_sds = list(\n+        feature_lambda(\n+            gene.sub_features,\n+            feature_test_type,\n+            {"type": "Shine_Dalgarno_sequence"},\n+            subfeatures=False,\n+        )\n+    )\n+    # Fraking apollo\n+    apollo_exons = list(\n+        feature_lambda(\n+            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n+        )\n+    )\n+    apollo_exons = [x for x in apollo_exons if len(x) < 10]\n+    # These are more NCBI\'s style\n+    regulatory_elements = list(\n+        feature_lambda(\n+            gene.sub_features,\n+            feature_test_type,\n+            {"type": "regulatory"},\n+            subfeatures=False,\n+        )\n+    )\n+    rbs_regulatory = list(\n+        feature_lambda(\n+            regulatory_elements,\n+            feature_test_quals,\n+            {"regulatory_class": ["ribosome_binding_site"]},\n+            subfeatures=False,\n+        )\n+    )\n+    # Here\'s hoping you find just one ;)\n+    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n+\n+\n+def nice_name(record):\n+    """\n+    get the real name rather than NCBI IDs and so on. If fails, will return record.id\n+    """\n+    name = record.id\n+    likely_parental_contig = list(genes(record.features, feature_type="contig"))\n+    if len(likely_parental_contig) == 1:\n+        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n+    return name\n+\n+\n+def fsort(it):\n+    for i in sorted(it, key=lambda x: int(x.location.start)):\n+        yield i\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 gff3_extract_sequence.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_extract_sequence.py Mon Jun 05 02:43:58 2023 +0000
[
b'@@ -0,0 +1,335 @@\n+#!/usr/bin/env python\n+import sys\n+import argparse\n+import logging\n+import uuid\n+from CPT_GFFParser import gffParse, gffWrite\n+from Bio import SeqIO\n+from Bio.Seq import Seq\n+from Bio.SeqRecord import SeqRecord\n+from Bio.SeqFeature import FeatureLocation, CompoundLocation\n+from gff3 import feature_lambda, feature_test_type, get_id\n+\n+logging.basicConfig(level=logging.INFO)\n+log = logging.getLogger(__name__)\n+\n+\n+def main(fasta, gff3, feature_filter=None, nodesc=False):\n+    if feature_filter == "nice_cds":\n+        from gff2gb import gff3_to_genbank as cpt_Gff2Gbk\n+\n+        for rec in cpt_Gff2Gbk(gff3, fasta, 11):\n+            seenList = {}\n+            if rec.seq[0] == "?":\n+                sys.stderr.write("Error: No Fasta ID matches GFF ID \'" + rec.id + "\'\\n")\n+                exit(1)\n+            for feat in sorted(rec.features, key=lambda x: x.location.start):\n+                if feat.type != "CDS":\n+                    continue\n+\n+                ind = 0\n+                if (\n+                    str(\n+                        feat.qualifiers.get("locus_tag", get_id(feat)).replace(" ", "-")\n+                    )\n+                    in seenList.keys()\n+                ):\n+                    seenList[\n+                        str(\n+                            feat.qualifiers.get("locus_tag", get_id(feat)).replace(\n+                                " ", "-"\n+                            )\n+                        )\n+                    ] += 1\n+                    ind = seenList[\n+                        str(\n+                            feat.qualifiers.get("locus_tag", get_id(feat)).replace(\n+                                " ", "-"\n+                            )\n+                        )\n+                    ]\n+                else:\n+                    seenList[\n+                        str(\n+                            feat.qualifiers.get("locus_tag", get_id(feat)).replace(\n+                                " ", "-"\n+                            )\n+                        )\n+                    ] = 1\n+                append = ""\n+                if ind != 0:\n+                    append = "_" + str(ind)\n+\n+                if nodesc:\n+                    description = ""\n+                else:\n+                    feat.qualifiers["ID"] = [feat._ID]\n+                    product = feat.qualifiers.get("product", "")\n+                    description = (\n+                        "{1} [Location={0.location};ID={0.qualifiers[ID][0]}]".format(\n+                            feat, product\n+                        )\n+                    )\n+                yield [\n+                    SeqRecord(\n+                        feat.extract(rec).seq,\n+                        id=str(\n+                            feat.qualifiers.get("locus_tag", get_id(feat)).replace(\n+                                " ", "-"\n+                            )\n+                        )\n+                        + append,\n+                        description=description,\n+                    )\n+                ]\n+\n+    elif feature_filter == "unique_cds":\n+        seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))\n+        seen_ids = {}\n+\n+        for rec in gffParse(gff3, base_dict=seq_dict):\n+            noMatch = True\n+            if "Alias" in rec.features[0].qualifiers.keys():\n+                lColumn = rec.features[0].qualifiers["Alias"][0]\n+            else:\n+                lColumn = ""\n+            for x in seq_dict:\n+                if x == rec.id or x == lColumn:\n+                    noMatch = False\n+            if noMatch:\n+                sys.stderr.write("Error: No Fasta ID matches GFF ID \'" + rec.id + "\'\\n")\n+                exit(1)\n+            newfeats = []\n+            for feat in sorted(\n+                feature_lambda(\n+                    rec.features, feature_test_type, {"type": "CDS"}, subfeatures=False\n+                ),\n+                key=lambda f: f.location.start,\n+            ):\n+                nid = rec.id + "__'..b'   }\n+                    else:\n+                        important_data = {\n+                            "Location": FeatureLocation(\n+                                feat.location.start + 1 + feat.phase,\n+                                feat.location.end,\n+                                feat.strand,\n+                            )\n+                        }\n+                    if "Name" in feat.qualifiers:\n+                        important_data["Name"] = feat.qualifiers.get("Name", [""])[0]\n+\n+                    description = "[{}]".format(\n+                        ";".join(\n+                            [\n+                                "{key}={value}".format(key=k, value=v)\n+                                for (k, v) in important_data.items()\n+                            ]\n+                        )\n+                    )\n+\n+                if isinstance(feat.location, CompoundLocation):\n+                    finSeq = ""\n+                    if feat.strand == -1:\n+                        for x in feat.location.parts:\n+                            finSeq += str(\n+                                (\n+                                    rec.seq[x.start : x.end - feat.phase]\n+                                ).reverse_complement()\n+                            )\n+                    else:\n+                        for x in feat.location.parts:\n+                            finSeq += str(rec.seq[x.start + feat.phase : x.end])\n+                    yield [\n+                        SeqRecord(\n+                            Seq(finSeq),\n+                            id=id.replace(" ", "-"),\n+                            description=description,\n+                        )\n+                    ]\n+\n+                else:\n+\n+                    if feat.strand == -1:\n+                        yield [\n+                            SeqRecord(\n+                                seq=Seq(\n+                                    str(\n+                                        rec.seq[\n+                                            feat.location.start : feat.location.end\n+                                            - feat.phase\n+                                        ]\n+                                    )\n+                                ).reverse_complement(),\n+                                id=id.replace(" ", "-"),\n+                                description=description,\n+                            )\n+                        ]\n+                    else:\n+                        yield [\n+                            SeqRecord(\n+                                # feat.extract(rec).seq,\n+                                seq=Seq(\n+                                    str(\n+                                        rec.seq[\n+                                            feat.location.start\n+                                            + feat.phase : feat.location.end\n+                                        ]\n+                                    )\n+                                ),\n+                                id=id.replace(" ", "-"),\n+                                description=description,\n+                            )\n+                        ]\n+\n+\n+if __name__ == "__main__":\n+    parser = argparse.ArgumentParser(\n+        description="Export corresponding sequence in genome from GFF3", epilog=""\n+    )\n+    parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")\n+    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File")\n+    parser.add_argument(\n+        "--feature_filter", default=None, help="Filter for specific feature types"\n+    )\n+    parser.add_argument(\n+        "--nodesc", action="store_true", help="Strip description field off"\n+    )\n+    args = parser.parse_args()\n+    for seq in main(**vars(args)):\n+        # if isinstance(seq, list):\n+        #  for x in seq:\n+        #    print(type(x.seq))\n+        #    SeqIO.write(x, sys.stdout, "fasta")\n+        # else:\n+        SeqIO.write(seq, sys.stdout, "fasta")\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 gff3_extract_sequence.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_extract_sequence.xml Mon Jun 05 02:43:58 2023 +0000
[
@@ -0,0 +1,69 @@
+<tool id="edu.tamu.cpt.gff3.export_seq" name="GFF3 Feature Sequence Export" version="19.1.0.0">
+    <description>Export corresponding sequence in genome from GFF3</description>
+    <macros>
+        <import>macros.xml</import>
+        <import>cpt-macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="aggressive"><![CDATA[
+@GENOME_SELECTOR_PRE@
+
+'$__tool_directory__/gff3_extract_sequence.py'
+@GENOME_SELECTOR@
+
+@INPUT_GFF@
+
+#if $feature_filter and $feature_filter is not None:
+--feature_filter '$feature_filter'
+#end if
+'$nodesc'
+> '$default']]></command>
+    <inputs>
+        <expand macro="genome_selector"/>
+        <expand macro="gff3_input"/>
+        <param label="Filter for specific feature types" name="feature_filter" type="text" help="Use 'nice_cds' if your features are coming from Apollo, however this will fail on non-Apollo data"/>
+        <param label="Remove description (use if blasting)" name="nodesc" type="boolean" truevalue="--nodesc" falsevalue=""/>
+    </inputs>
+    <outputs>
+        <data format="fasta" hidden="false" name="default"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="reference_genome_source" value="history"/>
+            <param name="genome_fasta" value="T7_ExtSeqIn.fasta"/>
+            <param name="gff3_data" value="T7_ExtSeqIn.gff3"/>
+            <param name="nodesc" value=""/>
+            <param name="feature_filter" value="CDS"/>
+            <output name="output" file="T7_ExtSeqOut.fasta"/>
+        </test>
+        <test>
+            <param name="reference_genome_source" value="history"/>
+            <param name="genome_fasta" value="Miro_ExtSeqIn.fa"/>
+            <param name="gff3_data" value="Miro_ExtSeqIn.gff3"/>
+            <param name="nodesc" value=""/>
+            <param name="feature_filter" value="CDS"/>
+            <output name="output" file="Miro_ExtSeqOut1.fa"/>
+        </test>
+        <test>
+            <param name="reference_genome_source" value="history"/>
+            <param name="genome_fasta" value="Miro_ExtSeqIn.fa"/>
+            <param name="gff3_data" value="Miro_ExtSeqIn.gff3"/>
+            <param name="feature_filter" value="CDS"/>
+            <param name="nodesc" value="--nodesc"/>
+            <output name="output" file="Miro_ExtSeqOut2.fa"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+From the FASTA sequence for a genome, extracts the *nucleotide* sequences for 
+all CDSs in an input GFF3 and outputs them as a multi-FASTA formatted file.
+
+The filter for specific feature types was designed for data retrieved from Apollo. 
+Using â€˜unique_cds’ extracts the sequence for all CDS. Using â€˜nice_cds’ will extract 
+the sequence only for CDS features with a start codon (recommended). If a gene from
+Apollo has multiple CDSs, the tool will append a "_##" to the end of subsequent genes.
+
+      ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 73390562b5a2 -r 34b80e483fb8 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jun 05 02:43:58 2023 +0000
b
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+ '$xmfa'
+ </token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+ '$sequences'
+ </token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+     '$gff3_data'
+ </token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+</macros>
b
diff -r 73390562b5a2 -r 34b80e483fb8 test-data/Miro_ExtSeqIn.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Miro_ExtSeqIn.fa Mon Jun 05 02:43:58 2023 +0000
b
b'@@ -0,0 +1,2936 @@\n+>Miro\n+TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n+TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n+AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n+GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n+CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n+TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n+ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n+CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n+ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n+ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n+TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n+CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n+TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n+AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n+CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n+TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n+CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n+AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n+GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n+TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n+TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n+TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n+GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n+CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n+CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n+AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n+AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n+TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n+TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n+AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n+CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n+GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n+TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n+TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n+TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n+CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n+TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n+TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n+TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n+AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n+ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n+ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n+GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n+GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n+ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n+GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n+ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n+AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n+TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n+AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n+GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n+TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n+ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n+ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n+TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n+CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n+GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n+TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n+ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n+GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n+GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n+ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n+ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n+GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n+ACGG'..b'TCGGGTAATATCG\n+TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n+ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n+GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n+GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n+GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n+CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n+TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n+ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n+TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n+AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n+TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n+CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n+AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n+GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n+TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n+ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n+TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n+TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n+ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n+TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n+CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n+TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n+AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n+TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n+CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n+CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n+ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n+GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n+ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n+AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n+CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n+CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n+TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n+TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n+ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n+ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n+TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n+TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n+AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n+ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n+TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n+GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n+GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n+TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n+ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n+CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n+TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n+AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n+GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n+GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n+TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n+AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n+TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n+ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n+CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n+TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n+TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n+GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n+GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n+GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n+TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n+TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n+GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n+GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n+GGCGCGGTTGTTTAG\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 test-data/Miro_ExtSeqIn.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Miro_ExtSeqIn.gff3 Mon Jun 05 02:43:58 2023 +0000
b
b'@@ -0,0 +1,827 @@\n+##gff-version 3\n+##sequence-region Miro 1 176055\n+Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8\n+Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8\n+Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9\n+Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9\n+Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206\n+Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206\n+Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117\n+Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117\n+Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200\n+Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200\n+Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201\n+Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201\n+Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202\n+Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202\n+Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203\n+Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203\n+Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142\n+Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD %284-26%29 N out%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142\n+Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1\n+Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb\n+Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1\n+Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2\n+Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2\n+Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3\n+Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD %2812-34%29 N in%2C C out\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3\n+Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4\n+Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical'..b't=Miro_98;obsolete_name=Miro_066;product=hypothetical conserved;tmhelix=2TMDs %287-26%2C 31-53%29 N in%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98\n+Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99\n+Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99\n+Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143\n+Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD %2815-37%29 N out%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143\n+Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114\n+Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114\n+Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141\n+Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141\n+Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140\n+Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140\n+Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147\n+Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147\n+Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146\n+Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146\n+Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145\n+Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145\n+Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115\n+Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs %282-21%2C 31-50%29 N in%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115\n+Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149\n+Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149\n+Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148\n+Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148\n+Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116\n+Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 test-data/Miro_ExtSeqOut1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Miro_ExtSeqOut1.fa Mon Jun 05 02:43:58 2023 +0000
[
b'@@ -0,0 +1,3212 @@\n+>Miro_1.CDS [Location=[1:899](-);Name=Miro_1]\n+TGGTCAAAATTCTCGACGAAGTACAACAGAAAGCTATCTATAATGGATGGTTGACGGGGG\n+CAAGTAAAACAGCACTGGCCCAACAGTTCAACGTAAGCGCCCGAACCATTGGGCGCGTTA\n+TCAACCGCAAGTTAGCGGAATTCCCTAAGAAAAGGGATGTAAAACAAAAGCCTAAAGTAT\n+CAGACACTGTACCGCGTATGATCGGATCTGAATCGTTCATTACGGTTGTATACGAAGGGC\n+GCGTTTTCATGGCGGGTGAAACACATCCGAACTTTAAGAAAGCGCATGAAATGCTGAAAG\n+CTGGTGATGTCAAAGGTGCTGTAACTTGTTTGGATACTCAAGAAGCGATCCGAACCTATA\n+GCAAAGGCAACATTAAAATCATTGGTCATCAGCTTCTATATAAAGATGTAGTGTTTGATT\n+CTGATATCACTCAACGAATCATTCGCGAAATGTATAACGATCGTCCGTATGAACATCTGG\n+TGAACTTCTTTGAACGGTTGATGCGAAACCCTTCACGCGATGCTGTATACCAGCTTTATG\n+GGTTCCTTGTACATAATGACATTGAACTGACCGATGATGGTTGTTTCCTTGCCTGGAAAC\n+GTGTGCGCGATAACTATAAAGATCTCGCTACTGGCAAATTTGATAATAGTCCTGGTGTGA\n+CTGTCTCCATGCCTCGAAACATGGTAGACGAAGATAAAACGCGCACCTGTTCTACTGGCC\n+TACACGTTGCGGCTAAATCATACCTTCCACACTACGGGGGCGGTGTTGGTAGAGTCATTC\n+AGGTAAAGGTGGATCCTGCTGACGTGGTAGCGATCCCAGTGGATTACAATAACGCAAAAA\n+TGCGTGTATGCCGTTATAAAGTCATGATTGATGTTACATATGGTTTTAGCCATTACTAA\n+>Miro_2.CDS [Location=[900:3160](-);Name=Miro_2]\n+TGAAATTACGCGAAGATACACCGCAAACCATCTACCGTGTTGGTACAGATACCAGCAAGG\n+GACGAAACAAACTATCTCTACGCGCAAACAAGAAAGCGTATAAGATCCTTTCTTCGACAG\n+TATACAAATACAAGATCCGCGCAATCATTCGCGAACTGTCTTGTAATGCGATTGATGGAC\n+ACAAAGAAGCGGGAAATCAGAACCCGTTTGATGTCCAGTTGCCAACTGCTGTTGATCCTC\n+GTTTTGTTATTCGTGACTACGGGATCGGTATGTCTCCTGATTTCGTTAGTGATGCGTTTA\n+CCGTTTACTTCGAATCAACTAAAAATGATTCAAACGACCTGATCGGTTCTATGGGTCTGG\n+GTTGCAAATCTCCGCTTTGCTATTCCGATGCTTTCACGGTGGAATCCGTAAAAGACGGTA\n+TCAAATGCGGTTACACAATCTATATGGATGATGGGGAACCTTTCTGCGATCCTCTGTATG\n+AGATTGAAAGCGATGAACCTAACGGGGTTACTATCACTGTTCCGGTTAAGGTTGAAGATA\n+TCAAAGAATGGGAAAACGAAGCAGCAAGGGTATACGAATCATTTACTGATATTCGTCCTA\n+ACTTTGTTGGTGCTTCTATTCTCAAAATCAACTATCAGCCGAAAGAAGCAACCAACGATA\n+GCGGGGTGATCCGTCATAAATCAGCATACACTAGCGGTGTATATGCTCGCATGGGTAACA\n+TCATTTATCCTCTGGATAAAGATTTGTATGATACCTCGATGTTCTATTGCTATACAGAAA\n+GCCAGTATACGTATATTATTGACTTCCCGATCGGTGAACTTGATTTCATGCCTTCTCGCG\n+AAGAGTTGAGTATGGATAAAATGACAGTAGGGATTGTTAAAGAACGTCTGAAACAAATCA\n+GCCGGGTATATTTCAATCGAGTTAAATCAGAATTCGATAAGTTACAAACCGTGCGTGATA\n+AACTGACGTGGTTCCATTCTCTGCCTTCAATGGTACAAAACTTTGTTGGTAAAGATGCTA\n+ATTTTCGCATCAATGGTGATTCTATTGGCTGGATCCATAGCGAACTAGTCAAACCAAACA\n+AATATAATGATGATTATGTTGCTGGGTATTGGGCGAACGAATACGACGGTAAAGATGCAT\n+GGTATCAGGTAACTGGTAGCGGAGGCCGCTGGAGCAAGTATAAGCCAGAAACAACCAAAC\n+GCCAGGACATCACACGCATTTATTATCCGTGGAAACAGAAGAAACTCATTCTGTTAAAAG\n+TGGATACCAACACGGTTAAACCGTATATCGTCGGGTATGCTAAAATGCATAACCTAAGCC\n+GTGTTAGCTTTGTTGCATATTACGATAGTGATTCAAAGCGTGAGATTGTTAATGATATCG\n+TTCGGAAAGGTCATTTTGATGAATCGGAAATTGTGTATCTCCGTACCAGTGAAATGACAA\n+AAGAAAAAGAGATCTATGATGCAGATCGCGAAAAATCAAAAGCGCTATATGCACCGAAAA\n+ATTCAGAACCGCGCCCAAAAACCCCGACCGTATATCGTTATGAGTTGGATAGTAACGGCA\n+ATCTGGCAAAAACTTCTCTGTTTATGACAAAATCAGAGTTTTTATCACTGGATAAAGCCC\n+CAGGTGTTCGGTTGTATGGTATTGATGAATATAGCCGCTTAGATGGTGAAAATTCCGGGT\n+TGTCTATGGATTCAGCAATGAAAGAATCTACTTTATCGCGTATCATGCGCCATACTGGGA\n+TCCCTGTTGTGTTTGCTATACGTAACAGCCTCTGGAAGTGGATCCCGGATTCAAATCTGG\n+TATGCTTTGATGATATGCTGTGCAAGCAGTATGTTAAATCTGAAAAGGCATTGAAAGATA\n+ATTGCTTGCCTGGATGGATTGGGAAGGATCACTCAACAGAAACAGACGCATTGCATAGCC\n+GTTTCGGTGTATCGCTTGATCGGATTGTGAAAAACCGATATAATGAAAAACTGTATAAGA\n+TTGTTGATACCCTCGAACGTATAGTGTGTCTGGAAGGGTATGAGAAAGATGGTAAAAATC\n+TTTCCAGGGTGCGTTGTCCGATTCTGCGTGAGTCCGTAGCATCAATGCGCGTTAAACGCA\n+GCAATATGAAAAAACGTGTTGATCAGGCATGGGAAATCTTTAAATCACTAAACCCGTTAC\n+TGGCTTCACTTGTTGAACATAGCGATTCTTATTCAATTCGTCCGATTTACAACAACGAAA\n+AGAATCTCAGTGAGTTTAAGAAATTAATACGGTGGAAATAA\n+>Miro_3.CDS [Location=[3172:3407](-);Name=Miro_3]\n+TGACACGTTATAATGCGCCTAAGTTGGGAAAATATCTGACCATATTCGGATTTTGTGCGT\n+TCTTTTCTGTCATTATCGGGGCTATCGTGTGGGGTATCCTTGATATGAAGAAACAGCAGG\n+TCGAGGAAGAAAAATTGGTAAAATTCCTTGACACCTATTGTGAAGTAGTAGAATATGGTC\n+TGAACAAAAAGCCAACGAAGTACTCTTGTGACCAAGTTATTTTTAATGTTAAGTGA\n+>Miro_4.CDS [Location=[3412:3965](-);Name=Miro_4]\n+TGTTGAAAACTCGTAGTGCAATGGAAGTAACCCACTGGGCGCGTACCATGATGGAAAAAC\n+ACGGTTTGATCAGTAATGGTTGGACATTCCGGATCAATGGACGCATTACAAAAACTCTGG\n+GCCGTTGCAGTTATACCAAAAAACTGATCGAACTGTCTGGTCGTCATGTTGCGGAAGATA\n+TCTACGAGGATATTTTAGACACTCTTTTACACGAAATCGCTCATGCTCTTGTCGGGCGCG\n+GTTATGAACATGGTAAAGTGTG'..b'GAATACACGA\n+ATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCGACACGTG\n+ATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAACTACTATT\n+CTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATGAAGATAA\n+ACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGA\n+>Miro_268.CDS [Location=[172795:173063](-);Name=Miro_268]\n+TGACGTTCGATGAGGCTATGAGAGCCGCTAAGAGAGGTTGTAAGGTTGCGCCAATAGGTA\n+GGCCATTCTTCGTGTATTATCACGACAGCGCTGGATACAGGAGGGTTACGTTGGGGGTTT\n+CGGCAAGGAATGAAGATTATGTTCCGGATCCGTGGGACAAATTGAAAGTTTGGGATATTT\n+ACGAGAAAAATAGCCCGGTGATGAACTTCTTTAAAGCAATCAAACGGGCTATTGGAAGAT\n+CACACAATTTAACACCAGCATTACGTTAA\n+>Miro_269.CDS [Location=[173061:173350](-);Name=Miro_269]\n+TGACATACGAAGATATGCTTAATGAAGTGTGCGCGGGTAAAACCGCGTACCGCACTACCA\n+ATCCTGATATGATTGTGTTTCGTGAGGGGGATACAATCATCAGACGTACACACCGCAAGT\n+GTGAAGTAAACCAGGTATTCATTGCCACGGTAGAAGAACAAAAGGCTACTGATTGGGATA\n+TCGTCATTGACGAAGAAAACCACGATGAGCTAGATCATCCGATATTCATTTTGAGTGAAA\n+CTATCGGAATTCGACCATTCATGCATAGAAATTTTTGGGGGAATCCATGA\n+>Miro_270.CDS [Location=[173351:174147](-);Name=Miro_270]\n+TGACTAACGCAACCGTAGTTACTACCACTTACTTCATTTCTTCTGGTCGTAAAAATGCAA\n+TGTATACCTTGCGTGTTGAGCGTAGAGGCGGCGGTTATACTTCTGATAACTATATCTGCA\n+ACTTATCTACCGATCCGGAGAAAGCGGAAGCGAAAGCGCGTGAGTATTTCGACCGTGTTT\n+CAGCCCGTTTAACTGAAACTGATACTTTCAAAATGATATTCCAGGGCTTTGCAGATTTCG\n+ATCTGTTCGAACGTCGCGGTAAACTGTCAGTGTTCGATACTGAAAAACTGGAACTGCTGG\n+AAAAAGGGATCATGCCGATTGGCAAGCGTAAAGGCGAAGTAATTGCAGAAATGCCAATGT\n+TTACTGTTCTTTGGTGGGCTGACCAGTCTAAAGAAGACAACAAAAACAGCCCTGTATTCG\n+ATGCTGTGTGCGCTTACTGCATGGGTGTTGCACTGGAAAAGGATTATATCGCCAAACGCG\n+AAGAAATCCGCGAACAGTGGGAACAAGAGCGCCAGGAACGCATTTCTAAAGCTAACCACA\n+TCGGTGAAATCAAACAACGTCTGGAAATGACCGGAACCGTTGAGAAAGTGATTTCACTGG\n+GATATACTCAAGTTTCTTACTACACTTCTGTAGAAAGATTCATGACCAAAATTAATGTTG\n+ATGGTAATGTTGTTGTTTACTTCGGTAATAACATTGCAAACGAAGGTGATGAAATCACTT\n+TCAAAGCAACCGTTAAAGAGCACGGAGAATACAAAGACGTTAAACAAACCATCGTTCAAC\n+GTGTAAAGGTTTTATAA\n+>Miro_271.CDS [Location=[174229:174455](-);Name=Miro_271]\n+TGAGTATACAAATTCTGTTCAGTAAAGAAGATCCATTAGGTACGGAATCATCGGTTCAGC\n+GCTTATTCTGGCATACAGATTATATAAAAAATCGATGTCGTGGCCCAAACAACACATGGG\n+GAAGCAACAACGATATTGACAAAGCAAAAATTTATCCTCGCGGAAAGGCGTACTGGAAAA\n+TTTTTGATATTCATCTTCATAAAACAATGAAAGATTCGAATGAATGA\n+>Miro_272.CDS [Location=[174453:174784](-);Name=Miro_272]\n+TGAAAACCGCTTTGATCATTCTTGAAGAAATTCCTGAAAATACAACGCTTTACAAAGTCG\n+AAACTGACGATAGCGAAGTAATTGAGATCCTCAAAACCGCCCACGGGCATTATGTAAACG\n+GTTCCGGCAACACCAAAGAACAGGATTATGCTGTTGATGTTGTTAACCTCATGCTGGGGC\n+CGAATACCGATGATAACCTGAAATGGGCGCGTGAATCCAACATTCCTGAAAAGTATGTTG\n+GTATGTTCTATCAGTGTAACATCGATAGCAAATCACCGTTTGAACCAGAAAAGAAAATCG\n+ATCTGATTGTTCGCACTGGGTTCTTCTTATGA\n+>Miro_273.CDS [Location=[174760:174893](-);Name=Miro_273]\n+TGAATAATTTGGTAGCGAAACATGATTTCAACCGCGCTTCTACTCACCGCGATCGCAAGC\n+GAGCATTTAAAGAAGCAAAACGTAAACAGAAACACAAAGGTAAGTGTGATGAAAACCGCT\n+TTGATCATTCTTGA\n+>Miro_274.CDS [Location=[174939:175186](-);Name=Miro_274]\n+TGAAACTGTTTAATAAAATTCGTTCTGTAGATGAAATCGTAGCAACTTTCGACAAAACCC\n+TGTCTGAACTGGAAGCGCGTATTGCTCACGATAATGAGCAGGTTGCACAGGTTGCAGCAG\n+ATCGTCAAGCAGCAGAAGAAGAACATCAGCGTAAGCTGGCGGAACTGGCTTCAAAAGAAA\n+GCGATCATACTGCAAGTGCAACCCGCGCCGGACGTATCGCAGACAAGATCCGCAAATTGC\n+TGGATTAA\n+>Miro_275.CDS [Location=[175224:175660](-);Name=Miro_275]\n+TGAAAAGAATTCTGACAAGACTTGACATTCACAAATATGCAATTAAAATTGGTGACATGA\n+AACGCGGAACCTTCAAGAAATTCGAAGGCGTGGATCTGAAAGAAAACGGGTTTTACCTGG\n+TAGTTGATGATAAAACAGGGCGCGTAGTATTCCGTTTCTACGTAGCACCAAATCAACGCC\n+GTAAAACTGGTGCTAGTCGTACTCTGTTACGTATCAAGCGCCGCGAACACGAACCGTCTG\n+CGATGGTTGCGATGCTGGAAAGTGCTTCTCTGTATTATGTAGCACATGACAAAATCAAAA\n+ACCTGTTCGGTTTCCCGAACCTCAACGGATCAACGTTCGAAAGTGCTATTCAAATGCGTT\n+ATGGTGCTGGTGAAAACTACACCGAGTGGAACCGCGCACTGAATGACATTTATGATTTTG\n+AATTTCAACCATATTGA\n+>Miro_276.CDS [Location=[175636:176009](-);Name=Miro_276]\n+TGAAATTTAAAAACATGCCTTCGGCTAACGAAACTCGCGCTAGTGGTGAATCTATTCTGA\n+AAACTGCCCGTGAAATGTGGGCCGACGACCGGATAGAATCCGGGAAAGGTTACGTATTTC\n+GTAAAGGATTTGGTGCATTTTATGTTGCAACCACCTTTATAGTAGGGTATAGTTGTTTAG\n+GAATCGCGTGGATGCAACGATATCTTGAACGTGCTGTTCTTTTTGTGATACGTTTTCTGT\n+TTACATTGATCAGCGTAGTGATTAGCATCCCATTTCTGAGCATCGCCTTGTTTGTTAACG\n+TAAAAACAGGCGAAATTACAAAATTAGGCAAACTGGTATCTAAAAAATATGAAAAGAATT\n+CTGACAAGACTTGA\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 test-data/Miro_ExtSeqOut2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Miro_ExtSeqOut2.fa Mon Jun 05 02:43:58 2023 +0000
b
b'@@ -0,0 +1,3212 @@\n+>Miro_1.CDS\n+TGGTCAAAATTCTCGACGAAGTACAACAGAAAGCTATCTATAATGGATGGTTGACGGGGG\n+CAAGTAAAACAGCACTGGCCCAACAGTTCAACGTAAGCGCCCGAACCATTGGGCGCGTTA\n+TCAACCGCAAGTTAGCGGAATTCCCTAAGAAAAGGGATGTAAAACAAAAGCCTAAAGTAT\n+CAGACACTGTACCGCGTATGATCGGATCTGAATCGTTCATTACGGTTGTATACGAAGGGC\n+GCGTTTTCATGGCGGGTGAAACACATCCGAACTTTAAGAAAGCGCATGAAATGCTGAAAG\n+CTGGTGATGTCAAAGGTGCTGTAACTTGTTTGGATACTCAAGAAGCGATCCGAACCTATA\n+GCAAAGGCAACATTAAAATCATTGGTCATCAGCTTCTATATAAAGATGTAGTGTTTGATT\n+CTGATATCACTCAACGAATCATTCGCGAAATGTATAACGATCGTCCGTATGAACATCTGG\n+TGAACTTCTTTGAACGGTTGATGCGAAACCCTTCACGCGATGCTGTATACCAGCTTTATG\n+GGTTCCTTGTACATAATGACATTGAACTGACCGATGATGGTTGTTTCCTTGCCTGGAAAC\n+GTGTGCGCGATAACTATAAAGATCTCGCTACTGGCAAATTTGATAATAGTCCTGGTGTGA\n+CTGTCTCCATGCCTCGAAACATGGTAGACGAAGATAAAACGCGCACCTGTTCTACTGGCC\n+TACACGTTGCGGCTAAATCATACCTTCCACACTACGGGGGCGGTGTTGGTAGAGTCATTC\n+AGGTAAAGGTGGATCCTGCTGACGTGGTAGCGATCCCAGTGGATTACAATAACGCAAAAA\n+TGCGTGTATGCCGTTATAAAGTCATGATTGATGTTACATATGGTTTTAGCCATTACTAA\n+>Miro_2.CDS\n+TGAAATTACGCGAAGATACACCGCAAACCATCTACCGTGTTGGTACAGATACCAGCAAGG\n+GACGAAACAAACTATCTCTACGCGCAAACAAGAAAGCGTATAAGATCCTTTCTTCGACAG\n+TATACAAATACAAGATCCGCGCAATCATTCGCGAACTGTCTTGTAATGCGATTGATGGAC\n+ACAAAGAAGCGGGAAATCAGAACCCGTTTGATGTCCAGTTGCCAACTGCTGTTGATCCTC\n+GTTTTGTTATTCGTGACTACGGGATCGGTATGTCTCCTGATTTCGTTAGTGATGCGTTTA\n+CCGTTTACTTCGAATCAACTAAAAATGATTCAAACGACCTGATCGGTTCTATGGGTCTGG\n+GTTGCAAATCTCCGCTTTGCTATTCCGATGCTTTCACGGTGGAATCCGTAAAAGACGGTA\n+TCAAATGCGGTTACACAATCTATATGGATGATGGGGAACCTTTCTGCGATCCTCTGTATG\n+AGATTGAAAGCGATGAACCTAACGGGGTTACTATCACTGTTCCGGTTAAGGTTGAAGATA\n+TCAAAGAATGGGAAAACGAAGCAGCAAGGGTATACGAATCATTTACTGATATTCGTCCTA\n+ACTTTGTTGGTGCTTCTATTCTCAAAATCAACTATCAGCCGAAAGAAGCAACCAACGATA\n+GCGGGGTGATCCGTCATAAATCAGCATACACTAGCGGTGTATATGCTCGCATGGGTAACA\n+TCATTTATCCTCTGGATAAAGATTTGTATGATACCTCGATGTTCTATTGCTATACAGAAA\n+GCCAGTATACGTATATTATTGACTTCCCGATCGGTGAACTTGATTTCATGCCTTCTCGCG\n+AAGAGTTGAGTATGGATAAAATGACAGTAGGGATTGTTAAAGAACGTCTGAAACAAATCA\n+GCCGGGTATATTTCAATCGAGTTAAATCAGAATTCGATAAGTTACAAACCGTGCGTGATA\n+AACTGACGTGGTTCCATTCTCTGCCTTCAATGGTACAAAACTTTGTTGGTAAAGATGCTA\n+ATTTTCGCATCAATGGTGATTCTATTGGCTGGATCCATAGCGAACTAGTCAAACCAAACA\n+AATATAATGATGATTATGTTGCTGGGTATTGGGCGAACGAATACGACGGTAAAGATGCAT\n+GGTATCAGGTAACTGGTAGCGGAGGCCGCTGGAGCAAGTATAAGCCAGAAACAACCAAAC\n+GCCAGGACATCACACGCATTTATTATCCGTGGAAACAGAAGAAACTCATTCTGTTAAAAG\n+TGGATACCAACACGGTTAAACCGTATATCGTCGGGTATGCTAAAATGCATAACCTAAGCC\n+GTGTTAGCTTTGTTGCATATTACGATAGTGATTCAAAGCGTGAGATTGTTAATGATATCG\n+TTCGGAAAGGTCATTTTGATGAATCGGAAATTGTGTATCTCCGTACCAGTGAAATGACAA\n+AAGAAAAAGAGATCTATGATGCAGATCGCGAAAAATCAAAAGCGCTATATGCACCGAAAA\n+ATTCAGAACCGCGCCCAAAAACCCCGACCGTATATCGTTATGAGTTGGATAGTAACGGCA\n+ATCTGGCAAAAACTTCTCTGTTTATGACAAAATCAGAGTTTTTATCACTGGATAAAGCCC\n+CAGGTGTTCGGTTGTATGGTATTGATGAATATAGCCGCTTAGATGGTGAAAATTCCGGGT\n+TGTCTATGGATTCAGCAATGAAAGAATCTACTTTATCGCGTATCATGCGCCATACTGGGA\n+TCCCTGTTGTGTTTGCTATACGTAACAGCCTCTGGAAGTGGATCCCGGATTCAAATCTGG\n+TATGCTTTGATGATATGCTGTGCAAGCAGTATGTTAAATCTGAAAAGGCATTGAAAGATA\n+ATTGCTTGCCTGGATGGATTGGGAAGGATCACTCAACAGAAACAGACGCATTGCATAGCC\n+GTTTCGGTGTATCGCTTGATCGGATTGTGAAAAACCGATATAATGAAAAACTGTATAAGA\n+TTGTTGATACCCTCGAACGTATAGTGTGTCTGGAAGGGTATGAGAAAGATGGTAAAAATC\n+TTTCCAGGGTGCGTTGTCCGATTCTGCGTGAGTCCGTAGCATCAATGCGCGTTAAACGCA\n+GCAATATGAAAAAACGTGTTGATCAGGCATGGGAAATCTTTAAATCACTAAACCCGTTAC\n+TGGCTTCACTTGTTGAACATAGCGATTCTTATTCAATTCGTCCGATTTACAACAACGAAA\n+AGAATCTCAGTGAGTTTAAGAAATTAATACGGTGGAAATAA\n+>Miro_3.CDS\n+TGACACGTTATAATGCGCCTAAGTTGGGAAAATATCTGACCATATTCGGATTTTGTGCGT\n+TCTTTTCTGTCATTATCGGGGCTATCGTGTGGGGTATCCTTGATATGAAGAAACAGCAGG\n+TCGAGGAAGAAAAATTGGTAAAATTCCTTGACACCTATTGTGAAGTAGTAGAATATGGTC\n+TGAACAAAAAGCCAACGAAGTACTCTTGTGACCAAGTTATTTTTAATGTTAAGTGA\n+>Miro_4.CDS\n+TGTTGAAAACTCGTAGTGCAATGGAAGTAACCCACTGGGCGCGTACCATGATGGAAAAAC\n+ACGGTTTGATCAGTAATGGTTGGACATTCCGGATCAATGGACGCATTACAAAAACTCTGG\n+GCCGTTGCAGTTATACCAAAAAACTGATCGAACTGTCTGGTCGTCATGTTGCGGAAGATA\n+TCTACGAGGATATTTTAGACACTCTTTTACACGAAATCGCTCATGCTCTTGTCGGGCGCG\n+GTTATGAACATGGTAAAGTGTGGCAAGCTATGGCCTTGCGCCTGGGTGCGAAACCTTCAC\n+CAAGTAAAACAACTACAAAGGATGCTAATCTAGTTGATAAGAATGAGATCCTTTATTGTT\n+TGTTCATGAAAGATTATCAAGGCCGTGAAGTCTATCAGGCTAAAG'..b'CTAGCCTTATTTGATCGGGTAATATCGTTTGTGA\n+TGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTTATGTTTC\n+TTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGTGCCCTTG\n+TAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCCGCGTTGG\n+AACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTGGCATTCA\n+GACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTACCATCCC\n+AAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAATACACGA\n+ATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCGACACGTG\n+ATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAACTACTATT\n+CTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATGAAGATAA\n+ACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGA\n+>Miro_268.CDS\n+TGACGTTCGATGAGGCTATGAGAGCCGCTAAGAGAGGTTGTAAGGTTGCGCCAATAGGTA\n+GGCCATTCTTCGTGTATTATCACGACAGCGCTGGATACAGGAGGGTTACGTTGGGGGTTT\n+CGGCAAGGAATGAAGATTATGTTCCGGATCCGTGGGACAAATTGAAAGTTTGGGATATTT\n+ACGAGAAAAATAGCCCGGTGATGAACTTCTTTAAAGCAATCAAACGGGCTATTGGAAGAT\n+CACACAATTTAACACCAGCATTACGTTAA\n+>Miro_269.CDS\n+TGACATACGAAGATATGCTTAATGAAGTGTGCGCGGGTAAAACCGCGTACCGCACTACCA\n+ATCCTGATATGATTGTGTTTCGTGAGGGGGATACAATCATCAGACGTACACACCGCAAGT\n+GTGAAGTAAACCAGGTATTCATTGCCACGGTAGAAGAACAAAAGGCTACTGATTGGGATA\n+TCGTCATTGACGAAGAAAACCACGATGAGCTAGATCATCCGATATTCATTTTGAGTGAAA\n+CTATCGGAATTCGACCATTCATGCATAGAAATTTTTGGGGGAATCCATGA\n+>Miro_270.CDS\n+TGACTAACGCAACCGTAGTTACTACCACTTACTTCATTTCTTCTGGTCGTAAAAATGCAA\n+TGTATACCTTGCGTGTTGAGCGTAGAGGCGGCGGTTATACTTCTGATAACTATATCTGCA\n+ACTTATCTACCGATCCGGAGAAAGCGGAAGCGAAAGCGCGTGAGTATTTCGACCGTGTTT\n+CAGCCCGTTTAACTGAAACTGATACTTTCAAAATGATATTCCAGGGCTTTGCAGATTTCG\n+ATCTGTTCGAACGTCGCGGTAAACTGTCAGTGTTCGATACTGAAAAACTGGAACTGCTGG\n+AAAAAGGGATCATGCCGATTGGCAAGCGTAAAGGCGAAGTAATTGCAGAAATGCCAATGT\n+TTACTGTTCTTTGGTGGGCTGACCAGTCTAAAGAAGACAACAAAAACAGCCCTGTATTCG\n+ATGCTGTGTGCGCTTACTGCATGGGTGTTGCACTGGAAAAGGATTATATCGCCAAACGCG\n+AAGAAATCCGCGAACAGTGGGAACAAGAGCGCCAGGAACGCATTTCTAAAGCTAACCACA\n+TCGGTGAAATCAAACAACGTCTGGAAATGACCGGAACCGTTGAGAAAGTGATTTCACTGG\n+GATATACTCAAGTTTCTTACTACACTTCTGTAGAAAGATTCATGACCAAAATTAATGTTG\n+ATGGTAATGTTGTTGTTTACTTCGGTAATAACATTGCAAACGAAGGTGATGAAATCACTT\n+TCAAAGCAACCGTTAAAGAGCACGGAGAATACAAAGACGTTAAACAAACCATCGTTCAAC\n+GTGTAAAGGTTTTATAA\n+>Miro_271.CDS\n+TGAGTATACAAATTCTGTTCAGTAAAGAAGATCCATTAGGTACGGAATCATCGGTTCAGC\n+GCTTATTCTGGCATACAGATTATATAAAAAATCGATGTCGTGGCCCAAACAACACATGGG\n+GAAGCAACAACGATATTGACAAAGCAAAAATTTATCCTCGCGGAAAGGCGTACTGGAAAA\n+TTTTTGATATTCATCTTCATAAAACAATGAAAGATTCGAATGAATGA\n+>Miro_272.CDS\n+TGAAAACCGCTTTGATCATTCTTGAAGAAATTCCTGAAAATACAACGCTTTACAAAGTCG\n+AAACTGACGATAGCGAAGTAATTGAGATCCTCAAAACCGCCCACGGGCATTATGTAAACG\n+GTTCCGGCAACACCAAAGAACAGGATTATGCTGTTGATGTTGTTAACCTCATGCTGGGGC\n+CGAATACCGATGATAACCTGAAATGGGCGCGTGAATCCAACATTCCTGAAAAGTATGTTG\n+GTATGTTCTATCAGTGTAACATCGATAGCAAATCACCGTTTGAACCAGAAAAGAAAATCG\n+ATCTGATTGTTCGCACTGGGTTCTTCTTATGA\n+>Miro_273.CDS\n+TGAATAATTTGGTAGCGAAACATGATTTCAACCGCGCTTCTACTCACCGCGATCGCAAGC\n+GAGCATTTAAAGAAGCAAAACGTAAACAGAAACACAAAGGTAAGTGTGATGAAAACCGCT\n+TTGATCATTCTTGA\n+>Miro_274.CDS\n+TGAAACTGTTTAATAAAATTCGTTCTGTAGATGAAATCGTAGCAACTTTCGACAAAACCC\n+TGTCTGAACTGGAAGCGCGTATTGCTCACGATAATGAGCAGGTTGCACAGGTTGCAGCAG\n+ATCGTCAAGCAGCAGAAGAAGAACATCAGCGTAAGCTGGCGGAACTGGCTTCAAAAGAAA\n+GCGATCATACTGCAAGTGCAACCCGCGCCGGACGTATCGCAGACAAGATCCGCAAATTGC\n+TGGATTAA\n+>Miro_275.CDS\n+TGAAAAGAATTCTGACAAGACTTGACATTCACAAATATGCAATTAAAATTGGTGACATGA\n+AACGCGGAACCTTCAAGAAATTCGAAGGCGTGGATCTGAAAGAAAACGGGTTTTACCTGG\n+TAGTTGATGATAAAACAGGGCGCGTAGTATTCCGTTTCTACGTAGCACCAAATCAACGCC\n+GTAAAACTGGTGCTAGTCGTACTCTGTTACGTATCAAGCGCCGCGAACACGAACCGTCTG\n+CGATGGTTGCGATGCTGGAAAGTGCTTCTCTGTATTATGTAGCACATGACAAAATCAAAA\n+ACCTGTTCGGTTTCCCGAACCTCAACGGATCAACGTTCGAAAGTGCTATTCAAATGCGTT\n+ATGGTGCTGGTGAAAACTACACCGAGTGGAACCGCGCACTGAATGACATTTATGATTTTG\n+AATTTCAACCATATTGA\n+>Miro_276.CDS\n+TGAAATTTAAAAACATGCCTTCGGCTAACGAAACTCGCGCTAGTGGTGAATCTATTCTGA\n+AAACTGCCCGTGAAATGTGGGCCGACGACCGGATAGAATCCGGGAAAGGTTACGTATTTC\n+GTAAAGGATTTGGTGCATTTTATGTTGCAACCACCTTTATAGTAGGGTATAGTTGTTTAG\n+GAATCGCGTGGATGCAACGATATCTTGAACGTGCTGTTCTTTTTGTGATACGTTTTCTGT\n+TTACATTGATCAGCGTAGTGATTAGCATCCCATTTCTGAGCATCGCCTTGTTTGTTAACG\n+TAAAAACAGGCGAAATTACAAAATTAGGCAAACTGGTATCTAAAAAATATGAAAAGAATT\n+CTGACAAGACTTGA\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 test-data/T7_ExtSeqIn.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T7_ExtSeqIn.fasta Mon Jun 05 02:43:58 2023 +0000
b
b'@@ -0,0 +1,667 @@\n+>NC_001604\n+TCTCACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCAC\n+CTAAAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGT\n+TTGTCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCTATCTGTTACAGTCTCCTAAA\n+GTATCCTCCTAAAGTCACCTCCTAACGTCCATCCTAAAGCCAACACCTAAAGCCTACACC\n+TAAAGACCCATCAAGTCAACGCCTATCTTAAAGTTTAAACATAAAGACCAGACCTAAAGA\n+CCAGACCTAAAGACACTACATAAAGACCAGACCTAAAGACGCCTTGTTGTTAGCCATAAA\n+GTGATAACCTTTAATCATTGTCTTTATTAATACAACTCACTATAAGGAGAGACAACTTAA\n+AGAGACTTAAAAGATTAATTTAAAATTTATCAAAAAGAGTATTGACTTAAAGTCTAACCT\n+ATAGGATACTTACAGCCATCGAGAGGGACACGGCGAATAGCCATCCCAATCGACACCGGG\n+GTCAACCGGATAAGTAGACAGCCTGATAAGTCGCACGAAAAACAGGTATTGACAACATGA\n+AGTAACATGCAGTAAGATACAAATCGCTAGGTAACACTAGCAGCGTCAACCGGGCGCACA\n+GTGCCTTCTAGGTGACTTAAGCGCACCACGGCACATAAGGTGAAACAAAACGGTTGACAA\n+CATGAAGTAAACACGGTACGATGTACCACATGAAACGACAGTGAGTCACCACACTGAAAG\n+GTGATGCGGTCTAACGAAACCTGACCTAAGACGCTCTTTAACAATCTGGTAAATAGCTCT\n+TGAGTGCATGACTAGCGGATAACTCAAGGGTATCGCAAGGTGCCCTTTATGATATTCACT\n+AATAACTGCACGAGGTAACACAAGATGGCTATGTCTAACATGACTTACAACAACGTTTTC\n+GACCACGCTTACGAAATGCTGAAAGAAAACATCCGTTATGATGACATCCGTGACACTGAT\n+GACCTGCACGATGCTATTCACATGGCTGCCGATAATGCAGTTCCGCACTACTACGCTGAC\n+ATCTTTAGCGTAATGGCAAGTGAGGGCATTGACCTTGAGTTCGAAGACTCTGGTCTGATG\n+CCTGACACCAAGGACGTAATCCGCATCCTGCAAGCGCGTATCTATGAGCAATTAACGATT\n+GACCTCTGGGAAGACGCAGAAGACTTGCTCAATGAATACTTGGAGGAAGTCGAGGAGTAC\n+GAGGAGGATGAAGAGTAATGTCTACTACCAACGTGCAATACGGTCTGACCGCTCAAACTG\n+TACTTTTCTATAGCGACATGGTGCGCTGTGGCTTTAACTGGTCACTCGCAATGGCACAGC\n+TCAAAGAACTGTACGAAAACAACAAGGCAATAGCTTTAGAATCTGCTGAGTGATAGACTC\n+AAGGTCGCTCCTAGCGAGTGGCCTTTATGATTATCACTTTACTTATGAGGGAGTAATGTA\n+TATGCTTACTATCGGTCTACTCACCGCTCTAGGTCTAGCTGTAGGTGCATCCTTTGGGAA\n+GGCTTTAGGTGTAGCTGTAGGTTCCTACTTTACCGCTTGCATCATCATAGGAATCATCAA\n+AGGGGCACTACGCAAATGATGAAGCACTACGTTATGCCAATCCACACGTCCAACGGGGCA\n+ACCGTATGTACACCTGATGGGTTCGCAATGAAACAACGAATCGAACGCCTTAAGCGTGAA\n+CTCCGCATTAACCGCAAGATTAACAAGATAGGTTCCGGCTATGACAGAACGCACTGATGG\n+CTTAAAGAAAGGTTATATGCCCAATGGCACACTATACGCTGCAAATCGGCGAATAGTGAG\n+AACTTGGCGAGAGAACAACCTCGAACGCCGCAAGGACAAGAGAGGGCGGCGTGGCATAGA\n+CGAAAGGAAAAGGTTAAAGCCAAGAAACTCGCCGCACTTGAACAGGCACTAGCCAACACA\n+CTGAACGCTATCTCATAACGAACATAAAGGACACAATGCAATGAACATTACCGACATCAT\n+GAACGCTATCGACGCAATCAAAGCACTGCCAATCTGTGAACTTGACAAGCGTCAAGGTAT\n+GCTTATCGACTTACTGGTCGAGATGGTCAACAGCGAGACGTGTGATGGCGAGCTAACCGA\n+ACTAAATCAGGCACTTGAGCATCAAGATTGGTGGACTACCTTGAAGTGTCTCACGGCTGA\n+CGCAGGGTTCAAGATGCTCGGTAATGGTCACTTCTCGGCTGCTTATAGTCACCCGCTGCT\n+ACCTAACAGAGTGATTAAGGTGGGCTTTAAGAAAGAGGATTCAGGCGCAGCCTATACCGC\n+ATTCTGCCGCATGTATCAGGGTCGTCCTGGTATCCCTAACGTCTACGATGTACAGCGCCA\n+CGCTGGATGCTATACGGTGGTACTTGACGCACTTAAGGATTGCGAGCGTTTCAACAATGA\n+TGCCCATTATAAATACGCTGAGATTGCAAGCGACATCATTGATTGCAATTCGGATGAGCA\n+TGATGAGTTAACTGGATGGGATGGTGAGTTTGTTGAAACTTGTAAACTAATCCGCAAGTT\n+CTTTGAGGGCATCGCCTCATTCGACATGCATAGCGGGAACATCATGTTCTCAAATGGAGA\n+CGTACCATACATCACCGACCCGGTATCATTCTCGCAGAAGAAAGACGGTGGCGCATTCAG\n+CATCGACCCTGAGGAACTCATCAAGGAAGTCGAGGAAGTCGCACGACAGAAAGAAATTGA\n+CCGCGCTAAGGCCCGTAAAGAACGTCACGAGGGGCGCTTAGAGGCACGCAGATTCAAACG\n+TCGCAACCGCAAGGCACGTAAAGCACACAAAGCTAAGCGCGAAAGAATGCTTGCTGCGTG\n+GCGATGGGCTGAACGTCAAGAACGGCGTAACCATGAGGTAGCTGTAGATGTACTAGGAAG\n+AACCAATAACGCTATGCTCTGGGTCAACATGTTCTCTGGGGACTTTAAGGCGCTTGAGGA\n+ACGAATCGCGCTGCACTGGCGTAATGCTGACCGGATGGCTATCGCTAATGGTCTTACGCT\n+CAACATTGATAAGCAACTTGACGCAATGTTAATGGGCTGATAGTCTTATCTTACAGGTCA\n+TCTGCGGGTGGCCTGAATAGGTACGATTTACTAACTGGAAGAGGCACTAAATGAACACGA\n+TTAACATCGCTAAGAACGACTTCTCTGACATCGAACTGGCTGCTATCCCGTTCAACACTC\n+TGGCTGACCATTACGGTGAGCGTTTAGCTCGCGAACAGTTGGCCCTTGAGCATGAGTCTT\n+ACGAGATGGGTGAAGCACGCTTCCGCAAGATGTTTGAGCGTCAACTTAAAGCTGGTGAGG\n+TTGCGGATAACGCTGCCGCCAAGCCTCTCATCACTACCCTACTCCCTAAGATGATTGCAC\n+GCATCAACGACTGGTTTGAGGAAGTGAAAGCTAAGCGCGGCAAGCGCCCGACAGCCTTCC\n+AGTTCCTGCAAGAAATCAAGCCGGAAGCCGTAGCGTACATCACCATTAAGACCACTCTGG\n+CTTGCCTAACCAGTGCTGACAATACAACCGTTCAGGCTGTAGCAAGCGCAATCGGTCGGG\n+CCATTGAGGACGAGGCTCGCTTCGGTCGTATCCGTGACCTTGAAGCTAAGCACTTCAAGA\n+AAAACGTTGAGGAACAACTCAACAAGCGCGTAGGGCACGTCTACAAGAAAGCATTTATGC\n+AAGTTGTCGAGGCTGACATGCTCTCTAAGGGTCTACTCGGTGGCGAGGCGTGGTCTTCGT\n+GGCATAAGGAAGACTCTATTCATGTAGGAGTACGCTGCATCGAGATGCTCATTGAGTCAA\n+'..b'TAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGT\n+GCCAACAACTCTTGGAACTTCTTCCGTACTGGCCCCGATGGAATCTACTTCATAGCCTCT\n+GATGGTGGATGGTTACGATTCCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCA\n+GACAGTCGTTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAATTGGTAAATCACAAG\n+GAAAGACGTGTAGTCCACGGATGGACTCTCAAGGAGGTACAAGGTGCTATCATTAGACTT\n+TAACAACGAATTGATTAAGGCTGCTCCAATTGTTGGGACGGGTGTAGCAGATGTTAGTGC\n+TCGACTGTTCTTTGGGTTAAGCCTTAACGAATGGTTCTACGTTGCTGCTATCGCCTACAC\n+AGTGGTTCAGATTGGTGCCAAGGTAGTCGATAAGATGATTGACTGGAAGAAAGCCAATAA\n+GGAGTGATATGTATGGAAAAGGATAAGAGCCTTATTACATTCTTAGAGATGTTGGACACT\n+GCGATGGCTCAGCGTATGCTTGCGGACCTTTCGGACCATGAGCGTCGCTCTCCGCAACTC\n+TATAATGCTATTAACAAACTGTTAGACCGCCACAAGTTCCAGATTGGTAAGTTGCAGCCG\n+GATGTTCACATCTTAGGTGGCCTTGCTGGTGCTCTTGAAGAGTACAAAGAGAAAGTCGGT\n+GATAACGGTCTTACGGATGATGATATTTACACATTACAGTGATATACTCAAGGCCACTAC\n+AGATAGTGGTCTTTATGGATGTCATTGTCTATACGAGATGCTCCTACGTGAAATCTGAAA\n+GTTAACGGGAGGCATTATGCTAGAATTTTTACGTAAGCTAATCCCTTGGGTTCTCGCTGG\n+GATGCTATTCGGGTTAGGATGGCATCTAGGGTCAGACTCAATGGACGCTAAATGGAAACA\n+GGAGGTACACAATGAGTACGTTAAGAGAGTTGAGGCTGCGAAGAGCACTCAAAGAGCAAT\n+CGATGCGGTATCTGCTAAGTATCAAGAAGACCTTGCCGCGCTGGAAGGGAGCACTGATAG\n+GATTATTTCTGATTTGCGTAGCGACAATAAGCGGTTGCGCGTCAGAGTCAAAACTACCGG\n+AACCTCCGATGGTCAGTGTGGATTCGAGCCTGATGGTCGAGCCGAACTTGACGACCGAGA\n+TGCTAAACGTATTCTCGCAGTGACCCAGAAGGGTGACGCATGGATTCGTGCGTTACAGGA\n+TACTATTCGTGAACTGCAACGTAAGTAGGAAATCAAGTAAGGAGGCAATGTGTCTACTCA\n+ATCCAATCGTAATGCGCTCGTAGTGGCGCAACTGAAAGGAGACTTCGTGGCGTTCCTATT\n+CGTCTTATGGAAGGCGCTAAACCTACCGGTGCCCACTAAGTGTCAGATTGACATGGCTAA\n+GGTGCTGGCGAATGGAGACAACAAGAAGTTCATCTTACAGGCTTTCCGTGGTATCGGTAA\n+GTCGTTCATCACATGTGCGTTCGTTGTGTGGTCCTTATGGAGAGACCCTCAGTTGAAGAT\n+ACTTATCGTATCAGCCTCTAAGGAGCGTGCAGACGCTAACTCCATCTTTATTAAGAACAT\n+CATTGACCTGCTGCCATTCCTATCTGAGTTAAAGCCAAGACCCGGACAGCGTGACTCGGT\n+AATCAGCTTTGATGTAGGCCCAGCCAATCCTGACCACTCTCCTAGTGTGAAATCAGTAGG\n+TATCACTGGTCAGTTAACTGGTAGCCGTGCTGACATTATCATTGCGGATGACGTTGAGAT\n+TCCGTCTAACAGCGCAACTATGGGTGCCCGTGAGAAGCTATGGACTCTGGTTCAGGAGTT\n+CGCTGCGTTACTTAAACCGCTGCCTTCCTCTCGCGTTATCTACCTTGGTACACCTCAGAC\n+AGAGATGACTCTCTATAAGGAACTTGAGGATAACCGTGGGTACACAACCATTATCTGGCC\n+TGCTCTGTACCCAAGGACACGTGAAGAGAACCTCTATTACTCACAGCGTCTTGCTCCTAT\n+GTTACGCGCTGAGTACGATGAGAACCCTGAGGCACTTGCTGGGACTCCAACAGACCCAGT\n+GCGCTTTGACCGTGATGACCTGCGCGAGCGTGAGTTGGAATACGGTAAGGCTGGCTTTAC\n+GCTACAGTTCATGCTTAACCCTAACCTTAGTGATGCCGAGAAGTACCCGCTGAGGCTTCG\n+TGACGCTATCGTAGCGGCCTTAGACTTAGAGAAGGCCCCAATGCATTACCAGTGGCTTCC\n+GAACCGTCAGAACATCATTGAGGACCTTCCTAACGTTGGCCTTAAGGGTGATGACCTGCA\n+TACGTACCACGATTGTTCCAACAACTCAGGTCAGTACCAACAGAAGATTCTGGTCATTGA\n+CCCTAGTGGTCGCGGTAAGGACGAAACAGGTTACGCTGTGCTGTACACACTGAACGGTTA\n+CATCTACCTTATGGAAGCTGGAGGTTTCCGTGATGGCTACTCCGATAAGACCCTTGAGTT\n+ACTCGCTAAGAAGGCAAAGCAATGGGGAGTCCAGACGGTTGTCTACGAGAGTAACTTCGG\n+TGACGGTATGTTCGGTAAGGTATTCAGTCCTATCCTTCTTAAACACCACAACTGTGCGAT\n+GGAAGAGATTCGTGCCCGTGGTATGAAAGAGATGCGTATTTGCGATACCCTTGAGCCAGT\n+CATGCAGACTCACCGCCTTGTAATTCGTGATGAGGTCATTAGGGCCGACTACCAGTCCGC\n+TCGTGACGTAGACGGTAAGCATGACGTTAAGTACTCGTTGTTCTACCAGATGACCCGTAT\n+CACTCGTGAGAAAGGCGCTCTGGCTCATGATGACCGATTGGATGCCCTTGCGTTAGGCAT\n+TGAGTATCTCCGTGAGTCCATGCAGTTGGATTCCGTTAAGGTCGAGGGTGAAGTACTTGC\n+TGACTTCCTTGAGGAACACATGATGCGTCCTACGGTTGCTGCTACGCATATCATTGAGAT\n+GTCTGTGGGAGGAGTTGATGTGTACTCTGAGGACGATGAGGGTTACGGTACGTCTTTCAT\n+TGAGTGGTGATTTATGCATTAGGACTGCATAGGGATGCACTATAGACCACGGATGGTCAG\n+TTCTTTAAGTTACTGAAAAGACACGATAAATTAATACGACTCACTATAGGGAGAGGAGGG\n+ACGAAAGGTTACTATATAGATACTGAATGAATACTTATAGAGTGCATAAAGTATGCATAA\n+TGGTGTACCTAGAGTGACCTCTAAGAATGGTGATTATATTGTATTAGTATCACCTTAACT\n+TAAGGACCAACATAAAGGGAGGAGACTCATGTTCCGCTTATTGTTGAACCTACTGCGGCA\n+TAGAGTCACCTACCGATTTCTTGTGGTACTTTGTGCTGCCCTTGGGTACGCATCTCTTAC\n+TGGAGACCTCAGTTCACTGGAGTCTGTCGTTTGCTCTATACTCACTTGTAGCGATTAGGG\n+TCTTCCTGACCGACTGATGGCTCACCGAGGGATTCAGCGGTATGATTGCATCACACCACT\n+TCATCCCTATAGAGTCAAGTCCTAAGGTATACCCATAAAGAGCCTCTAATGGTCTATCCT\n+AAGGTCTATACCTAAAGATAGGCCATCCTATCAGTGTCACCTAAAGAGGGTCTTAGAGAG\n+GGCCTATGGAGTTCCTATAGGGTCCTTTAAAATATACCATAAAAATCTGAGTGACTATCT\n+CACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCACCTA\n+AAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGTTTG\n+TCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCT\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 test-data/T7_ExtSeqIn.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T7_ExtSeqIn.gff3 Mon Jun 05 02:43:58 2023 +0000
[
b"@@ -0,0 +1,171 @@\n+##gff-version 3\n+NC_001604\tGenBank\tcontig\t1\t39937\t.\t+\t1\tID=NC_001604;Dbxref=BioProject:PRJNA485481,taxon:10760;Name=NC_001604;Note=Enterobacteria phage T7%2C complete genome.,VALIDATED REFSEQ: This record has undergone validation or preliminary review. The reference sequence was derived from V01146. The sequence was submitted by the authors [1] on magnetic tape and revised according to [3],[4],and [5]. [3] made changes at 8 positions in gene 1 without affecting the size of the total sequence but changing gene 1 amino acids 443,474,and 388 to 424. [4] inserted a T at nucleotide 17511,increasing the total sequence to 39937 bp. This change,originally found in T3 DNA [8],revealed gene 5.9 and shortened gene 6. [5] changed the nucleotides at 11061 and 11062 from GT to TG,changing amino acid 119 of T7 lysozyme (gene 3.5) from glycine to valine. Features have been extracted from [1] unless otherwise noted. The sequence shown is that of the l strand,which corresponds to the sequence of all mRNAs of known functional significance. Early mRNAs are produced by three major promoters for E. coli RNA polymerase A1,A2,and A3,located near the left end of the DNA. A fourth major E. coli promoter,A0 (also called D),that would direct transcription leftward,and several minor E. coli promoters (see Table 6 in [1]) function in vitro but have no known in vivo function. Late mRNAs are produced by 15 promoters for T7 RNA polymerase distributed across the right-most 85%25 of the DNA,and named e.g. phi10,for the first gene downstream of the promoter. There are also two T7 promoters,phiOL and phiOR,associated with possible origins of replication at the left and right ends of T7 DNA. The 23 base-pair consensus sequence for T7 promoters stretches from -17 to +6,where the initiating nucleotide is at +1. T7 DNA also contains a 160 base-pair terminal repetition. The beginning and end of RNAs are determined by the promoters,by a terminator for E. coli RNA polymerase,TE,located at the end of the early region,a terminator for T7 RNA polymerase,Tphi,located just downstream of gene 10,and a series of RNase III cleavage sites. Early mRNAs made by E. coli RNA polymerase are listed in Features. The many RNAs predicted to be made by T7 RNA polymerase are not listed but can be deduced from the position of the transcription signals (see Tables 8 and 9 in [1]). Promoters are listed in Features by the known or predicted first nucleotide of the RNA,terminators by the last nucleotide of the RNA,and RNase III sites by the nucleotide 5' of the position of cleavage. Genes are numbered 0.3 to 19.5 in order of their left-to-right position on the genome. Proteins are named by the gene number,e.g.,the gene 1 protein,or by a functional name,e.g.,T7 RNA polymerase. There is now genetic or biochemical evidence that proteins are produced from at least 52 of the 56 T7 genes. Gene 4 produces two proteins,4A and 4B,by initiating translation at two different sites in the same reading frame. Gene 10 produces two proteins,10A and 10B,by frameshifting during translation. Genes 0.6 and 5.5 probably also make two proteins by translational frameshifting,the gene 5.5 frameshift producing a gene 5.5-5.7 fusion protein. COMPLETENESS: full length. ;comment1=VALIDATED REFSEQ: This record has undergone validation or preliminary review. The reference sequence was derived from V01146. The sequence was submitted by the authors [1] on magnetic tape and revised according to [3]%2C [4]%2C and [5]. [3] made changes at 8 positions in gene 1 without affecting the size of the total sequence but changing gene 1 amino acids 443%2C 474%2C and 388 to 424. [4] inserted a T at nucleotide 17511%2C increasing the total sequence to 39937 bp. This change%2C originally found in T3 DNA [8]%2C revealed gene 5.9 and shortened gene 6. [5] changed the nucleotides at 11061 and 11062 from GT to TG%2C changing amino acid 119 of T7 lysozyme (gene 3.5) from glycine to valine. Features have been extracted from [1"..b' However%2C in phage T7 the holin protein gp17.5 does not appear to be essential and gp17.5 mutants only show a minor delay in lysis. Other names: gp17.5%3B lysis protein;codon_start=1;product=type II holin;protein_id=NP_042006.1;transl_table=11;translation=length.67;\n+NC_001604\tGenBank\tgene\t36344\t36547\t.\t+\t1\tID=T7p53.gene;Alias=T7p53;Dbxref=GeneID:1261022;Name=T7p53;Note=gene 17.5;\n+NC_001604\tGenBank\tCDS\t36553\t36822\t.\t+\t1\tID=T7p54;Dbxref=GOA:P03693,UniProtKB/Swiss-Prot:P03693,GeneID:1261042;Name=T7p54;Note=involved in the packaging of genome monomers into a procapsid using head-to-tail concatemers of genomes. other names: DNA packaging protein A%3B DNA maturation protein A%3B terminase%2C small subunit;codon_start=1;product=DNA packaging protein%2C small subunit;protein_id=NP_042007.1;transl_table=11;translation=length.89;\n+NC_001604\tGenBank\tregulatory\t36836\t36836\t.\t+\t1\tID=GenBank:regulatory:NC_001604:36836:36836;Note=E. coli promoter E[6];regulatory_class=promoter;\n+NC_001604\tGenBank\tsequence_secondary_structure\t36856\t36856\t.\t+\t1\tID=GenBank:sequence_secondary_structure:NC_001604:36856:36856;Note=RNase III site R18.5;\n+NC_001604\tGenBank\tgene\t36553\t36822\t.\t+\t1\tID=T7p54.gene;Alias=T7p54;Dbxref=GeneID:1261042;Name=T7p54;Note=gene 18;\n+NC_001604\tGenBank\tCDS\t36917\t37348\t.\t+\t1\tID=T7p55;Dbxref=GOA:P03803,UniProtKB/Swiss-Prot:P03803,GeneID:1261067;Name=T7p55;Note=analog of phage lambda protein Rz%2C a cell lysis protein. Rz and gp18.5 share distant sequence similarity%2C similar function%2C and a similar genome neighborhood. In T7%2C gp18.5 interacts with gp18.7%2C a lambda RZ1-like lysis protein. Other names: gp18.5;codon_start=1;product=phage lambda Rz-like lysis protein;protein_id=NP_042008.1;transl_table=11;translation=length.143;\n+NC_001604\tGenBank\tgene\t36917\t37348\t.\t+\t1\tID=T7p55.gene;Alias=T7p55;Dbxref=GeneID:1261067;Name=T7p55;Note=gene 18.5;\n+NC_001604\tGenBank\tCDS\t37032\t37283\t.\t+\t1\tID=T7p56;Dbxref=UniProtKB/Swiss-Prot:P03788,GeneID:1261057;Name=T7p56;Note=in Enterobacteria phage T7%2C this protein interacts with gp18.5 and is expressed from the -1 frame of a gene completely overlapping gene 18.5. This suggests that it may be an analog of lambda lysis protein Rz1. Other names: gp18.7.;codon_start=1;product=phage lambda Rz1-like protein;protein_id=NP_042009.1;transl_table=11;translation=length.83;\n+NC_001604\tGenBank\tgene\t37032\t37283\t.\t+\t1\tID=T7p56.gene;Alias=T7p56;Dbxref=GeneID:1261057;Name=T7p56;Note=gene 18.7;\n+NC_001604\tGenBank\tCDS\t37370\t39130\t.\t+\t1\tID=T7p57;Dbxref=GOA:P03694,UniProtKB/Swiss-Prot:P03694,GeneID:1261062;Name=T7p57;Note=gene 19;codon_start=1;product=DNA maturation protein;protein_id=NP_042010.1;transl_table=11;translation=length.586;\n+NC_001604\tGenBank\tgene\t37370\t39130\t.\t+\t1\tID=T7p57.gene;Alias=T7p57;Dbxref=GeneID:1261062;Name=T7p57;Note=gene 19;\n+NC_001604\tGenBank\tCDS\t38016\t38273\t.\t+\t1\tID=T7p58;Dbxref=UniProtKB/Swiss-Prot:P03789,GeneID:1261064;Name=T7p58;Note=gene 19.2;codon_start=1;product=hypothetical protein;protein_id=NP_042011.1;transl_table=11;translation=length.85;\n+NC_001604\tGenBank\tgene\t38016\t38273\t.\t+\t1\tID=T7p58.gene;Alias=T7p58;Dbxref=GeneID:1261064;Name=T7p58;Note=gene 19.2;\n+NC_001604\tGenBank\tCDS\t38553\t38726\t.\t+\t1\tID=T7p59;Dbxref=UniProtKB/Swiss-Prot:P03790,GeneID:1261066;Name=T7p59;Note=gene 19.3;codon_start=1;product=hypothetical protein;protein_id=NP_042012.1;transl_table=11;translation=length.57;\n+NC_001604\tGenBank\tregulatory\t39229\t39229\t.\t+\t1\tID=GenBank:regulatory:NC_001604:39229:39229;Note=T7 promoter phiOR;regulatory_class=promoter;\n+NC_001604\tGenBank\tgene\t38553\t38726\t.\t+\t1\tID=T7p59.gene;Alias=T7p59;Dbxref=GeneID:1261066;Name=T7p59;Note=gene 19.3;\n+NC_001604\tGenBank\tCDS\t39389\t39538\t.\t+\t1\tID=T7p60;Dbxref=UniProtKB/Swiss-Prot:P03804,GeneID:1261068;Name=T7p60;Note=gene 19.5;codon_start=1;product=hypothetical protein;protein_id=NP_042013.1;transl_table=11;translation=length.49;\n+NC_001604\tGenBank\tgene\t39389\t39538\t.\t+\t1\tID=T7p60.gene;Alias=T7p60;Dbxref=GeneID:1261068;Name=T7p60;Note=gene 19.5;\n'
b
diff -r 73390562b5a2 -r 34b80e483fb8 test-data/T7_ExtSeqOut.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T7_ExtSeqOut.fasta Mon Jun 05 02:43:58 2023 +0000
[
b'@@ -0,0 +1,776 @@\n+>T7p01.p01 [Location=[926:1278](+);Name=T7p01]\n+TGGCTATGTCTAACATGACTTACAACAACGTTTTCGACCACGCTTACGAAATGCTGAAAG\n+AAAACATCCGTTATGATGACATCCGTGACACTGATGACCTGCACGATGCTATTCACATGG\n+CTGCCGATAATGCAGTTCCGCACTACTACGCTGACATCTTTAGCGTAATGGCAAGTGAGG\n+GCATTGACCTTGAGTTCGAAGACTCTGGTCTGATGCCTGACACCAAGGACGTAATCCGCA\n+TCCTGCAAGCGCGTATCTATGAGCAATTAACGATTGACCTCTGGGAAGACGCAGAAGACT\n+TGCTCAATGAATACTTGGAGGAAGTCGAGGAGTACGAGGAGGATGAAGAGTAA\n+>T7p02 [Location=[1279:1433](+);Name=T7p02]\n+TGTCTACTACCAACGTGCAATACGGTCTGACCGCTCAAACTGTACTTTTCTATAGCGACA\n+TGGTGCGCTGTGGCTTTAACTGGTCACTCGCAATGGCACAGCTCAAAGAACTGTACGAAA\n+ACAACAAGGCAATAGCTTTAGAATCTGCTGAGTGA\n+>T7p04 [Location=[1497:1639](+);Name=T7p04]\n+TGTATATGCTTACTATCGGTCTACTCACCGCTCTAGGTCTAGCTGTAGGTGCATCCTTTG\n+GGAAGGCTTTAGGTGTAGCTGTAGGTTCCTACTTTACCGCTTGCATCATCATAGGAATCA\n+TCAAAGGGGCACTACGCAAATGA\n+>T7p05.cds1 [Location=[1637:1794](+);Name=T7p05]\n+TGATGAAGCACTACGTTATGCCAATCCACACGTCCAACGGGGCAACCGTATGTACACCTG\n+ATGGGTTCGCAATGAAACAACGAATCGAACGCCTTAAGCGTGAACTCCGCATTAACCGCA\n+AGATTAACAAGATAGGTTCCGGCTATGACAGAACGCAC\n+>T7p06 [Location=[1637:1797](+);Name=T7p06]\n+TGATGAAGCACTACGTTATGCCAATCCACACGTCCAACGGGGCAACCGTATGTACACCTG\n+ATGGGTTCGCAATGAAACAACGAATCGAACGCCTTAAGCGTGAACTCCGCATTAACCGCA\n+AGATTAACAAGATAGGTTCCGGCTATGACAGAACGCACTGA\n+>T7p05.cds2 [Location=[1797:1972](+);Name=T7p05]\n+ATGGCTTAAAGAAAGGTTATATGCCCAATGGCACACTATACGCTGCAAATCGGCGAATAG\n+TGAGAACTTGGCGAGAGAACAACCTCGAACGCCGCAAGGACAAGAGAGGGCGGCGTGGCA\n+TAGACGAAAGGAAAAGGTTAAAGCCAAGAAACTCGCCGCACTTGAACAGGCACTAG\n+>T7p03.p01 [Location=[2022:3100](+);Name=T7p03]\n+TGAACATTACCGACATCATGAACGCTATCGACGCAATCAAAGCACTGCCAATCTGTGAAC\n+TTGACAAGCGTCAAGGTATGCTTATCGACTTACTGGTCGAGATGGTCAACAGCGAGACGT\n+GTGATGGCGAGCTAACCGAACTAAATCAGGCACTTGAGCATCAAGATTGGTGGACTACCT\n+TGAAGTGTCTCACGGCTGACGCAGGGTTCAAGATGCTCGGTAATGGTCACTTCTCGGCTG\n+CTTATAGTCACCCGCTGCTACCTAACAGAGTGATTAAGGTGGGCTTTAAGAAAGAGGATT\n+CAGGCGCAGCCTATACCGCATTCTGCCGCATGTATCAGGGTCGTCCTGGTATCCCTAACG\n+TCTACGATGTACAGCGCCACGCTGGATGCTATACGGTGGTACTTGACGCACTTAAGGATT\n+GCGAGCGTTTCAACAATGATGCCCATTATAAATACGCTGAGATTGCAAGCGACATCATTG\n+ATTGCAATTCGGATGAGCATGATGAGTTAACTGGATGGGATGGTGAGTTTGTTGAAACTT\n+GTAAACTAATCCGCAAGTTCTTTGAGGGCATCGCCTCATTCGACATGCATAGCGGGAACA\n+TCATGTTCTCAAATGGAGACGTACCATACATCACCGACCCGGTATCATTCTCGCAGAAGA\n+AAGACGGTGGCGCATTCAGCATCGACCCTGAGGAACTCATCAAGGAAGTCGAGGAAGTCG\n+CACGACAGAAAGAAATTGACCGCGCTAAGGCCCGTAAAGAACGTCACGAGGGGCGCTTAG\n+AGGCACGCAGATTCAAACGTCGCAACCGCAAGGCACGTAAAGCACACAAAGCTAAGCGCG\n+AAAGAATGCTTGCTGCGTGGCGATGGGCTGAACGTCAAGAACGGCGTAACCATGAGGTAG\n+CTGTAGATGTACTAGGAAGAACCAATAACGCTATGCTCTGGGTCAACATGTTCTCTGGGG\n+ACTTTAAGGCGCTTGAGGAACGAATCGCGCTGCACTGGCGTAATGCTGACCGGATGGCTA\n+TCGCTAATGGTCTTACGCTCAACATTGATAAGCAACTTGACGCAATGTTAATGGGCTGA\n+>T7p07.p01 [Location=[3172:5822](+);Name=T7p07]\n+TGAACACGATTAACATCGCTAAGAACGACTTCTCTGACATCGAACTGGCTGCTATCCCGT\n+TCAACACTCTGGCTGACCATTACGGTGAGCGTTTAGCTCGCGAACAGTTGGCCCTTGAGC\n+ATGAGTCTTACGAGATGGGTGAAGCACGCTTCCGCAAGATGTTTGAGCGTCAACTTAAAG\n+CTGGTGAGGTTGCGGATAACGCTGCCGCCAAGCCTCTCATCACTACCCTACTCCCTAAGA\n+TGATTGCACGCATCAACGACTGGTTTGAGGAAGTGAAAGCTAAGCGCGGCAAGCGCCCGA\n+CAGCCTTCCAGTTCCTGCAAGAAATCAAGCCGGAAGCCGTAGCGTACATCACCATTAAGA\n+CCACTCTGGCTTGCCTAACCAGTGCTGACAATACAACCGTTCAGGCTGTAGCAAGCGCAA\n+TCGGTCGGGCCATTGAGGACGAGGCTCGCTTCGGTCGTATCCGTGACCTTGAAGCTAAGC\n+ACTTCAAGAAAAACGTTGAGGAACAACTCAACAAGCGCGTAGGGCACGTCTACAAGAAAG\n+CATTTATGCAAGTTGTCGAGGCTGACATGCTCTCTAAGGGTCTACTCGGTGGCGAGGCGT\n+GGTCTTCGTGGCATAAGGAAGACTCTATTCATGTAGGAGTACGCTGCATCGAGATGCTCA\n+TTGAGTCAACCGGAATGGTTAGCTTACACCGCCAAAATGCTGGCGTAGTAGGTCAAGACT\n+CTGAGACTATCGAACTCGCACCTGAATACGCTGAGGCTATCGCAACCCGTGCAGGTGCGC\n+TGGCTGGCATCTCTCCGATGTTCCAACCTTGCGTAGTTCCTCCTAAGCCGTGGACTGGCA\n+TTACTGGTGGTGGCTATTGGGCTAACGGTCGTCGTCCTCTGGCGCTGGTGCGTACTCACA\n+GTAAGAAAGCACTGATGCGCTACGAAGACGTTTACATGCCTGAGGTGTACAAAGCGATTA\n+ACATTGCGCAAAACACCGCATGGAAAATCAACAAGAAAGTCCTAGCGGTCGCCAACGTAA\n+TCACCAAGTGGAAGCATTGTCCGGTCGAGGACATCCCTGCGATTGAGCGTGAAGAACTCC\n+CGATGAAACCGGAAGACATCGACATGAATCCTGAGGCTCTCACCGCGTGGAAACGTGCTG\n+CCGCTGCTGTGTACCGCAAGGACAAGGCTCGCAAGTCTCGCCGTATCAGCCTTGAGTTCA\n+TGCTTGAGCAAGCCAATAAGTTTGCTAACCATAAGGCCATCTGGTTCCCTTACAAC'..b'TAA\n+>T7p53 [Location=[36345:36547](+);Name=T7p53]\n+TGCTATCATTAGACTTTAACAACGAATTGATTAAGGCTGCTCCAATTGTTGGGACGGGTG\n+TAGCAGATGTTAGTGCTCGACTGTTCTTTGGGTTAAGCCTTAACGAATGGTTCTACGTTG\n+CTGCTATCGCCTACACAGTGGTTCAGATTGGTGCCAAGGTAGTCGATAAGATGATTGACT\n+GGAAGAAAGCCAATAAGGAGTGA\n+>T7p54 [Location=[36554:36822](+);Name=T7p54]\n+TGGAAAAGGATAAGAGCCTTATTACATTCTTAGAGATGTTGGACACTGCGATGGCTCAGC\n+GTATGCTTGCGGACCTTTCGGACCATGAGCGTCGCTCTCCGCAACTCTATAATGCTATTA\n+ACAAACTGTTAGACCGCCACAAGTTCCAGATTGGTAAGTTGCAGCCGGATGTTCACATCT\n+TAGGTGGCCTTGCTGGTGCTCTTGAAGAGTACAAAGAGAAAGTCGGTGATAACGGTCTTA\n+CGGATGATGATATTTACACATTACAGTGA\n+>T7p55 [Location=[36918:37348](+);Name=T7p55]\n+TGCTAGAATTTTTACGTAAGCTAATCCCTTGGGTTCTCGCTGGGATGCTATTCGGGTTAG\n+GATGGCATCTAGGGTCAGACTCAATGGACGCTAAATGGAAACAGGAGGTACACAATGAGT\n+ACGTTAAGAGAGTTGAGGCTGCGAAGAGCACTCAAAGAGCAATCGATGCGGTATCTGCTA\n+AGTATCAAGAAGACCTTGCCGCGCTGGAAGGGAGCACTGATAGGATTATTTCTGATTTGC\n+GTAGCGACAATAAGCGGTTGCGCGTCAGAGTCAAAACTACCGGAACCTCCGATGGTCAGT\n+GTGGATTCGAGCCTGATGGTCGAGCCGAACTTGACGACCGAGATGCTAAACGTATTCTCG\n+CAGTGACCCAGAAGGGTGACGCATGGATTCGTGCGTTACAGGATACTATTCGTGAACTGC\n+AACGTAAGTAG\n+>T7p56 [Location=[37033:37283](+);Name=T7p56]\n+TGAGTACGTTAAGAGAGTTGAGGCTGCGAAGAGCACTCAAAGAGCAATCGATGCGGTATC\n+TGCTAAGTATCAAGAAGACCTTGCCGCGCTGGAAGGGAGCACTGATAGGATTATTTCTGA\n+TTTGCGTAGCGACAATAAGCGGTTGCGCGTCAGAGTCAAAACTACCGGAACCTCCGATGG\n+TCAGTGTGGATTCGAGCCTGATGGTCGAGCCGAACTTGACGACCGAGATGCTAAACGTAT\n+TCTCGCAGTGA\n+>T7p57 [Location=[37371:39130](+);Name=T7p57]\n+TGTCTACTCAATCCAATCGTAATGCGCTCGTAGTGGCGCAACTGAAAGGAGACTTCGTGG\n+CGTTCCTATTCGTCTTATGGAAGGCGCTAAACCTACCGGTGCCCACTAAGTGTCAGATTG\n+ACATGGCTAAGGTGCTGGCGAATGGAGACAACAAGAAGTTCATCTTACAGGCTTTCCGTG\n+GTATCGGTAAGTCGTTCATCACATGTGCGTTCGTTGTGTGGTCCTTATGGAGAGACCCTC\n+AGTTGAAGATACTTATCGTATCAGCCTCTAAGGAGCGTGCAGACGCTAACTCCATCTTTA\n+TTAAGAACATCATTGACCTGCTGCCATTCCTATCTGAGTTAAAGCCAAGACCCGGACAGC\n+GTGACTCGGTAATCAGCTTTGATGTAGGCCCAGCCAATCCTGACCACTCTCCTAGTGTGA\n+AATCAGTAGGTATCACTGGTCAGTTAACTGGTAGCCGTGCTGACATTATCATTGCGGATG\n+ACGTTGAGATTCCGTCTAACAGCGCAACTATGGGTGCCCGTGAGAAGCTATGGACTCTGG\n+TTCAGGAGTTCGCTGCGTTACTTAAACCGCTGCCTTCCTCTCGCGTTATCTACCTTGGTA\n+CACCTCAGACAGAGATGACTCTCTATAAGGAACTTGAGGATAACCGTGGGTACACAACCA\n+TTATCTGGCCTGCTCTGTACCCAAGGACACGTGAAGAGAACCTCTATTACTCACAGCGTC\n+TTGCTCCTATGTTACGCGCTGAGTACGATGAGAACCCTGAGGCACTTGCTGGGACTCCAA\n+CAGACCCAGTGCGCTTTGACCGTGATGACCTGCGCGAGCGTGAGTTGGAATACGGTAAGG\n+CTGGCTTTACGCTACAGTTCATGCTTAACCCTAACCTTAGTGATGCCGAGAAGTACCCGC\n+TGAGGCTTCGTGACGCTATCGTAGCGGCCTTAGACTTAGAGAAGGCCCCAATGCATTACC\n+AGTGGCTTCCGAACCGTCAGAACATCATTGAGGACCTTCCTAACGTTGGCCTTAAGGGTG\n+ATGACCTGCATACGTACCACGATTGTTCCAACAACTCAGGTCAGTACCAACAGAAGATTC\n+TGGTCATTGACCCTAGTGGTCGCGGTAAGGACGAAACAGGTTACGCTGTGCTGTACACAC\n+TGAACGGTTACATCTACCTTATGGAAGCTGGAGGTTTCCGTGATGGCTACTCCGATAAGA\n+CCCTTGAGTTACTCGCTAAGAAGGCAAAGCAATGGGGAGTCCAGACGGTTGTCTACGAGA\n+GTAACTTCGGTGACGGTATGTTCGGTAAGGTATTCAGTCCTATCCTTCTTAAACACCACA\n+ACTGTGCGATGGAAGAGATTCGTGCCCGTGGTATGAAAGAGATGCGTATTTGCGATACCC\n+TTGAGCCAGTCATGCAGACTCACCGCCTTGTAATTCGTGATGAGGTCATTAGGGCCGACT\n+ACCAGTCCGCTCGTGACGTAGACGGTAAGCATGACGTTAAGTACTCGTTGTTCTACCAGA\n+TGACCCGTATCACTCGTGAGAAAGGCGCTCTGGCTCATGATGACCGATTGGATGCCCTTG\n+CGTTAGGCATTGAGTATCTCCGTGAGTCCATGCAGTTGGATTCCGTTAAGGTCGAGGGTG\n+AAGTACTTGCTGACTTCCTTGAGGAACACATGATGCGTCCTACGGTTGCTGCTACGCATA\n+TCATTGAGATGTCTGTGGGAGGAGTTGATGTGTACTCTGAGGACGATGAGGGTTACGGTA\n+CGTCTTTCATTGAGTGGTGA\n+>T7p58 [Location=[38017:38273](+);Name=T7p58]\n+TGGGTACACAACCATTATCTGGCCTGCTCTGTACCCAAGGACACGTGAAGAGAACCTCTA\n+TTACTCACAGCGTCTTGCTCCTATGTTACGCGCTGAGTACGATGAGAACCCTGAGGCACT\n+TGCTGGGACTCCAACAGACCCAGTGCGCTTTGACCGTGATGACCTGCGCGAGCGTGAGTT\n+GGAATACGGTAAGGCTGGCTTTACGCTACAGTTCATGCTTAACCCTAACCTTAGTGATGC\n+CGAGAAGTACCCGCTGA\n+>T7p59 [Location=[38554:38726](+);Name=T7p59]\n+TGGCTACTCCGATAAGACCCTTGAGTTACTCGCTAAGAAGGCAAAGCAATGGGGAGTCCA\n+GACGGTTGTCTACGAGAGTAACTTCGGTGACGGTATGTTCGGTAAGGTATTCAGTCCTAT\n+CCTTCTTAAACACCACAACTGTGCGATGGAAGAGATTCGTGCCCGTGGTATGA\n+>T7p60 [Location=[39390:39538](+);Name=T7p60]\n+TGTTCCGCTTATTGTTGAACCTACTGCGGCATAGAGTCACCTACCGATTTCTTGTGGTAC\n+TTTGTGCTGCCCTTGGGTACGCATCTCTTACTGGAGACCTCAGTTCACTGGAGTCTGTCG\n+TTTGCTCTATACTCACTTGTAGCGATTAG\n'