Repository 'cpt_lipory'
hg clone https://toolshed.g2.bx.psu.edu/repos/cpt/cpt_lipory

Changeset 4:b79df4966ebb (2023-06-05)
Previous changeset 3:68e1e56e338a (2022-05-20) Next changeset 5:2654bcd9b14d (2024-01-05)
Commit message:
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
added:
cpt-macros.xml
gff3.py
lipory.py
lipory.xml
macros.xml
test-data/T7_LiporyIn.fasta
test-data/T7_LiporyIn.gff3
test-data/T7_LiporyOut.gff3
removed:
cpt_lipory/cpt-macros.xml
cpt_lipory/gff3.py
cpt_lipory/lipory.py
cpt_lipory/lipory.xml
cpt_lipory/macros.xml
cpt_lipory/test-data/T7_LiporyIn.fasta
cpt_lipory/test-data/T7_LiporyIn.gff3
cpt_lipory/test-data/T7_LiporyOut.gff3
b
diff -r 68e1e56e338a -r b79df4966ebb cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Mon Jun 05 02:45:43 2023 +0000
[
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+ <requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+ <![CDATA[
+ cd '$__tool_directory__' && git rev-parse HEAD
+ ]]>
+ </version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+        <yield/>
+    </xml>
+</macros>
b
diff -r 68e1e56e338a -r b79df4966ebb cpt_lipory/cpt-macros.xml
--- a/cpt_lipory/cpt-macros.xml Fri May 20 08:58:15 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="gff_requirements">
- <requirements>
- <requirement type="package" version="2.7">python</requirement>
- <requirement type="package" version="1.65">biopython</requirement>
- <requirement type="package" version="2.12.1">requests</requirement>
- <yield/>
- </requirements>
- <version_command>
- <![CDATA[
- cd $__tool_directory__ && git rev-parse HEAD
- ]]>
- </version_command>
- </xml>
- <xml name="citation/mijalisrasche">
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">@unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- </xml>
- <xml name="citations">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation> 
- <yield/>
- </citations>
- </xml>
-     <xml name="citations-crr">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020-AJC-solo">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-clm">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="sl-citations-clm">
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </xml>
-</macros>
b
diff -r 68e1e56e338a -r b79df4966ebb cpt_lipory/gff3.py
--- a/cpt_lipory/gff3.py Fri May 20 08:58:15 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,346 +0,0 @@\n-import copy\n-import logging\n-\n-log = logging.getLogger()\n-log.setLevel(logging.WARN)\n-\n-\n-def feature_lambda(\n-    feature_list,\n-    test,\n-    test_kwargs,\n-    subfeatures=True,\n-    parent=None,\n-    invert=False,\n-    recurse=True,\n-):\n-    """Recursively search through features, testing each with a test function, yielding matches.\n-\n-    GFF3 is a hierachical data structure, so we need to be able to recursively\n-    search through features. E.g. if you\'re looking for a feature with\n-    ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n-    case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n-\n-    :type feature_list: list\n-    :param feature_list: an iterable of features\n-\n-    :type test: function reference\n-    :param test: a closure with the method signature (feature, **kwargs) where\n-                 the kwargs are those passed in the next argument. This\n-                 function should return True or False, True if the feature is\n-                 to be yielded as part of the main feature_lambda function, or\n-                 False if it is to be ignored. This function CAN mutate the\n-                 features passed to it (think "apply").\n-\n-    :type test_kwargs: dictionary\n-    :param test_kwargs: kwargs to pass to your closure when it is called.\n-\n-    :type subfeatures: boolean\n-    :param subfeatures: when a feature is matched, should just that feature be\n-                        yielded to the caller, or should the entire sub_feature\n-                        tree for that feature be included? subfeatures=True is\n-                        useful in cases such as searching for a gene feature,\n-                        and wanting to know what RBS/Shine_Dalgarno_sequences\n-                        are in the sub_feature tree (which can be accomplished\n-                        with two feature_lambda calls). subfeatures=False is\n-                        useful in cases when you want to process (and possibly\n-                        return) the entire feature tree, such as applying a\n-                        qualifier to every single feature.\n-\n-    :type invert: boolean\n-    :param invert: Negate/invert the result of the filter.\n-\n-    :rtype: yielded list\n-    :return: Yields a list of matching features.\n-    """\n-    # Either the top level set of [features] or the subfeature attribute\n-    for feature in feature_list:\n-        feature._parent = parent\n-        if not parent:\n-            # Set to self so we cannot go above root.\n-            feature._parent = feature\n-        test_result = test(feature, **test_kwargs)\n-        # if (not invert and test_result) or (invert and not test_result):\n-        if invert ^ test_result:\n-            if not subfeatures:\n-                feature_copy = copy.deepcopy(feature)\n-                feature_copy.sub_features = list()\n-                yield feature_copy\n-            else:\n-                yield feature\n-\n-        if recurse and hasattr(feature, "sub_features"):\n-            for x in feature_lambda(\n-                feature.sub_features,\n-                test,\n-                test_kwargs,\n-                subfeatures=subfeatures,\n-                parent=feature,\n-                invert=invert,\n-                recurse=recurse,\n-            ):\n-                yield x\n-\n-\n-def fetchParent(feature):\n-    if not hasattr(feature, "_parent") or feature._parent is None:\n-        return feature\n-    else:\n-        return fetchParent(feature._parent)\n-\n-\n-def feature_test_true(feature, **kwargs):\n-    return True\n-\n-\n-def feature_test_type(feature, **kwargs):\n-    if "type" in kwargs:\n-        return str(feature.type).upper() == str(kwargs["type"]).upper()\n-    elif "types" in kwargs:\n-      for x in kwargs["types"]:\n-        if str(feature.type).upper() == str(x).upper():\n-          return True\n-      return False\n-    raise Exception("Incorrect feature_test_type call, ne'..b'feature.location.start,\n-        # feature.location.end,\n-        # feature.location.strand\n-        # )\n-    return result\n-\n-\n-def get_gff3_id(gene):\n-    return gene.qualifiers.get("Name", [gene.id])[0]\n-\n-\n-def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n-    # This prevents frameshift errors\n-    while start < 0:\n-        start += 3\n-    while end < 0:\n-        end += 3\n-    while start > parent_length:\n-        start -= 3\n-    while end > parent_length:\n-        end -= 3\n-    return (start, end)\n-\n-\n-def coding_genes(feature_list):\n-    for x in genes(feature_list):\n-        if (\n-            len(\n-                list(\n-                    feature_lambda(\n-                        x.sub_features,\n-                        feature_test_type,\n-                        {"type": "CDS"},\n-                        subfeatures=False,\n-                    )\n-                )\n-            )\n-            > 0\n-        ):\n-            yield x\n-\n-\n-def genes(feature_list, feature_type="gene", sort=False):\n-    """\n-    Simple filter to extract gene features from the feature set.\n-    """\n-\n-    if not sort:\n-        for x in feature_lambda(\n-            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n-        ):\n-            yield x\n-    else:\n-        data = list(genes(feature_list, feature_type=feature_type, sort=False))\n-        data = sorted(data, key=lambda feature: feature.location.start)\n-        for x in data:\n-            yield x\n-\n-\n-def wa_unified_product_name(feature):\n-    """\n-    Try and figure out a name. We gave conflicting instructions, so\n-    this isn\'t as trivial as it should be. Sometimes it will be in\n-    \'product\' or \'Product\', othertimes in \'Name\'\n-    """\n-    # Manually applied tags.\n-    protein_product = feature.qualifiers.get(\n-        "product", feature.qualifiers.get("Product", [None])\n-    )[0]\n-\n-    # If neither of those are available ...\n-    if protein_product is None:\n-        # And there\'s a name...\n-        if "Name" in feature.qualifiers:\n-            if not is_uuid(feature.qualifiers["Name"][0]):\n-                protein_product = feature.qualifiers["Name"][0]\n-\n-    return protein_product\n-\n-\n-def is_uuid(name):\n-    return name.count("-") == 4 and len(name) == 36\n-\n-\n-def get_rbs_from(gene):\n-    # Normal RBS annotation types\n-    rbs_rbs = list(\n-        feature_lambda(\n-            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n-        )\n-    )\n-    rbs_sds = list(\n-        feature_lambda(\n-            gene.sub_features,\n-            feature_test_type,\n-            {"type": "Shine_Dalgarno_sequence"},\n-            subfeatures=False,\n-        )\n-    )\n-    # Fraking apollo\n-    apollo_exons = list(\n-        feature_lambda(\n-            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n-        )\n-    )\n-    apollo_exons = [x for x in apollo_exons if len(x) < 10]\n-    # These are more NCBI\'s style\n-    regulatory_elements = list(\n-        feature_lambda(\n-            gene.sub_features,\n-            feature_test_type,\n-            {"type": "regulatory"},\n-            subfeatures=False,\n-        )\n-    )\n-    rbs_regulatory = list(\n-        feature_lambda(\n-            regulatory_elements,\n-            feature_test_quals,\n-            {"regulatory_class": ["ribosome_binding_site"]},\n-            subfeatures=False,\n-        )\n-    )\n-    # Here\'s hoping you find just one ;)\n-    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n-\n-\n-def nice_name(record):\n-    """\n-    get the real name rather than NCBI IDs and so on. If fails, will return record.id\n-    """\n-    name = record.id\n-    likely_parental_contig = list(genes(record.features, feature_type="contig"))\n-    if len(likely_parental_contig) == 1:\n-        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n-    return name\n-\n-\n-def fsort(it):\n-    for i in sorted(it, key=lambda x: int(x.location.start)):\n-        yield i\n'
b
diff -r 68e1e56e338a -r b79df4966ebb cpt_lipory/lipory.py
--- a/cpt_lipory/lipory.py Fri May 20 08:58:15 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,116 +0,0 @@
-#!/usr/bin/env python
-import re
-import sys
-import argparse
-import logging
-from Bio import SeqIO
-from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
-from gff3 import feature_lambda, feature_test_type, get_id
-from Bio.SeqFeature import SeqFeature, FeatureLocation
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-
-def find_lipoprotein(gff3_file, fasta_genome, lipobox_mindist=10, lipobox_maxdist=40):
-    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta_genome, "fasta"))
-
-    CASES = [
-        re.compile(
-            "^.{%s,%s}[ILMFTV][^REKD][GAS]C" % (lipobox_mindist, lipobox_maxdist)
-        ),
-        re.compile(
-            "^.{%s,%s}AW[AGS]C" % (lipobox_mindist, lipobox_maxdist)
-        ),
-        # Make sure to not have multiple cases that share matches, will introduce duplicate features into gff3 file
-    ]
-
-    for record in gffParse(gff3_file, base_dict=seq_dict):
-        good_features = []
-
-        genes = list(
-            feature_lambda(
-                record.features, feature_test_type, {"type": "gene"}, subfeatures=True
-            )
-        )
-        for gene in genes:
-            cdss = list(
-                feature_lambda(
-                    gene.sub_features,
-                    feature_test_type,
-                    {"type": "CDS"},
-                    subfeatures=False,
-                )
-            )
-            if len(cdss) == 0:
-                continue
-
-            for cds in cdss:
-                try:
-                    tmpseq = str(
-                        cds.extract(record.seq).translate(table=11, cds=True)
-                    ).replace("*", "")
-                except:
-                    continue
-
-                for case in CASES:
-                    m = case.search(tmpseq)
-                    if m:
-                        if cds.location.strand > 0:
-                            start = cds.location.start + (3 * (m.end() - 4))
-                            end = cds.location.start + (3 * m.end())
-                        else:
-                            start = cds.location.end - (3 * (m.end() - 4))
-                            end = cds.location.end - (3 * m.end())
-
-                        tmp = gffSeqFeature(
-                            FeatureLocation(
-                                min(start, end),
-                                max(start, end),
-                                strand=cds.location.strand,
-                            ),
-                            type="Lipobox",
-                            qualifiers={
-                                "source": "CPT_LipoRy",
-                                "ID": "%s.lipobox" % get_id(gene),
-                            },
-                        )
-                        tmp.qualifiers["sequence"] = str(
-                            tmp.extract(record).seq.translate()
-                        )
-
-                        gene.sub_features.append(tmp)
-                        good_features.append(gene)
-
-            record.features = good_features
-        yield [record]
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Filter out lipoproteins", epilog="")
-    parser.add_argument(
-        "gff3_file", type=argparse.FileType("r"), help="Naive ORF Calls"
-    )
-    parser.add_argument(
-        "fasta_genome", type=argparse.FileType("r"), help="Fasta genome sequence"
-    )
-
-    parser.add_argument(
-        "--lipobox_mindist",
-        type=int,
-        help="Minimum distance in codons to start of lipobox",
-        default=10,
-    )
-    parser.add_argument(
-        "--lipobox_maxdist",
-        type=int,
-        help="Maximum distance in codons to start of lipobox",
-        default=40,
-    )
-
-    args = parser.parse_args()
-
-    args = vars(parser.parse_args())
-    for record in find_lipoprotein(**args):
-        record[0].annotations = {}
-        gffWrite(record, sys.stdout)
b
diff -r 68e1e56e338a -r b79df4966ebb cpt_lipory/lipory.xml
--- a/cpt_lipory/lipory.xml Fri May 20 08:58:15 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,60 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt.fasta.lipory" name="Identify Lipoboxes" version="19.1.0.0">
-  <description> in protein sequences</description>
-  <macros>
-    <import>macros.xml</import>
-    <import>cpt-macros.xml</import>
-  </macros>
-  <expand macro="requirements">
-    <requirement type="package" version="2022.1.18">regex</requirement>
-  </expand>
-  <command interpreter="python" detect_errors="aggressive"><![CDATA[lipory.py
-$positional_1
-$positional_2
-
---lipobox_mindist $lipobox_mindist
---lipobox_maxdist $lipobox_maxdist
-
-> $default]]></command>
-  <inputs>
-    <param label="Naive orf calls" name="positional_1" type="data" format="gff3"/>
-    <param label="Genome" name="positional_2" type="data" format="fasta"/>
-
-    <param label="Minimum distance in codons to start of lipobox" name="lipobox_mindist" type="integer" value="10"/>
-    <param label="Maximum distance in codons to start of lipobox" name="lipobox_maxdist" type="integer" value="40"/>
-  </inputs>
-  <outputs>
-      <data format="gff3" name="default" label="Lipoboxes from ${on_string}"/>
-  </outputs>
-  <tests>
-      <test>
-          <param name="positional_1" value="T7_LiporyIn.gff3" />
-          <param name="positional_2" value="T7_LiporyIn.fasta" />
-          <param name="lipobox_mindist" value="10" />
-          <param name="lipobox_maxdist" value="60" />
-          <output name="default" value="T7_LiporyOut.gff3" />
-      </test>
-  </tests>
-  <help><![CDATA[
-**What it does**
-
-Identifies possible LipoBoxes from an input GFF3 and FASTA.
-
-**How it works**
-
-Searches in the first 10-40 amino acids of an input protein sequence using regular expressions 
-for a 4-amino acid motif based on the consensus sequences described in (**Babu** et al. 2006. *J 
-Bacteriol.* 188(8):2761-2773 and **Kongari** *et al.* 2018 *BMC Bioinformatics*. 19:326). The 
-amino acids allowed here are relaxed to allow for the diversity of amino acids known to occur in lipoboxes.
-
-Position 1: ILMFTV or only A
-
-Position 2: any residue except REKD or only W
-
-Position 3: GAS 
-
-Position 4: C
-
-      ]]></help>
- <expand macro="citations-2020" />
-</tool>
b
diff -r 68e1e56e338a -r b79df4966ebb cpt_lipory/macros.xml
--- a/cpt_lipory/macros.xml Fri May 20 08:58:15 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,85 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="requirements">
- <requirements>
- <requirement type="package" version="3.8.13">python</requirement>
- <requirement type="package" version="1.79">biopython</requirement>
- <requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
- <yield/>
- </requirements>
- </xml>
- <token name="@BLAST_TSV@">
- "$blast_tsv"
- </token>
- <xml name="blast_tsv">
- <param label="Blast Results" help="TSV/tabular (25 Column)"
- name="blast_tsv" type="data" format="tabular" />
- </xml>
-
- <token name="@BLAST_XML@">
- "$blast_xml"
- </token>
- <xml name="blast_xml">
- <param label="Blast Results" help="XML format"
- name="blast_xml" type="data" format="blastxml" />
- </xml>
- <xml name="gff3_with_fasta">
- <param label="Genome Sequences" name="fasta" type="data" format="fasta" />
- <param label="Genome Annotations" name="gff3" type="data" format="gff3" />
- </xml>
- <xml name="genome_selector">
- <conditional name="reference_genome">
- <param name="reference_genome_source" type="select" label="Reference Genome">
- <option value="history" selected="True">From History</option>
- <option value="cached">Locally Cached</option>
- </param>
- <when value="cached">
- <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
- <options from_data_table="all_fasta"/>
- </param>
- </when>
- <when value="history">
- <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
- </when>
- </conditional>
- </xml>
- <xml name="gff3_input">
- <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
- </xml>
- <xml name="input/gff3+fasta">
- <expand macro="gff3_input" />
- <expand macro="genome_selector" />
- </xml>
- <token name="@INPUT_GFF@">
- "$gff3_data"
- </token>
- <token name="@INPUT_FASTA@">
-#if str($reference_genome.reference_genome_source) == 'cached':
- "${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
- genomeref.fa
-#end if
- </token>
- <token name="@GENOME_SELECTOR_PRE@">
-#if $reference_genome.reference_genome_source == 'history':
- ln -s $reference_genome.genome_fasta genomeref.fa;
-#end if
- </token>
- <token name="@GENOME_SELECTOR@">
-#if str($reference_genome.reference_genome_source) == 'cached':
- "${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
- genomeref.fa
-#end if
- </token>
-        <xml name="input/fasta">
- <param label="Fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-
- <token name="@SEQUENCE@">
- "$sequences"
- </token>
- <xml name="input/fasta/protein">
- <param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-</macros>
b
diff -r 68e1e56e338a -r b79df4966ebb cpt_lipory/test-data/T7_LiporyIn.fasta
--- a/cpt_lipory/test-data/T7_LiporyIn.fasta Fri May 20 08:58:15 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,667 +0,0 @@\n->NC_001604\n-TCTCACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCAC\n-CTAAAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGT\n-TTGTCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCTATCTGTTACAGTCTCCTAAA\n-GTATCCTCCTAAAGTCACCTCCTAACGTCCATCCTAAAGCCAACACCTAAAGCCTACACC\n-TAAAGACCCATCAAGTCAACGCCTATCTTAAAGTTTAAACATAAAGACCAGACCTAAAGA\n-CCAGACCTAAAGACACTACATAAAGACCAGACCTAAAGACGCCTTGTTGTTAGCCATAAA\n-GTGATAACCTTTAATCATTGTCTTTATTAATACAACTCACTATAAGGAGAGACAACTTAA\n-AGAGACTTAAAAGATTAATTTAAAATTTATCAAAAAGAGTATTGACTTAAAGTCTAACCT\n-ATAGGATACTTACAGCCATCGAGAGGGACACGGCGAATAGCCATCCCAATCGACACCGGG\n-GTCAACCGGATAAGTAGACAGCCTGATAAGTCGCACGAAAAACAGGTATTGACAACATGA\n-AGTAACATGCAGTAAGATACAAATCGCTAGGTAACACTAGCAGCGTCAACCGGGCGCACA\n-GTGCCTTCTAGGTGACTTAAGCGCACCACGGCACATAAGGTGAAACAAAACGGTTGACAA\n-CATGAAGTAAACACGGTACGATGTACCACATGAAACGACAGTGAGTCACCACACTGAAAG\n-GTGATGCGGTCTAACGAAACCTGACCTAAGACGCTCTTTAACAATCTGGTAAATAGCTCT\n-TGAGTGCATGACTAGCGGATAACTCAAGGGTATCGCAAGGTGCCCTTTATGATATTCACT\n-AATAACTGCACGAGGTAACACAAGATGGCTATGTCTAACATGACTTACAACAACGTTTTC\n-GACCACGCTTACGAAATGCTGAAAGAAAACATCCGTTATGATGACATCCGTGACACTGAT\n-GACCTGCACGATGCTATTCACATGGCTGCCGATAATGCAGTTCCGCACTACTACGCTGAC\n-ATCTTTAGCGTAATGGCAAGTGAGGGCATTGACCTTGAGTTCGAAGACTCTGGTCTGATG\n-CCTGACACCAAGGACGTAATCCGCATCCTGCAAGCGCGTATCTATGAGCAATTAACGATT\n-GACCTCTGGGAAGACGCAGAAGACTTGCTCAATGAATACTTGGAGGAAGTCGAGGAGTAC\n-GAGGAGGATGAAGAGTAATGTCTACTACCAACGTGCAATACGGTCTGACCGCTCAAACTG\n-TACTTTTCTATAGCGACATGGTGCGCTGTGGCTTTAACTGGTCACTCGCAATGGCACAGC\n-TCAAAGAACTGTACGAAAACAACAAGGCAATAGCTTTAGAATCTGCTGAGTGATAGACTC\n-AAGGTCGCTCCTAGCGAGTGGCCTTTATGATTATCACTTTACTTATGAGGGAGTAATGTA\n-TATGCTTACTATCGGTCTACTCACCGCTCTAGGTCTAGCTGTAGGTGCATCCTTTGGGAA\n-GGCTTTAGGTGTAGCTGTAGGTTCCTACTTTACCGCTTGCATCATCATAGGAATCATCAA\n-AGGGGCACTACGCAAATGATGAAGCACTACGTTATGCCAATCCACACGTCCAACGGGGCA\n-ACCGTATGTACACCTGATGGGTTCGCAATGAAACAACGAATCGAACGCCTTAAGCGTGAA\n-CTCCGCATTAACCGCAAGATTAACAAGATAGGTTCCGGCTATGACAGAACGCACTGATGG\n-CTTAAAGAAAGGTTATATGCCCAATGGCACACTATACGCTGCAAATCGGCGAATAGTGAG\n-AACTTGGCGAGAGAACAACCTCGAACGCCGCAAGGACAAGAGAGGGCGGCGTGGCATAGA\n-CGAAAGGAAAAGGTTAAAGCCAAGAAACTCGCCGCACTTGAACAGGCACTAGCCAACACA\n-CTGAACGCTATCTCATAACGAACATAAAGGACACAATGCAATGAACATTACCGACATCAT\n-GAACGCTATCGACGCAATCAAAGCACTGCCAATCTGTGAACTTGACAAGCGTCAAGGTAT\n-GCTTATCGACTTACTGGTCGAGATGGTCAACAGCGAGACGTGTGATGGCGAGCTAACCGA\n-ACTAAATCAGGCACTTGAGCATCAAGATTGGTGGACTACCTTGAAGTGTCTCACGGCTGA\n-CGCAGGGTTCAAGATGCTCGGTAATGGTCACTTCTCGGCTGCTTATAGTCACCCGCTGCT\n-ACCTAACAGAGTGATTAAGGTGGGCTTTAAGAAAGAGGATTCAGGCGCAGCCTATACCGC\n-ATTCTGCCGCATGTATCAGGGTCGTCCTGGTATCCCTAACGTCTACGATGTACAGCGCCA\n-CGCTGGATGCTATACGGTGGTACTTGACGCACTTAAGGATTGCGAGCGTTTCAACAATGA\n-TGCCCATTATAAATACGCTGAGATTGCAAGCGACATCATTGATTGCAATTCGGATGAGCA\n-TGATGAGTTAACTGGATGGGATGGTGAGTTTGTTGAAACTTGTAAACTAATCCGCAAGTT\n-CTTTGAGGGCATCGCCTCATTCGACATGCATAGCGGGAACATCATGTTCTCAAATGGAGA\n-CGTACCATACATCACCGACCCGGTATCATTCTCGCAGAAGAAAGACGGTGGCGCATTCAG\n-CATCGACCCTGAGGAACTCATCAAGGAAGTCGAGGAAGTCGCACGACAGAAAGAAATTGA\n-CCGCGCTAAGGCCCGTAAAGAACGTCACGAGGGGCGCTTAGAGGCACGCAGATTCAAACG\n-TCGCAACCGCAAGGCACGTAAAGCACACAAAGCTAAGCGCGAAAGAATGCTTGCTGCGTG\n-GCGATGGGCTGAACGTCAAGAACGGCGTAACCATGAGGTAGCTGTAGATGTACTAGGAAG\n-AACCAATAACGCTATGCTCTGGGTCAACATGTTCTCTGGGGACTTTAAGGCGCTTGAGGA\n-ACGAATCGCGCTGCACTGGCGTAATGCTGACCGGATGGCTATCGCTAATGGTCTTACGCT\n-CAACATTGATAAGCAACTTGACGCAATGTTAATGGGCTGATAGTCTTATCTTACAGGTCA\n-TCTGCGGGTGGCCTGAATAGGTACGATTTACTAACTGGAAGAGGCACTAAATGAACACGA\n-TTAACATCGCTAAGAACGACTTCTCTGACATCGAACTGGCTGCTATCCCGTTCAACACTC\n-TGGCTGACCATTACGGTGAGCGTTTAGCTCGCGAACAGTTGGCCCTTGAGCATGAGTCTT\n-ACGAGATGGGTGAAGCACGCTTCCGCAAGATGTTTGAGCGTCAACTTAAAGCTGGTGAGG\n-TTGCGGATAACGCTGCCGCCAAGCCTCTCATCACTACCCTACTCCCTAAGATGATTGCAC\n-GCATCAACGACTGGTTTGAGGAAGTGAAAGCTAAGCGCGGCAAGCGCCCGACAGCCTTCC\n-AGTTCCTGCAAGAAATCAAGCCGGAAGCCGTAGCGTACATCACCATTAAGACCACTCTGG\n-CTTGCCTAACCAGTGCTGACAATACAACCGTTCAGGCTGTAGCAAGCGCAATCGGTCGGG\n-CCATTGAGGACGAGGCTCGCTTCGGTCGTATCCGTGACCTTGAAGCTAAGCACTTCAAGA\n-AAAACGTTGAGGAACAACTCAACAAGCGCGTAGGGCACGTCTACAAGAAAGCATTTATGC\n-AAGTTGTCGAGGCTGACATGCTCTCTAAGGGTCTACTCGGTGGCGAGGCGTGGTCTTCGT\n-GGCATAAGGAAGACTCTATTCATGTAGGAGTACGCTGCATCGAGATGCTCATTGAGTCAA\n-'..b'TAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGT\n-GCCAACAACTCTTGGAACTTCTTCCGTACTGGCCCCGATGGAATCTACTTCATAGCCTCT\n-GATGGTGGATGGTTACGATTCCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCA\n-GACAGTCGTTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAATTGGTAAATCACAAG\n-GAAAGACGTGTAGTCCACGGATGGACTCTCAAGGAGGTACAAGGTGCTATCATTAGACTT\n-TAACAACGAATTGATTAAGGCTGCTCCAATTGTTGGGACGGGTGTAGCAGATGTTAGTGC\n-TCGACTGTTCTTTGGGTTAAGCCTTAACGAATGGTTCTACGTTGCTGCTATCGCCTACAC\n-AGTGGTTCAGATTGGTGCCAAGGTAGTCGATAAGATGATTGACTGGAAGAAAGCCAATAA\n-GGAGTGATATGTATGGAAAAGGATAAGAGCCTTATTACATTCTTAGAGATGTTGGACACT\n-GCGATGGCTCAGCGTATGCTTGCGGACCTTTCGGACCATGAGCGTCGCTCTCCGCAACTC\n-TATAATGCTATTAACAAACTGTTAGACCGCCACAAGTTCCAGATTGGTAAGTTGCAGCCG\n-GATGTTCACATCTTAGGTGGCCTTGCTGGTGCTCTTGAAGAGTACAAAGAGAAAGTCGGT\n-GATAACGGTCTTACGGATGATGATATTTACACATTACAGTGATATACTCAAGGCCACTAC\n-AGATAGTGGTCTTTATGGATGTCATTGTCTATACGAGATGCTCCTACGTGAAATCTGAAA\n-GTTAACGGGAGGCATTATGCTAGAATTTTTACGTAAGCTAATCCCTTGGGTTCTCGCTGG\n-GATGCTATTCGGGTTAGGATGGCATCTAGGGTCAGACTCAATGGACGCTAAATGGAAACA\n-GGAGGTACACAATGAGTACGTTAAGAGAGTTGAGGCTGCGAAGAGCACTCAAAGAGCAAT\n-CGATGCGGTATCTGCTAAGTATCAAGAAGACCTTGCCGCGCTGGAAGGGAGCACTGATAG\n-GATTATTTCTGATTTGCGTAGCGACAATAAGCGGTTGCGCGTCAGAGTCAAAACTACCGG\n-AACCTCCGATGGTCAGTGTGGATTCGAGCCTGATGGTCGAGCCGAACTTGACGACCGAGA\n-TGCTAAACGTATTCTCGCAGTGACCCAGAAGGGTGACGCATGGATTCGTGCGTTACAGGA\n-TACTATTCGTGAACTGCAACGTAAGTAGGAAATCAAGTAAGGAGGCAATGTGTCTACTCA\n-ATCCAATCGTAATGCGCTCGTAGTGGCGCAACTGAAAGGAGACTTCGTGGCGTTCCTATT\n-CGTCTTATGGAAGGCGCTAAACCTACCGGTGCCCACTAAGTGTCAGATTGACATGGCTAA\n-GGTGCTGGCGAATGGAGACAACAAGAAGTTCATCTTACAGGCTTTCCGTGGTATCGGTAA\n-GTCGTTCATCACATGTGCGTTCGTTGTGTGGTCCTTATGGAGAGACCCTCAGTTGAAGAT\n-ACTTATCGTATCAGCCTCTAAGGAGCGTGCAGACGCTAACTCCATCTTTATTAAGAACAT\n-CATTGACCTGCTGCCATTCCTATCTGAGTTAAAGCCAAGACCCGGACAGCGTGACTCGGT\n-AATCAGCTTTGATGTAGGCCCAGCCAATCCTGACCACTCTCCTAGTGTGAAATCAGTAGG\n-TATCACTGGTCAGTTAACTGGTAGCCGTGCTGACATTATCATTGCGGATGACGTTGAGAT\n-TCCGTCTAACAGCGCAACTATGGGTGCCCGTGAGAAGCTATGGACTCTGGTTCAGGAGTT\n-CGCTGCGTTACTTAAACCGCTGCCTTCCTCTCGCGTTATCTACCTTGGTACACCTCAGAC\n-AGAGATGACTCTCTATAAGGAACTTGAGGATAACCGTGGGTACACAACCATTATCTGGCC\n-TGCTCTGTACCCAAGGACACGTGAAGAGAACCTCTATTACTCACAGCGTCTTGCTCCTAT\n-GTTACGCGCTGAGTACGATGAGAACCCTGAGGCACTTGCTGGGACTCCAACAGACCCAGT\n-GCGCTTTGACCGTGATGACCTGCGCGAGCGTGAGTTGGAATACGGTAAGGCTGGCTTTAC\n-GCTACAGTTCATGCTTAACCCTAACCTTAGTGATGCCGAGAAGTACCCGCTGAGGCTTCG\n-TGACGCTATCGTAGCGGCCTTAGACTTAGAGAAGGCCCCAATGCATTACCAGTGGCTTCC\n-GAACCGTCAGAACATCATTGAGGACCTTCCTAACGTTGGCCTTAAGGGTGATGACCTGCA\n-TACGTACCACGATTGTTCCAACAACTCAGGTCAGTACCAACAGAAGATTCTGGTCATTGA\n-CCCTAGTGGTCGCGGTAAGGACGAAACAGGTTACGCTGTGCTGTACACACTGAACGGTTA\n-CATCTACCTTATGGAAGCTGGAGGTTTCCGTGATGGCTACTCCGATAAGACCCTTGAGTT\n-ACTCGCTAAGAAGGCAAAGCAATGGGGAGTCCAGACGGTTGTCTACGAGAGTAACTTCGG\n-TGACGGTATGTTCGGTAAGGTATTCAGTCCTATCCTTCTTAAACACCACAACTGTGCGAT\n-GGAAGAGATTCGTGCCCGTGGTATGAAAGAGATGCGTATTTGCGATACCCTTGAGCCAGT\n-CATGCAGACTCACCGCCTTGTAATTCGTGATGAGGTCATTAGGGCCGACTACCAGTCCGC\n-TCGTGACGTAGACGGTAAGCATGACGTTAAGTACTCGTTGTTCTACCAGATGACCCGTAT\n-CACTCGTGAGAAAGGCGCTCTGGCTCATGATGACCGATTGGATGCCCTTGCGTTAGGCAT\n-TGAGTATCTCCGTGAGTCCATGCAGTTGGATTCCGTTAAGGTCGAGGGTGAAGTACTTGC\n-TGACTTCCTTGAGGAACACATGATGCGTCCTACGGTTGCTGCTACGCATATCATTGAGAT\n-GTCTGTGGGAGGAGTTGATGTGTACTCTGAGGACGATGAGGGTTACGGTACGTCTTTCAT\n-TGAGTGGTGATTTATGCATTAGGACTGCATAGGGATGCACTATAGACCACGGATGGTCAG\n-TTCTTTAAGTTACTGAAAAGACACGATAAATTAATACGACTCACTATAGGGAGAGGAGGG\n-ACGAAAGGTTACTATATAGATACTGAATGAATACTTATAGAGTGCATAAAGTATGCATAA\n-TGGTGTACCTAGAGTGACCTCTAAGAATGGTGATTATATTGTATTAGTATCACCTTAACT\n-TAAGGACCAACATAAAGGGAGGAGACTCATGTTCCGCTTATTGTTGAACCTACTGCGGCA\n-TAGAGTCACCTACCGATTTCTTGTGGTACTTTGTGCTGCCCTTGGGTACGCATCTCTTAC\n-TGGAGACCTCAGTTCACTGGAGTCTGTCGTTTGCTCTATACTCACTTGTAGCGATTAGGG\n-TCTTCCTGACCGACTGATGGCTCACCGAGGGATTCAGCGGTATGATTGCATCACACCACT\n-TCATCCCTATAGAGTCAAGTCCTAAGGTATACCCATAAAGAGCCTCTAATGGTCTATCCT\n-AAGGTCTATACCTAAAGATAGGCCATCCTATCAGTGTCACCTAAAGAGGGTCTTAGAGAG\n-GGCCTATGGAGTTCCTATAGGGTCCTTTAAAATATACCATAAAAATCTGAGTGACTATCT\n-CACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCACCTA\n-AAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGTTTG\n-TCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCT\n'
b
diff -r 68e1e56e338a -r b79df4966ebb cpt_lipory/test-data/T7_LiporyIn.gff3
--- a/cpt_lipory/test-data/T7_LiporyIn.gff3 Fri May 20 08:58:15 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,3145 +0,0 @@\n-##gff-version 3\n-NC_001604\tcpt.fixModel\tgene\t542\t666\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331;\n-NC_001604\tcpt.fixModel\tmRNA\t542\t649\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331.mRNA;Parent=ORF.0.2504_0.5545204186518331;\n-NC_001604\tgetOrfsOrCds\tCDS\t542\t649\t.\t-\t0\tID=ORF.0.2504_0.5545204186518331.CDS;Parent=ORF.0.2504_0.5545204186518331.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t664\t666\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2504_0.5545204186518331.rbs-0;Parent=ORF.0.2504_0.5545204186518331;\n-NC_001604\tcpt.fixModel\tgene\t542\t651\t.\t-\t.\tID=ORF.0.2506_0.7313234548298825;\n-NC_001604\tcpt.fixModel\tmRNA\t542\t637\t.\t-\t.\tID=ORF.0.2506_0.7313234548298825.mRNA;Parent=ORF.0.2506_0.7313234548298825;\n-NC_001604\tgetOrfsOrCds\tCDS\t542\t637\t.\t-\t0\tID=ORF.0.2506_0.7313234548298825.CDS;Parent=ORF.0.2506_0.7313234548298825.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t649\t651\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2506_0.7313234548298825.rbs-0;Parent=ORF.0.2506_0.7313234548298825;\n-NC_001604\tcpt.fixModel\tgene\t627\t839\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932;\n-NC_001604\tcpt.fixModel\tmRNA\t627\t824\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932.mRNA;Parent=ORF.0.3363_0.9803284230217932;\n-NC_001604\tgetOrfsOrCds\tCDS\t627\t824\t.\t-\t0\tID=ORF.0.3363_0.9803284230217932.CDS;Parent=ORF.0.3363_0.9803284230217932.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t837\t839\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.3363_0.9803284230217932.rbs-0;Parent=ORF.0.3363_0.9803284230217932;\n-NC_001604\tcpt.fixModel\tgene\t627\t747\t.\t-\t.\tID=ORF.0.3367_0.1254781548971481;\n-NC_001604\tcpt.fixModel\tmRNA\t627\t734\t.\t-\t.\tID=ORF.0.3367_0.1254781548971481.mRNA;Parent=ORF.0.3367_0.1254781548971481;\n-NC_001604\tgetOrfsOrCds\tCDS\t627\t734\t.\t-\t0\tID=ORF.0.3367_0.1254781548971481.CDS;Parent=ORF.0.3367_0.1254781548971481.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t745\t747\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.3367_0.1254781548971481.rbs-0;Parent=ORF.0.3367_0.1254781548971481;\n-NC_001604\tcpt.fixModel\tgene\t766\t1206\t.\t-\t.\tID=ORF.0.2960_0.21096600108012498;\n-NC_001604\tcpt.fixModel\tmRNA\t766\t1191\t.\t-\t.\tID=ORF.0.2960_0.21096600108012498.mRNA;Parent=ORF.0.2960_0.21096600108012498;\n-NC_001604\tgetOrfsOrCds\tCDS\t766\t1191\t.\t-\t0\tID=ORF.0.2960_0.21096600108012498.CDS;Parent=ORF.0.2960_0.21096600108012498.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t1202\t1206\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2960_0.21096600108012498.rbs-0;Parent=ORF.0.2960_0.21096600108012498;\n-NC_001604\tcpt.fixModel\tgene\t766\t885\t.\t-\t.\tID=ORF.0.2976_0.8667531510652563;\n-NC_001604\tcpt.fixModel\tmRNA\t766\t867\t.\t-\t.\tID=ORF.0.2976_0.8667531510652563.mRNA;Parent=ORF.0.2976_0.8667531510652563;\n-NC_001604\tgetOrfsOrCds\tCDS\t766\t867\t.\t-\t0\tID=ORF.0.2976_0.8667531510652563.CDS;Parent=ORF.0.2976_0.8667531510652563.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t883\t885\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2976_0.8667531510652563.rbs-0;Parent=ORF.0.2976_0.8667531510652563;\n-NC_001604\tcpt.fixModel\tgene\t912\t1278\t.\t+\t.\tID=ORF.0.2_0.39432314427019266;\n-NC_001604\tcpt.fixModel\tmRNA\t925\t1278\t.\t+\t.\tID=ORF.0.2_0.39432314427019266.mRNA;Parent=ORF.0.2_0.39432314427019266;\n-NC_001604\tgetOrfsOrCds\tCDS\t925\t1278\t.\t+\t0\tID=ORF.0.2_0.39432314427019266.CDS;Parent=ORF.0.2_0.39432314427019266.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t912\t916\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2_0.39432314427019266.rbs-0;Parent=ORF.0.2_0.39432314427019266;\n-NC_001604\tcpt.fixModel\tgene\t912\t1278\t.\t+\t.\tID=ORF.0.3_0.8154113297998601;\n-NC_001604\tcpt.fixModel\tmRNA\t931\t1278\t.\t+\t.\tID=ORF.0.3_0.8154113297998601.mRNA;Parent=ORF.0.3_0.8154113297998601;\n-NC_001604\tgetOrfsOrCds\tCDS\t931\t1278\t.\t+\t0\tID=ORF.0.3_0.8154113297998601.CDS;Parent=ORF.0.3_0.8154113297998601.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t912\t916\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.3_0.8154113297998601.rbs-0;Parent=ORF.0.3_0.8154113297998601;\n-NC_001604\tcpt.fixModel\tgene\t1182\t1310\t.\t-\t.\tID=ORF.0.3359_0.6449951188722964;\n-NC_001604\tcpt.fixModel\tmRNA\t1182\t1298\t.\t-\t.\tID=ORF.0.3359_0.6449951188722964.mRNA;Parent=ORF.0.3359_0.64499511887229'..b'93689042043441.rbs-0;Parent=ORF.0.1411_0.9793689042043441;\n-NC_001604\tcpt.fixModel\tgene\t39011\t39130\t.\t+\t.\tID=ORF.0.1412_0.30622712641637695;\n-NC_001604\tcpt.fixModel\tmRNA\t39020\t39130\t.\t+\t.\tID=ORF.0.1412_0.30622712641637695.mRNA;Parent=ORF.0.1412_0.30622712641637695;\n-NC_001604\tgetOrfsOrCds\tCDS\t39020\t39130\t.\t+\t0\tID=ORF.0.1412_0.30622712641637695.CDS;Parent=ORF.0.1412_0.30622712641637695.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t39011\t39014\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1412_0.30622712641637695.rbs-0;Parent=ORF.0.1412_0.30622712641637695;\n-NC_001604\tcpt.fixModel\tgene\t39012\t39130\t.\t+\t.\tID=ORF.0.1413_0.6484168178188776;\n-NC_001604\tcpt.fixModel\tmRNA\t39023\t39130\t.\t+\t.\tID=ORF.0.1413_0.6484168178188776.mRNA;Parent=ORF.0.1413_0.6484168178188776;\n-NC_001604\tgetOrfsOrCds\tCDS\t39023\t39130\t.\t+\t0\tID=ORF.0.1413_0.6484168178188776.CDS;Parent=ORF.0.1413_0.6484168178188776.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t39012\t39015\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1413_0.6484168178188776.rbs-0;Parent=ORF.0.1413_0.6484168178188776;\n-NC_001604\tcpt.fixModel\tgene\t39378\t39538\t.\t+\t.\tID=ORF.0.1414_0.38280168913440005;\n-NC_001604\tcpt.fixModel\tmRNA\t39389\t39538\t.\t+\t.\tID=ORF.0.1414_0.38280168913440005.mRNA;Parent=ORF.0.1414_0.38280168913440005;\n-NC_001604\tgetOrfsOrCds\tCDS\t39389\t39538\t.\t+\t0\tID=ORF.0.1414_0.38280168913440005.CDS;Parent=ORF.0.1414_0.38280168913440005.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t39378\t39382\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1414_0.38280168913440005.rbs-0;Parent=ORF.0.1414_0.38280168913440005;\n-NC_001604\tcpt.fixModel\tgene\t39423\t39557\t.\t+\t.\tID=ORF.0.2020_0.5190345053482985;\n-NC_001604\tcpt.fixModel\tmRNA\t39441\t39557\t.\t+\t.\tID=ORF.0.2020_0.5190345053482985.mRNA;Parent=ORF.0.2020_0.5190345053482985;\n-NC_001604\tgetOrfsOrCds\tCDS\t39441\t39557\t.\t+\t0\tID=ORF.0.2020_0.5190345053482985.CDS;Parent=ORF.0.2020_0.5190345053482985.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t39423\t39425\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2020_0.5190345053482985.rbs-0;Parent=ORF.0.2020_0.5190345053482985;\n-NC_001604\tcpt.fixModel\tgene\t39445\t39557\t.\t+\t.\tID=ORF.0.2021_0.3406547997303282;\n-NC_001604\tcpt.fixModel\tmRNA\t39453\t39557\t.\t+\t.\tID=ORF.0.2021_0.3406547997303282.mRNA;Parent=ORF.0.2021_0.3406547997303282;\n-NC_001604\tgetOrfsOrCds\tCDS\t39453\t39557\t.\t+\t0\tID=ORF.0.2021_0.3406547997303282.CDS;Parent=ORF.0.2021_0.3406547997303282.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t39445\t39447\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2021_0.3406547997303282.rbs-0;Parent=ORF.0.2021_0.3406547997303282;\n-NC_001604\tcpt.fixModel\tgene\t39445\t39557\t.\t+\t.\tID=ORF.0.2023_0.2547887662353707;\n-NC_001604\tcpt.fixModel\tmRNA\t39462\t39557\t.\t+\t.\tID=ORF.0.2023_0.2547887662353707.mRNA;Parent=ORF.0.2023_0.2547887662353707;\n-NC_001604\tgetOrfsOrCds\tCDS\t39462\t39557\t.\t+\t0\tID=ORF.0.2023_0.2547887662353707.CDS;Parent=ORF.0.2023_0.2547887662353707.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t39445\t39447\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2023_0.2547887662353707.rbs-0;Parent=ORF.0.2023_0.2547887662353707;\n-NC_001604\tcpt.fixModel\tgene\t39494\t39623\t.\t-\t.\tID=ORF.0.2029_0.06575596254471638;\n-NC_001604\tcpt.fixModel\tmRNA\t39494\t39604\t.\t-\t.\tID=ORF.0.2029_0.06575596254471638.mRNA;Parent=ORF.0.2029_0.06575596254471638;\n-NC_001604\tgetOrfsOrCds\tCDS\t39494\t39604\t.\t-\t0\tID=ORF.0.2029_0.06575596254471638.CDS;Parent=ORF.0.2029_0.06575596254471638.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t39620\t39623\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2029_0.06575596254471638.rbs-0;Parent=ORF.0.2029_0.06575596254471638;\n-NC_001604\tcpt.fixModel\tgene\t39713\t39861\t.\t-\t.\tID=ORF.0.2026_0.08836418353296771;\n-NC_001604\tcpt.fixModel\tmRNA\t39713\t39847\t.\t-\t.\tID=ORF.0.2026_0.08836418353296771.mRNA;Parent=ORF.0.2026_0.08836418353296771;\n-NC_001604\tgetOrfsOrCds\tCDS\t39713\t39847\t.\t-\t0\tID=ORF.0.2026_0.08836418353296771.CDS;Parent=ORF.0.2026_0.08836418353296771.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t39858\t39861\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2026_0.08836418353296771.rbs-0;Parent=ORF.0.2026_0.08836418353296771;\n'
b
diff -r 68e1e56e338a -r b79df4966ebb cpt_lipory/test-data/T7_LiporyOut.gff3
--- a/cpt_lipory/test-data/T7_LiporyOut.gff3 Fri May 20 08:58:15 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,296 +0,0 @@\n-##gff-version 3\n-NC_001604\tcpt.fixModel\tgene\t542\t666\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331;\n-NC_001604\tcpt.fixModel\tmRNA\t542\t649\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331.mRNA;Parent=ORF.0.2504_0.5545204186518331;\n-NC_001604\tgetOrfsOrCds\tCDS\t542\t649\t.\t-\t0\tID=ORF.0.2504_0.5545204186518331.CDS;Parent=ORF.0.2504_0.5545204186518331.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t664\t666\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2504_0.5545204186518331.rbs-0;Parent=ORF.0.2504_0.5545204186518331;\n-NC_001604\tfeature\tLipobox\t605\t616\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.2504_0.5545204186518331.lipobox;sequence=LTAC;Parent=ORF.0.2504_0.5545204186518331;\n-NC_001604\tcpt.fixModel\tgene\t627\t839\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932;\n-NC_001604\tcpt.fixModel\tmRNA\t627\t824\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932.mRNA;Parent=ORF.0.3363_0.9803284230217932;\n-NC_001604\tgetOrfsOrCds\tCDS\t627\t824\t.\t-\t0\tID=ORF.0.3363_0.9803284230217932.CDS;Parent=ORF.0.3363_0.9803284230217932.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t837\t839\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.3363_0.9803284230217932.rbs-0;Parent=ORF.0.3363_0.9803284230217932;\n-NC_001604\tfeature\tLipobox\t720\t731\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.3363_0.9803284230217932.lipobox;sequence=FTSC;Parent=ORF.0.3363_0.9803284230217932;\n-NC_001604\tcpt.fixModel\tgene\t1487\t1639\t.\t+\t.\tID=ORF.0.566_0.7631590264556776;\n-NC_001604\tcpt.fixModel\tmRNA\t1496\t1639\t.\t+\t.\tID=ORF.0.566_0.7631590264556776.mRNA;Parent=ORF.0.566_0.7631590264556776;\n-NC_001604\tgetOrfsOrCds\tCDS\t1496\t1639\t.\t+\t0\tID=ORF.0.566_0.7631590264556776.CDS;Parent=ORF.0.566_0.7631590264556776.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t1487\t1490\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.566_0.7631590264556776.rbs-0;Parent=ORF.0.566_0.7631590264556776;\n-NC_001604\tfeature\tLipobox\t1589\t1600\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.566_0.7631590264556776.lipobox;sequence=FTAC;Parent=ORF.0.566_0.7631590264556776;\n-NC_001604\tcpt.fixModel\tgene\t1490\t1639\t.\t+\t.\tID=ORF.0.567_0.10768222865442767;\n-NC_001604\tcpt.fixModel\tmRNA\t1502\t1639\t.\t+\t.\tID=ORF.0.567_0.10768222865442767.mRNA;Parent=ORF.0.567_0.10768222865442767;\n-NC_001604\tgetOrfsOrCds\tCDS\t1502\t1639\t.\t+\t0\tID=ORF.0.567_0.10768222865442767.CDS;Parent=ORF.0.567_0.10768222865442767.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t1490\t1493\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.567_0.10768222865442767.rbs-0;Parent=ORF.0.567_0.10768222865442767;\n-NC_001604\tfeature\tLipobox\t1589\t1600\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.567_0.10768222865442767.lipobox;sequence=FTAC;Parent=ORF.0.567_0.10768222865442767;\n-NC_001604\tcpt.fixModel\tgene\t3341\t3547\t.\t-\t.\tID=ORF.0.2469_0.7331780084741397;\n-NC_001604\tcpt.fixModel\tmRNA\t3341\t3535\t.\t-\t.\tID=ORF.0.2469_0.7331780084741397.mRNA;Parent=ORF.0.2469_0.7331780084741397;\n-NC_001604\tgetOrfsOrCds\tCDS\t3341\t3535\t.\t-\t0\tID=ORF.0.2469_0.7331780084741397.CDS;Parent=ORF.0.2469_0.7331780084741397.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t3545\t3547\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2469_0.7331780084741397.rbs-0;Parent=ORF.0.2469_0.7331780084741397;\n-NC_001604\tfeature\tLipobox\t3488\t3499\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.2469_0.7331780084741397.lipobox;sequence=LISC;Parent=ORF.0.2469_0.7331780084741397;\n-NC_001604\tcpt.fixModel\tgene\t3433\t5822\t.\t+\t.\tID=ORF.0.1457_0.7756036756597531;\n-NC_001604\tcpt.fixModel\tmRNA\t3444\t5822\t.\t+\t.\tID=ORF.0.1457_0.7756036756597531.mRNA;Parent=ORF.0.1457_0.7756036756597531;\n-NC_001604\tgetOrfsOrCds\tCDS\t3444\t5822\t.\t+\t0\tID=ORF.0.1457_0.7756036756597531.CDS;Parent=ORF.0.1457_0.7756036756597531.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t3433\t3435\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1457_0.7756036756597531.rbs-0;Parent=ORF.0.1457_0.7756036756597531;\n-NC_001604\tfeature\tLipobox\t3534\t3545\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1457_0.7756036756597531.lipobox;sequence=TLAC;Parent=ORF.0.1457_0.7756036756597531;\n-NC_001604\tcpt.fixModel\tgene\t4440\t5822\t.\t+\t.\tID=ORF.0.1500_0.6062249049400771;\n-NC_001604\tcpt.fixModel\tmRNA\t4455\t5822\t.\t+\t.\tID=ORF.0.1500_0.6062249049400771.mRNA;Parent=ORF.0.1500_0.6'..b'ID=ORF.0.1322_0.7094403889052515.lipobox;sequence=LYGC;Parent=ORF.0.1322_0.7094403889052515;\n-NC_001604\tcpt.fixModel\tgene\t36787\t36898\t.\t+\t.\tID=ORF.0.1324_0.11087411288527005;\n-NC_001604\tcpt.fixModel\tmRNA\t36797\t36898\t.\t+\t.\tID=ORF.0.1324_0.11087411288527005.mRNA;Parent=ORF.0.1324_0.11087411288527005;\n-NC_001604\tgetOrfsOrCds\tCDS\t36797\t36898\t.\t+\t0\tID=ORF.0.1324_0.11087411288527005.CDS;Parent=ORF.0.1324_0.11087411288527005.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t36787\t36789\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1324_0.11087411288527005.rbs-0;Parent=ORF.0.1324_0.11087411288527005;\n-NC_001604\tfeature\tLipobox\t36851\t36862\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1324_0.11087411288527005.lipobox;sequence=LYGC;Parent=ORF.0.1324_0.11087411288527005;\n-NC_001604\tcpt.fixModel\tgene\t36787\t36898\t.\t+\t.\tID=ORF.0.1325_0.22902888411750433;\n-NC_001604\tcpt.fixModel\tmRNA\t36800\t36898\t.\t+\t.\tID=ORF.0.1325_0.22902888411750433.mRNA;Parent=ORF.0.1325_0.22902888411750433;\n-NC_001604\tgetOrfsOrCds\tCDS\t36800\t36898\t.\t+\t0\tID=ORF.0.1325_0.22902888411750433.CDS;Parent=ORF.0.1325_0.22902888411750433.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t36787\t36789\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1325_0.22902888411750433.rbs-0;Parent=ORF.0.1325_0.22902888411750433;\n-NC_001604\tfeature\tLipobox\t36851\t36862\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1325_0.22902888411750433.lipobox;sequence=LYGC;Parent=ORF.0.1325_0.22902888411750433;\n-NC_001604\tcpt.fixModel\tgene\t37020\t37283\t.\t+\t.\tID=ORF.0.1961_0.03848108116896165;\n-NC_001604\tcpt.fixModel\tmRNA\t37032\t37283\t.\t+\t.\tID=ORF.0.1961_0.03848108116896165.mRNA;Parent=ORF.0.1961_0.03848108116896165;\n-NC_001604\tgetOrfsOrCds\tCDS\t37032\t37283\t.\t+\t0\tID=ORF.0.1961_0.03848108116896165.CDS;Parent=ORF.0.1961_0.03848108116896165.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t37020\t37026\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1961_0.03848108116896165.rbs-0;Parent=ORF.0.1961_0.03848108116896165;\n-NC_001604\tfeature\tLipobox\t37167\t37178\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1961_0.03848108116896165.lipobox;sequence=ISGC;Parent=ORF.0.1961_0.03848108116896165;\n-NC_001604\tcpt.fixModel\tgene\t37034\t37283\t.\t+\t.\tID=ORF.0.1962_0.6590821562203436;\n-NC_001604\tcpt.fixModel\tmRNA\t37050\t37283\t.\t+\t.\tID=ORF.0.1962_0.6590821562203436.mRNA;Parent=ORF.0.1962_0.6590821562203436;\n-NC_001604\tgetOrfsOrCds\tCDS\t37050\t37283\t.\t+\t0\tID=ORF.0.1962_0.6590821562203436.CDS;Parent=ORF.0.1962_0.6590821562203436.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t37034\t37036\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1962_0.6590821562203436.rbs-0;Parent=ORF.0.1962_0.6590821562203436;\n-NC_001604\tfeature\tLipobox\t37167\t37178\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1962_0.6590821562203436.lipobox;sequence=ISGC;Parent=ORF.0.1962_0.6590821562203436;\n-NC_001604\tcpt.fixModel\tgene\t37074\t37283\t.\t+\t.\tID=ORF.0.1964_0.6899335526754861;\n-NC_001604\tcpt.fixModel\tmRNA\t37083\t37283\t.\t+\t.\tID=ORF.0.1964_0.6899335526754861.mRNA;Parent=ORF.0.1964_0.6899335526754861;\n-NC_001604\tgetOrfsOrCds\tCDS\t37083\t37283\t.\t+\t0\tID=ORF.0.1964_0.6899335526754861.CDS;Parent=ORF.0.1964_0.6899335526754861.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t37074\t37076\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1964_0.6899335526754861.rbs-0;Parent=ORF.0.1964_0.6899335526754861;\n-NC_001604\tfeature\tLipobox\t37167\t37178\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1964_0.6899335526754861.lipobox;sequence=ISGC;Parent=ORF.0.1964_0.6899335526754861;\n-NC_001604\tcpt.fixModel\tgene\t37213\t37379\t.\t-\t.\tID=ORF.0.2528_0.10906489943882225;\n-NC_001604\tcpt.fixModel\tmRNA\t37213\t37368\t.\t-\t.\tID=ORF.0.2528_0.10906489943882225.mRNA;Parent=ORF.0.2528_0.10906489943882225;\n-NC_001604\tgetOrfsOrCds\tCDS\t37213\t37368\t.\t-\t0\tID=ORF.0.2528_0.10906489943882225.CDS;Parent=ORF.0.2528_0.10906489943882225.mRNA;\n-NC_001604\tfeature\tShine_Dalgarno_sequence\t37377\t37379\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2528_0.10906489943882225.rbs-0;Parent=ORF.0.2528_0.10906489943882225;\n-NC_001604\tfeature\tLipobox\t37315\t37326\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.2528_0.10906489943882225.lipobox;sequence=IVSC;Parent=ORF.0.2528_0.10906489943882225;\n'
b
diff -r 68e1e56e338a -r b79df4966ebb gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3.py Mon Jun 05 02:45:43 2023 +0000
[
b'@@ -0,0 +1,346 @@\n+import copy\n+import logging\n+\n+log = logging.getLogger()\n+log.setLevel(logging.WARN)\n+\n+\n+def feature_lambda(\n+    feature_list,\n+    test,\n+    test_kwargs,\n+    subfeatures=True,\n+    parent=None,\n+    invert=False,\n+    recurse=True,\n+):\n+    """Recursively search through features, testing each with a test function, yielding matches.\n+\n+    GFF3 is a hierachical data structure, so we need to be able to recursively\n+    search through features. E.g. if you\'re looking for a feature with\n+    ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n+    case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n+\n+    :type feature_list: list\n+    :param feature_list: an iterable of features\n+\n+    :type test: function reference\n+    :param test: a closure with the method signature (feature, **kwargs) where\n+                 the kwargs are those passed in the next argument. This\n+                 function should return True or False, True if the feature is\n+                 to be yielded as part of the main feature_lambda function, or\n+                 False if it is to be ignored. This function CAN mutate the\n+                 features passed to it (think "apply").\n+\n+    :type test_kwargs: dictionary\n+    :param test_kwargs: kwargs to pass to your closure when it is called.\n+\n+    :type subfeatures: boolean\n+    :param subfeatures: when a feature is matched, should just that feature be\n+                        yielded to the caller, or should the entire sub_feature\n+                        tree for that feature be included? subfeatures=True is\n+                        useful in cases such as searching for a gene feature,\n+                        and wanting to know what RBS/Shine_Dalgarno_sequences\n+                        are in the sub_feature tree (which can be accomplished\n+                        with two feature_lambda calls). subfeatures=False is\n+                        useful in cases when you want to process (and possibly\n+                        return) the entire feature tree, such as applying a\n+                        qualifier to every single feature.\n+\n+    :type invert: boolean\n+    :param invert: Negate/invert the result of the filter.\n+\n+    :rtype: yielded list\n+    :return: Yields a list of matching features.\n+    """\n+    # Either the top level set of [features] or the subfeature attribute\n+    for feature in feature_list:\n+        feature._parent = parent\n+        if not parent:\n+            # Set to self so we cannot go above root.\n+            feature._parent = feature\n+        test_result = test(feature, **test_kwargs)\n+        # if (not invert and test_result) or (invert and not test_result):\n+        if invert ^ test_result:\n+            if not subfeatures:\n+                feature_copy = copy.deepcopy(feature)\n+                feature_copy.sub_features = list()\n+                yield feature_copy\n+            else:\n+                yield feature\n+\n+        if recurse and hasattr(feature, "sub_features"):\n+            for x in feature_lambda(\n+                feature.sub_features,\n+                test,\n+                test_kwargs,\n+                subfeatures=subfeatures,\n+                parent=feature,\n+                invert=invert,\n+                recurse=recurse,\n+            ):\n+                yield x\n+\n+\n+def fetchParent(feature):\n+    if not hasattr(feature, "_parent") or feature._parent is None:\n+        return feature\n+    else:\n+        return fetchParent(feature._parent)\n+\n+\n+def feature_test_true(feature, **kwargs):\n+    return True\n+\n+\n+def feature_test_type(feature, **kwargs):\n+    if "type" in kwargs:\n+        return str(feature.type).upper() == str(kwargs["type"]).upper()\n+    elif "types" in kwargs:\n+        for x in kwargs["types"]:\n+            if str(feature.type).upper() == str(x).upper():\n+                return True\n+        return False\n+    raise Exception("Incorrect feature_test'..b'feature.location.start,\n+        # feature.location.end,\n+        # feature.location.strand\n+        # )\n+    return result\n+\n+\n+def get_gff3_id(gene):\n+    return gene.qualifiers.get("Name", [gene.id])[0]\n+\n+\n+def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n+    # This prevents frameshift errors\n+    while start < 0:\n+        start += 3\n+    while end < 0:\n+        end += 3\n+    while start > parent_length:\n+        start -= 3\n+    while end > parent_length:\n+        end -= 3\n+    return (start, end)\n+\n+\n+def coding_genes(feature_list):\n+    for x in genes(feature_list):\n+        if (\n+            len(\n+                list(\n+                    feature_lambda(\n+                        x.sub_features,\n+                        feature_test_type,\n+                        {"type": "CDS"},\n+                        subfeatures=False,\n+                    )\n+                )\n+            )\n+            > 0\n+        ):\n+            yield x\n+\n+\n+def genes(feature_list, feature_type="gene", sort=False):\n+    """\n+    Simple filter to extract gene features from the feature set.\n+    """\n+\n+    if not sort:\n+        for x in feature_lambda(\n+            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n+        ):\n+            yield x\n+    else:\n+        data = list(genes(feature_list, feature_type=feature_type, sort=False))\n+        data = sorted(data, key=lambda feature: feature.location.start)\n+        for x in data:\n+            yield x\n+\n+\n+def wa_unified_product_name(feature):\n+    """\n+    Try and figure out a name. We gave conflicting instructions, so\n+    this isn\'t as trivial as it should be. Sometimes it will be in\n+    \'product\' or \'Product\', othertimes in \'Name\'\n+    """\n+    # Manually applied tags.\n+    protein_product = feature.qualifiers.get(\n+        "product", feature.qualifiers.get("Product", [None])\n+    )[0]\n+\n+    # If neither of those are available ...\n+    if protein_product is None:\n+        # And there\'s a name...\n+        if "Name" in feature.qualifiers:\n+            if not is_uuid(feature.qualifiers["Name"][0]):\n+                protein_product = feature.qualifiers["Name"][0]\n+\n+    return protein_product\n+\n+\n+def is_uuid(name):\n+    return name.count("-") == 4 and len(name) == 36\n+\n+\n+def get_rbs_from(gene):\n+    # Normal RBS annotation types\n+    rbs_rbs = list(\n+        feature_lambda(\n+            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n+        )\n+    )\n+    rbs_sds = list(\n+        feature_lambda(\n+            gene.sub_features,\n+            feature_test_type,\n+            {"type": "Shine_Dalgarno_sequence"},\n+            subfeatures=False,\n+        )\n+    )\n+    # Fraking apollo\n+    apollo_exons = list(\n+        feature_lambda(\n+            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n+        )\n+    )\n+    apollo_exons = [x for x in apollo_exons if len(x) < 10]\n+    # These are more NCBI\'s style\n+    regulatory_elements = list(\n+        feature_lambda(\n+            gene.sub_features,\n+            feature_test_type,\n+            {"type": "regulatory"},\n+            subfeatures=False,\n+        )\n+    )\n+    rbs_regulatory = list(\n+        feature_lambda(\n+            regulatory_elements,\n+            feature_test_quals,\n+            {"regulatory_class": ["ribosome_binding_site"]},\n+            subfeatures=False,\n+        )\n+    )\n+    # Here\'s hoping you find just one ;)\n+    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n+\n+\n+def nice_name(record):\n+    """\n+    get the real name rather than NCBI IDs and so on. If fails, will return record.id\n+    """\n+    name = record.id\n+    likely_parental_contig = list(genes(record.features, feature_type="contig"))\n+    if len(likely_parental_contig) == 1:\n+        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n+    return name\n+\n+\n+def fsort(it):\n+    for i in sorted(it, key=lambda x: int(x.location.start)):\n+        yield i\n'
b
diff -r 68e1e56e338a -r b79df4966ebb lipory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lipory.py Mon Jun 05 02:45:43 2023 +0000
[
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+import re
+import sys
+import argparse
+import logging
+from Bio import SeqIO
+from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
+from gff3 import feature_lambda, feature_test_type, get_id
+from Bio.SeqFeature import SeqFeature, FeatureLocation
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+def find_lipoprotein(gff3_file, fasta_genome, lipobox_mindist=10, lipobox_maxdist=40):
+    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta_genome, "fasta"))
+
+    CASES = [
+        re.compile(
+            "^.{%s,%s}[ILMFTV][^REKD][GAS]C" % (lipobox_mindist, lipobox_maxdist)
+        ),
+        re.compile("^.{%s,%s}AW[AGS]C" % (lipobox_mindist, lipobox_maxdist)),
+        # Make sure to not have multiple cases that share matches, will introduce duplicate features into gff3 file
+    ]
+
+    for record in gffParse(gff3_file, base_dict=seq_dict):
+        good_features = []
+
+        genes = list(
+            feature_lambda(
+                record.features, feature_test_type, {"type": "gene"}, subfeatures=True
+            )
+        )
+        for gene in genes:
+            cdss = list(
+                feature_lambda(
+                    gene.sub_features,
+                    feature_test_type,
+                    {"type": "CDS"},
+                    subfeatures=False,
+                )
+            )
+            if len(cdss) == 0:
+                continue
+
+            for cds in cdss:
+                try:
+                    tmpseq = str(
+                        cds.extract(record.seq).translate(table=11, cds=True)
+                    ).replace("*", "")
+                except:
+                    continue
+
+                for case in CASES:
+                    m = case.search(tmpseq)
+                    if m:
+                        if cds.location.strand > 0:
+                            start = cds.location.start + (3 * (m.end() - 4))
+                            end = cds.location.start + (3 * m.end())
+                        else:
+                            start = cds.location.end - (3 * (m.end() - 4))
+                            end = cds.location.end - (3 * m.end())
+
+                        tmp = gffSeqFeature(
+                            FeatureLocation(
+                                min(start, end),
+                                max(start, end),
+                                strand=cds.location.strand,
+                            ),
+                            type="Lipobox",
+                            qualifiers={
+                                "source": "CPT_LipoRy",
+                                "ID": "%s.lipobox" % get_id(gene),
+                            },
+                        )
+                        tmp.qualifiers["sequence"] = str(
+                            tmp.extract(record).seq.translate()
+                        )
+
+                        gene.sub_features.append(tmp)
+                        good_features.append(gene)
+
+            record.features = good_features
+        yield [record]
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Filter out lipoproteins", epilog="")
+    parser.add_argument(
+        "gff3_file", type=argparse.FileType("r"), help="Naive ORF Calls"
+    )
+    parser.add_argument(
+        "fasta_genome", type=argparse.FileType("r"), help="Fasta genome sequence"
+    )
+
+    parser.add_argument(
+        "--lipobox_mindist",
+        type=int,
+        help="Minimum distance in codons to start of lipobox",
+        default=10,
+    )
+    parser.add_argument(
+        "--lipobox_maxdist",
+        type=int,
+        help="Maximum distance in codons to start of lipobox",
+        default=40,
+    )
+
+    args = parser.parse_args()
+
+    args = vars(parser.parse_args())
+    for record in find_lipoprotein(**args):
+        record[0].annotations = {}
+        gffWrite(record, sys.stdout)
b
diff -r 68e1e56e338a -r b79df4966ebb lipory.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lipory.xml Mon Jun 05 02:45:43 2023 +0000
[
@@ -0,0 +1,58 @@
+<tool id="edu.tamu.cpt.fasta.lipory" name="Identify Lipoboxes" version="19.1.0.0">
+  <description> in protein sequences</description>
+  <macros>
+    <import>macros.xml</import>
+    <import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements">
+    <requirement type="package" version="2022.1.18">regex</requirement>
+  </expand>
+  <command interpreter="python" detect_errors="aggressive"><![CDATA[lipory.py
+'$positional_1'
+'$positional_2'
+
+--lipobox_mindist '$lipobox_mindist'
+--lipobox_maxdist '$lipobox_maxdist'
+
+> '$default']]></command>
+  <inputs>
+    <param label="Naive orf calls" name="positional_1" type="data" format="gff3"/>
+    <param label="Genome" name="positional_2" type="data" format="fasta"/>
+    <param label="Minimum distance in codons to start of lipobox" name="lipobox_mindist" type="integer" value="10"/>
+    <param label="Maximum distance in codons to start of lipobox" name="lipobox_maxdist" type="integer" value="40"/>
+  </inputs>
+  <outputs>
+    <data format="gff3" name="default" label="Lipoboxes from ${on_string}"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="positional_1" value="T7_LiporyIn.gff3"/>
+      <param name="positional_2" value="T7_LiporyIn.fasta"/>
+      <param name="lipobox_mindist" value="10"/>
+      <param name="lipobox_maxdist" value="60"/>
+      <output name="default" value="T7_LiporyOut.gff3"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+**What it does**
+
+Identifies possible LipoBoxes from an input GFF3 and FASTA.
+
+**How it works**
+
+Searches in the first 10-40 amino acids of an input protein sequence using regular expressions 
+for a 4-amino acid motif based on the consensus sequences described in (**Babu** et al. 2006. *J 
+Bacteriol.* 188(8):2761-2773 and **Kongari** *et al.* 2018 *BMC Bioinformatics*. 19:326). The 
+amino acids allowed here are relaxed to allow for the diversity of amino acids known to occur in lipoboxes.
+
+Position 1: ILMFTV or only A
+
+Position 2: any residue except REKD or only W
+
+Position 3: GAS 
+
+Position 4: C
+
+      ]]></help>
+  <expand macro="citations-2020"/>
+</tool>
b
diff -r 68e1e56e338a -r b79df4966ebb macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jun 05 02:45:43 2023 +0000
b
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+ '$xmfa'
+ </token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+ '$sequences'
+ </token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+     '$gff3_data'
+ </token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+</macros>
b
diff -r 68e1e56e338a -r b79df4966ebb test-data/T7_LiporyIn.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T7_LiporyIn.fasta Mon Jun 05 02:45:43 2023 +0000
b
b'@@ -0,0 +1,667 @@\n+>NC_001604\n+TCTCACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCAC\n+CTAAAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGT\n+TTGTCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCTATCTGTTACAGTCTCCTAAA\n+GTATCCTCCTAAAGTCACCTCCTAACGTCCATCCTAAAGCCAACACCTAAAGCCTACACC\n+TAAAGACCCATCAAGTCAACGCCTATCTTAAAGTTTAAACATAAAGACCAGACCTAAAGA\n+CCAGACCTAAAGACACTACATAAAGACCAGACCTAAAGACGCCTTGTTGTTAGCCATAAA\n+GTGATAACCTTTAATCATTGTCTTTATTAATACAACTCACTATAAGGAGAGACAACTTAA\n+AGAGACTTAAAAGATTAATTTAAAATTTATCAAAAAGAGTATTGACTTAAAGTCTAACCT\n+ATAGGATACTTACAGCCATCGAGAGGGACACGGCGAATAGCCATCCCAATCGACACCGGG\n+GTCAACCGGATAAGTAGACAGCCTGATAAGTCGCACGAAAAACAGGTATTGACAACATGA\n+AGTAACATGCAGTAAGATACAAATCGCTAGGTAACACTAGCAGCGTCAACCGGGCGCACA\n+GTGCCTTCTAGGTGACTTAAGCGCACCACGGCACATAAGGTGAAACAAAACGGTTGACAA\n+CATGAAGTAAACACGGTACGATGTACCACATGAAACGACAGTGAGTCACCACACTGAAAG\n+GTGATGCGGTCTAACGAAACCTGACCTAAGACGCTCTTTAACAATCTGGTAAATAGCTCT\n+TGAGTGCATGACTAGCGGATAACTCAAGGGTATCGCAAGGTGCCCTTTATGATATTCACT\n+AATAACTGCACGAGGTAACACAAGATGGCTATGTCTAACATGACTTACAACAACGTTTTC\n+GACCACGCTTACGAAATGCTGAAAGAAAACATCCGTTATGATGACATCCGTGACACTGAT\n+GACCTGCACGATGCTATTCACATGGCTGCCGATAATGCAGTTCCGCACTACTACGCTGAC\n+ATCTTTAGCGTAATGGCAAGTGAGGGCATTGACCTTGAGTTCGAAGACTCTGGTCTGATG\n+CCTGACACCAAGGACGTAATCCGCATCCTGCAAGCGCGTATCTATGAGCAATTAACGATT\n+GACCTCTGGGAAGACGCAGAAGACTTGCTCAATGAATACTTGGAGGAAGTCGAGGAGTAC\n+GAGGAGGATGAAGAGTAATGTCTACTACCAACGTGCAATACGGTCTGACCGCTCAAACTG\n+TACTTTTCTATAGCGACATGGTGCGCTGTGGCTTTAACTGGTCACTCGCAATGGCACAGC\n+TCAAAGAACTGTACGAAAACAACAAGGCAATAGCTTTAGAATCTGCTGAGTGATAGACTC\n+AAGGTCGCTCCTAGCGAGTGGCCTTTATGATTATCACTTTACTTATGAGGGAGTAATGTA\n+TATGCTTACTATCGGTCTACTCACCGCTCTAGGTCTAGCTGTAGGTGCATCCTTTGGGAA\n+GGCTTTAGGTGTAGCTGTAGGTTCCTACTTTACCGCTTGCATCATCATAGGAATCATCAA\n+AGGGGCACTACGCAAATGATGAAGCACTACGTTATGCCAATCCACACGTCCAACGGGGCA\n+ACCGTATGTACACCTGATGGGTTCGCAATGAAACAACGAATCGAACGCCTTAAGCGTGAA\n+CTCCGCATTAACCGCAAGATTAACAAGATAGGTTCCGGCTATGACAGAACGCACTGATGG\n+CTTAAAGAAAGGTTATATGCCCAATGGCACACTATACGCTGCAAATCGGCGAATAGTGAG\n+AACTTGGCGAGAGAACAACCTCGAACGCCGCAAGGACAAGAGAGGGCGGCGTGGCATAGA\n+CGAAAGGAAAAGGTTAAAGCCAAGAAACTCGCCGCACTTGAACAGGCACTAGCCAACACA\n+CTGAACGCTATCTCATAACGAACATAAAGGACACAATGCAATGAACATTACCGACATCAT\n+GAACGCTATCGACGCAATCAAAGCACTGCCAATCTGTGAACTTGACAAGCGTCAAGGTAT\n+GCTTATCGACTTACTGGTCGAGATGGTCAACAGCGAGACGTGTGATGGCGAGCTAACCGA\n+ACTAAATCAGGCACTTGAGCATCAAGATTGGTGGACTACCTTGAAGTGTCTCACGGCTGA\n+CGCAGGGTTCAAGATGCTCGGTAATGGTCACTTCTCGGCTGCTTATAGTCACCCGCTGCT\n+ACCTAACAGAGTGATTAAGGTGGGCTTTAAGAAAGAGGATTCAGGCGCAGCCTATACCGC\n+ATTCTGCCGCATGTATCAGGGTCGTCCTGGTATCCCTAACGTCTACGATGTACAGCGCCA\n+CGCTGGATGCTATACGGTGGTACTTGACGCACTTAAGGATTGCGAGCGTTTCAACAATGA\n+TGCCCATTATAAATACGCTGAGATTGCAAGCGACATCATTGATTGCAATTCGGATGAGCA\n+TGATGAGTTAACTGGATGGGATGGTGAGTTTGTTGAAACTTGTAAACTAATCCGCAAGTT\n+CTTTGAGGGCATCGCCTCATTCGACATGCATAGCGGGAACATCATGTTCTCAAATGGAGA\n+CGTACCATACATCACCGACCCGGTATCATTCTCGCAGAAGAAAGACGGTGGCGCATTCAG\n+CATCGACCCTGAGGAACTCATCAAGGAAGTCGAGGAAGTCGCACGACAGAAAGAAATTGA\n+CCGCGCTAAGGCCCGTAAAGAACGTCACGAGGGGCGCTTAGAGGCACGCAGATTCAAACG\n+TCGCAACCGCAAGGCACGTAAAGCACACAAAGCTAAGCGCGAAAGAATGCTTGCTGCGTG\n+GCGATGGGCTGAACGTCAAGAACGGCGTAACCATGAGGTAGCTGTAGATGTACTAGGAAG\n+AACCAATAACGCTATGCTCTGGGTCAACATGTTCTCTGGGGACTTTAAGGCGCTTGAGGA\n+ACGAATCGCGCTGCACTGGCGTAATGCTGACCGGATGGCTATCGCTAATGGTCTTACGCT\n+CAACATTGATAAGCAACTTGACGCAATGTTAATGGGCTGATAGTCTTATCTTACAGGTCA\n+TCTGCGGGTGGCCTGAATAGGTACGATTTACTAACTGGAAGAGGCACTAAATGAACACGA\n+TTAACATCGCTAAGAACGACTTCTCTGACATCGAACTGGCTGCTATCCCGTTCAACACTC\n+TGGCTGACCATTACGGTGAGCGTTTAGCTCGCGAACAGTTGGCCCTTGAGCATGAGTCTT\n+ACGAGATGGGTGAAGCACGCTTCCGCAAGATGTTTGAGCGTCAACTTAAAGCTGGTGAGG\n+TTGCGGATAACGCTGCCGCCAAGCCTCTCATCACTACCCTACTCCCTAAGATGATTGCAC\n+GCATCAACGACTGGTTTGAGGAAGTGAAAGCTAAGCGCGGCAAGCGCCCGACAGCCTTCC\n+AGTTCCTGCAAGAAATCAAGCCGGAAGCCGTAGCGTACATCACCATTAAGACCACTCTGG\n+CTTGCCTAACCAGTGCTGACAATACAACCGTTCAGGCTGTAGCAAGCGCAATCGGTCGGG\n+CCATTGAGGACGAGGCTCGCTTCGGTCGTATCCGTGACCTTGAAGCTAAGCACTTCAAGA\n+AAAACGTTGAGGAACAACTCAACAAGCGCGTAGGGCACGTCTACAAGAAAGCATTTATGC\n+AAGTTGTCGAGGCTGACATGCTCTCTAAGGGTCTACTCGGTGGCGAGGCGTGGTCTTCGT\n+GGCATAAGGAAGACTCTATTCATGTAGGAGTACGCTGCATCGAGATGCTCATTGAGTCAA\n+'..b'TAAGTGTGACTGTTTCACAGGATCTCCGCTTCCGCAATATCTGGATTAAGTGT\n+GCCAACAACTCTTGGAACTTCTTCCGTACTGGCCCCGATGGAATCTACTTCATAGCCTCT\n+GATGGTGGATGGTTACGATTCCAAATACACTCCAACGGTCTCGGATTCAAGAATATTGCA\n+GACAGTCGTTCAGTACCTAATGCAATCATGGTGGAGAACGAGTAATTGGTAAATCACAAG\n+GAAAGACGTGTAGTCCACGGATGGACTCTCAAGGAGGTACAAGGTGCTATCATTAGACTT\n+TAACAACGAATTGATTAAGGCTGCTCCAATTGTTGGGACGGGTGTAGCAGATGTTAGTGC\n+TCGACTGTTCTTTGGGTTAAGCCTTAACGAATGGTTCTACGTTGCTGCTATCGCCTACAC\n+AGTGGTTCAGATTGGTGCCAAGGTAGTCGATAAGATGATTGACTGGAAGAAAGCCAATAA\n+GGAGTGATATGTATGGAAAAGGATAAGAGCCTTATTACATTCTTAGAGATGTTGGACACT\n+GCGATGGCTCAGCGTATGCTTGCGGACCTTTCGGACCATGAGCGTCGCTCTCCGCAACTC\n+TATAATGCTATTAACAAACTGTTAGACCGCCACAAGTTCCAGATTGGTAAGTTGCAGCCG\n+GATGTTCACATCTTAGGTGGCCTTGCTGGTGCTCTTGAAGAGTACAAAGAGAAAGTCGGT\n+GATAACGGTCTTACGGATGATGATATTTACACATTACAGTGATATACTCAAGGCCACTAC\n+AGATAGTGGTCTTTATGGATGTCATTGTCTATACGAGATGCTCCTACGTGAAATCTGAAA\n+GTTAACGGGAGGCATTATGCTAGAATTTTTACGTAAGCTAATCCCTTGGGTTCTCGCTGG\n+GATGCTATTCGGGTTAGGATGGCATCTAGGGTCAGACTCAATGGACGCTAAATGGAAACA\n+GGAGGTACACAATGAGTACGTTAAGAGAGTTGAGGCTGCGAAGAGCACTCAAAGAGCAAT\n+CGATGCGGTATCTGCTAAGTATCAAGAAGACCTTGCCGCGCTGGAAGGGAGCACTGATAG\n+GATTATTTCTGATTTGCGTAGCGACAATAAGCGGTTGCGCGTCAGAGTCAAAACTACCGG\n+AACCTCCGATGGTCAGTGTGGATTCGAGCCTGATGGTCGAGCCGAACTTGACGACCGAGA\n+TGCTAAACGTATTCTCGCAGTGACCCAGAAGGGTGACGCATGGATTCGTGCGTTACAGGA\n+TACTATTCGTGAACTGCAACGTAAGTAGGAAATCAAGTAAGGAGGCAATGTGTCTACTCA\n+ATCCAATCGTAATGCGCTCGTAGTGGCGCAACTGAAAGGAGACTTCGTGGCGTTCCTATT\n+CGTCTTATGGAAGGCGCTAAACCTACCGGTGCCCACTAAGTGTCAGATTGACATGGCTAA\n+GGTGCTGGCGAATGGAGACAACAAGAAGTTCATCTTACAGGCTTTCCGTGGTATCGGTAA\n+GTCGTTCATCACATGTGCGTTCGTTGTGTGGTCCTTATGGAGAGACCCTCAGTTGAAGAT\n+ACTTATCGTATCAGCCTCTAAGGAGCGTGCAGACGCTAACTCCATCTTTATTAAGAACAT\n+CATTGACCTGCTGCCATTCCTATCTGAGTTAAAGCCAAGACCCGGACAGCGTGACTCGGT\n+AATCAGCTTTGATGTAGGCCCAGCCAATCCTGACCACTCTCCTAGTGTGAAATCAGTAGG\n+TATCACTGGTCAGTTAACTGGTAGCCGTGCTGACATTATCATTGCGGATGACGTTGAGAT\n+TCCGTCTAACAGCGCAACTATGGGTGCCCGTGAGAAGCTATGGACTCTGGTTCAGGAGTT\n+CGCTGCGTTACTTAAACCGCTGCCTTCCTCTCGCGTTATCTACCTTGGTACACCTCAGAC\n+AGAGATGACTCTCTATAAGGAACTTGAGGATAACCGTGGGTACACAACCATTATCTGGCC\n+TGCTCTGTACCCAAGGACACGTGAAGAGAACCTCTATTACTCACAGCGTCTTGCTCCTAT\n+GTTACGCGCTGAGTACGATGAGAACCCTGAGGCACTTGCTGGGACTCCAACAGACCCAGT\n+GCGCTTTGACCGTGATGACCTGCGCGAGCGTGAGTTGGAATACGGTAAGGCTGGCTTTAC\n+GCTACAGTTCATGCTTAACCCTAACCTTAGTGATGCCGAGAAGTACCCGCTGAGGCTTCG\n+TGACGCTATCGTAGCGGCCTTAGACTTAGAGAAGGCCCCAATGCATTACCAGTGGCTTCC\n+GAACCGTCAGAACATCATTGAGGACCTTCCTAACGTTGGCCTTAAGGGTGATGACCTGCA\n+TACGTACCACGATTGTTCCAACAACTCAGGTCAGTACCAACAGAAGATTCTGGTCATTGA\n+CCCTAGTGGTCGCGGTAAGGACGAAACAGGTTACGCTGTGCTGTACACACTGAACGGTTA\n+CATCTACCTTATGGAAGCTGGAGGTTTCCGTGATGGCTACTCCGATAAGACCCTTGAGTT\n+ACTCGCTAAGAAGGCAAAGCAATGGGGAGTCCAGACGGTTGTCTACGAGAGTAACTTCGG\n+TGACGGTATGTTCGGTAAGGTATTCAGTCCTATCCTTCTTAAACACCACAACTGTGCGAT\n+GGAAGAGATTCGTGCCCGTGGTATGAAAGAGATGCGTATTTGCGATACCCTTGAGCCAGT\n+CATGCAGACTCACCGCCTTGTAATTCGTGATGAGGTCATTAGGGCCGACTACCAGTCCGC\n+TCGTGACGTAGACGGTAAGCATGACGTTAAGTACTCGTTGTTCTACCAGATGACCCGTAT\n+CACTCGTGAGAAAGGCGCTCTGGCTCATGATGACCGATTGGATGCCCTTGCGTTAGGCAT\n+TGAGTATCTCCGTGAGTCCATGCAGTTGGATTCCGTTAAGGTCGAGGGTGAAGTACTTGC\n+TGACTTCCTTGAGGAACACATGATGCGTCCTACGGTTGCTGCTACGCATATCATTGAGAT\n+GTCTGTGGGAGGAGTTGATGTGTACTCTGAGGACGATGAGGGTTACGGTACGTCTTTCAT\n+TGAGTGGTGATTTATGCATTAGGACTGCATAGGGATGCACTATAGACCACGGATGGTCAG\n+TTCTTTAAGTTACTGAAAAGACACGATAAATTAATACGACTCACTATAGGGAGAGGAGGG\n+ACGAAAGGTTACTATATAGATACTGAATGAATACTTATAGAGTGCATAAAGTATGCATAA\n+TGGTGTACCTAGAGTGACCTCTAAGAATGGTGATTATATTGTATTAGTATCACCTTAACT\n+TAAGGACCAACATAAAGGGAGGAGACTCATGTTCCGCTTATTGTTGAACCTACTGCGGCA\n+TAGAGTCACCTACCGATTTCTTGTGGTACTTTGTGCTGCCCTTGGGTACGCATCTCTTAC\n+TGGAGACCTCAGTTCACTGGAGTCTGTCGTTTGCTCTATACTCACTTGTAGCGATTAGGG\n+TCTTCCTGACCGACTGATGGCTCACCGAGGGATTCAGCGGTATGATTGCATCACACCACT\n+TCATCCCTATAGAGTCAAGTCCTAAGGTATACCCATAAAGAGCCTCTAATGGTCTATCCT\n+AAGGTCTATACCTAAAGATAGGCCATCCTATCAGTGTCACCTAAAGAGGGTCTTAGAGAG\n+GGCCTATGGAGTTCCTATAGGGTCCTTTAAAATATACCATAAAAATCTGAGTGACTATCT\n+CACAGTGTACGGACCTAAAGTTCCCCCATAGGGGGTACCTAAAGCCCAGCCAATCACCTA\n+AAGTCAACCTTCGGTTGACCTTGAGGGTTCCCTAAGGGTTGGGGATGACCCTTGGGTTTG\n+TCTTTGGGTGTTACCTTGAGTGTCTCTCTGTGTCCCT\n'
b
diff -r 68e1e56e338a -r b79df4966ebb test-data/T7_LiporyIn.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T7_LiporyIn.gff3 Mon Jun 05 02:45:43 2023 +0000
b
b'@@ -0,0 +1,3145 @@\n+##gff-version 3\n+NC_001604\tcpt.fixModel\tgene\t542\t666\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331;\n+NC_001604\tcpt.fixModel\tmRNA\t542\t649\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331.mRNA;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tgetOrfsOrCds\tCDS\t542\t649\t.\t-\t0\tID=ORF.0.2504_0.5545204186518331.CDS;Parent=ORF.0.2504_0.5545204186518331.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t664\t666\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2504_0.5545204186518331.rbs-0;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tcpt.fixModel\tgene\t542\t651\t.\t-\t.\tID=ORF.0.2506_0.7313234548298825;\n+NC_001604\tcpt.fixModel\tmRNA\t542\t637\t.\t-\t.\tID=ORF.0.2506_0.7313234548298825.mRNA;Parent=ORF.0.2506_0.7313234548298825;\n+NC_001604\tgetOrfsOrCds\tCDS\t542\t637\t.\t-\t0\tID=ORF.0.2506_0.7313234548298825.CDS;Parent=ORF.0.2506_0.7313234548298825.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t649\t651\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2506_0.7313234548298825.rbs-0;Parent=ORF.0.2506_0.7313234548298825;\n+NC_001604\tcpt.fixModel\tgene\t627\t839\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932;\n+NC_001604\tcpt.fixModel\tmRNA\t627\t824\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932.mRNA;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tgetOrfsOrCds\tCDS\t627\t824\t.\t-\t0\tID=ORF.0.3363_0.9803284230217932.CDS;Parent=ORF.0.3363_0.9803284230217932.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t837\t839\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.3363_0.9803284230217932.rbs-0;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tcpt.fixModel\tgene\t627\t747\t.\t-\t.\tID=ORF.0.3367_0.1254781548971481;\n+NC_001604\tcpt.fixModel\tmRNA\t627\t734\t.\t-\t.\tID=ORF.0.3367_0.1254781548971481.mRNA;Parent=ORF.0.3367_0.1254781548971481;\n+NC_001604\tgetOrfsOrCds\tCDS\t627\t734\t.\t-\t0\tID=ORF.0.3367_0.1254781548971481.CDS;Parent=ORF.0.3367_0.1254781548971481.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t745\t747\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.3367_0.1254781548971481.rbs-0;Parent=ORF.0.3367_0.1254781548971481;\n+NC_001604\tcpt.fixModel\tgene\t766\t1206\t.\t-\t.\tID=ORF.0.2960_0.21096600108012498;\n+NC_001604\tcpt.fixModel\tmRNA\t766\t1191\t.\t-\t.\tID=ORF.0.2960_0.21096600108012498.mRNA;Parent=ORF.0.2960_0.21096600108012498;\n+NC_001604\tgetOrfsOrCds\tCDS\t766\t1191\t.\t-\t0\tID=ORF.0.2960_0.21096600108012498.CDS;Parent=ORF.0.2960_0.21096600108012498.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t1202\t1206\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2960_0.21096600108012498.rbs-0;Parent=ORF.0.2960_0.21096600108012498;\n+NC_001604\tcpt.fixModel\tgene\t766\t885\t.\t-\t.\tID=ORF.0.2976_0.8667531510652563;\n+NC_001604\tcpt.fixModel\tmRNA\t766\t867\t.\t-\t.\tID=ORF.0.2976_0.8667531510652563.mRNA;Parent=ORF.0.2976_0.8667531510652563;\n+NC_001604\tgetOrfsOrCds\tCDS\t766\t867\t.\t-\t0\tID=ORF.0.2976_0.8667531510652563.CDS;Parent=ORF.0.2976_0.8667531510652563.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t883\t885\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2976_0.8667531510652563.rbs-0;Parent=ORF.0.2976_0.8667531510652563;\n+NC_001604\tcpt.fixModel\tgene\t912\t1278\t.\t+\t.\tID=ORF.0.2_0.39432314427019266;\n+NC_001604\tcpt.fixModel\tmRNA\t925\t1278\t.\t+\t.\tID=ORF.0.2_0.39432314427019266.mRNA;Parent=ORF.0.2_0.39432314427019266;\n+NC_001604\tgetOrfsOrCds\tCDS\t925\t1278\t.\t+\t0\tID=ORF.0.2_0.39432314427019266.CDS;Parent=ORF.0.2_0.39432314427019266.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t912\t916\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2_0.39432314427019266.rbs-0;Parent=ORF.0.2_0.39432314427019266;\n+NC_001604\tcpt.fixModel\tgene\t912\t1278\t.\t+\t.\tID=ORF.0.3_0.8154113297998601;\n+NC_001604\tcpt.fixModel\tmRNA\t931\t1278\t.\t+\t.\tID=ORF.0.3_0.8154113297998601.mRNA;Parent=ORF.0.3_0.8154113297998601;\n+NC_001604\tgetOrfsOrCds\tCDS\t931\t1278\t.\t+\t0\tID=ORF.0.3_0.8154113297998601.CDS;Parent=ORF.0.3_0.8154113297998601.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t912\t916\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.3_0.8154113297998601.rbs-0;Parent=ORF.0.3_0.8154113297998601;\n+NC_001604\tcpt.fixModel\tgene\t1182\t1310\t.\t-\t.\tID=ORF.0.3359_0.6449951188722964;\n+NC_001604\tcpt.fixModel\tmRNA\t1182\t1298\t.\t-\t.\tID=ORF.0.3359_0.6449951188722964.mRNA;Parent=ORF.0.3359_0.64499511887229'..b'93689042043441.rbs-0;Parent=ORF.0.1411_0.9793689042043441;\n+NC_001604\tcpt.fixModel\tgene\t39011\t39130\t.\t+\t.\tID=ORF.0.1412_0.30622712641637695;\n+NC_001604\tcpt.fixModel\tmRNA\t39020\t39130\t.\t+\t.\tID=ORF.0.1412_0.30622712641637695.mRNA;Parent=ORF.0.1412_0.30622712641637695;\n+NC_001604\tgetOrfsOrCds\tCDS\t39020\t39130\t.\t+\t0\tID=ORF.0.1412_0.30622712641637695.CDS;Parent=ORF.0.1412_0.30622712641637695.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39011\t39014\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1412_0.30622712641637695.rbs-0;Parent=ORF.0.1412_0.30622712641637695;\n+NC_001604\tcpt.fixModel\tgene\t39012\t39130\t.\t+\t.\tID=ORF.0.1413_0.6484168178188776;\n+NC_001604\tcpt.fixModel\tmRNA\t39023\t39130\t.\t+\t.\tID=ORF.0.1413_0.6484168178188776.mRNA;Parent=ORF.0.1413_0.6484168178188776;\n+NC_001604\tgetOrfsOrCds\tCDS\t39023\t39130\t.\t+\t0\tID=ORF.0.1413_0.6484168178188776.CDS;Parent=ORF.0.1413_0.6484168178188776.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39012\t39015\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1413_0.6484168178188776.rbs-0;Parent=ORF.0.1413_0.6484168178188776;\n+NC_001604\tcpt.fixModel\tgene\t39378\t39538\t.\t+\t.\tID=ORF.0.1414_0.38280168913440005;\n+NC_001604\tcpt.fixModel\tmRNA\t39389\t39538\t.\t+\t.\tID=ORF.0.1414_0.38280168913440005.mRNA;Parent=ORF.0.1414_0.38280168913440005;\n+NC_001604\tgetOrfsOrCds\tCDS\t39389\t39538\t.\t+\t0\tID=ORF.0.1414_0.38280168913440005.CDS;Parent=ORF.0.1414_0.38280168913440005.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39378\t39382\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1414_0.38280168913440005.rbs-0;Parent=ORF.0.1414_0.38280168913440005;\n+NC_001604\tcpt.fixModel\tgene\t39423\t39557\t.\t+\t.\tID=ORF.0.2020_0.5190345053482985;\n+NC_001604\tcpt.fixModel\tmRNA\t39441\t39557\t.\t+\t.\tID=ORF.0.2020_0.5190345053482985.mRNA;Parent=ORF.0.2020_0.5190345053482985;\n+NC_001604\tgetOrfsOrCds\tCDS\t39441\t39557\t.\t+\t0\tID=ORF.0.2020_0.5190345053482985.CDS;Parent=ORF.0.2020_0.5190345053482985.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39423\t39425\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2020_0.5190345053482985.rbs-0;Parent=ORF.0.2020_0.5190345053482985;\n+NC_001604\tcpt.fixModel\tgene\t39445\t39557\t.\t+\t.\tID=ORF.0.2021_0.3406547997303282;\n+NC_001604\tcpt.fixModel\tmRNA\t39453\t39557\t.\t+\t.\tID=ORF.0.2021_0.3406547997303282.mRNA;Parent=ORF.0.2021_0.3406547997303282;\n+NC_001604\tgetOrfsOrCds\tCDS\t39453\t39557\t.\t+\t0\tID=ORF.0.2021_0.3406547997303282.CDS;Parent=ORF.0.2021_0.3406547997303282.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39445\t39447\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2021_0.3406547997303282.rbs-0;Parent=ORF.0.2021_0.3406547997303282;\n+NC_001604\tcpt.fixModel\tgene\t39445\t39557\t.\t+\t.\tID=ORF.0.2023_0.2547887662353707;\n+NC_001604\tcpt.fixModel\tmRNA\t39462\t39557\t.\t+\t.\tID=ORF.0.2023_0.2547887662353707.mRNA;Parent=ORF.0.2023_0.2547887662353707;\n+NC_001604\tgetOrfsOrCds\tCDS\t39462\t39557\t.\t+\t0\tID=ORF.0.2023_0.2547887662353707.CDS;Parent=ORF.0.2023_0.2547887662353707.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39445\t39447\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.2023_0.2547887662353707.rbs-0;Parent=ORF.0.2023_0.2547887662353707;\n+NC_001604\tcpt.fixModel\tgene\t39494\t39623\t.\t-\t.\tID=ORF.0.2029_0.06575596254471638;\n+NC_001604\tcpt.fixModel\tmRNA\t39494\t39604\t.\t-\t.\tID=ORF.0.2029_0.06575596254471638.mRNA;Parent=ORF.0.2029_0.06575596254471638;\n+NC_001604\tgetOrfsOrCds\tCDS\t39494\t39604\t.\t-\t0\tID=ORF.0.2029_0.06575596254471638.CDS;Parent=ORF.0.2029_0.06575596254471638.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39620\t39623\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2029_0.06575596254471638.rbs-0;Parent=ORF.0.2029_0.06575596254471638;\n+NC_001604\tcpt.fixModel\tgene\t39713\t39861\t.\t-\t.\tID=ORF.0.2026_0.08836418353296771;\n+NC_001604\tcpt.fixModel\tmRNA\t39713\t39847\t.\t-\t.\tID=ORF.0.2026_0.08836418353296771.mRNA;Parent=ORF.0.2026_0.08836418353296771;\n+NC_001604\tgetOrfsOrCds\tCDS\t39713\t39847\t.\t-\t0\tID=ORF.0.2026_0.08836418353296771.CDS;Parent=ORF.0.2026_0.08836418353296771.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t39858\t39861\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2026_0.08836418353296771.rbs-0;Parent=ORF.0.2026_0.08836418353296771;\n'
b
diff -r 68e1e56e338a -r b79df4966ebb test-data/T7_LiporyOut.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/T7_LiporyOut.gff3 Mon Jun 05 02:45:43 2023 +0000
b
b'@@ -0,0 +1,296 @@\n+##gff-version 3\n+NC_001604\tcpt.fixModel\tgene\t542\t666\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331;\n+NC_001604\tcpt.fixModel\tmRNA\t542\t649\t.\t-\t.\tID=ORF.0.2504_0.5545204186518331.mRNA;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tgetOrfsOrCds\tCDS\t542\t649\t.\t-\t0\tID=ORF.0.2504_0.5545204186518331.CDS;Parent=ORF.0.2504_0.5545204186518331.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t664\t666\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2504_0.5545204186518331.rbs-0;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tfeature\tLipobox\t605\t616\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.2504_0.5545204186518331.lipobox;sequence=LTAC;Parent=ORF.0.2504_0.5545204186518331;\n+NC_001604\tcpt.fixModel\tgene\t627\t839\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932;\n+NC_001604\tcpt.fixModel\tmRNA\t627\t824\t.\t-\t.\tID=ORF.0.3363_0.9803284230217932.mRNA;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tgetOrfsOrCds\tCDS\t627\t824\t.\t-\t0\tID=ORF.0.3363_0.9803284230217932.CDS;Parent=ORF.0.3363_0.9803284230217932.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t837\t839\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.3363_0.9803284230217932.rbs-0;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tfeature\tLipobox\t720\t731\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.3363_0.9803284230217932.lipobox;sequence=FTSC;Parent=ORF.0.3363_0.9803284230217932;\n+NC_001604\tcpt.fixModel\tgene\t1487\t1639\t.\t+\t.\tID=ORF.0.566_0.7631590264556776;\n+NC_001604\tcpt.fixModel\tmRNA\t1496\t1639\t.\t+\t.\tID=ORF.0.566_0.7631590264556776.mRNA;Parent=ORF.0.566_0.7631590264556776;\n+NC_001604\tgetOrfsOrCds\tCDS\t1496\t1639\t.\t+\t0\tID=ORF.0.566_0.7631590264556776.CDS;Parent=ORF.0.566_0.7631590264556776.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t1487\t1490\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.566_0.7631590264556776.rbs-0;Parent=ORF.0.566_0.7631590264556776;\n+NC_001604\tfeature\tLipobox\t1589\t1600\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.566_0.7631590264556776.lipobox;sequence=FTAC;Parent=ORF.0.566_0.7631590264556776;\n+NC_001604\tcpt.fixModel\tgene\t1490\t1639\t.\t+\t.\tID=ORF.0.567_0.10768222865442767;\n+NC_001604\tcpt.fixModel\tmRNA\t1502\t1639\t.\t+\t.\tID=ORF.0.567_0.10768222865442767.mRNA;Parent=ORF.0.567_0.10768222865442767;\n+NC_001604\tgetOrfsOrCds\tCDS\t1502\t1639\t.\t+\t0\tID=ORF.0.567_0.10768222865442767.CDS;Parent=ORF.0.567_0.10768222865442767.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t1490\t1493\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.567_0.10768222865442767.rbs-0;Parent=ORF.0.567_0.10768222865442767;\n+NC_001604\tfeature\tLipobox\t1589\t1600\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.567_0.10768222865442767.lipobox;sequence=FTAC;Parent=ORF.0.567_0.10768222865442767;\n+NC_001604\tcpt.fixModel\tgene\t3341\t3547\t.\t-\t.\tID=ORF.0.2469_0.7331780084741397;\n+NC_001604\tcpt.fixModel\tmRNA\t3341\t3535\t.\t-\t.\tID=ORF.0.2469_0.7331780084741397.mRNA;Parent=ORF.0.2469_0.7331780084741397;\n+NC_001604\tgetOrfsOrCds\tCDS\t3341\t3535\t.\t-\t0\tID=ORF.0.2469_0.7331780084741397.CDS;Parent=ORF.0.2469_0.7331780084741397.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t3545\t3547\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2469_0.7331780084741397.rbs-0;Parent=ORF.0.2469_0.7331780084741397;\n+NC_001604\tfeature\tLipobox\t3488\t3499\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.2469_0.7331780084741397.lipobox;sequence=LISC;Parent=ORF.0.2469_0.7331780084741397;\n+NC_001604\tcpt.fixModel\tgene\t3433\t5822\t.\t+\t.\tID=ORF.0.1457_0.7756036756597531;\n+NC_001604\tcpt.fixModel\tmRNA\t3444\t5822\t.\t+\t.\tID=ORF.0.1457_0.7756036756597531.mRNA;Parent=ORF.0.1457_0.7756036756597531;\n+NC_001604\tgetOrfsOrCds\tCDS\t3444\t5822\t.\t+\t0\tID=ORF.0.1457_0.7756036756597531.CDS;Parent=ORF.0.1457_0.7756036756597531.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t3433\t3435\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1457_0.7756036756597531.rbs-0;Parent=ORF.0.1457_0.7756036756597531;\n+NC_001604\tfeature\tLipobox\t3534\t3545\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1457_0.7756036756597531.lipobox;sequence=TLAC;Parent=ORF.0.1457_0.7756036756597531;\n+NC_001604\tcpt.fixModel\tgene\t4440\t5822\t.\t+\t.\tID=ORF.0.1500_0.6062249049400771;\n+NC_001604\tcpt.fixModel\tmRNA\t4455\t5822\t.\t+\t.\tID=ORF.0.1500_0.6062249049400771.mRNA;Parent=ORF.0.1500_0.6'..b'ID=ORF.0.1322_0.7094403889052515.lipobox;sequence=LYGC;Parent=ORF.0.1322_0.7094403889052515;\n+NC_001604\tcpt.fixModel\tgene\t36787\t36898\t.\t+\t.\tID=ORF.0.1324_0.11087411288527005;\n+NC_001604\tcpt.fixModel\tmRNA\t36797\t36898\t.\t+\t.\tID=ORF.0.1324_0.11087411288527005.mRNA;Parent=ORF.0.1324_0.11087411288527005;\n+NC_001604\tgetOrfsOrCds\tCDS\t36797\t36898\t.\t+\t0\tID=ORF.0.1324_0.11087411288527005.CDS;Parent=ORF.0.1324_0.11087411288527005.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t36787\t36789\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1324_0.11087411288527005.rbs-0;Parent=ORF.0.1324_0.11087411288527005;\n+NC_001604\tfeature\tLipobox\t36851\t36862\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1324_0.11087411288527005.lipobox;sequence=LYGC;Parent=ORF.0.1324_0.11087411288527005;\n+NC_001604\tcpt.fixModel\tgene\t36787\t36898\t.\t+\t.\tID=ORF.0.1325_0.22902888411750433;\n+NC_001604\tcpt.fixModel\tmRNA\t36800\t36898\t.\t+\t.\tID=ORF.0.1325_0.22902888411750433.mRNA;Parent=ORF.0.1325_0.22902888411750433;\n+NC_001604\tgetOrfsOrCds\tCDS\t36800\t36898\t.\t+\t0\tID=ORF.0.1325_0.22902888411750433.CDS;Parent=ORF.0.1325_0.22902888411750433.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t36787\t36789\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1325_0.22902888411750433.rbs-0;Parent=ORF.0.1325_0.22902888411750433;\n+NC_001604\tfeature\tLipobox\t36851\t36862\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1325_0.22902888411750433.lipobox;sequence=LYGC;Parent=ORF.0.1325_0.22902888411750433;\n+NC_001604\tcpt.fixModel\tgene\t37020\t37283\t.\t+\t.\tID=ORF.0.1961_0.03848108116896165;\n+NC_001604\tcpt.fixModel\tmRNA\t37032\t37283\t.\t+\t.\tID=ORF.0.1961_0.03848108116896165.mRNA;Parent=ORF.0.1961_0.03848108116896165;\n+NC_001604\tgetOrfsOrCds\tCDS\t37032\t37283\t.\t+\t0\tID=ORF.0.1961_0.03848108116896165.CDS;Parent=ORF.0.1961_0.03848108116896165.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t37020\t37026\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1961_0.03848108116896165.rbs-0;Parent=ORF.0.1961_0.03848108116896165;\n+NC_001604\tfeature\tLipobox\t37167\t37178\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1961_0.03848108116896165.lipobox;sequence=ISGC;Parent=ORF.0.1961_0.03848108116896165;\n+NC_001604\tcpt.fixModel\tgene\t37034\t37283\t.\t+\t.\tID=ORF.0.1962_0.6590821562203436;\n+NC_001604\tcpt.fixModel\tmRNA\t37050\t37283\t.\t+\t.\tID=ORF.0.1962_0.6590821562203436.mRNA;Parent=ORF.0.1962_0.6590821562203436;\n+NC_001604\tgetOrfsOrCds\tCDS\t37050\t37283\t.\t+\t0\tID=ORF.0.1962_0.6590821562203436.CDS;Parent=ORF.0.1962_0.6590821562203436.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t37034\t37036\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1962_0.6590821562203436.rbs-0;Parent=ORF.0.1962_0.6590821562203436;\n+NC_001604\tfeature\tLipobox\t37167\t37178\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1962_0.6590821562203436.lipobox;sequence=ISGC;Parent=ORF.0.1962_0.6590821562203436;\n+NC_001604\tcpt.fixModel\tgene\t37074\t37283\t.\t+\t.\tID=ORF.0.1964_0.6899335526754861;\n+NC_001604\tcpt.fixModel\tmRNA\t37083\t37283\t.\t+\t.\tID=ORF.0.1964_0.6899335526754861.mRNA;Parent=ORF.0.1964_0.6899335526754861;\n+NC_001604\tgetOrfsOrCds\tCDS\t37083\t37283\t.\t+\t0\tID=ORF.0.1964_0.6899335526754861.CDS;Parent=ORF.0.1964_0.6899335526754861.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t37074\t37076\t.\t+\t.\tsource=CPT_ShineFind;ID=ORF.0.1964_0.6899335526754861.rbs-0;Parent=ORF.0.1964_0.6899335526754861;\n+NC_001604\tfeature\tLipobox\t37167\t37178\t.\t+\t.\tsource=CPT_LipoRy;ID=ORF.0.1964_0.6899335526754861.lipobox;sequence=ISGC;Parent=ORF.0.1964_0.6899335526754861;\n+NC_001604\tcpt.fixModel\tgene\t37213\t37379\t.\t-\t.\tID=ORF.0.2528_0.10906489943882225;\n+NC_001604\tcpt.fixModel\tmRNA\t37213\t37368\t.\t-\t.\tID=ORF.0.2528_0.10906489943882225.mRNA;Parent=ORF.0.2528_0.10906489943882225;\n+NC_001604\tgetOrfsOrCds\tCDS\t37213\t37368\t.\t-\t0\tID=ORF.0.2528_0.10906489943882225.CDS;Parent=ORF.0.2528_0.10906489943882225.mRNA;\n+NC_001604\tfeature\tShine_Dalgarno_sequence\t37377\t37379\t.\t-\t.\tsource=CPT_ShineFind;ID=ORF.0.2528_0.10906489943882225.rbs-0;Parent=ORF.0.2528_0.10906489943882225;\n+NC_001604\tfeature\tLipobox\t37315\t37326\t.\t-\t.\tsource=CPT_LipoRy;ID=ORF.0.2528_0.10906489943882225.lipobox;sequence=IVSC;Parent=ORF.0.2528_0.10906489943882225;\n'