Previous changeset 3:4d4a4b603d33 (2022-05-20) Next changeset 5:e7e82d0ae286 (2023-07-23) |
Commit message:
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c |
added:
cpt-macros.xml gff3.py macros.xml shinefind.py shinefind.xml test-data/Miro_ShineFindIn.fa test-data/Miro_ShineFindIn.gff3 test-data/Miro_ShineFindOut.tbl test-data/Miro_ShineFindOut1.gff3 test-data/Miro_ShineFindOut2.gff3 |
removed:
cpt_shinefind/cpt-macros.xml cpt_shinefind/gff3.py cpt_shinefind/macros.xml cpt_shinefind/shinefind.py cpt_shinefind/shinefind.xml cpt_shinefind/test-data/Miro_ShineFindIn.fa cpt_shinefind/test-data/Miro_ShineFindIn.gff3 cpt_shinefind/test-data/Miro_ShineFindOut.tbl cpt_shinefind/test-data/Miro_ShineFindOut1.gff3 cpt_shinefind/test-data/Miro_ShineFindOut2.gff3 |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt-macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt-macros.xml Mon Jun 05 02:53:31 2023 +0000 |
[ |
@@ -0,0 +1,115 @@ +<macros> + <xml name="gff_requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="2.12.1">requests</requirement> + <requirement type="package" version="1.2.2">cpt_gffparser</requirement> + <yield/> + </requirements> + <version_command> + <![CDATA[ + cd '$__tool_directory__' && git rev-parse HEAD + ]]> + </version_command> + </xml> + <xml name="citation/mijalisrasche"> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex">@unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-crr"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020-AJC-solo"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-clm"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="sl-citations-clm"> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </xml> +</macros> |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/cpt-macros.xml --- a/cpt_shinefind/cpt-macros.xml Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,115 +0,0 @@ -<?xml version="1.0"?> -<macros> - <xml name="gff_requirements"> - <requirements> - <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.65">biopython</requirement> - <requirement type="package" version="2.12.1">requests</requirement> - <yield/> - </requirements> - <version_command> - <![CDATA[ - cd $__tool_directory__ && git rev-parse HEAD - ]]> - </version_command> - </xml> - <xml name="citation/mijalisrasche"> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex">@unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - </xml> - <xml name="citations"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-crr"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Ross}, - title = {CPT Galaxy Tools}, - year = {2020-}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-2020"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-2020-AJC-solo"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="citations-clm"> - <citations> - <citation type="doi">10.1371/journal.pcbi.1008214</citation> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </citations> - </xml> - <xml name="sl-citations-clm"> - <citation type="bibtex"> - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - </citation> - <yield/> - </xml> -</macros> |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/gff3.py --- a/cpt_shinefind/gff3.py Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,346 +0,0 @@\n-import copy\n-import logging\n-\n-log = logging.getLogger()\n-log.setLevel(logging.WARN)\n-\n-\n-def feature_lambda(\n- feature_list,\n- test,\n- test_kwargs,\n- subfeatures=True,\n- parent=None,\n- invert=False,\n- recurse=True,\n-):\n- """Recursively search through features, testing each with a test function, yielding matches.\n-\n- GFF3 is a hierachical data structure, so we need to be able to recursively\n- search through features. E.g. if you\'re looking for a feature with\n- ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n- case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n-\n- :type feature_list: list\n- :param feature_list: an iterable of features\n-\n- :type test: function reference\n- :param test: a closure with the method signature (feature, **kwargs) where\n- the kwargs are those passed in the next argument. This\n- function should return True or False, True if the feature is\n- to be yielded as part of the main feature_lambda function, or\n- False if it is to be ignored. This function CAN mutate the\n- features passed to it (think "apply").\n-\n- :type test_kwargs: dictionary\n- :param test_kwargs: kwargs to pass to your closure when it is called.\n-\n- :type subfeatures: boolean\n- :param subfeatures: when a feature is matched, should just that feature be\n- yielded to the caller, or should the entire sub_feature\n- tree for that feature be included? subfeatures=True is\n- useful in cases such as searching for a gene feature,\n- and wanting to know what RBS/Shine_Dalgarno_sequences\n- are in the sub_feature tree (which can be accomplished\n- with two feature_lambda calls). subfeatures=False is\n- useful in cases when you want to process (and possibly\n- return) the entire feature tree, such as applying a\n- qualifier to every single feature.\n-\n- :type invert: boolean\n- :param invert: Negate/invert the result of the filter.\n-\n- :rtype: yielded list\n- :return: Yields a list of matching features.\n- """\n- # Either the top level set of [features] or the subfeature attribute\n- for feature in feature_list:\n- feature._parent = parent\n- if not parent:\n- # Set to self so we cannot go above root.\n- feature._parent = feature\n- test_result = test(feature, **test_kwargs)\n- # if (not invert and test_result) or (invert and not test_result):\n- if invert ^ test_result:\n- if not subfeatures:\n- feature_copy = copy.deepcopy(feature)\n- feature_copy.sub_features = list()\n- yield feature_copy\n- else:\n- yield feature\n-\n- if recurse and hasattr(feature, "sub_features"):\n- for x in feature_lambda(\n- feature.sub_features,\n- test,\n- test_kwargs,\n- subfeatures=subfeatures,\n- parent=feature,\n- invert=invert,\n- recurse=recurse,\n- ):\n- yield x\n-\n-\n-def fetchParent(feature):\n- if not hasattr(feature, "_parent") or feature._parent is None:\n- return feature\n- else:\n- return fetchParent(feature._parent)\n-\n-\n-def feature_test_true(feature, **kwargs):\n- return True\n-\n-\n-def feature_test_type(feature, **kwargs):\n- if "type" in kwargs:\n- return str(feature.type).upper() == str(kwargs["type"]).upper()\n- elif "types" in kwargs:\n- for x in kwargs["types"]:\n- if str(feature.type).upper() == str(x).upper():\n- return True\n- return False\n- raise Exception("Incorrect feature_test_type call, ne'..b'feature.location.start,\n- # feature.location.end,\n- # feature.location.strand\n- # )\n- return result\n-\n-\n-def get_gff3_id(gene):\n- return gene.qualifiers.get("Name", [gene.id])[0]\n-\n-\n-def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n- # This prevents frameshift errors\n- while start < 0:\n- start += 3\n- while end < 0:\n- end += 3\n- while start > parent_length:\n- start -= 3\n- while end > parent_length:\n- end -= 3\n- return (start, end)\n-\n-\n-def coding_genes(feature_list):\n- for x in genes(feature_list):\n- if (\n- len(\n- list(\n- feature_lambda(\n- x.sub_features,\n- feature_test_type,\n- {"type": "CDS"},\n- subfeatures=False,\n- )\n- )\n- )\n- > 0\n- ):\n- yield x\n-\n-\n-def genes(feature_list, feature_type="gene", sort=False):\n- """\n- Simple filter to extract gene features from the feature set.\n- """\n-\n- if not sort:\n- for x in feature_lambda(\n- feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n- ):\n- yield x\n- else:\n- data = list(genes(feature_list, feature_type=feature_type, sort=False))\n- data = sorted(data, key=lambda feature: feature.location.start)\n- for x in data:\n- yield x\n-\n-\n-def wa_unified_product_name(feature):\n- """\n- Try and figure out a name. We gave conflicting instructions, so\n- this isn\'t as trivial as it should be. Sometimes it will be in\n- \'product\' or \'Product\', othertimes in \'Name\'\n- """\n- # Manually applied tags.\n- protein_product = feature.qualifiers.get(\n- "product", feature.qualifiers.get("Product", [None])\n- )[0]\n-\n- # If neither of those are available ...\n- if protein_product is None:\n- # And there\'s a name...\n- if "Name" in feature.qualifiers:\n- if not is_uuid(feature.qualifiers["Name"][0]):\n- protein_product = feature.qualifiers["Name"][0]\n-\n- return protein_product\n-\n-\n-def is_uuid(name):\n- return name.count("-") == 4 and len(name) == 36\n-\n-\n-def get_rbs_from(gene):\n- # Normal RBS annotation types\n- rbs_rbs = list(\n- feature_lambda(\n- gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n- )\n- )\n- rbs_sds = list(\n- feature_lambda(\n- gene.sub_features,\n- feature_test_type,\n- {"type": "Shine_Dalgarno_sequence"},\n- subfeatures=False,\n- )\n- )\n- # Fraking apollo\n- apollo_exons = list(\n- feature_lambda(\n- gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n- )\n- )\n- apollo_exons = [x for x in apollo_exons if len(x) < 10]\n- # These are more NCBI\'s style\n- regulatory_elements = list(\n- feature_lambda(\n- gene.sub_features,\n- feature_test_type,\n- {"type": "regulatory"},\n- subfeatures=False,\n- )\n- )\n- rbs_regulatory = list(\n- feature_lambda(\n- regulatory_elements,\n- feature_test_quals,\n- {"regulatory_class": ["ribosome_binding_site"]},\n- subfeatures=False,\n- )\n- )\n- # Here\'s hoping you find just one ;)\n- return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n-\n-\n-def nice_name(record):\n- """\n- get the real name rather than NCBI IDs and so on. If fails, will return record.id\n- """\n- name = record.id\n- likely_parental_contig = list(genes(record.features, feature_type="contig"))\n- if len(likely_parental_contig) == 1:\n- name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n- return name\n-\n-\n-def fsort(it):\n- for i in sorted(it, key=lambda x: int(x.location.start)):\n- yield i\n' |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/macros.xml --- a/cpt_shinefind/macros.xml Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,43 +0,0 @@ -<?xml version="1.0"?> -<macros> - <xml name="requirements"> - <requirements> - <requirement type="package" version="3.8.13">python</requirement> - <requirement type="package" version="1.79">biopython</requirement> - <requirement type="package" version="1.2.2">cpt_gffparser</requirement> - <yield/> - </requirements> - </xml> - <xml name="genome_selector"> - <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> - </xml> - <xml name="gff3_input"> - <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> - </xml> - <xml name="input/gff3+fasta"> - <expand macro="gff3_input" /> - <expand macro="genome_selector" /> - </xml> - <token name="@INPUT_GFF@"> - "$gff3_data" - </token> - <token name="@INPUT_FASTA@"> - genomeref.fa - </token> - <token name="@GENOME_SELECTOR_PRE@"> - ln -s $genome_fasta genomeref.fa; - </token> - <token name="@GENOME_SELECTOR@"> - genomeref.fa - </token> - <xml name="input/fasta"> - <param label="Fasta file" name="sequences" type="data" format="fasta"/> - </xml> - - <token name="@SEQUENCE@"> - "$sequences" - </token> - <xml name="input/fasta/protein"> - <param label="Protein fasta file" name="sequences" type="data" format="fasta"/> - </xml> -</macros> |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/shinefind.py --- a/cpt_shinefind/shinefind.py Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,420 +0,0 @@\n-#!/usr/bin/env python\n-import re\n-import sys\n-import argparse\n-import logging\n-from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature\n-from Bio import SeqIO\n-from Bio.SeqRecord import SeqRecord\n-from Bio.SeqFeature import FeatureLocation\n-from gff3 import (\n- feature_lambda,\n- feature_test_type,\n- feature_test_true,\n- feature_test_quals,\n- get_id,\n- ensure_location_in_bounds,\n-)\n-\n-logging.basicConfig(level=logging.INFO)\n-log = logging.getLogger()\n-\n-\n-class NaiveSDCaller(object):\n-\n- # TODO May make switch for different sequence sets\n- SD_SEQUENCES = (\n- "AGGAGGT",\n- "GGAGGT",\n- "AGGAGG",\n- "GGGGGG",\n- "AGGAG",\n- "GAGGT",\n- "GGAGG",\n- "GGGGG",\n- "AGGT",\n- "GGGT",\n- "GAGG",\n- "GGGG",\n- "AGGA",\n- "GGAG",\n- "GGA",\n- "GAG",\n- "AGG",\n- "GGT",\n- "GGG",\n- )\n-\n- def __init__(self):\n- self.sd_reg = [re.compile(x, re.IGNORECASE) for x in self.SD_SEQUENCES]\n-\n- def list_sds(self, sequence, sd_min=3, sd_max=17):\n- hits = []\n- for regex in self.sd_reg:\n- for match in regex.finditer(sequence):\n- spacing = len(sequence) - len(match.group()) - match.start()\n- if sd_max >= spacing+sd_min and spacing+sd_min >= sd_min:\n- #if the spacing is within gap limits, add \n- #(search space is [sd_max+7 .. sd_min] so actual gap is spacing+sd_min)\n- #print(\'min %d max %d - adding SD with gap %d\' % (sd_min, sd_max, spacing+sd_min))\n- hits.append(\n- {\n- "spacing": spacing,\n- "hit": match.group(),\n- "start": match.start(),\n- "end": match.end(),\n- "len": len(match.group()),\n- }\n- )\n- hits = sorted(hits, key= lambda x: (-x[\'len\'],x[\'spacing\']))\n- return hits\n-\n- @classmethod\n- def highlight_sd(cls, sequence, start, end):\n- return " ".join(\n- [\n- sequence[0:start].lower(),\n- sequence[start:end].upper(),\n- sequence[end:].lower(),\n- ]\n- )\n-\n- @classmethod\n- def to_features(cls, hits, strand, parent_start, parent_end, feature_id=None, sd_min=3, sd_max=17):\n- results = []\n- for idx, hit in enumerate(hits):\n- # gene complement(124..486)\n- # -1 491 501 0 5 5\n- # -1 491 501 0 4 5\n- # -1 491 501 1 4 5\n- # -1 491 501 2 3 5\n- # -1 491 501 1 3 5\n- # -1 491 501 0 3 5\n- \n- qualifiers = {\n- "source": "CPT_ShineFind",\n- "ID": "%s.rbs-%s" % (feature_id, idx),\n- }\n-\n- if strand > 0:\n- start = parent_end - hit["spacing"] - hit["len"]\n- end = parent_end - hit["spacing"]\n- else:\n- start = parent_start + hit["spacing"]\n- end = parent_start + hit["spacing"] + hit["len"]\n- # check that the END of the SD sequence is within the given min/max of parent start/end\n-\n- # gap is either the sd_start-cds_end (neg strand) or the sd_end-cds_start (pos strand)\n- # minimum absolute value of these two will be the proper gap regardless of strand\n- tmp = gffSeqFeature(\n- FeatureLocation(min(start, end), max(start, end), strand=strand),\n- #FeatureLocation(min(start, end), max(start, end), strand=strand),\n- type="Shine_Dalgarno_sequence",\n- qualifiers=qualifiers,\n- )\n- '..b'and fake a\n- # break, because an actual break triggers the else: block\n- table_output.write(\n- "\\t".join(\n- map(\n- str,\n- [\n- feature.id,\n- feature_id,\n- feature.location.start,\n- feature.location.end,\n- human_strand,\n- sd_finder.highlight_sd(seq, sd["start"], sd["end"]),\n- sd["hit"],\n- int(sd["spacing"]) + lookahead_min,\n- ],\n- )\n- )\n- + "\\n"\n- )\n-\n- if add:\n- # Append the top RBS to the gene feature\n- gene.sub_features.append(sd_feature)\n- # Pick out start/end locations for all sub_features\n- locations = [x.location.start for x in gene.sub_features] + [\n- x.location.end for x in gene.sub_features\n- ]\n- # Update gene\'s start/end to be inclusive\n- gene.location._start = min(locations)\n- gene.location._end = max(locations)\n- # Also register the feature with the separate GFF3 output\n- sd_feature = fix_gene_boundaries(sd_feature)\n- gff3_output_record.features.append(sd_feature)\n-\n- if top_only or sd == (sds[-1]):\n- break\n- else:\n- table_output.write(\n- "\\t".join(\n- map(\n- str,\n- [\n- feature.id,\n- feature_id,\n- feature.location.start,\n- feature.location.end,\n- human_strand,\n- seq,\n- None,\n- -1,\n- ],\n- )\n- )\n- + "\\n"\n- )\n-\n- record.annotations = {}\n- gffWrite([record], sys.stdout)\n-\n- gff3_output_record.features = sorted(\n- gff3_output_record.features, key=lambda x: x.location.start\n- )\n- gff3_output_record.annotations = {}\n- gffWrite([gff3_output_record], gff3_output)\n-\n-\n-if __name__ == "__main__":\n- parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")\n- parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")\n- parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")\n-\n- parser.add_argument(\n- "--gff3_output",\n- type=argparse.FileType("w"),\n- help="GFF3 Output",\n- default="shinefind.gff3",\n- )\n- parser.add_argument(\n- "--table_output",\n- type=argparse.FileType("w"),\n- help="Tabular Output",\n- default="shinefind.tbl",\n- )\n-\n- parser.add_argument(\n- "--lookahead_min",\n- nargs="?",\n- type=int,\n- help="Number of bases upstream of CDSs to end search",\n- default=3,\n- )\n- parser.add_argument(\n- "--lookahead_max",\n- nargs="?",\n- type=int,\n- help="Number of bases upstream of CDSs to begin search",\n- default=17,\n- )\n-\n- parser.add_argument("--top_only", action="store_true", help="Only report best hits")\n- parser.add_argument(\n- "--add",\n- action="store_true",\n- help=\'Function in "addition" mode whereby the \'\n- + "RBSs are added directly to the gene model.",\n- )\n-\n- args = parser.parse_args()\n- shinefind(**vars(args))\n' |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/shinefind.xml --- a/cpt_shinefind/shinefind.xml Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,98 +0,0 @@ -<tool id="edu.tamu.cpt.genbank.shinefind" name="Shine Find" version="21.1.0.0"> - <description>Identify shine-dalgarno sequences</description> - <macros> - <import>macros.xml</import> - <import>cpt-macros.xml</import> - </macros> - <expand macro="requirements"/> - <command detect_errors="aggressive"><![CDATA[ -@GENOME_SELECTOR_PRE@ - -python $__tool_directory__/shinefind.py - -@GENOME_SELECTOR@ -$gff3_data - ---table_output $default_output ---gff3_output $gff3_output - ---lookahead_min $lookahead_min ---lookahead_max $lookahead_max -$add -$top_only - - -> $stdout - -]]></command> - <inputs> - <expand macro="genome_selector" /> - <expand macro="gff3_input" /> - - <param label="Minimum number of bases upstream of CDS for gap (--lookahead_min)" name="lookahead_min" type="integer" value="3"/> - <param label="Maximum number of bases upstream of CDS for gap (--lookahead_max)" name="lookahead_max" type="integer" value="17"/> - <param checked="true" label="Automatically add RBSs to input GFF3" name="add" - type="boolean" truevalue="--add" falsevalue="" /> - <param checked="true" label="Only report best hits (--top_only)" - name="top_only" type="boolean" falsevalue="" truevalue="--top_only"/> - </inputs> - <outputs> - <data format="tabular" name="default_output" label="ShineFind RBS list from $gff3_data.name"/> - <data format="gff3" name="gff3_output" label="ShineFind GFF3 RBSs from $gff3_data.name"/> - <data format="gff3" name="stdout" label="$gff3_data.name with RBSs"/> - </outputs> - <tests> - <test> - <param name="genome_fasta" value="Miro_ShineFindIn.fa" /> - <param name="reference_genome_source" value="history" /> - <param name="gff3_data" value="Miro_ShineFindIn.gff3" /> - <param name="lookahead_max" value="15" /> - <param name="lookahead_min" value="5" /> - <param name="add" value="True"/> - <param name="top_only" value="False" /> - <output name="default_output" file="Miro_ShineFindOut.tbl" /> - <output name="gff3_output" file="Miro_ShineFindOut1.gff3" /> - <output name="stdout" file="Miro_ShineFindOut2.gff3" /> - </test> - </tests> - <help><![CDATA[ -**What it does** - -From an input GFF3 and FASTA file, the upstream sequence within user-specified bounds is -extracted for all CDS features. Input CDS features with an RBS are not re-analyzed. For -the remaining CDS features, the upstream sequence is searched against the following -possible Shine-Dalgarno sequences: -- AGGAGGT -- GGAGGT -- AGGAGG -- AGGAG -- GAGGT -- GGAGG -- AGGT -- GGGT -- GAGG -- GGGG -- AGGA -- GGAG -- GGA -- GAG -- AGG -- GGT - -**Gap Min/Max Settings** - In previous versions of this tool, the min/max was set for the window to find a Shine-Dalgarno sequence, but - currently we specify the minimum and maxium GAP between the end of the SD and the start of the closest CDS. - -By default, only the first (closest to consensus) hit to the list is returned to the GFF3. In the case of a tie, it will select the smaller gap. -By selecting ‘No’ for ‘Only report best hits’, **all hits** to the possible SD list will -be added to the GFF3 file. - -**March 10, 2021 Update** - Previous to this date, when using "Only report best hits" option the algorithm would pick the "first" one (moving downstream) found in the case of tied sequence length. After this date, it now will report the sequence with the smaller gap between the END of the SD sequence and the START of the CDS. - - ]]></help> - <expand macro="citations" > - <expand macro="sl-citations-clm"/> - </expand> -</tool> - |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/test-data/Miro_ShineFindIn.fa --- a/cpt_shinefind/test-data/Miro_ShineFindIn.fa Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,2936 +0,0 @@\n->Miro\n-TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n-TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n-AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n-GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n-CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n-TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n-ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n-CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n-ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n-ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n-TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n-CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n-TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n-AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n-CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n-TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n-CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n-AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n-GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n-TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n-TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n-TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n-GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n-CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n-CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n-AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n-AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n-TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n-TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n-AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n-CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n-GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n-TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n-TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n-TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n-CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n-TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n-TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n-TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n-AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n-ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n-ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n-GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n-GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n-ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n-GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n-ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n-AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n-TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n-AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n-GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n-TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n-ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n-ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n-TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n-CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n-GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n-TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n-ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n-GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n-GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n-ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n-ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n-GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n-ACGG'..b'TCGGGTAATATCG\n-TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n-ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n-GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n-GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n-GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n-CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n-TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n-ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n-TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n-AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n-TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n-CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n-AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n-GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n-TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n-ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n-TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n-TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n-ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n-TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n-CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n-TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n-AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n-TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n-CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n-CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n-ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n-GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n-ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n-AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n-CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n-CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n-TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n-TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n-ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n-ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n-TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n-TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n-AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n-ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n-TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n-GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n-GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n-TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n-ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n-CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n-TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n-AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n-GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n-GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n-TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n-AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n-TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n-ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n-CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n-TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n-TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n-GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n-GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n-GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n-TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n-TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n-GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n-GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n-GGCGCGGTTGTTTAG\n' |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/test-data/Miro_ShineFindIn.gff3 --- a/cpt_shinefind/test-data/Miro_ShineFindIn.gff3 Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,827 +0,0 @@\n-##gff-version 3\n-##sequence-region Miro 1 176055\n-Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8\n-Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8\n-Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9\n-Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9\n-Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206\n-Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206\n-Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117\n-Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117\n-Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200\n-Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200\n-Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201\n-Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201\n-Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202\n-Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202\n-Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203\n-Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203\n-Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142\n-Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD %284-26%29 N out%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142\n-Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1\n-Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb\n-Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1\n-Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2\n-Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2\n-Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3\n-Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD %2812-34%29 N in%2C C out\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3\n-Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4\n-Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical'..b't=Miro_98;obsolete_name=Miro_066;product=hypothetical conserved;tmhelix=2TMDs %287-26%2C 31-53%29 N in%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98\n-Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99\n-Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99\n-Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143\n-Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD %2815-37%29 N out%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143\n-Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114\n-Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114\n-Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141\n-Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141\n-Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140\n-Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140\n-Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147\n-Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147\n-Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146\n-Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146\n-Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145\n-Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145\n-Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115\n-Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs %282-21%2C 31-50%29 N in%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115\n-Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149\n-Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149\n-Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148\n-Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148\n-Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116\n-Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116\n' |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/test-data/Miro_ShineFindOut.tbl --- a/cpt_shinefind/test-data/Miro_ShineFindOut.tbl Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -ID Name Terminus Terminus Strand Upstream Sequence SD Spacing |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/test-data/Miro_ShineFindOut1.gff3 --- a/cpt_shinefind/test-data/Miro_ShineFindOut1.gff3 Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -##gff-version 3 |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 cpt_shinefind/test-data/Miro_ShineFindOut2.gff3 --- a/cpt_shinefind/test-data/Miro_ShineFindOut2.gff3 Fri May 20 09:04:04 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,824 +0,0 @@\n-##gff-version 3\n-Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8;\n-Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8;\n-Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9;\n-Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9;\n-Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206;\n-Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206;\n-Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117;\n-Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117;\n-Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200;\n-Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200;\n-Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201;\n-Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201;\n-Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202;\n-Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202;\n-Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203;\n-Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203;\n-Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142;\n-Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD (4-26) N out%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142;\n-Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1;\n-Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1;\n-Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2;\n-Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2;\n-Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3;\n-Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD (12-34) N in%2C C out;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3;\n-Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4;\n-Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical con'..b'ro_066;product=hypothetical conserved;tmhelix=2TMDs (7-26%2C 31-53) N in%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98;\n-Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99;\n-Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99;\n-Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143;\n-Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD (15-37) N out%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143;\n-Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114;\n-Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114;\n-Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141;\n-Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141;\n-Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140;\n-Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140;\n-Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147;\n-Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147;\n-Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146;\n-Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146;\n-Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145;\n-Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145;\n-Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115;\n-Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs (2-21%2C 31-50) N in%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115;\n-Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149;\n-Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149;\n-Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148;\n-Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148;\n-Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116;\n-Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116;\n' |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 gff3.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3.py Mon Jun 05 02:53:31 2023 +0000 |
[ |
b'@@ -0,0 +1,346 @@\n+import copy\n+import logging\n+\n+log = logging.getLogger()\n+log.setLevel(logging.WARN)\n+\n+\n+def feature_lambda(\n+ feature_list,\n+ test,\n+ test_kwargs,\n+ subfeatures=True,\n+ parent=None,\n+ invert=False,\n+ recurse=True,\n+):\n+ """Recursively search through features, testing each with a test function, yielding matches.\n+\n+ GFF3 is a hierachical data structure, so we need to be able to recursively\n+ search through features. E.g. if you\'re looking for a feature with\n+ ID=\'bob.42\', you can\'t just do a simple list comprehension with a test\n+ case. You don\'t know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.\n+\n+ :type feature_list: list\n+ :param feature_list: an iterable of features\n+\n+ :type test: function reference\n+ :param test: a closure with the method signature (feature, **kwargs) where\n+ the kwargs are those passed in the next argument. This\n+ function should return True or False, True if the feature is\n+ to be yielded as part of the main feature_lambda function, or\n+ False if it is to be ignored. This function CAN mutate the\n+ features passed to it (think "apply").\n+\n+ :type test_kwargs: dictionary\n+ :param test_kwargs: kwargs to pass to your closure when it is called.\n+\n+ :type subfeatures: boolean\n+ :param subfeatures: when a feature is matched, should just that feature be\n+ yielded to the caller, or should the entire sub_feature\n+ tree for that feature be included? subfeatures=True is\n+ useful in cases such as searching for a gene feature,\n+ and wanting to know what RBS/Shine_Dalgarno_sequences\n+ are in the sub_feature tree (which can be accomplished\n+ with two feature_lambda calls). subfeatures=False is\n+ useful in cases when you want to process (and possibly\n+ return) the entire feature tree, such as applying a\n+ qualifier to every single feature.\n+\n+ :type invert: boolean\n+ :param invert: Negate/invert the result of the filter.\n+\n+ :rtype: yielded list\n+ :return: Yields a list of matching features.\n+ """\n+ # Either the top level set of [features] or the subfeature attribute\n+ for feature in feature_list:\n+ feature._parent = parent\n+ if not parent:\n+ # Set to self so we cannot go above root.\n+ feature._parent = feature\n+ test_result = test(feature, **test_kwargs)\n+ # if (not invert and test_result) or (invert and not test_result):\n+ if invert ^ test_result:\n+ if not subfeatures:\n+ feature_copy = copy.deepcopy(feature)\n+ feature_copy.sub_features = list()\n+ yield feature_copy\n+ else:\n+ yield feature\n+\n+ if recurse and hasattr(feature, "sub_features"):\n+ for x in feature_lambda(\n+ feature.sub_features,\n+ test,\n+ test_kwargs,\n+ subfeatures=subfeatures,\n+ parent=feature,\n+ invert=invert,\n+ recurse=recurse,\n+ ):\n+ yield x\n+\n+\n+def fetchParent(feature):\n+ if not hasattr(feature, "_parent") or feature._parent is None:\n+ return feature\n+ else:\n+ return fetchParent(feature._parent)\n+\n+\n+def feature_test_true(feature, **kwargs):\n+ return True\n+\n+\n+def feature_test_type(feature, **kwargs):\n+ if "type" in kwargs:\n+ return str(feature.type).upper() == str(kwargs["type"]).upper()\n+ elif "types" in kwargs:\n+ for x in kwargs["types"]:\n+ if str(feature.type).upper() == str(x).upper():\n+ return True\n+ return False\n+ raise Exception("Incorrect feature_test'..b'feature.location.start,\n+ # feature.location.end,\n+ # feature.location.strand\n+ # )\n+ return result\n+\n+\n+def get_gff3_id(gene):\n+ return gene.qualifiers.get("Name", [gene.id])[0]\n+\n+\n+def ensure_location_in_bounds(start=0, end=0, parent_length=0):\n+ # This prevents frameshift errors\n+ while start < 0:\n+ start += 3\n+ while end < 0:\n+ end += 3\n+ while start > parent_length:\n+ start -= 3\n+ while end > parent_length:\n+ end -= 3\n+ return (start, end)\n+\n+\n+def coding_genes(feature_list):\n+ for x in genes(feature_list):\n+ if (\n+ len(\n+ list(\n+ feature_lambda(\n+ x.sub_features,\n+ feature_test_type,\n+ {"type": "CDS"},\n+ subfeatures=False,\n+ )\n+ )\n+ )\n+ > 0\n+ ):\n+ yield x\n+\n+\n+def genes(feature_list, feature_type="gene", sort=False):\n+ """\n+ Simple filter to extract gene features from the feature set.\n+ """\n+\n+ if not sort:\n+ for x in feature_lambda(\n+ feature_list, feature_test_type, {"type": feature_type}, subfeatures=True\n+ ):\n+ yield x\n+ else:\n+ data = list(genes(feature_list, feature_type=feature_type, sort=False))\n+ data = sorted(data, key=lambda feature: feature.location.start)\n+ for x in data:\n+ yield x\n+\n+\n+def wa_unified_product_name(feature):\n+ """\n+ Try and figure out a name. We gave conflicting instructions, so\n+ this isn\'t as trivial as it should be. Sometimes it will be in\n+ \'product\' or \'Product\', othertimes in \'Name\'\n+ """\n+ # Manually applied tags.\n+ protein_product = feature.qualifiers.get(\n+ "product", feature.qualifiers.get("Product", [None])\n+ )[0]\n+\n+ # If neither of those are available ...\n+ if protein_product is None:\n+ # And there\'s a name...\n+ if "Name" in feature.qualifiers:\n+ if not is_uuid(feature.qualifiers["Name"][0]):\n+ protein_product = feature.qualifiers["Name"][0]\n+\n+ return protein_product\n+\n+\n+def is_uuid(name):\n+ return name.count("-") == 4 and len(name) == 36\n+\n+\n+def get_rbs_from(gene):\n+ # Normal RBS annotation types\n+ rbs_rbs = list(\n+ feature_lambda(\n+ gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False\n+ )\n+ )\n+ rbs_sds = list(\n+ feature_lambda(\n+ gene.sub_features,\n+ feature_test_type,\n+ {"type": "Shine_Dalgarno_sequence"},\n+ subfeatures=False,\n+ )\n+ )\n+ # Fraking apollo\n+ apollo_exons = list(\n+ feature_lambda(\n+ gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False\n+ )\n+ )\n+ apollo_exons = [x for x in apollo_exons if len(x) < 10]\n+ # These are more NCBI\'s style\n+ regulatory_elements = list(\n+ feature_lambda(\n+ gene.sub_features,\n+ feature_test_type,\n+ {"type": "regulatory"},\n+ subfeatures=False,\n+ )\n+ )\n+ rbs_regulatory = list(\n+ feature_lambda(\n+ regulatory_elements,\n+ feature_test_quals,\n+ {"regulatory_class": ["ribosome_binding_site"]},\n+ subfeatures=False,\n+ )\n+ )\n+ # Here\'s hoping you find just one ;)\n+ return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons\n+\n+\n+def nice_name(record):\n+ """\n+ get the real name rather than NCBI IDs and so on. If fails, will return record.id\n+ """\n+ name = record.id\n+ likely_parental_contig = list(genes(record.features, feature_type="contig"))\n+ if len(likely_parental_contig) == 1:\n+ name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]\n+ return name\n+\n+\n+def fsort(it):\n+ for i in sorted(it, key=lambda x: int(x.location.start)):\n+ yield i\n' |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jun 05 02:53:31 2023 +0000 |
b |
@@ -0,0 +1,74 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package">progressivemauve</requirement> + <!--<requirement type="package" version="2.7">python</requirement>--> + <requirement type="package" version="0.6.4">bcbiogff</requirement> + <yield/> + </requirements> + </xml> + <token name="@WRAPPER_VERSION@">2.4.0</token> + <xml name="citation/progressive_mauve"> + <citation type="doi">10.1371/journal.pone.0011147</citation> + </xml> + <xml name="citation/gepard"> + <citation type="doi">10.1093/bioinformatics/btm039</citation> + </xml> + <token name="@XMFA_INPUT@"> + '$xmfa' + </token> + <xml name="xmfa_input" token_formats="xmfa"> + <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/> + </xml> + <token name="@XMFA_FA_INPUT@"> + '$sequences' + </token> + <xml name="xmfa_fa_input"> + <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/> + </xml> + <xml name="genome_selector"> + <conditional name="reference_genome"> + <param name="reference_genome_source" type="select" label="Reference Genome"> + <option value="history" selected="True">From History</option> + <option value="cached">Locally Cached</option> + </param> + <when value="cached"> + <param name="fasta_indexes" type="select" label="Source FASTA Sequence"> + <options from_data_table="all_fasta"/> + </param> + </when> + <when value="history"> + <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> + </when> + </conditional> + </xml> + <xml name="gff3_input"> + <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> + </xml> + <xml name="input/gff3+fasta"> + <expand macro="gff3_input"/> + <expand macro="genome_selector"/> + </xml> + <token name="@INPUT_GFF@"> + '$gff3_data' + </token> + <token name="@INPUT_FASTA@"> + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + </token> + <token name="@GENOME_SELECTOR_PRE@"> + #if $reference_genome.reference_genome_source == 'history': + ln -s '$reference_genome.genome_fasta' genomeref.fa; + #end if + </token> + <token name="@GENOME_SELECTOR@"> + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + </token> +</macros> |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 shinefind.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shinefind.py Mon Jun 05 02:53:31 2023 +0000 |
[ |
b'@@ -0,0 +1,433 @@\n+#!/usr/bin/env python\n+import re\n+import sys\n+import argparse\n+import logging\n+from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature\n+from Bio import SeqIO\n+from Bio.SeqRecord import SeqRecord\n+from Bio.SeqFeature import FeatureLocation\n+from gff3 import (\n+ feature_lambda,\n+ feature_test_type,\n+ feature_test_true,\n+ feature_test_quals,\n+ get_id,\n+ ensure_location_in_bounds,\n+)\n+\n+logging.basicConfig(level=logging.INFO)\n+log = logging.getLogger()\n+\n+\n+class NaiveSDCaller(object):\n+\n+ # TODO May make switch for different sequence sets\n+ SD_SEQUENCES = (\n+ "AGGAGGT",\n+ "GGAGGT",\n+ "AGGAGG",\n+ "GGGGGG",\n+ "AGGAG",\n+ "GAGGT",\n+ "GGAGG",\n+ "GGGGG",\n+ "AGGT",\n+ "GGGT",\n+ "GAGG",\n+ "GGGG",\n+ "AGGA",\n+ "GGAG",\n+ "GGA",\n+ "GAG",\n+ "AGG",\n+ "GGT",\n+ "GGG",\n+ )\n+\n+ def __init__(self):\n+ self.sd_reg = [re.compile(x, re.IGNORECASE) for x in self.SD_SEQUENCES]\n+\n+ def list_sds(self, sequence, sd_min=3, sd_max=17):\n+ hits = []\n+ for regex in self.sd_reg:\n+ for match in regex.finditer(sequence):\n+ spacing = len(sequence) - len(match.group()) - match.start()\n+ if sd_max >= spacing + sd_min and spacing + sd_min >= sd_min:\n+ # if the spacing is within gap limits, add\n+ # (search space is [sd_max+7 .. sd_min] so actual gap is spacing+sd_min)\n+ # print(\'min %d max %d - adding SD with gap %d\' % (sd_min, sd_max, spacing+sd_min))\n+ hits.append(\n+ {\n+ "spacing": spacing,\n+ "hit": match.group(),\n+ "start": match.start(),\n+ "end": match.end(),\n+ "len": len(match.group()),\n+ }\n+ )\n+ hits = sorted(hits, key=lambda x: (-x["len"], x["spacing"]))\n+ return hits\n+\n+ @classmethod\n+ def highlight_sd(cls, sequence, start, end):\n+ return " ".join(\n+ [\n+ sequence[0:start].lower(),\n+ sequence[start:end].upper(),\n+ sequence[end:].lower(),\n+ ]\n+ )\n+\n+ @classmethod\n+ def to_features(\n+ cls,\n+ hits,\n+ strand,\n+ parent_start,\n+ parent_end,\n+ feature_id=None,\n+ sd_min=3,\n+ sd_max=17,\n+ ):\n+ results = []\n+ for idx, hit in enumerate(hits):\n+ # gene complement(124..486)\n+ # -1 491 501 0 5 5\n+ # -1 491 501 0 4 5\n+ # -1 491 501 1 4 5\n+ # -1 491 501 2 3 5\n+ # -1 491 501 1 3 5\n+ # -1 491 501 0 3 5\n+\n+ qualifiers = {\n+ "source": "CPT_ShineFind",\n+ "ID": "%s.rbs-%s" % (feature_id, idx),\n+ }\n+\n+ if strand > 0:\n+ start = parent_end - hit["spacing"] - hit["len"]\n+ end = parent_end - hit["spacing"]\n+ else:\n+ start = parent_start + hit["spacing"]\n+ end = parent_start + hit["spacing"] + hit["len"]\n+ # check that the END of the SD sequence is within the given min/max of parent start/end\n+\n+ # gap is either the sd_start-cds_end (neg strand) or the sd_end-cds_start (pos strand)\n+ # minimum absolute value of these two will be the proper gap regardless of strand\n+ tmp = gffSeqFeature(\n+ FeatureLocation(min(start, end), max(start, end), strand=strand),\n+ # FeatureLocation(min(start, end), max(start, end), strand=strand),\n+ type="Shine_Dalgarno_'..b'and fake a\n+ # break, because an actual break triggers the else: block\n+ table_output.write(\n+ "\\t".join(\n+ map(\n+ str,\n+ [\n+ feature.id,\n+ feature_id,\n+ feature.location.start,\n+ feature.location.end,\n+ human_strand,\n+ sd_finder.highlight_sd(seq, sd["start"], sd["end"]),\n+ sd["hit"],\n+ int(sd["spacing"]) + lookahead_min,\n+ ],\n+ )\n+ )\n+ + "\\n"\n+ )\n+\n+ if add:\n+ # Append the top RBS to the gene feature\n+ gene.sub_features.append(sd_feature)\n+ # Pick out start/end locations for all sub_features\n+ locations = [x.location.start for x in gene.sub_features] + [\n+ x.location.end for x in gene.sub_features\n+ ]\n+ # Update gene\'s start/end to be inclusive\n+ gene.location._start = min(locations)\n+ gene.location._end = max(locations)\n+ # Also register the feature with the separate GFF3 output\n+ sd_feature = fix_gene_boundaries(sd_feature)\n+ gff3_output_record.features.append(sd_feature)\n+\n+ if top_only or sd == (sds[-1]):\n+ break\n+ else:\n+ table_output.write(\n+ "\\t".join(\n+ map(\n+ str,\n+ [\n+ feature.id,\n+ feature_id,\n+ feature.location.start,\n+ feature.location.end,\n+ human_strand,\n+ seq,\n+ None,\n+ -1,\n+ ],\n+ )\n+ )\n+ + "\\n"\n+ )\n+\n+ record.annotations = {}\n+ gffWrite([record], sys.stdout)\n+\n+ gff3_output_record.features = sorted(\n+ gff3_output_record.features, key=lambda x: x.location.start\n+ )\n+ gff3_output_record.annotations = {}\n+ gffWrite([gff3_output_record], gff3_output)\n+\n+\n+if __name__ == "__main__":\n+ parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")\n+ parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")\n+ parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")\n+\n+ parser.add_argument(\n+ "--gff3_output",\n+ type=argparse.FileType("w"),\n+ help="GFF3 Output",\n+ default="shinefind.gff3",\n+ )\n+ parser.add_argument(\n+ "--table_output",\n+ type=argparse.FileType("w"),\n+ help="Tabular Output",\n+ default="shinefind.tbl",\n+ )\n+\n+ parser.add_argument(\n+ "--lookahead_min",\n+ nargs="?",\n+ type=int,\n+ help="Number of bases upstream of CDSs to end search",\n+ default=3,\n+ )\n+ parser.add_argument(\n+ "--lookahead_max",\n+ nargs="?",\n+ type=int,\n+ help="Number of bases upstream of CDSs to begin search",\n+ default=17,\n+ )\n+\n+ parser.add_argument("--top_only", action="store_true", help="Only report best hits")\n+ parser.add_argument(\n+ "--add",\n+ action="store_true",\n+ help=\'Function in "addition" mode whereby the \'\n+ + "RBSs are added directly to the gene model.",\n+ )\n+\n+ args = parser.parse_args()\n+ shinefind(**vars(args))\n' |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 shinefind.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shinefind.xml Mon Jun 05 02:53:31 2023 +0000 |
[ |
@@ -0,0 +1,94 @@ +<tool id="edu.tamu.cpt.genbank.shinefind" name="Shine Find" version="21.1.0.0"> + <description>Identify shine-dalgarno sequences</description> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +@GENOME_SELECTOR_PRE@ + +python '$__tool_directory__/shinefind.py' + +@GENOME_SELECTOR@ +'$gff3_data' + +--table_output '$default_output' +--gff3_output '$gff3_output' + +--lookahead_min '$lookahead_min' +--lookahead_max '$lookahead_max' +'$add' +'$top_only' + + +> '$stdout' + +]]></command> + <inputs> + <expand macro="genome_selector"/> + <expand macro="gff3_input"/> + <param label="Minimum number of bases upstream of CDS for gap (--lookahead_min)" name="lookahead_min" type="integer" value="3"/> + <param label="Maximum number of bases upstream of CDS for gap (--lookahead_max)" name="lookahead_max" type="integer" value="17"/> + <param checked="true" label="Automatically add RBSs to input GFF3" name="add" type="boolean" truevalue="--add" falsevalue=""/> + <param checked="true" label="Only report best hits (--top_only)" name="top_only" type="boolean" falsevalue="" truevalue="--top_only"/> + </inputs> + <outputs> + <data format="tabular" name="default_output" label="ShineFind RBS list from $gff3_data.name"/> + <data format="gff3" name="gff3_output" label="ShineFind GFF3 RBSs from $gff3_data.name"/> + <data format="gff3" name="stdout" label="$gff3_data.name with RBSs"/> + </outputs> + <tests> + <test> + <param name="genome_fasta" value="Miro_ShineFindIn.fa"/> + <param name="reference_genome_source" value="history"/> + <param name="gff3_data" value="Miro_ShineFindIn.gff3"/> + <param name="lookahead_max" value="15"/> + <param name="lookahead_min" value="5"/> + <param name="add" value="True"/> + <param name="top_only" value="False"/> + <output name="default_output" file="Miro_ShineFindOut.tbl"/> + <output name="gff3_output" file="Miro_ShineFindOut1.gff3"/> + <output name="stdout" file="Miro_ShineFindOut2.gff3"/> + </test> + </tests> + <help><![CDATA[ +**What it does** + +From an input GFF3 and FASTA file, the upstream sequence within user-specified bounds is +extracted for all CDS features. Input CDS features with an RBS are not re-analyzed. For +the remaining CDS features, the upstream sequence is searched against the following +possible Shine-Dalgarno sequences: +- AGGAGGT +- GGAGGT +- AGGAGG +- AGGAG +- GAGGT +- GGAGG +- AGGT +- GGGT +- GAGG +- GGGG +- AGGA +- GGAG +- GGA +- GAG +- AGG +- GGT + +**Gap Min/Max Settings** + In previous versions of this tool, the min/max was set for the window to find a Shine-Dalgarno sequence, but + currently we specify the minimum and maxium GAP between the end of the SD and the start of the closest CDS. + +By default, only the first (closest to consensus) hit to the list is returned to the GFF3. In the case of a tie, it will select the smaller gap. +By selecting ‘No’ for ‘Only report best hits’, **all hits** to the possible SD list will +be added to the GFF3 file. + +**March 10, 2021 Update** + Previous to this date, when using "Only report best hits" option the algorithm would pick the "first" one (moving downstream) found in the case of tied sequence length. After this date, it now will report the sequence with the smaller gap between the END of the SD sequence and the START of the CDS. + + ]]></help> + <expand macro="citations"> + <expand macro="sl-citations-clm"/> + </expand> +</tool> |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 test-data/Miro_ShineFindIn.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Miro_ShineFindIn.fa Mon Jun 05 02:53:31 2023 +0000 |
b |
b'@@ -0,0 +1,2936 @@\n+>Miro\n+TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n+TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n+AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n+GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n+CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n+TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n+ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n+CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n+ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n+ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n+TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n+CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n+TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n+AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n+CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n+TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n+CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n+AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n+GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n+TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n+TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n+TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n+GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n+CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n+CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n+AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n+AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n+TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n+TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n+AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n+CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n+GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n+TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n+TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n+TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n+CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n+TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n+TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n+TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n+AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n+ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n+ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n+GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n+GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n+ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n+GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n+ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n+AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n+TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n+AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n+GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n+TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n+ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n+ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n+TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n+CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n+GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n+TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n+ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n+GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n+GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n+ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n+ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n+GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n+ACGG'..b'TCGGGTAATATCG\n+TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n+ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n+GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n+GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n+GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n+CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n+TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n+ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n+TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n+AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n+TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n+CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n+AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n+GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n+TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n+ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n+TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n+TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n+ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n+TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n+CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n+TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n+AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n+TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n+CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n+CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n+ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n+GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n+ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n+AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n+CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n+CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n+TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n+TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n+ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n+ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n+TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n+TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n+AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n+ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n+TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n+GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n+GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n+TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n+ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n+CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n+TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n+AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n+GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n+GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n+TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n+AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n+TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n+ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n+CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n+TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n+TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n+GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n+GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n+GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n+TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n+TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n+GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n+GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n+GGCGCGGTTGTTTAG\n' |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 test-data/Miro_ShineFindIn.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Miro_ShineFindIn.gff3 Mon Jun 05 02:53:31 2023 +0000 |
b |
b'@@ -0,0 +1,827 @@\n+##gff-version 3\n+##sequence-region Miro 1 176055\n+Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8\n+Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8\n+Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9\n+Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9\n+Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206\n+Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206\n+Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117\n+Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117\n+Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200\n+Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200\n+Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201\n+Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201\n+Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202\n+Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202\n+Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203\n+Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203\n+Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142\n+Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD %284-26%29 N out%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142\n+Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1\n+Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb\n+Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1\n+Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2\n+Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2\n+Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3\n+Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD %2812-34%29 N in%2C C out\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3\n+Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4\n+Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical'..b't=Miro_98;obsolete_name=Miro_066;product=hypothetical conserved;tmhelix=2TMDs %287-26%2C 31-53%29 N in%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98\n+Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99\n+Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99\n+Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143\n+Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD %2815-37%29 N out%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143\n+Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114\n+Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114\n+Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141\n+Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141\n+Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140\n+Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140\n+Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147\n+Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147\n+Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146\n+Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146\n+Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145\n+Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145\n+Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115\n+Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs %282-21%2C 31-50%29 N in%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115\n+Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149\n+Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149\n+Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148\n+Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148\n+Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116\n+Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116\n' |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 test-data/Miro_ShineFindOut.tbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Miro_ShineFindOut.tbl Mon Jun 05 02:53:31 2023 +0000 |
b |
@@ -0,0 +1,1 @@ +ID Name Terminus Terminus Strand Upstream Sequence SD Spacing |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 test-data/Miro_ShineFindOut1.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Miro_ShineFindOut1.gff3 Mon Jun 05 02:53:31 2023 +0000 |
b |
@@ -0,0 +1,1 @@ +##gff-version 3 |
b |
diff -r 4d4a4b603d33 -r 5004ddb62700 test-data/Miro_ShineFindOut2.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Miro_ShineFindOut2.gff3 Mon Jun 05 02:53:31 2023 +0000 |
b |
b'@@ -0,0 +1,824 @@\n+##gff-version 3\n+Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8;\n+Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8;\n+Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9;\n+Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9;\n+Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206;\n+Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206;\n+Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117;\n+Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117;\n+Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200;\n+Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200;\n+Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201;\n+Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201;\n+Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202;\n+Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202;\n+Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203;\n+Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203;\n+Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142;\n+Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD (4-26) N out%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142;\n+Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1;\n+Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1;\n+Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2;\n+Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2;\n+Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3;\n+Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD (12-34) N in%2C C out;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3;\n+Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4;\n+Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical con'..b'ro_066;product=hypothetical conserved;tmhelix=2TMDs (7-26%2C 31-53) N in%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98;\n+Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99;\n+Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99;\n+Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143;\n+Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD (15-37) N out%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143;\n+Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114;\n+Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114;\n+Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141;\n+Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141;\n+Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140;\n+Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140;\n+Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147;\n+Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147;\n+Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146;\n+Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146;\n+Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145;\n+Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145;\n+Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115;\n+Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs (2-21%2C 31-50) N in%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115;\n+Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149;\n+Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149;\n+Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148;\n+Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148;\n+Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116;\n+Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116;\n' |