Repository 'cpt_gff_split'
hg clone https://toolshed.g2.bx.psu.edu/repos/cpt/cpt_gff_split

Changeset 3:8bd03ba8510a (2023-04-28)
Previous changeset 2:e7762a585e3d (2022-05-20) Next changeset 4:4b60e0b6a150 (2024-01-05)
Commit message:
planemo upload commit edc74553919d09dcbe27fcadf144612c1ad3a2a2
added:
cpt-macros.xml
gff3_splitgff.py
gff3_splitgff.xml
macros.xml
test-data/Miro_SplitIn.gff3
test-data/Miro_SplitOut.fa
test-data/Miro_SplitOut.gff3
removed:
cpt_gff_split/cpt-macros.xml
cpt_gff_split/gff3_splitgff.py
cpt_gff_split/gff3_splitgff.xml
cpt_gff_split/macros.xml
cpt_gff_split/test-data/Miro_SplitIn.gff3
cpt_gff_split/test-data/Miro_SplitOut.fa
cpt_gff_split/test-data/Miro_SplitOut.gff3
b
diff -r e7762a585e3d -r 8bd03ba8510a cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Fri Apr 28 01:35:25 2023 +0000
[
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+ <requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+ <![CDATA[
+ cd '$__tool_directory__' && git rev-parse HEAD
+ ]]>
+ </version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+        <yield/>
+    </xml>
+</macros>
b
diff -r e7762a585e3d -r 8bd03ba8510a cpt_gff_split/cpt-macros.xml
--- a/cpt_gff_split/cpt-macros.xml Fri May 20 08:53:34 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="gff_requirements">
- <requirements>
- <requirement type="package" version="2.7">python</requirement>
- <requirement type="package" version="1.65">biopython</requirement>
- <requirement type="package" version="2.12.1">requests</requirement>
- <yield/>
- </requirements>
- <version_command>
- <![CDATA[
- cd $__tool_directory__ && git rev-parse HEAD
- ]]>
- </version_command>
- </xml>
- <xml name="citation/mijalisrasche">
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">@unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- </xml>
- <xml name="citations">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation> 
- <yield/>
- </citations>
- </xml>
-     <xml name="citations-crr">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020-AJC-solo">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-clm">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="sl-citations-clm">
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </xml>
-</macros>
b
diff -r e7762a585e3d -r 8bd03ba8510a cpt_gff_split/gff3_splitgff.py
--- a/cpt_gff_split/gff3_splitgff.py Fri May 20 08:53:34 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,35 +0,0 @@
-#!/usr/bin/env python
-import sys
-import argparse
-from Bio import SeqIO
-from Bio.Seq import Seq
-from CPT_GFFParser import gffParse, gffWrite
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Sample script to add an attribute to a feature via web services"
-    )
-    parser.add_argument("data", type=argparse.FileType("r"), help="GFF3 File")
-    parser.add_argument(
-        "--gff",
-        type=argparse.FileType("w"),
-        help="Output Annotations",
-        default="data.gff3",
-    )
-    parser.add_argument(
-        "--fasta",
-        type=argparse.FileType("w"),
-        help="Output Sequence",
-        default="data.fa",
-    )
-    args = parser.parse_args()
-
-    for record in gffParse(args.data):
-        gffWrite([record], args.gff)
-        record.description = ""
-        
-        if isinstance(record.seq, str):
-          record.seq = Seq(record.seq)
-        
-        SeqIO.write([record], args.fasta, "fasta")
-        sys.exit()
b
diff -r e7762a585e3d -r 8bd03ba8510a cpt_gff_split/gff3_splitgff.xml
--- a/cpt_gff_split/gff3_splitgff.xml Fri May 20 08:53:34 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,36 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt2.gff3.splitGff" name="Split GFF3+Fasta into separate parts" version="22.0.0">
-  <description></description>
-  <macros>
-    <import>macros.xml</import>
- <import>cpt-macros.xml</import>
-  </macros>
-  <expand macro="requirements"/>
-  <command detect_errors="aggressive"><![CDATA[
-python $__tool_directory__/gff3_splitgff.py
-$input
-
---gff $gff_out
---fasta $fasta_out
-]]></command>
-  <inputs>
-    <param label="Combined GFF3/Fasta File" name="input" type="data" format="gff,gff3"/>
-  </inputs>
-  <outputs>
-    <data format="gff3" label="GFF Portion of ${input.name}" name="gff_out"/>
-    <data format="fasta" label="Fasta Portion of ${input.name}" name="fasta_out"/>
-  </outputs>
-  <tests>
- <test>
- <param name="input" value="Miro_SplitIn.gff3" />
- <output name="gff" file="Miro_SplitOut.gff3" />
- <output name="fasta" file="Miro_SplitOut.fa" />
- </test>
-  </tests>
-  <help><![CDATA[
-**What it does**
-
-Splits apart the GFF3/Fasta data in a combined file into separate fasta and gff3 files.
-      ]]></help>
- <expand macro="citations" />
-</tool>
b
diff -r e7762a585e3d -r 8bd03ba8510a cpt_gff_split/macros.xml
--- a/cpt_gff_split/macros.xml Fri May 20 08:53:34 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,85 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="requirements">
- <requirements>
- <requirement type="package" version="3.8.13">python</requirement>
- <requirement type="package" version="1.79">biopython</requirement>
- <requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
- <yield/>
- </requirements>
- </xml>
- <token name="@BLAST_TSV@">
- "$blast_tsv"
- </token>
- <xml name="blast_tsv">
- <param label="Blast Results" help="TSV/tabular (25 Column)"
- name="blast_tsv" type="data" format="tabular" />
- </xml>
-
- <token name="@BLAST_XML@">
- "$blast_xml"
- </token>
- <xml name="blast_xml">
- <param label="Blast Results" help="XML format"
- name="blast_xml" type="data" format="blastxml" />
- </xml>
- <xml name="gff3_with_fasta">
- <param label="Genome Sequences" name="fasta" type="data" format="fasta" />
- <param label="Genome Annotations" name="gff3" type="data" format="gff3" />
- </xml>
- <xml name="genome_selector">
- <conditional name="reference_genome">
- <param name="reference_genome_source" type="select" label="Reference Genome">
- <option value="history" selected="True">From History</option>
- <option value="cached">Locally Cached</option>
- </param>
- <when value="cached">
- <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
- <options from_data_table="all_fasta"/>
- </param>
- </when>
- <when value="history">
- <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
- </when>
- </conditional>
- </xml>
- <xml name="gff3_input">
- <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
- </xml>
- <xml name="input/gff3+fasta">
- <expand macro="gff3_input" />
- <expand macro="genome_selector" />
- </xml>
- <token name="@INPUT_GFF@">
- "$gff3_data"
- </token>
- <token name="@INPUT_FASTA@">
-#if str($reference_genome.reference_genome_source) == 'cached':
- "${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
- genomeref.fa
-#end if
- </token>
- <token name="@GENOME_SELECTOR_PRE@">
-#if $reference_genome.reference_genome_source == 'history':
- ln -s $reference_genome.genome_fasta genomeref.fa;
-#end if
- </token>
- <token name="@GENOME_SELECTOR@">
-#if str($reference_genome.reference_genome_source) == 'cached':
- "${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
- genomeref.fa
-#end if
- </token>
-        <xml name="input/fasta">
- <param label="Fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-
- <token name="@SEQUENCE@">
- "$sequences"
- </token>
- <xml name="input/fasta/protein">
- <param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
- </xml>
-</macros>
b
diff -r e7762a585e3d -r 8bd03ba8510a cpt_gff_split/test-data/Miro_SplitIn.gff3
--- a/cpt_gff_split/test-data/Miro_SplitIn.gff3 Fri May 20 08:53:34 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,3763 +0,0 @@\n-##gff-version 3\n-Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8\n-Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8\n-Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9\n-Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9\n-Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206\n-Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206\n-Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117\n-Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved\n-Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117\n-Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200\n-Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200\n-Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201\n-Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201\n-Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202\n-Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202\n-Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203\n-Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein\n-Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203\n-Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142\n-Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD %284-26%29 N out%2C C in\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142\n-Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1\n-Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb\n-Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1\n-Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2\n-Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2\n-Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3\n-Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD %2812-34%29 N in%2C C out\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3\n-Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4\n-Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical conserved\n-Miro\tGenBank\tShine_D'..b'TCGGGTAATATCG\n-TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n-ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n-GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n-GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n-GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n-CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n-TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n-ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n-TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n-AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n-TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n-CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n-AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n-GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n-TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n-ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n-TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n-TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n-ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n-TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n-CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n-TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n-AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n-TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n-CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n-CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n-ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n-GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n-ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n-AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n-CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n-CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n-TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n-TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n-ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n-ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n-TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n-TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n-AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n-ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n-TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n-GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n-GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n-TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n-ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n-CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n-TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n-AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n-GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n-GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n-TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n-AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n-TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n-ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n-CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n-TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n-TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n-GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n-GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n-GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n-TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n-TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n-GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n-GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n-GGCGCGGTTGTTTAG\n'
b
diff -r e7762a585e3d -r 8bd03ba8510a cpt_gff_split/test-data/Miro_SplitOut.fa
--- a/cpt_gff_split/test-data/Miro_SplitOut.fa Fri May 20 08:53:34 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2936 +0,0 @@\n->Miro\n-TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n-TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n-AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n-GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n-CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n-TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n-ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n-CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n-ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n-ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n-TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n-CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n-TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n-AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n-CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n-TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n-CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n-AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n-GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n-TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n-TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n-TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n-GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n-CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n-CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n-AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n-AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n-TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n-TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n-AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n-CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n-GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n-TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n-TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n-TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n-CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n-TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n-TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n-TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n-AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n-ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n-ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n-GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n-GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n-ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n-GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n-ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n-AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n-TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n-AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n-GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n-TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n-ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n-ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n-TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n-CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n-GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n-TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n-ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n-GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n-GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n-ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n-ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n-GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n-ACGG'..b'TCGGGTAATATCG\n-TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n-ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n-GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n-GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n-GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n-CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n-TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n-ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n-TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n-AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n-TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n-CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n-AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n-GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n-TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n-ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n-TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n-TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n-ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n-TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n-CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n-TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n-AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n-TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n-CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n-CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n-ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n-GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n-ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n-AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n-CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n-CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n-TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n-TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n-ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n-ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n-TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n-TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n-AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n-ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n-TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n-GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n-GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n-TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n-ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n-CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n-TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n-AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n-GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n-GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n-TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n-AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n-TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n-ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n-CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n-TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n-TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n-GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n-GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n-GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n-TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n-TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n-GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n-GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n-GGCGCGGTTGTTTAG\n'
b
diff -r e7762a585e3d -r 8bd03ba8510a cpt_gff_split/test-data/Miro_SplitOut.gff3
--- a/cpt_gff_split/test-data/Miro_SplitOut.gff3 Fri May 20 08:53:34 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,825 +0,0 @@\n-##gff-version 3\n-##sequence-region Miro 1 176055\n-Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8;\n-Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8;\n-Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9;\n-Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9;\n-Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206;\n-Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206;\n-Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117;\n-Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117;\n-Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200;\n-Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200;\n-Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201;\n-Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201;\n-Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202;\n-Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202;\n-Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203;\n-Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203;\n-Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142;\n-Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD (4-26) N out%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142;\n-Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1;\n-Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1;\n-Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2;\n-Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2;\n-Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3;\n-Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD (12-34) N in%2C C out;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3;\n-Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4;\n-Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name='..b'ro_066;product=hypothetical conserved;tmhelix=2TMDs (7-26%2C 31-53) N in%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98;\n-Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99;\n-Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99;\n-Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143;\n-Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD (15-37) N out%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143;\n-Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114;\n-Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114;\n-Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141;\n-Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141;\n-Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140;\n-Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140;\n-Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147;\n-Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147;\n-Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146;\n-Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146;\n-Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145;\n-Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145;\n-Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115;\n-Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs (2-21%2C 31-50) N in%2C C in;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115;\n-Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149;\n-Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149;\n-Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148;\n-Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148;\n-Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116;\n-Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved;\n-Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116;\n'
b
diff -r e7762a585e3d -r 8bd03ba8510a gff3_splitgff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_splitgff.py Fri Apr 28 01:35:25 2023 +0000
[
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+import sys
+import argparse
+from Bio import SeqIO
+from Bio.Seq import Seq
+from CPT_GFFParser import gffParse, gffWrite
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Sample script to add an attribute to a feature via web services"
+    )
+    parser.add_argument("data", type=argparse.FileType("r"), help="GFF3 File")
+    parser.add_argument(
+        "--gff",
+        type=argparse.FileType("w"),
+        help="Output Annotations",
+        default="data.gff3",
+    )
+    parser.add_argument(
+        "--fasta",
+        type=argparse.FileType("w"),
+        help="Output Sequence",
+        default="data.fa",
+    )
+    args = parser.parse_args()
+
+    for record in gffParse(args.data):
+        gffWrite([record], args.gff)
+        record.description = ""
+
+        if isinstance(record.seq, str):
+            record.seq = Seq(record.seq)
+
+        SeqIO.write([record], args.fasta, "fasta")
+        sys.exit()
b
diff -r e7762a585e3d -r 8bd03ba8510a gff3_splitgff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_splitgff.xml Fri Apr 28 01:35:25 2023 +0000
[
@@ -0,0 +1,35 @@
+<tool id="edu.tamu.cpt2.gff3.splitGff" name="Split GFF3+Fasta into separate parts" version="22.0.0">
+  <description/>
+  <macros>
+    <import>macros.xml</import>
+    <import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="aggressive"><![CDATA[
+'python $__tool_directory__/gff3_splitgff.py'
+'$input'
+
+--gff '$gff_out'
+--fasta '$fasta_out'
+]]></command>
+  <inputs>
+    <param label="Combined GFF3/Fasta File" name="input" type="data" format="gff,gff3"/>
+  </inputs>
+  <outputs>
+    <data format="gff3" label="GFF Portion of ${input.name}" name="gff_out"/>
+    <data format="fasta" label="Fasta Portion of ${input.name}" name="fasta_out"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="Miro_SplitIn.gff3"/>
+      <output name="gff" file="Miro_SplitOut.gff3"/>
+      <output name="fasta" file="Miro_SplitOut.fa"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+**What it does**
+
+Splits apart the GFF3/Fasta data in a combined file into separate fasta and gff3 files.
+      ]]></help>
+  <expand macro="citations"/>
+</tool>
b
diff -r e7762a585e3d -r 8bd03ba8510a macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Apr 28 01:35:25 2023 +0000
b
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+ '$xmfa'
+ </token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+ '$sequences'
+ </token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+     '$gff3_data'
+ </token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+</macros>
b
diff -r e7762a585e3d -r 8bd03ba8510a test-data/Miro_SplitIn.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Miro_SplitIn.gff3 Fri Apr 28 01:35:25 2023 +0000
b
b'@@ -0,0 +1,3763 @@\n+##gff-version 3\n+Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8\n+Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8\n+Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9\n+Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9\n+Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206\n+Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206\n+Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117\n+Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved\n+Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117\n+Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200\n+Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200\n+Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201\n+Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201\n+Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202\n+Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202\n+Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203\n+Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein\n+Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203\n+Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142\n+Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD %284-26%29 N out%2C C in\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142\n+Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1\n+Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb\n+Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1\n+Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2\n+Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2\n+Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3\n+Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD %2812-34%29 N in%2C C out\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3\n+Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4\n+Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name=Miro_160;product=hypothetical conserved\n+Miro\tGenBank\tShine_D'..b'TCGGGTAATATCG\n+TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n+ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n+GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n+GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n+GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n+CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n+TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n+ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n+TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n+AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n+TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n+CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n+AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n+GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n+TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n+ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n+TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n+TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n+ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n+TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n+CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n+TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n+AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n+TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n+CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n+CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n+ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n+GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n+ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n+AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n+CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n+CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n+TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n+TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n+ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n+ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n+TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n+TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n+AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n+ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n+TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n+GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n+GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n+TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n+ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n+CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n+TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n+AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n+GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n+GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n+TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n+AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n+TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n+ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n+CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n+TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n+TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n+GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n+GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n+GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n+TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n+TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n+GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n+GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n+GGCGCGGTTGTTTAG\n'
b
diff -r e7762a585e3d -r 8bd03ba8510a test-data/Miro_SplitOut.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Miro_SplitOut.fa Fri Apr 28 01:35:25 2023 +0000
b
b'@@ -0,0 +1,2936 @@\n+>Miro\n+TTAGTAATGGCTAAAACCATATGTAACATCAATCATGACTTTATAACGGCATACACGCAT\n+TTTTGCGTTATTGTAATCCACTGGGATCGCTACCACGTCAGCAGGATCCACCTTTACCTG\n+AATGACTCTACCAACACCGCCCCCGTAGTGTGGAAGGTATGATTTAGCCGCAACGTGTAG\n+GCCAGTAGAACAGGTGCGCGTTTTATCTTCGTCTACCATGTTTCGAGGCATGGAGACAGT\n+CACACCAGGACTATTATCAAATTTGCCAGTAGCGAGATCTTTATAGTTATCGCGCACACG\n+TTTCCAGGCAAGGAAACAACCATCATCGGTCAGTTCAATGTCATTATGTACAAGGAACCC\n+ATAAAGCTGGTATACAGCATCGCGTGAAGGGTTTCGCATCAACCGTTCAAAGAAGTTCAC\n+CAGATGTTCATACGGACGATCGTTATACATTTCGCGAATGATTCGTTGAGTGATATCAGA\n+ATCAAACACTACATCTTTATATAGAAGCTGATGACCAATGATTTTAATGTTGCCTTTGCT\n+ATAGGTTCGGATCGCTTCTTGAGTATCCAAACAAGTTACAGCACCTTTGACATCACCAGC\n+TTTCAGCATTTCATGCGCTTTCTTAAAGTTCGGATGTGTTTCACCCGCCATGAAAACGCG\n+CCCTTCGTATACAACCGTAATGAACGATTCAGATCCGATCATACGCGGTACAGTGTCTGA\n+TACTTTAGGCTTTTGTTTTACATCCCTTTTCTTAGGGAATTCCGCTAACTTGCGGTTGAT\n+AACGCGCCCAATGGTTCGGGCGCTTACGTTGAACTGTTGGGCCAGTGCTGTTTTACTTGC\n+CCCCGTCAACCATCCATTATAGATAGCTTTCTGTTGTACTTCGTCGAGAATTTTGACCAT\n+TATTTCCACCGTATTAATTTCTTAAACTCACTGAGATTCTTTTCGTTGTTGTAAATCGGA\n+CGAATTGAATAAGAATCGCTATGTTCAACAAGTGAAGCCAGTAACGGGTTTAGTGATTTA\n+AAGATTTCCCATGCCTGATCAACACGTTTTTTCATATTGCTGCGTTTAACGCGCATTGAT\n+GCTACGGACTCACGCAGAATCGGACAACGCACCCTGGAAAGATTTTTACCATCTTTCTCA\n+TACCCTTCCAGACACACTATACGTTCGAGGGTATCAACAATCTTATACAGTTTTTCATTA\n+TATCGGTTTTTCACAATCCGATCAAGCGATACACCGAAACGGCTATGCAATGCGTCTGTT\n+TCTGTTGAGTGATCCTTCCCAATCCATCCAGGCAAGCAATTATCTTTCAATGCCTTTTCA\n+GATTTAACATACTGCTTGCACAGCATATCATCAAAGCATACCAGATTTGAATCCGGGATC\n+CACTTCCAGAGGCTGTTACGTATAGCAAACACAACAGGGATCCCAGTATGGCGCATGATA\n+CGCGATAAAGTAGATTCTTTCATTGCTGAATCCATAGACAACCCGGAATTTTCACCATCT\n+AAGCGGCTATATTCATCAATACCATACAACCGAACACCTGGGGCTTTATCCAGTGATAAA\n+AACTCTGATTTTGTCATAAACAGAGAAGTTTTTGCCAGATTGCCGTTACTATCCAACTCA\n+TAACGATATACGGTCGGGGTTTTTGGGCGCGGTTCTGAATTTTTCGGTGCATATAGCGCT\n+TTTGATTTTTCGCGATCTGCATCATAGATCTCTTTTTCTTTTGTCATTTCACTGGTACGG\n+AGATACACAATTTCCGATTCATCAAAATGACCTTTCCGAACGATATCATTAACAATCTCA\n+CGCTTTGAATCACTATCGTAATATGCAACAAAGCTAACACGGCTTAGGTTATGCATTTTA\n+GCATACCCGACGATATACGGTTTAACCGTGTTGGTATCCACTTTTAACAGAATGAGTTTC\n+TTCTGTTTCCACGGATAATAAATGCGTGTGATGTCCTGGCGTTTGGTTGTTTCTGGCTTA\n+TACTTGCTCCAGCGGCCTCCGCTACCAGTTACCTGATACCATGCATCTTTACCGTCGTAT\n+TCGTTCGCCCAATACCCAGCAACATAATCATCATTATATTTGTTTGGTTTGACTAGTTCG\n+CTATGGATCCAGCCAATAGAATCACCATTGATGCGAAAATTAGCATCTTTACCAACAAAG\n+TTTTGTACCATTGAAGGCAGAGAATGGAACCACGTCAGTTTATCACGCACGGTTTGTAAC\n+TTATCGAATTCTGATTTAACTCGATTGAAATATACCCGGCTGATTTGTTTCAGACGTTCT\n+TTAACAATCCCTACTGTCATTTTATCCATACTCAACTCTTCGCGAGAAGGCATGAAATCA\n+AGTTCACCGATCGGGAAGTCAATAATATACGTATACTGGCTTTCTGTATAGCAATAGAAC\n+ATCGAGGTATCATACAAATCTTTATCCAGAGGATAAATGATGTTACCCATGCGAGCATAT\n+ACACCGCTAGTGTATGCTGATTTATGACGGATCACCCCGCTATCGTTGGTTGCTTCTTTC\n+GGCTGATAGTTGATTTTGAGAATAGAAGCACCAACAAAGTTAGGACGAATATCAGTAAAT\n+GATTCGTATACCCTTGCTGCTTCGTTTTCCCATTCTTTGATATCTTCAACCTTAACCGGA\n+ACAGTGATAGTAACCCCGTTAGGTTCATCGCTTTCAATCTCATACAGAGGATCGCAGAAA\n+GGTTCCCCATCATCCATATAGATTGTGTAACCGCATTTGATACCGTCTTTTACGGATTCC\n+ACCGTGAAAGCATCGGAATAGCAAAGCGGAGATTTGCAACCCAGACCCATAGAACCGATC\n+AGGTCGTTTGAATCATTTTTAGTTGATTCGAAGTAAACGGTAAACGCATCACTAACGAAA\n+TCAGGAGACATACCGATCCCGTAGTCACGAATAACAAAACGAGGATCAACAGCAGTTGGC\n+AACTGGACATCAAACGGGTTCTGATTTCCCGCTTCTTTGTGTCCATCAATCGCATTACAA\n+GACAGTTCGCGAATGATTGCGCGGATCTTGTATTTGTATACTGTCGAAGAAAGGATCTTA\n+TACGCTTTCTTGTTTGCGCGTAGAGATAGTTTGTTTCGTCCCTTGCTGGTATCTGTACCA\n+ACACGGTAGATGGTTTGCGGTGTATCTTCGCGTAATTTCATTGTTTATTTCTCACTTAAC\n+ATTAAAAATAACTTGGTCACAAGAGTACTTCGTTGGCTTTTTGTTCAGACCATATTCTAC\n+TACTTCACAATAGGTGTCAAGGAATTTTACCAATTTTTCTTCCTCGACCTGCTGTTTCTT\n+CATATCAAGGATACCCCACACGATAGCCCCGATAATGACAGAAAAGAACGCACAAAATCC\n+GAATATGGTCAGATATTTTCCCAACTTAGGCGCATTATAACGTGTCATACCTTACCCCTC\n+TTTGCGAATGTATGCAAGTTCTTCATGGGTTACTGGACGGATATACAGACGGCCTTTTGT\n+ATATGCCTTGCGCCCGCTGATCCAAATGTTTTTCATATCCTTAACACCGTTCATCACATC\n+GTTGTAAAACTTCTTATCAGCTTTAGCCTGATAGACTTCACGGCCTTGATAATCTTTCAT\n+GAACAAACAATAAAGGATCTCATTCTTATCAACTAGATTAGCATCCTTTGTAGTTGTTTT\n+ACTTGGTGAAGGTTTCGCACCCAGGCGCAAGGCCATAGCTTGCCACACTTTACCATGTTC\n+ATAACCGCGCCCGACAAGAGCATGAGCGATTTCGTGTAAAAGAGTGTCTAAAATATCCTC\n+GTAGATATCTTCCGCAACATGACGACCAGACAGTTCGATCAGTTTTTTGGTATAACTGCA\n+ACGG'..b'TCGGGTAATATCG\n+TTTGTGATGGGTGTGAAAACATGGAAACAATTGCTGATAAAAATAATTCTAATAATGGTT\n+ATGTTTCTTATGGTAGTAACTTGGTACAAATGGACTGATATATTCCCGATGATAAAAGGT\n+GCCCTTGTAGTCGATACAAGGGCTATCGAAATGGAAAGAACAGAAAAGTTTAATCAATCC\n+GCGTTGGAACAGTTGAGCATAGTTCATCTTACTTCCAACGCGGATTTTTCGGCGGTACTG\n+GCATTCAGACCAAAGAACATAAACTATTTTGTTGACATTGTAGAATATCAGGGAAAATTA\n+CCATCCCAAATCGATCCTAAAAACCTCGGTGGTTATCCGATCGATAAAACATCCGAAGAA\n+TACACGAATCATATAAATGGCTTGTACTATTCATCAACTACAGCAAGTTCCTACCTACCG\n+ACACGTGATTTTGTGCCAGTAGCTTATACTTTTAGTTGCCCTTATTTCAATCTTGATAAC\n+TACTATTCTGGATCGGTTTTGATGGAATGGTATGCAAAGCGGCCTGATATACCAGATATG\n+AAGATAAACATCATATGTGGACAGGCCGCGCGCATTTTAGGTCGAGCGAGGTGATTAACG\n+TAATGCTGGTGTTAAATTGTGTGATCTTCCAATAGCCCGTTTGATTGCTTTAAAGAAGTT\n+CATCACCGGGCTATTTTTCTCGTAAATATCCCAAACTTTCAATTTGTCCCACGGATCCGG\n+AACATAATCTTCATTCCTTGCCGAAACCCCCAACGTAACCCTCCTGTATCCAGCGCTGTC\n+GTGATAATACACGAAGAATGGCCTACCTATTGGCGCAACCTTACAACCTCTCTTAGCGGC\n+TCTCATAGCCTCATCGAACGTCATGGATTCCCCCAAAAATTTCTATGCATGAATGGTCGA\n+ATTCCGATAGTTTCACTCAAAATGAATATCGGATGATCTAGCTCATCGTGGTTTTCTTCG\n+TCAATGACGATATCCCAATCAGTAGCCTTTTGTTCTTCTACCGTGGCAATGAATACCTGG\n+TTTACTTCACACTTGCGGTGTGTACGTCTGATGATTGTATCCCCCTCACGAAACACAATC\n+ATATCAGGATTGGTAGTGCGGTACGCGGTTTTACCCGCGCACACTTCATTAAGCATATCT\n+TCGTATGTCATTATAAAACCTTTACACGTTGAACGATGGTTTGTTTAACGTCTTTGTATT\n+CTCCGTGCTCTTTAACGGTTGCTTTGAAAGTGATTTCATCACCTTCGTTTGCAATGTTAT\n+TACCGAAGTAAACAACAACATTACCATCAACATTAATTTTGGTCATGAATCTTTCTACAG\n+AAGTGTAGTAAGAAACTTGAGTATATCCCAGTGAAATCACTTTCTCAACGGTTCCGGTCA\n+TTTCCAGACGTTGTTTGATTTCACCGATGTGGTTAGCTTTAGAAATGCGTTCCTGGCGCT\n+CTTGTTCCCACTGTTCGCGGATTTCTTCGCGTTTGGCGATATAATCCTTTTCCAGTGCAA\n+CACCCATGCAGTAAGCGCACACAGCATCGAATACAGGGCTGTTTTTGTTGTCTTCTTTAG\n+ACTGGTCAGCCCACCAAAGAACAGTAAACATTGGCATTTCTGCAATTACTTCGCCTTTAC\n+GCTTGCCAATCGGCATGATCCCTTTTTCCAGCAGTTCCAGTTTTTCAGTATCGAACACTG\n+ACAGTTTACCGCGACGTTCGAACAGATCGAAATCTGCAAAGCCCTGGAATATCATTTTGA\n+AAGTATCAGTTTCAGTTAAACGGGCTGAAACACGGTCGAAATACTCACGCGCTTTCGCTT\n+CCGCTTTCTCCGGATCGGTAGATAAGTTGCAGATATAGTTATCAGAAGTATAACCGCCGC\n+CTCTACGCTCAACACGCAAGGTATACATTGCATTTTTACGACCAGAAGAAATGAAGTAAG\n+TGGTAGTAACTACGGTTGCGTTAGTCATGGTATTTCTCCTTAAAGGGTATCTCGTTTCGA\n+TATGGCTAATATAGCAAAAGCCCCTGACCGAAGTCAAGGGCTTTTTCATCATTCATTCGA\n+ATCTTTCATTGTTTTATGAAGATGAATATCAAAAATTTTCCAGTACGCCTTTCCGCGAGG\n+ATAAATTTTTGCTTTGTCAATATCGTTGTTGCTTCCCCATGTGTTGTTTGGGCCACGACA\n+TCGATTTTTTATATAATCTGTATGCCAGAATAAGCGCTGAACCGATGATTCCGTACCTAA\n+TGGATCTTCTTTACTGAACAGAATTTGTATACTCATAAGAAGAACCCAGTGCGAACAATC\n+AGATCGATTTTCTTTTCTGGTTCAAACGGTGATTTGCTATCGATGTTACACTGATAGAAC\n+ATACCAACATACTTTTCAGGAATGTTGGATTCACGCGCCCATTTCAGGTTATCATCGGTA\n+TTCGGCCCCAGCATGAGGTTAACAACATCAACAGCATAATCCTGTTCTTTGGTGTTGCCG\n+GAACCGTTTACATAATGCCCGTGGGCGGTTTTGAGGATCTCAATTACTTCGCTATCGTCA\n+GTTTCGACTTTGTAAAGCGTTGTATTTTCAGGAATTTCTTCAAGAATGATCAAAGCGGTT\n+TTCATCACACTTACCTTTGTGTTTCTGTTTACGTTTTGCTTCTTTAAATGCTCGCTTGCG\n+ATCGCGGTGAGTAGAAGCGCGGTTGAAATCATGTTTCGCTACCAAATTATTCATATAAGC\n+CCCTTAAAGAAAAATATTTAGGGGCTTTCGCCCCTGTATTAATCCAGCAATTTGCGGATC\n+TTGTCTGCGATACGTCCGGCGCGGGTTGCACTTGCAGTATGATCGCTTTCTTTTGAAGCC\n+AGTTCCGCCAGCTTACGCTGATGTTCTTCTTCTGCTGCTTGACGATCTGCTGCAACCTGT\n+GCAACCTGCTCATTATCGTGAGCAATACGCGCTTCCAGTTCAGACAGGGTTTTGTCGAAA\n+GTTGCTACGATTTCATCTACAGAACGAATTTTATTAAACAGTTTCATAATTTATCTCAAT\n+TGGTTAGTTTTAATCAGTATACATCAATATGGTTGAAATTCAAAATCATAAATGTCATTC\n+AGTGCGCGGTTCCACTCGGTGTAGTTTTCACCAGCACCATAACGCATTTGAATAGCACTT\n+TCGAACGTTGATCCGTTGAGGTTCGGGAAACCGAACAGGTTTTTGATTTTGTCATGTGCT\n+ACATAATACAGAGAAGCACTTTCCAGCATCGCAACCATCGCAGACGGTTCGTGTTCGCGG\n+CGCTTGATACGTAACAGAGTACGACTAGCACCAGTTTTACGGCGTTGATTTGGTGCTACG\n+TAGAAACGGAATACTACGCGCCCTGTTTTATCATCAACTACCAGGTAAAACCCGTTTTCT\n+TTCAGATCCACGCCTTCGAATTTCTTGAAGGTTCCGCGTTTCATGTCACCAATTTTAATT\n+GCATATTTGTGAATGTCAAGTCTTGTCAGAATTCTTTTCATATTTTTTAGATACCAGTTT\n+GCCTAATTTTGTAATTTCGCCTGTTTTTACGTTAACAAACAAGGCGATGCTCAGAAATGG\n+GATGCTAATCACTACGCTGATCAATGTAAACAGAAAACGTATCACAAAAAGAACAGCACG\n+TTCAAGATATCGTTGCATCCACGCGATTCCTAAACAACTATACCCTACTATAAAGGTGGT\n+TGCAACATAAAATGCACCAAATCCTTTACGAAATACGTAACCTTTCCCGGATTCTATCCG\n+GTCGTCGGCCCACATTTCACGGGCAGTTTTCAGAATAGATTCACCACTAGCGCGAGTTTC\n+GTTAGCCGAAGGCATGTTTTTAAATTTCATGATAGTCTCCTATGCGCCCAGAACTCTCCA\n+GGCGCGGTTGTTTAG\n'
b
diff -r e7762a585e3d -r 8bd03ba8510a test-data/Miro_SplitOut.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Miro_SplitOut.gff3 Fri Apr 28 01:35:25 2023 +0000
b
b'@@ -0,0 +1,825 @@\n+##gff-version 3\n+##sequence-region Miro 1 176055\n+Miro\tfeature\tgene\t7454\t7906\t.\t-\t.\tID=Miro_8;\n+Miro\tGenBank\tCDS\t7454\t7894\t.\t-\t1\tID=Miro_8.CDS;Name=Miro_8;Parent=Miro_8;obsolete_name=Miro_156;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t7903\t7906\t.\t-\t1\tAlias=Miro_8;ID=Miro_8.RBS;Name=Miro_8;Parent=Miro_8;\n+Miro\tfeature\tgene\t7917\t8512\t.\t-\t.\tID=Miro_9;\n+Miro\tGenBank\tCDS\t7917\t8501\t.\t-\t1\tID=Miro_9.CDS;Name=Miro_9;Parent=Miro_9;obsolete_name=Miro_155;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t8509\t8512\t.\t-\t1\tAlias=Miro_9;ID=Miro_9.RBS;Name=Miro_9;Parent=Miro_9;\n+Miro\tfeature\tgene\t123276\t124212\t.\t+\t.\tID=Miro_206;\n+Miro\tGenBank\tCDS\t123286\t124212\t.\t+\t1\tAlias=Miro_206;ID=Miro_206.CDS;Name=Miro_206;Parent=Miro_206;obsolete_name=Miro_234;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t123276\t123279\t.\t+\t1\tID=Miro_206.rbs;Name=Miro_206;Parent=Miro_206;\n+Miro\tfeature\tgene\t68490\t70715\t.\t-\t.\tID=Miro_117;\n+Miro\tGenBank\tCDS\t68490\t70706\t.\t-\t1\tID=Miro_117.CDS;Name=Miro_117;Note=contains von Willebrand factor%2C type A;Parent=Miro_117;obsolete_name=Miro_047;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t70713\t70715\t.\t-\t1\tAlias=Miro_117;ID=Miro_117.RBS;Name=Miro_117;Parent=Miro_117;\n+Miro\tfeature\tgene\t115729\t116735\t.\t+\t.\tID=Miro_200;\n+Miro\tGenBank\tCDS\t115743\t116735\t.\t+\t1\tAlias=Miro_200;ID=Miro_200.CDS;Name=Miro_200;Note=T4 gp6-like;Parent=Miro_200;obsolete_name=Miro_240;product=baseplate structural protein;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t115729\t115732\t.\t+\t1\tID=Miro_200.RBS;Name=Miro_200;Parent=Miro_200;\n+Miro\tfeature\tgene\t116735\t117608\t.\t+\t.\tID=Miro_201;\n+Miro\tGenBank\tCDS\t116745\t117608\t.\t+\t1\tAlias=Miro_201;ID=Miro_201.CDS;Name=Miro_201;Note=T4 gp9/gp10-like;Parent=Miro_201;obsolete_name=Miro_239;product=baseplate structural protein;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t116735\t116738\t.\t+\t1\tID=Miro_201.RBS;Name=Miro_201;Parent=Miro_201;\n+Miro\tfeature\tgene\t117595\t119422\t.\t+\t.\tID=Miro_202;\n+Miro\tGenBank\tCDS\t117605\t119422\t.\t+\t1\tAlias=Miro_202;ID=Miro_202.CDS;Name=Miro_202;Note=T4 gp9/gp10-like;Parent=Miro_202;obsolete_name=Miro_238;product=baseplate structural protein;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t117595\t117597\t.\t+\t1\tID=Miro_202.RBS;Name=Miro_202;Parent=Miro_202;\n+Miro\tfeature\tgene\t119412\t120090\t.\t+\t.\tID=Miro_203;\n+Miro\tGenBank\tCDS\t119422\t120090\t.\t+\t1\tAlias=Miro_203;ID=Miro_203.CDS;Name=Miro_203;Note=T4 gp11-like;Parent=Miro_203;obsolete_name=Miro_237;product=baseplate to short tail fiber connector protein;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t119412\t119415\t.\t+\t1\tID=Miro_203.RBS;Name=Miro_203;Parent=Miro_203;\n+Miro\tfeature\tgene\t81829\t81940\t.\t-\t.\tID=Miro_142;\n+Miro\tGenBank\tCDS\t81829\t81927\t.\t-\t1\tID=Miro_142.CDS;Name=Miro_142;Parent=Miro_142;obsolete_name=Miro_022;product=hypothetical conserved;tmhelix=1 TMD (4-26) N out%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81938\t81940\t.\t-\t1\tAlias=Miro_142;ID=Miro_142.RBS;Name=Miro_142;Parent=Miro_142;\n+Miro\tfeature\tgene\t1\t910\t.\t-\t.\tID=Miro_1;\n+Miro\tGenBank\tCDS\t1\t900\t.\t-\t1\tID=Miro_1.CDS;Name=Miro_1;Parent=Miro_1;obsolete_name=Miro_163;product=rIIb;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t908\t910\t.\t-\t1\tAlias=Miro_1;ID=Miro_1.RBS;Name=Miro_1;Parent=Miro_1;\n+Miro\tfeature\tgene\t900\t3173\t.\t-\t.\tID=Miro_2;\n+Miro\tGenBank\tCDS\t900\t3161\t.\t-\t1\tID=Miro_2.CDS;Name=Miro_2;Parent=Miro_2;obsolete_name=Miro_162;product=rIIa;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3171\t3173\t.\t-\t1\tAlias=Miro_2;ID=Miro_2.RBS;Name=Miro_2;Parent=Miro_2;\n+Miro\tfeature\tgene\t3172\t3417\t.\t-\t.\tID=Miro_3;\n+Miro\tGenBank\tCDS\t3172\t3408\t.\t-\t1\tID=Miro_3.CDS;Name=Miro_3;Parent=Miro_3;obsolete_name=Miro_161;product=hypothetical conserved;tmhelix=1 TMD (12-34) N in%2C C out;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t3414\t3417\t.\t-\t1\tAlias=Miro_3;ID=Miro_3.RBS;Name=Miro_3;Parent=Miro_3;\n+Miro\tfeature\tgene\t3412\t3979\t.\t-\t.\tID=Miro_4;\n+Miro\tGenBank\tCDS\t3412\t3966\t.\t-\t1\tID=Miro_4.CDS;Name=Miro_4;Note=contains SprT domain;Parent=Miro_4;obsolete_name='..b'ro_066;product=hypothetical conserved;tmhelix=2TMDs (7-26%2C 31-53) N in%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57368\t57372\t.\t+\t1\tID=Miro_98.RBS;Name=Miro_98;Parent=Miro_98;\n+Miro\tfeature\tgene\t57613\t57914\t.\t+\t.\tID=Miro_99;\n+Miro\tGenBank\tCDS\t57624\t57914\t.\t+\t1\tAlias=Miro_99;ID=Miro_99.CDS;Name=Miro_99;Parent=Miro_99;obsolete_name=Miro_065;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t57613\t57616\t.\t+\t1\tID=Miro_99.RBS;Name=Miro_99;Parent=Miro_99;\n+Miro\tfeature\tgene\t81924\t82086\t.\t-\t.\tID=Miro_143;\n+Miro\tGenBank\tCDS\t81924\t82079\t.\t-\t1\tID=Miro_143.CDS;Name=Miro_143;Parent=Miro_143;obsolete_name=Miro_021;product=hypothetical conserved;tmhelix=1 TMD (15-37) N out%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t82084\t82086\t.\t-\t1\tAlias=Miro_143;ID=Miro_143.RBS;Name=Miro_143;Parent=Miro_143;\n+Miro\tfeature\tgene\t67179\t67658\t.\t-\t.\tID=Miro_114;\n+Miro\tGenBank\tCDS\t67179\t67649\t.\t-\t1\tID=Miro_114.CDS;Name=Miro_114;Parent=Miro_114;obsolete_name=Miro_050;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67656\t67658\t.\t-\t1\tAlias=Miro_114;ID=Miro_114.RBS;Name=Miro_114;Parent=Miro_114;\n+Miro\tfeature\tgene\t81366\t81851\t.\t-\t.\tID=Miro_141;\n+Miro\tGenBank\tCDS\t81366\t81839\t.\t-\t1\tID=Miro_141.CDS;Name=Miro_141;Note=contains macro domain;Parent=Miro_141;obsolete_name=Miro_023;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81849\t81851\t.\t-\t1\tAlias=Miro_141;ID=Miro_141.RBS;Name=Miro_141;Parent=Miro_141;\n+Miro\tfeature\tgene\t81076\t81376\t.\t-\t.\tID=Miro_140;\n+Miro\tGenBank\tCDS\t81076\t81363\t.\t-\t1\tID=Miro_140.CDS;Name=Miro_140;Parent=Miro_140;obsolete_name=Miro_024;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t81374\t81376\t.\t-\t1\tAlias=Miro_140;ID=Miro_140.RBS;Name=Miro_140;Parent=Miro_140;\n+Miro\tfeature\tgene\t83223\t83630\t.\t-\t.\tID=Miro_147;\n+Miro\tGenBank\tCDS\t83223\t83618\t.\t-\t1\tID=Miro_147.CDS;Name=Miro_147;Parent=Miro_147;obsolete_name=Miro_017;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83627\t83630\t.\t-\t1\tAlias=Miro_147;ID=Miro_147.RBS;Name=Miro_147;Parent=Miro_147;\n+Miro\tfeature\tgene\t83066\t83224\t.\t-\t.\tID=Miro_146;\n+Miro\tGenBank\tCDS\t83066\t83212\t.\t-\t1\tID=Miro_146.CDS;Name=Miro_146;Parent=Miro_146;obsolete_name=Miro_018;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83221\t83224\t.\t-\t1\tAlias=Miro_146;ID=Miro_146.RBS;Name=Miro_146;Parent=Miro_146;\n+Miro\tfeature\tgene\t82479\t83083\t.\t-\t.\tID=Miro_145;\n+Miro\tGenBank\tCDS\t82479\t83069\t.\t-\t1\tID=Miro_145.CDS;Name=Miro_145;Parent=Miro_145;obsolete_name=Miro_019;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t83080\t83083\t.\t-\t1\tAlias=Miro_145;ID=Miro_145.RBS;Name=Miro_145;Parent=Miro_145;\n+Miro\tfeature\tgene\t67646\t67816\t.\t-\t.\tID=Miro_115;\n+Miro\tGenBank\tCDS\t67646\t67804\t.\t-\t1\tID=Miro_115.CDS;Name=Miro_115;Parent=Miro_115;obsolete_name=Miro_049;product=hypothetical conserved;tmhelix=2TMDs (2-21%2C 31-50) N in%2C C in;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t67814\t67816\t.\t-\t1\tAlias=Miro_115;ID=Miro_115.RBS;Name=Miro_115;Parent=Miro_115;\n+Miro\tfeature\tgene\t84392\t84959\t.\t-\t.\tID=Miro_149;\n+Miro\tGenBank\tCDS\t84392\t84946\t.\t-\t1\tID=Miro_149.CDS;Name=Miro_149;Parent=Miro_149;obsolete_name=Miro_015;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84955\t84959\t.\t-\t1\tAlias=Miro_149;ID=Miro_149.RBS;Name=Miro_149;Parent=Miro_149;\n+Miro\tfeature\tgene\t83686\t84337\t.\t-\t.\tID=Miro_148;\n+Miro\tGenBank\tCDS\t83686\t84327\t.\t-\t1\tID=Miro_148.CDS;Name=Miro_148;Note=T4 RegB-like;Parent=Miro_148;obsolete_name=Miro_016;product=endoribonuclease;signal=signal peptidase II cleavage site 12-13;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t84334\t84337\t.\t-\t1\tAlias=Miro_148;ID=Miro_148.RBS;Name=Miro_148;Parent=Miro_148;\n+Miro\tfeature\tgene\t67801\t68461\t.\t-\t.\tID=Miro_116;\n+Miro\tGenBank\tCDS\t67801\t68451\t.\t-\t1\tID=Miro_116.CDS;Name=Miro_116;Parent=Miro_116;obsolete_name=Miro_048;product=hypothetical conserved;\n+Miro\tGenBank\tShine_Dalgarno_sequence\t68457\t68461\t.\t-\t1\tAlias=Miro_116;ID=Miro_116.RBS;Name=Miro_116;Parent=Miro_116;\n'