changeset 1:9d689f8c9ce4 draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics commit 9dff0476530d65342db00896f3108edb899e3fd2
author ecology
date Thu, 08 Aug 2024 11:58:48 +0000
parents 12870a79d56b
children
files sanntis.xml test-data/BGC0001472.fna test-data/Regex_Find_And_Replace_on_data_21.fasta test-data/Sanntis_output_data.genbank
diffstat 4 files changed, 637 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/sanntis.xml	Fri Jul 26 14:31:32 2024 +0000
+++ b/sanntis.xml	Thu Aug 08 11:58:48 2024 +0000
@@ -2,7 +2,7 @@
     <description>in genomic and metagenomic data</description>
     <macros>
         <token name="@TOOL_VERSION@">0.9.3.5</token>
-        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@VERSION_SUFFIX@">1</token>
     </macros>
     <edam_topics>
         <edam_topic>topic_3387</edam_topic>
@@ -11,21 +11,49 @@
         <requirement type="package" version="@TOOL_VERSION@">sanntis</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        sanntis --ip-file '$input_interpro' --outfile 'output_sanntis.gff' '$input_genbank'
+    #if $selection.which_sanntis == 'sanntis': 
+        sanntis --ip-file '$selection.input_interpro' --outfile 'output_sanntis.gff' '$selection.input_genbank'
+    #else:
+        sanntis_build_gb  -n '$selection.input_nuc' -a '$selection.input_prot' -o 'output_sanntis_gb.gb'
+    #end if 
     ]]></command>
     <inputs>
-        <param name="input_interpro" type="data" format="tabular" label="Input the TSV file from InterProScan" help="Before using this tool you need to retrieve the right data by using the InterProScan tool"/>
-        <param name="input_genbank" type="data" format="genbank" label="Input a Genbank .gb file" help="It needs to have the right structure and fit the protein fasta file used in InterProScan"/>
+        <conditional name="selection">
+            <param name="which_sanntis" type="select" label="Do you want to build a genbank or to make a SMBGC Annotation?" help="If you decide to build a genbank you can then use this genbank to then conduct the annotation.">
+                <option value="sanntis">Run sanntis</option>
+                <option value="genbank">Build genbank</option>
+            </param>
+            <when value="sanntis">
+                <param name="input_interpro" type="data" format="tabular" label="Input the tabular file from InterProScan" help="Before using this tool you need to retrieve the right data by using the InterProScan tool"/>
+                <param name="input_genbank" type="data" format="genbank" label="Input a Genbank file" help="It needs to have the right structure and fit the protein fasta file used in InterProScan"/>
+            </when>
+            <when value="genbank">
+                <param name="input_nuc" type="data" format="fasta" label="Input a nucleotide fasta file"/>
+                <param name="input_prot" type="data" format="fasta" label="Input a protein fasta file" help="Before using this tool you can get the right protein data by using the Prodigal tool"/>
+            </when>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="output_sanntis" from_work_dir="output_sanntis.gff" format="gff3" label="Sanntis output data"/>
+        <data name="output_sanntis" from_work_dir="output_sanntis.gff" format="gff3" label="Sanntis output data">
+            <filter>selection['which_sanntis'] == 'sanntis'</filter>
+        </data>
+        <data name="output_sanntis_gb" from_work_dir="output_sanntis_gb.gb" format="genbank" label="Sanntis output data genbank">
+            <filter>selection['which_sanntis'] == 'genbank'</filter>
+        </data>
     </outputs>
     <tests>
         <test expect_num_outputs="1">
+            <param name="which_sanntis" value="sanntis"/>
             <param name="input_interpro" value="BGC0001472.fna.prodigal.faa.ip.tsv"/>
             <param name="input_genbank" value="BGC0001472.fna.prodigal.faa.gb"/>
             <output name="output_sanntis" value="Sanntis_output_data.gff3"/>
         </test>
+        <test expect_num_outputs="1">
+            <param name="which_sanntis" value="genbank"/>
+            <param name="input_nuc" value="BGC0001472.fna"/>
+            <param name="input_prot" value="Regex_Find_And_Replace_on_data_21.fasta"/>
+            <output name="output_sanntis_gb" value="Sanntis_output_data.genbank"/>
+        </test>
     </tests>
     <help><![CDATA[
 
@@ -36,6 +64,10 @@
 SMBGC Annotation using Neural Networks Trained on Interpro Signatures
 Tool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data
 
+**Or**
+
+This tool can also create a Genbank adapted to be used in sanntis
+
 .....
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/BGC0001472.fna	Thu Aug 08 11:58:48 2024 +0000
@@ -0,0 +1,2 @@
+>BGC0001472
+GCCCCGGGGGCCGTCGCTCCGGGGGTCGGTCCTGCCCGGTGGCGCAGGACCACGGGGGCCGGGGCCCGGGGGTGGACGGCATTTGTTTTGACCCAGCTCCGTGAGGTAGGTACGCTCAAGCCTTGTGCCTGGGGTGTGCCTGGGCTCGGGTGCGTGTCCTCAACCGCATGGCGAGTCCGTAAGTGGCCACCGCAATCTGTGTTCCGTCTGCCTTCCAGCAGGGGCGTGCAGTATTCGACACACCCGACCGCGTGGGTCGGTGACTGTTCCAGGTTAGTTTCACCGAACGGCACACAGAAACCGGAGAAGTAGTGCCTACGATCCAGCAGCTGGTCCGGAAGGGCCGGCAGGACAAGGTCGAGAAGAACAAGACGCCCGCGCTCGAGGGTTCGCCCCAGCGTCGTGGTGTCTGCACGCGTGTGTTCACGACCACCCCGAAGAAGCCGAACTCGGCGCTCCGTAAGGTCGCGCGTGTGCGTCTGACCTCCGGTATCGAGGTCACGGCCTACATCCCGGGTGAGGGGCACAACCTGCAGGAGCACTCCATCGTGCTCGTGCGTGGTGGCCGTGTGAAGGACCTGCCGGGTGTTCGTTACAAGATCATCCGCGGTTCGCTCGACACCCAGGGTGTCAAGAACCGCAAGCAGGCCCGCAGCCGCTACGGCGCCAAGAAGGAGAAGTAAGAATGCCTCGTAAGGGCCCCGCCCCGAAGCGCCCGGTCATCATCGACCCGGTCTACAGCTCTCCTCTTGTCACCTCGCTGATCAACAAGATCCTGCTCGACGGCAAGCGTTCCACCGCCGAGCGGATCGTGTACGGCGCCATGGAAGGCCTCCGCGAGAAGACCGGCGCTGACCCGGTCATCACGCTGAAGCGCGCGCTTGAGAACGTCAAGCCCTCGCTCGAGGTCAAGTCCCGCCGTGTCGGTGGCGCCACCTACCAGGTGCCGATCGAGGTCAAGCCCGGTCGCGCCGCCACCCTCGCTCTGCGCTGGGTCGTGGGTTACTCCCGCGCCCGTCGCGAGAAGACCATGACCGAGCGCCTCATGAACGAGCTGCTCGACGCCTCCAACGGTCTTGGCGCTGCCGTCAAGAAGCGCGAGGACACCCACAAGATGGCCGAGTCGAACAAGGCCTTCGCGCACTACCGCTGGTAGTCGCTCACCCCATCGAGACCGAGAGAAGATTGAGCCTTATGGCCACCACTTCGCTTGACCTGGCCAAGGTCCGCAACATCGGGATCATGGCCCACATCGACGCGGGCAAGACGACCACCACCGAGCGGATCCTCTTCTACACCGGCGTTTCGTACAAGATCGGTGAAGTCCACGACGGCGCAGCCACGATGGACTGGATGGAGCAGGAGCAGGAGCGCGGCATCACGATCACGTCCGCCGCGACGACCTGTCACTGGCCGCTCAATGATGTTGACCACACCATCAACATCATCGACACCCCGGGTCACGTCGACTTCACCGTCGAGGTGGAGCGTTCGCTCCGCGTCCTCGACGGTGCCGTCACCGTGTTCGACGGTGTGGCCGGCGTCGAGCCCCAGTCCGAGACCGTCTGGCGTCAGGCGGACCGCTACGGCGTGCCGCGTATCTGCTTCGTCAACAAGCTCGACCGCACGGGCGCCGACTTCCTCCGTTGCGTCGACATGATCGTCCAGCGCCTCGGCGCTGTCCCGATCGTCATGCAGCTCCCCATCGGTGCGGAGGCTGACTTCCGCGGCGTCGTCGACCTCGTGTCGATGAAGGCCTTCGTTTACCCCGAAGAGGCCGTCAAGGGCGAGATGTACGACACCGTCGAGATCCCGGACAACCTCAAGGAGGCCGCCGAGGAATGGCGCGGCAAGCTCCTCGAGGCCGTCTCGGAGAACGACGACCAGATGATGGAGCTGTACCTCGAGGGCGAAGAGCCCACCGAGGAGCAGCTGCACGAGGCGATCCGTCGGATCACCCTCGCGTCGAAGGGCTCGGCCGACTCCGTCACCGTGACCCCCGTCTTCTGTGGCACGGCGTTCAAGAACAAGGGCGTCCAGCCCCTGCTCGACGCCGTCGTCCGCTACCTGCCTTCCCCCCTGGACGTCGAGGCCATCGAGGGCCACGACGTCAAGGACCCGGAGAAGGTCGTCCAGCGGAAGCCCTCGGACGACGAGCCGTTCTCCGGCCTGGCGTTCAAGATCGCGAGCGACCCGCACCTCGGCAAGCTCACCTTCGTCCGGATCTACTCCGGTCGCCTCGAGGCCGGCACCGCGGTGCTGAACTCGGTCAAGGGCAAGAAGGAGCGCATCGGCAAGATCTACCGCATGCACGCGAACAAGCGTGAGGAGATCCCGTCGGTGGGCGCCGGTGACATCGTCGCCGTCATGGGCCTGAAGCAGACCACCACCGGTGAGACGCTGTGTGACGACAAGAACCCGGTGATCCTGGAGTCCATGGACTTCCCGGCGCCGGTCATCCAGGTCGCCATCGAGCCCAAGTCCAAGGGTGACCAGGAGAAGCTGGGTGTCGCCATCCAGCGCCTCTCGGAGGAGGACCCCTCCTTCCAGGTGCACTCCGACGAGGAGACCGGCCAGACCATCATCGGTGGTATGGGCGAGCTTCACCTCGAGGTGCTCGTCGACCGCATGAAGCGCGAGTTCCGCGTCGAGGCGAACGTCGGCAAGCCGCAGGTCGCGTACCGTGAGACGATCCGCAAGGCCGTCGAGCGTATCGACTACACGCACAAGAAGCAGACTGGTGGTACCGGCCAGTTCGCGAAGGTGCAGATCGCCATCGAGCCCATCGAGGGTGGCGACGCGTCCTACGAGTTCGTCAACAAGGTCACCGGTGGCCGCATCCCCCGTGAGTACATTCCCTCGGTGGACGCGGGTGCCCAGGAAGCCATGCAGTTCGGCATCCTGGCCGGCTACGAGATGGTGGGCGTCCGCGTCACCCTTCTCGACGGTGGTTACCACGAGGTCGACTCCTCGGAGCTCGCCTTCAAGATCGCTGGTTCGCAGGCGTTCAAGGAGGGTGCCCGCAAGGCGTCCCCCGTGCTCCTCGAGCCGATGATGGCCGTCGAGGTCACCACACCCGAGGACTACATGGGTGAAGTGGTCGGCGACATCAACTCCCGCCGTGGCCAGATCCAGGCCATGGAGGAGCGCCACGGCGCTCGCGTCGTGAAGGGCCTCGTGCCCCTCTCGGAGATGTTCGGCTACGTCGGAGACCTCCGCAGCAAGACCTCGGGTCGCGCAAGCTACTCGATGCAGTTCGACTCCTACGCCGAGGTTCCGCGGAACGTCGCCGAGGAGATCATCGCGAAGGCCAAGGGCGAGTAACTCTTCCGAGCTCACGCTTTAGGCTTGTCACCGGAGCCCGGTCGGGCATGCGTCGCAGTGCGGCGGATGCCCCCGGCACCGGCATTCCAGCAAAGATCACCTGGCGCCGATGAAGCAAGGCGTACAGAACCACTCAGGAGGACCCCAGTGGCGAAGGCAAAGTTCGAGCGGACTAAGCCGCACGTCAACATCGGCACCATCGGTCACATCGACCACGGTAAGACGACCCTCACGGCCGCCATTACCAAGGTGCTGCACGACGCGTACCCGGACCTGAACGAGGCCTCGGCCTTCGACCAGATCGACAAGGCTCCTGAGGAGCGTCAGCGCGGTATCACGATCTCGATCGCGCACGTCGAGTACCAGACGGAGTCGCGTCACTACGCGCACGTCGACTGCCCGGGTCACGCTGACTACATCAAGAACATGATCACGGGTGCGGCGCAGATGGACGGCGCCATCCTCGTGGTCGCGGCCACCGACGGCCCGATGCCGCAGACCAAGGAGCACGTGCTCCTGGCCCGCCAGGTAGGCGTGCCGTACATCGTCGTCGCGCTGAACAAGGCCGACATGGTGGACGACGAGGAGATCCTGGAGCTCGTCGAGCTCGAGGTCCGTGAGCTCCTCTCCGAGTACGAGTTCCCGGGCGACGACCTTCCGGTCGTCAAGGTCTCGGCGCTCAAGGCCCTCGAGGGCGACGCCGAGTGGGGCCAGACCGTTCTCGACCTGATGAAGGCCGTCGACGAGTCCATCCCGCAGCCCGAGCGTGACGTCGAGAAGCCGTTCCTCATGCCCATCGAGGACGTCTTCACGATCACCGGTCGCGGTACGGTCGTCACCGGCCGCATCGAGCGTGGTGTCCTGAAGGTCAACGAGACCGTCGACATCGTCGGTATCAAGACCGAGAAGACCACCACCACGGTCACCGGCATCGAGATGTTCCGCAAGCTGCTCGACGAGGGCCAGGCCGGTGAGAACGTCGGTCTGCTGCTTCGTGGCATCAAGCGCGAGGACGTCGAGCGCGGCCAGGTCATCATCAAGCCGGGTTCGGTCACGCCGCACACCGAGTTCCAGGCCCAGGCCTACATCCTGTCGAAGGACGAGGGTGGCCGTCACACCCCCTTCTTCAACAACTACCGCCCGCAGTTCTACTTCCGTACCACGGACGTGACGGGCGTTGTGACCCTTCCCGAGGGCACCGAGATGGTCATGCCGGGTGACAACACCCTCATGGACGTCGCGCTGATCCAGCCGGTCGCCATGGAAGAGGGCCTGAAGTTCGCCATCCGTGAGGGTGGTCGTACGGTGGGCGCCGGCCAGGTCACCAAGATCACCAAGTAATTCCGATTACTTGTGGGTCGGGGTAACCCGGTTGCTCTGAACTGAGCGCACAGCACCAAGCAGGGCCCGCACGGCATCACGCCGTGCGGGCCCTGTGCTGTCTCCGCGGGGCTCGCGGCGGACAGGACGACGGGCGCGGCCCGCACACCCTGAGGGTGTGCGGGCCGTCGTGACGCGGGCGCCGTTCGCGTCGGGAGGTGCGGTCAGCTCTCCGGTGGCCGGGCGTATGTGGTGGCCAGCAGGGAATGGCGGGTCGCCCCGTCGCTGTGGACCAGATGGTGCACGGACGACGGGACGAAGCCGCCGAACAGTGTGCCGACGAGCCCCAGCGCCTGGCACCTGAGATTGAGGTCGTAGGTGGCCATCGAGGCCCGCAGCGCACTGATCCGGTAGCCGTGGGAACCCGCCCAGGTGTCCGCCCGGTCCAGACTCGCGTAGAGCGCGACGATCCCCGCTCCGGTGGAGAACTCGCGCTGGACGCCGAGGTTCTCGGCCGGTCCGATCTCGTCGAGCCCGGCCAGGTAGCACGTCTCCTCGGCGGTCACCCGGTACAGCCCTGGTTCGGCGCCCTCGCTGCGGAACGCGAAGACGAAGCCCTCCAGCGCGCCCGCCGAGGCGTCGAGCCCCCAGTCGTCACGGTCCCGGCGCAGCACGTCACGCAGCAGCGACAGGATCACGTCCGTGCGTACCGGCAGTGGCGCGTAGTGCAGCGAGGAGCGGCGGCGTTCGAGCGTGCCCCGCAGGTCGTGGACCGGCCGCAGGTCCTCGGCAGACGGTTGCGGTTCGACGGGCACGGAGTCCGCGAACGGCACCGGCCGGCGTTCGGCCGCCAGGCCGTCCGACGTGGGCGTCCGGGAGGTGAAGCCCTCCAGCACGGCCGTCATGGATGTCACGTCGTTCCTCATGCGGTACCTCCCAGGGACGCGAAGGCCGCCACCGGCTCGTCGGCAGGCGATGTTCCGAGCAGCCGGGCCAACGCGTCGTCGTCCCAGTCCGACCTGGGTGTGAAGCCCAGGCCCAGGTGCTGTGCGAGCTCCCGCAGACTGGCCAGGTTGCAGCCCGCGTCCAGAAAGACCAGCCTGAAGCCGAACGTGCCGTACTTGGTCATGACCTTCTTGAGATCGCCGGTGATGATGATGTCGCACGGCGAGTCACCCGGAGGTACCTCGCCGGAGACGGTCACGAGTGTGTGGCTGCCCTGGGCGTACGCGTAGACCCCAGGAGGCATGATCCGGTCGTCGCGCACCACCGCGTAGGCCGTCGTGCTGCCGATGTTTCCCGACGCCGCGGTCCATCGTTTGACGCGCTCCGGAGTGGTCTCGTCCTCCTTCACCCCGAAGGCGACCTTCAGCAGCAGTCCCAGGGAGCTCAGGGTGAGCGGGGTGTCGCCGTGGGACGGGTCATGCCGTTCCGACCCGGCGAGGACGGAGATGTCCAGGGCGGGGAGCGGTGTGTGCGGGCGGCTCGGCCAGTCCTTGAACTGCGACTGCAGGCGCAGGTTGGACGCGTAGTAGTGCGCCTGGTGGTCCTTCGGAGCGAGGAACGCGCGTGGAGGCATCGCGACCGAGGCCTCGTAGACGGCACCGGCGGGTGCCTGGGGTGCGACCGGCCCCCGCGCGTAGGAGCAGCGAGGGCACCCCGGGCGGACGGCAACCGGCCGGTAGACCGTGCTCAGGGTCGCGGTGTCGATGACGGTGAAGTCGCCGGGGAGGTGCGAGATCGTGGCCCGTGCGAGCAACGCGGTGACGTGGTGGGAGGCCAGGCCGACGACCAGGTCGTGCAGGTATTCCGGCGGCTCGCCGGAGAGGTCCGCCTCACCGTGACGGCCGCAGTCCAGGCACGGTGTGATCGAGAGGTCGGCGTACGGGCCGATCGTGATCGTCCGCGCGTCGGCGCGCACCCGCAGCAGCGGACGCCCGTCCTGCCGGCACCGTTCCTCGGTGGCGGCGAGCAGGGGTGCCGACGCGGGCGTCTCGAAGAACACGGTGAGCTCGTCACCGGGGCCCGGCGGCCCGGCCGGTTCCGTGACCACCGGGCACACCTCCCGCAGCGACCGGCGGGCCCCGGCGACCAGGGCGGCGTCCCCCTCGAGGCGCACCGAGCGCGACACCAGCCGCGCCGCGGCATCCGCCCAGGACGGGTTCGATCCGGTGGAGTTGCCCAGCCGGGAGAGGAAGACGGCCCATTCGGGCGTGACGTCCGGTTCCTCGCCGGACATGGCCTCCTCGACCGCCCCGGCCGTCGACAGCAAGGCCAGGCACTTGTAGACCGTGGCCTCGTCGAATCCGGTCTTGAGCGCCAGTTCAGTGTGGTCGCGGGTTCCGTCGCACGCCTCGGTCAGGGGCACCAGCCCTTCTCGTGCGAACGCTCCGGAGAACACCTGTGCCCGGTCGGCCCCGTCGAGCACGACGGATTCCCCTGCCCTGCGCAGCCTCACCCCGCGGCGCAGGACGGGCCGGGCAGGGACGGTCAACTGCATGTCCGTACGTGCGGCCCGCCCGATCTGCTCGGCCTTCATCTCGGCAACCATGATCGATCTCCCTCTTACCGCAGCAGTTCCGGAACAGGCATGGCGCTGTCCGCGCGCGCCTGCGAGAACCGGCCCCCGCACCGGGGGCACCGGTTCACCGAGACCGTCACCGCGGACGAGACGGCTCCGGAGACGAGATTGAACGTACGTACGGTGCCCCCGATCCCCGTCACACCCGTGGCGATCTCGGTCAGGGCGAGGTCCAGGAGGCCGGAGGCGACGGACAGGTGCTGCCGGCCGAACCCCTCGGGCAGGCCGGACAGGGCGGCGTCCATGTCGTACGGGCGGGCGGTGCCCGCGTGTTGGGCGGCGCGCTTCCTGTAGCACGCGTAGCAGGCCGTCCGGCCCGGAACCACGACGGGTCCGCAGAGGACCTTGGTCGGCAGCAGCTGGAGCCCGACGGACGGTGTGCTCCGTTCCGCGCAGATCCGGTCCGTCTCGTCGCGGAGCCCGGGGTCGCCGCCGGCATGCACGGGGACGACGACATCCGCGTGCGGTGCCGTGTCGTTCAGGAAGTCCTCGAAGGACACCTGTAGCTCCGGGGGGACACCACGGTGCTCCGTGAGCCGCCGGCTGAAGGTGTCACCCACGAGGTACAGGCACTGCGGACGCGGATCGTGCGACATGCTGCTCTGCTCCTTCTTCTCGCTCATGCGAACGGCTGCTGCACGGGGTTGACCGCGTCCTCGGCATGGCTCGTGTGCCCCATCGCGCGCGGCGCGTCATAGAGGCGCGGGGTGCCCAGGTACCGCTCGCCGTGTACGAACGACACGGGCATGGCCTCGGGCACGAGCACCTTGACCGCCCGCATGCCCACCTGGCGCGCCTCGTCCGTGGTGATGTCCGTGACGAGCACCTCGGCACCCCGTGCCGCGAGCCGGGCGACGACGGTGTCCAGCGGGTCCGCTCCCGCCGGAAGCCCCGGCATGCCTTCGAGGCCGTACGCCGGCCGTTCACCGTCCAGCAGGAAGCCGAACACGTCCCGCCGGTCGCGCGTCGCGTTGTGGACCGCGCCGCCGACGACGCTCACCTTGGCCGGATCGGGCTCGCGCCCGGCGTACGCCGAGAGGTATCCGCGCAACGCCACCCGCAGGGAGGCCAGTTCGCGGTAGATCTTGCCCAGCGCCTGTTCCGGATGGACGTCGCAGGTGGCGGCGACGATCTGGGCGAGCGCGGGGTCGGCGTCGGAGAGCTGCACGGCGTAGATCACCGGGACGCCGAAGTCCGTCGTCGCGTCGAACAGCCTCACCCGCAGGTCCGTGGAGGTGCCGACGCGGTGCAGTTCCCGGACGCCGGCGTCGAGCCGCGCCGGATCCACGACCAGCTCGGGCAGCCGCAGCTGCTGGAGCCAGACCAGCGCGATGGCGTCCCGCTCGACGACTTCCAGCAACCCGCCGAGCACGGCGCTGCGTACGTCGGAGTGGACGGCGGCACCCGTGGTGATCCCGCGGATGAACTCTTCGGACTTCGACTGGTAGGGCATGTGCAGATACACGGAGATCGCCGGCACGAGCACCGGTATCCGCCGGGTGAGCGACCATGCGCGCACCCATCGGATCGGGACCGAGGGGTCGTACGCGGACAGGCTGCAGTCGTCGCGGGCCAGCTCGGTGGGCGAGCAGCTGGGCCACCGGGACGGCGACACGAACTCCTCTGTGAGGTCGTTCTCGGCGGCGACCACCATCTCGTCGTCGTCCCAGGCACACGTGGAGTACCGTTCGAGCGCTTCCGCGATGGAGACCAGCTTGGCGCGCTCGGGCGTGAGCCCGGTGCCCGCGCCGTCGGAATTTCCGGTGTCCTCGTCGTGGGCCCACGTCCGCAGGTTGGGAAGAGCACGGGACGGGACACCGAGATAGGCCAGCTGGACGGCGAAGGGCGGTTCGCCCTCGCGCACCGGCAGCGGCGCGGTGCGTGACACCAGTCCGTAGGGCGAGACGAGCTCCTCGAGACCGCGCAGCTCGGCCGAGATGTGCTCCTGAGGCCTGTTCAGATGCATGTGGTTCTTCTTCCCGCGTCCGGATGTTCCCCGTGTCCTTGCCGCGCTACTCGGCGGTCTTCGTGGATCCCGACGCCATGCTCCTGGCCAGGCCGTCGAGCTCGTCCTGGAGCTCCTTCATGGACTTTCCGCAGACGTCTTCGTTCGCACGTGCGACGACGTAGCAGAAGACGTACCTCTCCGCCAGGCTGAAACCGAGGGTGTACAGGCAGCTGTACAGCAGGCTGGTCTGCAGCCGGAAGGCCAGGAAGTCGGCGTCGCGGTGCATCAGGCGGTCCAGCTCGGGGCTCGGTGGCGTATGGAAGCGCGTCGGCTCGACCGGGGCGCCTCGCTTGCGAACGAGGTCTTCGAGCGTGTGTCCGGCGTTGACGACGGATCCGTCCGAGAAATTTTCGGCCAGGTGGCTTGATTCAGACGTGATGGATTTCGTCCAAAGCCGAACCATCTCGTCGGCCGGATCGTCGTCCGGATCGCCGCACGCCGTGATGAATTCCCGGACGCCGGCCCCTACCTGTTCGTAGAACCGGGCGCAGGCGGCGTCGAAGGATTGCGGATCCTTGGTGCGCAGATAAATCGCCTCGAAATGCGAACGGTAACTCAGCAGGCGTAGGGAAAGCAGCTCACGGAATTCGTATCCGTCGATTTCTCTCTGCGGTGATCTCAGAAGAGTGGCTCTGGTGTGCGCGGCCATGAGGCGGATGGCACCGAGTGCCGGCGGAGGCGATTGAGGTGTCTCTGCCGCACGATTCAAGAATGAGACGAGAGTCGGTGCGGCCTCCTGGAAGAGCCGCTTGGAGGAAGCGGAGAATCCGGTTCCGGTAAACCCCGCGTTCCACAACGGTGAGGGAACCGAATCAAGGGGCGTGGCGTCCACCTGTGCGGCGCACCCATATCGTTCGGCCGTTCTATGCAGGTCAGCGCGGGTGGAGTCCTCGTCGGACACCCCTTCGATGCTGATCTGCAGGGTGTCCGTGCCGAGCTGCGCGTCCTGGGTGCGGACGAAGTAGTAAGGGCCGTGACCGCCTGCTCCACCTACCCGCTCCGGCGCGAACAATTCGCGAATCAGGGGCGCGAACGTGTCGTTGTATTTTGAACCAACTGGCTGCGGGATGGTGGCAAACAAGTGCACGAATGAAACCCCCGAGATTTCGGCCGGCGCCAGACGGGCCGTCACGTGATCTTGTAATCGACCGTAACTCAGAGAGATGAGAAGAAGGAACCCCGGCTCCAGTGTGATCTGCGTCACTCCGGAAACTGCTTGCGTTGTGATCGTCCGAATGCCTAGATTCGAATCACATTGACGAAAGGGGGTGTAATCAATGGAGCAGCAGATCGAACTCGATGTGCTCGAGATTTCGGACCTCATTGCAGGTGCCGGGGAGAACGATGACCTGGCGCAGGTGATGGCCGCCTCGTGCACGACCACCAGTGTTTCGACGAGTTCTTCGTCGTCCTCGTCCTGAATCTAGGGACCGGGAAACAGTTGAGCCACCGTCGGGGTGTTCCTCGGCGGTGGCCTTCTGCAGTCCTGGCGTGCCTTTTTCGATTCGAGGAACGCGCCGTCCCTGGAGGTCGACCAGAGGTGCGGGCGTCGTGCCCCGGACGACGACGGGCTCGTGCCAAGCCGGCCGGCCGAGAGCCGGAACACATGTTGTCAAGCGCGAACTGACCGGTGCGGTGGAGTGACAGGCGGGCTCCGTGCGGCGGGGCGCCTCTCCGCGGCCCGGCCCCGACGTCCGCACGCCGTGGTGAACCGGGCCGGCGTGACGAAGTTGGGGGATTCCTATGGGTGTGAACATCAGTCCGTACGTCGTCTATCGGCGCAGCAGACTCCCACTGGGCGAGCTCGGAGGGATGTCCTTCACCACCGCCTGGTCGCGCATCGATGAACTGCACGCCCTGCGGGACGAGATCGGCAAGAACGCCGTCGGCCTGGCCGACCGCCTCGGCGAGCTCGTGCCTACGCTGGGGGACGACGTCCGGGCCGACCTGATCAGGCTGCGGCGCGACGTGCACAATCTGCGGCACGACCGGGCGGTGGCGCGACTGGAGCCACTGCGTCCGCATCTCGGCCGCGAGGTGGTCGACGAGGTCGAGACCTGGTGCGCGCTCGGCGTGCGGGCCGAACAGTGCGAGCGAGCAGGGCGCGAGGAGCTCGAGAGTGAGAAGGCCCGGGCCGCCGACGGCTTCGGCGCCCTCTTCGAGCACGATGCGATGGCGCGCAGCATCCAACTCTCCGGCGACCGGCTGTACCGGGGCCTGCGCGACCTCGTCGCGGGCGACGAGGCGAGCGCCCTCAAGCCGAGCAAGGCCCGGCTGCGGGAGTCTTCCCTCGTCAACTTCGCCTACCGGGCGAGCTTGAAGCCGTCCCCCTTCGGACGGTTCACCGAGATCGGCGCGTTCCCTCCGGACGACCCGCGCCCCGCGGATCCCGGTGGCCGGCACGGCGGGACGCAGGAGTCGGTCACGACGCTGAACCGTCTCCTCGTGAACTGGGGGCCCCCCGGCCTGCCGCTCGTACCGGGCGGGATGGAGCCGGGGCACCTCGTGCTGAACTCCACGCTGCGGGCCGGCACCGAGTACGTCGAGTACGTCGGTGTCGCTCCCGGCTCCCGTGAGGACGGCCGGATGGCCACCGAGAGGGTGCTGCGCGTACGCCGGGAGGGACTCTTCGACGCACTGCTCGCGGCGATGCCCGAAGGATCGGCTCCGGCGGCCACGGTGCTGCGCGACCTCACCGCCGTCACCGGGAAGGCGGAGACGAGCCGGAAGGTCGTGCAGGGGCTGATCCGGGCCGGCATCCTCTTCTTCCGGCCGGAGATCGACGATCACGACCCCGACTACTCCATGAAGCTCGACCGCGTACTCGCGGCCGGCGGGACGCCGGAGACGGCCGCGCTACGCGGACACTTCTCCGAACTCAGGCGGTTGGAGACGGACTTCTCCGAGGCGGCGGCCGACGAGAGGCAGAAGCTGCTCGACTCGGCGTACGCGGCGATCGGCGGCATCGCCGAGCTGTGCAAGGTGTCCCCGCCCCCCGAGGAGGTCCTGAAGTCACCGGTCTTCGAGGACACTCCGGCATCCACGGCGCCCCAGGCCTGGAACCTGCCGACGGTGGAGGGGAGCATCCCCGCCCTGACGGGCCTCTGGCGTCTGGCCTCGATGATGGACAACGGCCAGGTGAAGCGACTGGGTCTCTACTCCTTCGCCACCCGCGTGCTCGGCGACCGCAGCACGATGCCCTTCCTCGAGTTCTTCCAGGCCTTCTCGTCGCTGACGGACCAGGAACAGGTCGACGTGTTCATGGGGCGCGACGTGGAGGAGGCCGAGAGGTACACGAGGCAGCGGGCGGAGGCTCTGCGCACGATCCGGCAGCGGCTGGTGCCCGGGGACGGCACCGTGCACCTGGACCCCTCGGTCATCGAGAAGGCCTGCGAGGGCGTGGAGGACCTCCTCGACACGGAATCGGTGACGTTCCGCGCGCAGTTCGCCCAGGGAGTGCTGCCCGACCGGGACCGGACGTTGGTCGTGAACGGCCTGCTCACCGGCTACGGCGTCTACTTCTCACGGTTCGGCTCGTTCGTCGAGGGCACCGACGAATGGTCCCTGCCGGCCGCCCAGCGGGAGCACCTCGCACGCAGGTTCCCCGGCCAGGTCGACCTCAACTCCGTGCTCGGATTCAACTTCAACCTGCACCCCTCGGTGACCCGGCGGGTCGTCAACTACCCCGGCGCGGTGTCGCTCGGCGCCGAGCGGACGGTCTACGGACTGGCGCGTCTGGAGGTCCGCGCGGATCAGGCCACCAGGTCGCTGCGCCTCTGGGACCCTGAGGCGCAGGAAACCCTCGACCTCGTGCCCATGAACTTCATGACCCCGATCGGGGTCCCGCTGCTCTACCGTCTGCTCGAGGCGCTGTCCCCGTCCAACCGCTACCTGTGGAAGCCCCTGGACGACATCAGGGACGCGGGAGGGCCCACGGTGTACGGCGAGACGGCACCCCGGCTGGTCGTGGGTGACGTCGTGGCCGACCGCAGGTCCTGGAACGTGGCCGCGGCCGAGATCCCCATGCTCCAGGATCTGAGCCGGGACGTGCCCGAAGCGCTCGTGGCCTTCGACGCGTGGCGCCTGACGCGGGGCCTTCCCCGCCACGCCTTCGTGCTGTGCCAGACGCCCGAGGAGCGAGACGTCATGGCCGGGCGCAGCCGGAAGGTGACCCGCCAGTGGGCGGACTACGCGCACCTGCGGCGCGCCAGCGTGCACAAGCCGATGTACGTCGACTTCCGGAACCCCTTCCTGGTCCGGAGCTTCGCGAAGTCGGCCCTGTCACGCGGCGATGTCGTCGCGTCGATCCGCGAGTGCCTTCCTTCGGTGGACGACTACGGCCCGGACACGGGCTGGACCGCAGCAGAGGAGTTCTTCGTTGAACTGTGTACCGACAACTAGTGGGCAGACCGGAACGCGTGAGTGGAGGACGGTCCACATCCACGTTCCGCACTCGCTGCACACCCCCTTCCTGTGCGACGTGGTCGAGCCGCTGCTCCGGTCCGAGGGACTCCAGGACCACTTCTTCTTCCTCCGGTACTGGCAGGGCGGCCCCCATCTGCGGCTGCGGATGCTCTGCGGCCCCGGGGCCGGTTCGGCCGAGGCGGCCGAACGGGTCGTCGCGGGTCTGGCACGTGCGATGCCGGAGTTCGGTGCGCAGGCGCGGGAGGAATACGCGCTCGGGCTGACCTTGCAGGACGAGCTCGCCCGCCTGGAGAAGGAGACCTCGGAGGAGGGCCGGCCCATCGGGGCCCTCGACCGGGTGGCGTACGAGCCGGAGTACCGCAAGTACGGGGGAACGGAGGGGCTGCAGATCGCCGAGACCGTATTCCGCAAGTCGTCGGTGGCGGTCCTCGGCCTGCTGGGCGGGCAACCGCGGGCGTGGGTGGACGAGCGCCGGGCACCGATCGGGGAAGCCGCGAGGATCATGGCGATGTTCCTCCACGGCGCAGGCCTCGACCCGCGGGCCGCAGGGCTGTTCCTGCGGGAGTACGAGGACTGGTGGCGTACGTACGCGCCGGATGACATGCAGCGTGCCTGGCCGAAACTGTTCGGCGGCGTCTCGGCACAGATGACGAATCTGTGCGCGGCGGTCTGGCGTGACGGCGCCACGGACGTGTTCCACGACATCAGCGCGGAGGCCGCCGCCCGCGCCCGTTCCGTGTGCGGGGCGGAGCCCGGCGGCGATGTCCGCGACCTCCGGCTCGACGGCACGCCTTACCCGGGCTGTCTCTCGAACTACGTGCACACCACCAACAACCGTCTCGGCCTGGTCCCCGCCGCCGAGGGGCTCGTCGCGTACCTCGTGCGCCGGGGCCTGGAAGCGATGGACGGGTAGGGCCTGTCGTTGTCGGGATCATGCGGGGCCCCGATGCCCCGGTGCCGGAAGCCCGGAGCCTCCGTCCCGCGCAGGCCCTTCCCTGTGGGGCCTGCGCCGGACGGAGAGCTCACCGGTGCGTCACCAGCTCACCGGAAGCTTGTGCAGGCCGTACACCCCCATGCTGTCCTTGAACGGCAGTTCCGCGACCGGGGTGGCCAGCTTCAGTCCGGGAATCCTCGCGAGGAGGGCGTTGAAGACGACCTCCAGCTCCAGTTTCGCGAGGTTCTGGCCGATGCACTGATGGATGCCGTGGCCGAACGCCAGGTGGTGCCTGCCGCCGCGCTCGATGTCCAGCCTGTCCGGGTCGGGGAACACCGCCTCGTCGTGGTTGCCGGAGGCGCTGAGCCCGATGACGCCCTCACCCGCGCGGATCAGCACCCCGCCGATCTCGAGGTCGGCCGTCGCCACCCGGGAGGTCACCTGGTCGGCGATGCTGAAGTAGCGCAGCAGCTCGTCGACGGCCTGCGGGGCCAGACCCGGGTCGGCCCTGAGCTTGGCCAGCTGGTCGGGGTTCTCCAGCAGGCCCACGACGCCGAGGGAGATCATGTTCGCGGTGGTCTCGTGACCGCCGACCAGGAGGACGTTCGCCATCCCGACCAGTTCCCCGTGGTCGAAGGTGCCGGTCTCCCGGTTCTTCACGACGAGCCTGCCGAGCAGATCGTCCCCCGGGTCCGCTTCCTGAGCGGTGACCAGCTCGGAGAAGTACGCGTGGAGCTCTTGATGCGCGCTGTTGCGCTCCTGTGGGTCGGCGTCGACCGACACCAGCTTGTTGGTGCGGTCCTGGAAGAACGCGCGGTCGCTGCGGGGCACCCCGAGCAGCTCGCAGATCACCAGGGACGGCACCGGGAGGGCGAGCGCCTCGACCAGATCGGCCGGCCCCTCGCCGGCGAGCATGGAGGAGATGCACTCGTCCACGATCTCCTCGACGCGGGGACGCAGTTGGAGCACCCGCTTGACGGTGAACTCCGGGATCACCATCTTGCGCTGCGCCGTGTGTACGGGCGGGTCCATGGCGAGCAGCACGGGCCGCATCTGCTCCATGACCTCCGGCGGCGCATCGAAGTGGAGTGGATAGCCCGGGTGGGCCAGGTTCGAGCTCACATGAGGGTCGGCGAGCAACTGTCGTATGTGCTCGTGCCGGGTGAGCAGCCACGCGGTCCGCCCGGAGGCCAGAGTGACCTTGGTGATCGGCTGCTCGGCACGCAGGGACGCGTACTCCTTCGGCGGGTGGAGCGGGCAGGTCCTGGGGTAGGGATACGCGCTGTCCTGTCGGTCCGTCACGGTCTTCTCCGCGGGTAGTCCAGGGAAATCTCCTTACGGACTCCATTCAAGCGGAAGATGATCCACATCGTGGCGTTATCGGTGAATGAGGCCGAACTCACGTGGACCGCAGGGAGGAAAGTCGGCCGCCTTCCCGGCATGGCCCGTCCCGTGCTTTCGGTGGGAGGGGTCGGTGCAGTGCGACATTGCAGTGATCGCGTAATCCGGAATGACCCCTTCCGTGCGGGGATGCGTCGATAGTACGTTGGATTTCATGTGCTCCACATCGTGGAGAGTTCGCTTGCGCCGTCGAAGTCACAGTGTGTGCCGAGGGGGAGTTGGGGCTATGTACCTTTCGATCGTCATGTGGGACCTGAAGAAGTCGGAAGCCACGGTGGAGAGCCTCAGGGAATACCTGCGGGACTATGCCGTGGACGCCTACTCCGCGCTGGACGGAATGCGGCTCAAGGCGTGGTTCTCCGATTCCGCACGTCAGCTGTGGGGTGCGGTCTATCTGTGGGACAGCCCCGAGCAGATGCCCGGCCTGTACAAAGTCAGCCGCGTGATCGATCTCATCGGGTATCCGCCGACTTCGGTCGGTGGTTTCACGCTCGAGGCGACCGCCGAAGGGAAGAGCGTTCACGAGACACTGGCCGGCCTGGGGATCGCCCTGGAGGGCGGAACGCAGTAAGGGCAGGCGTGCGACCGGATGGGCGGGAGGTTTGCCGGGCCGGTCGGTTCGAGGGGCGGTCGAGGGCCGGAGCCGCGCGGGTGGCTTCGCCGCTCCGGCTGGCGGCCGGGTGTGGAGTGCCGCGCGGCGCCGGTGTCGCCCGCGCCGGGCATCACCGTGCTCGCGGAGGGTTCAGGTGTGTCGGACCGGCGCTTTCGCGTGTGTTCGGCACCGTCCGGTGGGCCGGGTGCTGTGCGCGGGCCTCGAGCTCCCGGGCGGGCGCGGTGCGCAGGCCCTCCCCGCCCGGACACGCGTATTCCGCACCGCTTCACGAAGATCATTCGGTGAAGGAGGCGGGGGCGCTCGTGCTAACGTCGTGATCGTGGCCAGCCTTGACATTATTACCGAACGCTCTGATTCTGCCGTACAACGCATCATCGATGTGACAAAGCATTCGAGGTCCGTTGTCCGCACGGTGCTGATCGAGGACATCGAGCCTCTTCTGCAGAGCATCCGTGCCGGAGTGGAATTCATCGAGATCTACGGACTCGACACCGTGCCTGTTCCGGACAGTCTGCTCGCCGAATGTGAACGGCGCAGAATTCCGGTCCGGCTGCTCGCCGCTTCGGTCGCCAATCAGGTCTTCAAGACCGAGAAGAAGCCCAAGGTATTCGGTATCGCCAAGGTCCCGCGGCCTCGTCGCCTGTCGGACCTGTCCGACATGACCGGTGACCTCATCCTGCTCGACGGAGTGAAGATCGTCGGCAATATCGGAGCCATCGTGCGGACCTCGTTCGCGCTCGGGGCCTCGGGAATCGTGCTCGTGGACAGCGATCTCGGCAGTATTGCGGACCGCCGTCTGATCAGGGCGAGCCGGGGCTATGTGTTCTCCCTTCCCATCGTTCTCGCGTCCCGGGCCGAGGCGCTCCAGTACTTCCAGGACAATGCGATGCGCCCGGTGGTGTTCGAGGCCGACGGGGATCTCGGCGTCGCTGATCTCGACGGTATGGACGAGCGACTTGTGCTCATGTTCGGCAGCGAGAGGATCGGCCCGTCGGGCGAGTTCTCCGACATCGCCGCCAAGTCGGTCTCCATTCCGATGAATCCCGCGGCCGAGTCCCTCAACGTATCGGTGTCGGCCGGAATCGCGCTGCACGCGAGGGCCCGCCGTAACCTCTCCCGGTAGTCCCGGCCGCAGAGCCCCGTCAAGGGCCCCGTCCCTCCCCTCCGGGAGGGGCGGGGCCCTTGACGTGCGCCGCCGGGAGCCCTCGCTACCGCGGCCGCCACATCCAGGGCGCCGCGTCCGGACCCAGGCCGGCGACCGAAGGCCGGCCGTCCGGGCCCAGGCGGAGCGAGGCCCCGTCCAGGGCGGCCGGCCCGTCGGTCCGCACGTGGAGCCCGCCGCCCATGAGCAGCTGGACGAGGCCCTCGGCGGTGCGGCCCAGCAGCACGGGCCCCCGCGGCGACGGGGCGGCGTCCACCGNNCCGTACCCGTCGAACCGTGCTCCGGGCACGGCCGTCCCCGCCCGAGCCGTGGTCAGCCCGCTCCCGGCCGCTGCCCGGTAGTAGAGGGACACGGACCCGTCGGGAGCCGGGAGGGCGGCCGGCGCGTGGGCCGGGACGGGCGCCGCCGTGAGCTGTGTGCGAGCCGTGAGCCCGGCGGACGGGGTGTCCTGGGTCCAGTGGTGTACGGCGTGGTGGCCGGCGCCGAAGACGTGGACGCGTCCACCCTCGTCCACGGCGGTGTGCAGGCCGTCCTGCACCTCTCCGCCGCCCATGTCCCGCCACGCGCTCCACCGCCCCGCCCCGTCCCGCACCCGGGTGCTGACGCCCTTCTCGGCGTCGCGTACGAAGAGATGGATCTGCCCGTCCGGAGCGGCGACGGCCACCGGTACGCCCGTGCGACGTACTTCGTCACGTCCGGGAGAAGGAGAGCCCAGGCCGCGCCAGGGGCGGAAGCCCCGGCCCGGGGCGCTCTGCTCCAGCACCACGATCTCGCGCTCGTTGTCGGCGCCGTGCCCGCCGAGTGCGGCGAAGCGGAGCCCGAACAGCAGCAGGCGCCCGTCCCGTGTGGTGGCCGAGCCCAGTGCGGGGGCGAGCGGGCCGCCGCCTAGGTCGTGCGGAGGCCCCCAGGCACCGCTGCCCGGCCCGGTCTCCTGCCACCGCACCACCCGCAGCCCCAGCACGGCATAGGCCGCGAGCCTGCCGTCCGGCTCGGCGGTGAGGACCGTGCGCGTGCCCGGGTAGCGGTGGTGGGTGGAGCGGACCCAGCCCTTGCGGTTGGTGAGCGGGCGGTCGCCGCCGACGTTGTAGTCACCGCAGCCGGACGGATTGCCGCAGTCCCAGTCGGGCGAGCCCCCGTAGGGGACGAGGTGGGCGGCCTTCCTCGCCAGTACCCCCTGCGGCAGGTTCTTCGGCCAGTGCCGGTTGTAGTAACCGCGGTAGGCGACCGTGACGAAGCCCGGTATCCGGCCACCGTCCGCGGTCGCCCGGGCCACCCAGCGGATCATCGCGGCCCACGCGAAGCAGGCCGCGGCCGTGTGGTCGGCGTGGTCGGAGTAGCCGGGCTGTTCGCTGTCCCTGCGGCGGACGGCCTCCGTGCTGTGCTGGATGTCCGGGTCGGGGTCCAGGGTGTGGACGACGGTGGGCCGGTAGCTCTCCATCAGGCCGGTGAGGACCCCGACCAGCCCGTCGTACGTGTACGAGCCGGCGCGCCGCAGCGGCGATCCGTCGGCCACGACCGTACGCAGCACGAGCCGGCGATCCTGCCAGAGACTGGGCAGCCCGAGCCGGTGCCGGCCGGTGTGCATGGCCGTGTTGAGGAAGATCAACTCCACTCTGCGGCCGTCGGCCGCCAGCACGTTCACCTCGGCCCTGTGGTCCGGGCCGAGCTCGGCGACGGAGACCTCCCAGGGGGTGAAGGGCCGCAGCCCGAGCAGGGCG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Regex_Find_And_Replace_on_data_21.fasta	Thu Aug 08 11:58:48 2024 +0000
@@ -0,0 +1,119 @@
+>BGC0001472_1 # 312 # 683 # 1 # ID=1_1;partial=00;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.642
+MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKPNSALRKVARVRLTSG
+IEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDTQGVKNRKQARSRYGAK
+KEK
+>BGC0001472_2 # 686 # 1156 # 1 # ID=1_2;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.660
+MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYGAMEGLREKTGADPVI
+TLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWVVGYSRARREKTMTERL
+MNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW
+>BGC0001472_3 # 1195 # 3324 # 1 # ID=1_3;partial=00;start_type=ATG;rbs_motif=AGxAG;rbs_spacer=5-10bp;gc_cont=0.652
+MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVHDGAATMDWMEQEQER
+GITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVLDGAVTVFDGVAGVEPQ
+SETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAVPIVMQLPIGAEADFRGV
+VDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLLEAVSENDDQMMELYLEGE
+EPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGVQPLLDAVVRYLPSPLDVEA
+IEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLTFVRIYSGRLEAGTAVLNSVK
+GKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTTGETLCDDKNPVILESMDFPAP
+VIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEETGQTIIGGMGELHLEVLVDRMK
+REFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGGTGQFAKVQIAIEPIEGGDASYEF
+VNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMVGVRVTLLDGGYHEVDSSELAFKIA
+GSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEVVGDINSRRGQIQAMEERHGARVVKG
+LVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEVPRNVAEEIIAKAKGE
+>BGC0001472_4 # 3472 # 4665 # 1 # ID=1_4;partial=00;start_type=GTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.642
+MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNEASAFDQIDKAPEERQ
+RGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGAILVVAATDGPMPQTKE
+HVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEYEFPGDDLPVVKVSALKA
+LEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFTITGRGTVVTGRIERGVLK
+VNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLLLRGIKREDVERGQVIIKPG
+SVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTDVTGVVTLPEGTEMVMPGDNT
+LMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK
+>BGC0001472_5 # 4869 # 5570 # -1 # ID=1_5;partial=00;start_type=ATG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.712
+MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPSAEDLRPVHDLRGTLE
+RRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEGFVFAFRSEGAEPGLYR
+VTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRADTWAGSHGYRISALRAS
+MATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSLLATTYARPPES
+>BGC0001472_6 # 5567 # 7195 # -1 # ID=1_6;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.709
+MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDGADRAQVFSGAFAREG
+LVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEAMSGEEPDVTPEWAVFL
+SRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLREVCPVVTEPAGPPGPGDE
+LTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGPYADLSITPCLDCGRHGEA
+DLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVIDTATLSTVYRPVAVRPGCP
+RCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHYYASNLRLQSQFKDWPSRPHT
+PLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFGVKEDETTPERVKRWTAASGNI
+GSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPPGDSPCDIIITGDLKKVMTKYGT
+FGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDDALARLLGTSPADEPVAAFASLGG
+TA
+>BGC0001472_7 # 7210 # 7821 # -1 # ID=1_7;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.712
+MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPHADVVVPVHAGGDPGL
+RDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRKRAAQHAGTARPYDMDA
+ALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFNLVSGAVSSAVTVSVNRC
+PRCGGRFSQARADSAMPVPELLR
+>BGC0001472_8 # 7845 # 9191 # -1 # ID=1_8;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=11-12bp;gc_cont=0.709
+MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQLAYLGVPSRALPNLR
+TWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDEMVVAAENDLTEEFVSP
+SRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLVPAISVYLHMPYQSKSEE
+FIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRLPELVVDPARLDAGVRELH
+RVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVAATCDVHPEQALGKIYRELA
+SLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDVFGFLLDGERPAYGLEGMPGL
+PAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAVKVLVPEAMPVSFVHGERYLGT
+PRLYDAPRAMGHTSHAEDAVNPVQQPFA
+>BGC0001472_9 # 9238 # 10437 # -1 # ID=1_9;partial=00;start_type=GTG;rbs_motif=AGxAG;rbs_spacer=5-10bp;gc_cont=0.621
+MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFATIPQPVGSKYNDTFAP
+LIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSDEDSTRADLHRTAERYG
+CAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLVSFLNRAAETPQSPPPAL
+GAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHFEAIYLRTKDPQSFDAACA
+RFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESSHLAENFSDGSVVNAGHTLE
+DLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQTSLLYSCLYTLGFSLAERYVF
+CYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE
+>BGC0001472_10 # 10511 # 10654 # 1 # ID=1_10;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.590
+MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSSSS
+>BGC0001472_11 # 10977 # 13634 # 1 # ID=1_11;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.701
+MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGKNAVGLADRLGELVPT
+LGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVETWCALGVRAEQCERAG
+REELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLVAGDEASALKPSKARLRE
+SSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGGTQESVTTLNRLLVNWGPP
+GLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGRMATERVLRVRREGLFDALL
+AAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGILFFRPEIDDHDPDYSMKLDRV
+LAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLDSAYAAIGGIAELCKVSPPPEE
+VLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLASMMDNGQVKRLGLYSFATRVLG
+DRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERYTRQRAEALRTIRQRLVPGDGTVH
+LDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDRDRTLVVNGLLTGYGVYFSRFGSFV
+EGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFNLHPSVTRRVVNYPGAVSLGAERTVY
+GLARLEVRADQATRSLRLWDPEAQETLDLVPMNFMTPIGVPLLYRLLEALSPSNRYLWKP
+LDDIRDAGGPTVYGETAPRLVVGDVVADRRSWNVAAAEIPMLQDLSRDVPEALVAFDAWR
+LTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQWADYAHLRRASVHKPMYVDFRNPFLVRS
+FAKSALSRGDVVASIRECLPSVDDYGPDTGWTAAEEFFVELCTDN
+>BGC0001472_12 # 13612 # 14571 # 1 # ID=1_12;partial=00;start_type=TTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.706
+MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGLQDHFFFLRYWQGGPH
+LRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLTLQDELARLEKETSEEG
+RPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLGGQPRAWVDERRAPIGEA
+ARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWPKLFGGVSAQMTNLCAAVW
+RDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPYPGCLSNYVHTTNNRLGLVP
+AAEGLVAYLVRRGLEAMDG
+>BGC0001472_13 # 14692 # 15894 # -1 # ID=1_13;partial=00;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.685
+MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWLLTRHEHIRQLLADPH
+VSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVIPEFTVKRVLQLRPRVE
+EIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDRAFFQDRTNKLVSVDADP
+QERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGTFDHGELVGMANVLLVGGH
+ETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRYFSIADQVTSRVATADLEIG
+GVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHLAFGHGIHQCIGQNLAKLELE
+VVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVSW
+>BGC0001472_14 # 16220 # 16564 # 1 # ID=1_14;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.626
+MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFSDSARQLWGAVYLWDS
+PEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAGLGIALEGGTQ
+>BGC0001472_15 # 17019 # 17729 # 1 # ID=1_15;partial=00;start_type=GTG;rbs_motif=3Base/5BMM;rbs_spacer=13-15bp;gc_cont=0.636
+MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPVRLLAASVANQVFKTE
+KKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVRTSFALGASGIVLVDSD
+LGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVVFEADGDLGVADLDGMDE
+RLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSAGIALHARARRNLSR
+>BGC0001472_16 # 17815 # 19485 # -1 # ID=1_16;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.756
+ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAMHTGRHRLGLPSLWQD
+RRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHTLDPDPDIQHSTEAVRR
+RDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFVTVAYRGYYNRHWPKNLP
+QGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNRKGWVRSTHHRYPGTRTVL
+TAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGPLAPALGSATTRDGRLLLFG
+LRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSPGRDEVRRTGVPVAVAAPDGQ
+IHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLHTAVDEGGRVHVFGAGHHAVHH
+WTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSLYYRAAAGSGLTTARAGTAVPGA
+RFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLHVRTDGPAALDGASLRLGPDGRPS
+VAGLGPDAAPWMWRPR
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Sanntis_output_data.genbank	Thu Aug 08 11:58:48 2024 +0000
@@ -0,0 +1,479 @@
+LOCUS       BGC0001472             19486 bp    DNA              UNK 01-JAN-1980
+DEFINITION  BGC0001472.
+ACCESSION   BGC0001472
+VERSION     BGC0001472
+KEYWORDS    .
+SOURCE      .
+  ORGANISM  .
+            .
+FEATURES             Location/Qualifiers
+     CDS             312..683
+                     /translation="MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKP
+                     NSALRKVARVRLTSGIEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDT
+                     QGVKNRKQARSRYGAKKEK"
+                     /protein_id="BGC0001472_1"
+     CDS             686..1156
+                     /translation="MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYG
+                     AMEGLREKTGADPVITLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWV
+                     VGYSRARREKTMTERLMNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW"
+                     /protein_id="BGC0001472_2"
+     CDS             1195..3324
+                     /translation="MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVH
+                     DGAATMDWMEQEQERGITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVL
+                     DGAVTVFDGVAGVEPQSETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAV
+                     PIVMQLPIGAEADFRGVVDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLL
+                     EAVSENDDQMMELYLEGEEPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGV
+                     QPLLDAVVRYLPSPLDVEAIEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLT
+                     FVRIYSGRLEAGTAVLNSVKGKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTT
+                     GETLCDDKNPVILESMDFPAPVIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEE
+                     TGQTIIGGMGELHLEVLVDRMKREFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGG
+                     TGQFAKVQIAIEPIEGGDASYEFVNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMV
+                     GVRVTLLDGGYHEVDSSELAFKIAGSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEV
+                     VGDINSRRGQIQAMEERHGARVVKGLVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEV
+                     PRNVAEEIIAKAKGE"
+                     /protein_id="BGC0001472_3"
+     CDS             3472..4665
+                     /translation="MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNE
+                     ASAFDQIDKAPEERQRGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGA
+                     ILVVAATDGPMPQTKEHVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEY
+                     EFPGDDLPVVKVSALKALEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFT
+                     ITGRGTVVTGRIERGVLKVNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLL
+                     LRGIKREDVERGQVIIKPGSVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTD
+                     VTGVVTLPEGTEMVMPGDNTLMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK"
+                     /protein_id="BGC0001472_4"
+     CDS             4869..5570
+                     /translation="MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPS
+                     AEDLRPVHDLRGTLERRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEG
+                     FVFAFRSEGAEPGLYRVTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRA
+                     DTWAGSHGYRISALRASMATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSL
+                     LATTYARPPES"
+                     /protein_id="BGC0001472_5"
+     CDS             5567..7195
+                     /translation="MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDG
+                     ADRAQVFSGAFAREGLVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEA
+                     MSGEEPDVTPEWAVFLSRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLRE
+                     VCPVVTEPAGPPGPGDELTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGP
+                     YADLSITPCLDCGRHGEADLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVI
+                     DTATLSTVYRPVAVRPGCPRCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHY
+                     YASNLRLQSQFKDWPSRPHTPLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFG
+                     VKEDETTPERVKRWTAASGNIGSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPP
+                     GDSPCDIIITGDLKKVMTKYGTFGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDD
+                     ALARLLGTSPADEPVAAFASLGGTA"
+                     /protein_id="BGC0001472_6"
+     CDS             7210..7821
+                     /translation="MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPH
+                     ADVVVPVHAGGDPGLRDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRK
+                     RAAQHAGTARPYDMDAALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFN
+                     LVSGAVSSAVTVSVNRCPRCGGRFSQARADSAMPVPELLR"
+                     /protein_id="BGC0001472_7"
+     CDS             7845..9191
+                     /translation="MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQ
+                     LAYLGVPSRALPNLRTWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDE
+                     MVVAAENDLTEEFVSPSRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLV
+                     PAISVYLHMPYQSKSEEFIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRL
+                     PELVVDPARLDAGVRELHRVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVA
+                     ATCDVHPEQALGKIYRELASLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDV
+                     FGFLLDGERPAYGLEGMPGLPAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAV
+                     KVLVPEAMPVSFVHGERYLGTPRLYDAPRAMGHTSHAEDAVNPVQQPFA"
+                     /protein_id="BGC0001472_8"
+     CDS             9238..10437
+                     /translation="MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFAT
+                     IPQPVGSKYNDTFAPLIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSD
+                     EDSTRADLHRTAERYGCAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLV
+                     SFLNRAAETPQSPPPALGAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHF
+                     EAIYLRTKDPQSFDAACARFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESS
+                     HLAENFSDGSVVNAGHTLEDLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQT
+                     SLLYSCLYTLGFSLAERYVFCYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE
+                     "
+                     /protein_id="BGC0001472_9"
+     CDS             10511..10654
+                     /translation="MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSS
+                     SS"
+                     /protein_id="BGC0001472_10"
+     CDS             10977..13634
+                     /translation="MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGK
+                     NAVGLADRLGELVPTLGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVE
+                     TWCALGVRAEQCERAGREELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLV
+                     AGDEASALKPSKARLRESSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGG
+                     TQESVTTLNRLLVNWGPPGLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGR
+                     MATERVLRVRREGLFDALLAAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGIL
+                     FFRPEIDDHDPDYSMKLDRVLAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLD
+                     SAYAAIGGIAELCKVSPPPEEVLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLA
+                     SMMDNGQVKRLGLYSFATRVLGDRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERY
+                     TRQRAEALRTIRQRLVPGDGTVHLDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDR
+                     DRTLVVNGLLTGYGVYFSRFGSFVEGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFN
+                     LHPSVTRRVVNYPGAVSLGAERTVYGLARLEVRADQATRSLRLWDPEAQETLDLVPMNF
+                     MTPIGVPLLYRLLEALSPSNRYLWKPLDDIRDAGGPTVYGETAPRLVVGDVVADRRSWN
+                     VAAAEIPMLQDLSRDVPEALVAFDAWRLTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQ
+                     WADYAHLRRASVHKPMYVDFRNPFLVRSFAKSALSRGDVVASIRECLPSVDDYGPDTGW
+                     TAAEEFFVELCTDN"
+                     /protein_id="BGC0001472_11"
+     CDS             13612..14571
+                     /translation="MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGL
+                     QDHFFFLRYWQGGPHLRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLT
+                     LQDELARLEKETSEEGRPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLG
+                     GQPRAWVDERRAPIGEAARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWP
+                     KLFGGVSAQMTNLCAAVWRDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPY
+                     PGCLSNYVHTTNNRLGLVPAAEGLVAYLVRRGLEAMDG"
+                     /protein_id="BGC0001472_12"
+     CDS             14692..15894
+                     /translation="MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWL
+                     LTRHEHIRQLLADPHVSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVI
+                     PEFTVKRVLQLRPRVEEIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDR
+                     AFFQDRTNKLVSVDADPQERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGT
+                     FDHGELVGMANVLLVGGHETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRY
+                     FSIADQVTSRVATADLEIGGVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHL
+                     AFGHGIHQCIGQNLAKLELEVVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVS
+                     W"
+                     /protein_id="BGC0001472_13"
+     CDS             16220..16564
+                     /translation="MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFS
+                     DSARQLWGAVYLWDSPEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAG
+                     LGIALEGGTQ"
+                     /protein_id="BGC0001472_14"
+     CDS             17019..17729
+                     /translation="MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPV
+                     RLLAASVANQVFKTEKKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVR
+                     TSFALGASGIVLVDSDLGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVV
+                     FEADGDLGVADLDGMDERLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSA
+                     GIALHARARRNLSR"
+                     /protein_id="BGC0001472_15"
+     CDS             17815..19485
+                     /translation="ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAM
+                     HTGRHRLGLPSLWQDRRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHT
+                     LDPDPDIQHSTEAVRRRDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFV
+                     TVAYRGYYNRHWPKNLPQGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNR
+                     KGWVRSTHHRYPGTRTVLTAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGP
+                     LAPALGSATTRDGRLLLFGLRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSP
+                     GRDEVRRTGVPVAVAAPDGQIHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLH
+                     TAVDEGGRVHVFGAGHHAVHHWTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSL
+                     YYRAAAGSGLTTARAGTAVPGARFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLH
+                     VRTDGPAALDGASLRLGPDGRPSVAGLGPDAAPWMWRPR"
+                     /protein_id="BGC0001472_16"
+ORIGIN
+        1 gccccggggg ccgtcgctcc gggggtcggt cctgcccggt ggcgcaggac cacgggggcc
+       61 ggggcccggg ggtggacggc atttgttttg acccagctcc gtgaggtagg tacgctcaag
+      121 ccttgtgcct ggggtgtgcc tgggctcggg tgcgtgtcct caaccgcatg gcgagtccgt
+      181 aagtggccac cgcaatctgt gttccgtctg ccttccagca ggggcgtgca gtattcgaca
+      241 cacccgaccg cgtgggtcgg tgactgttcc aggttagttt caccgaacgg cacacagaaa
+      301 ccggagaagt agtgcctacg atccagcagc tggtccggaa gggccggcag gacaaggtcg
+      361 agaagaacaa gacgcccgcg ctcgagggtt cgccccagcg tcgtggtgtc tgcacgcgtg
+      421 tgttcacgac caccccgaag aagccgaact cggcgctccg taaggtcgcg cgtgtgcgtc
+      481 tgacctccgg tatcgaggtc acggcctaca tcccgggtga ggggcacaac ctgcaggagc
+      541 actccatcgt gctcgtgcgt ggtggccgtg tgaaggacct gccgggtgtt cgttacaaga
+      601 tcatccgcgg ttcgctcgac acccagggtg tcaagaaccg caagcaggcc cgcagccgct
+      661 acggcgccaa gaaggagaag taagaatgcc tcgtaagggc cccgccccga agcgcccggt
+      721 catcatcgac ccggtctaca gctctcctct tgtcacctcg ctgatcaaca agatcctgct
+      781 cgacggcaag cgttccaccg ccgagcggat cgtgtacggc gccatggaag gcctccgcga
+      841 gaagaccggc gctgacccgg tcatcacgct gaagcgcgcg cttgagaacg tcaagccctc
+      901 gctcgaggtc aagtcccgcc gtgtcggtgg cgccacctac caggtgccga tcgaggtcaa
+      961 gcccggtcgc gccgccaccc tcgctctgcg ctgggtcgtg ggttactccc gcgcccgtcg
+     1021 cgagaagacc atgaccgagc gcctcatgaa cgagctgctc gacgcctcca acggtcttgg
+     1081 cgctgccgtc aagaagcgcg aggacaccca caagatggcc gagtcgaaca aggccttcgc
+     1141 gcactaccgc tggtagtcgc tcaccccatc gagaccgaga gaagattgag ccttatggcc
+     1201 accacttcgc ttgacctggc caaggtccgc aacatcggga tcatggccca catcgacgcg
+     1261 ggcaagacga ccaccaccga gcggatcctc ttctacaccg gcgtttcgta caagatcggt
+     1321 gaagtccacg acggcgcagc cacgatggac tggatggagc aggagcagga gcgcggcatc
+     1381 acgatcacgt ccgccgcgac gacctgtcac tggccgctca atgatgttga ccacaccatc
+     1441 aacatcatcg acaccccggg tcacgtcgac ttcaccgtcg aggtggagcg ttcgctccgc
+     1501 gtcctcgacg gtgccgtcac cgtgttcgac ggtgtggccg gcgtcgagcc ccagtccgag
+     1561 accgtctggc gtcaggcgga ccgctacggc gtgccgcgta tctgcttcgt caacaagctc
+     1621 gaccgcacgg gcgccgactt cctccgttgc gtcgacatga tcgtccagcg cctcggcgct
+     1681 gtcccgatcg tcatgcagct ccccatcggt gcggaggctg acttccgcgg cgtcgtcgac
+     1741 ctcgtgtcga tgaaggcctt cgtttacccc gaagaggccg tcaagggcga gatgtacgac
+     1801 accgtcgaga tcccggacaa cctcaaggag gccgccgagg aatggcgcgg caagctcctc
+     1861 gaggccgtct cggagaacga cgaccagatg atggagctgt acctcgaggg cgaagagccc
+     1921 accgaggagc agctgcacga ggcgatccgt cggatcaccc tcgcgtcgaa gggctcggcc
+     1981 gactccgtca ccgtgacccc cgtcttctgt ggcacggcgt tcaagaacaa gggcgtccag
+     2041 cccctgctcg acgccgtcgt ccgctacctg ccttcccccc tggacgtcga ggccatcgag
+     2101 ggccacgacg tcaaggaccc ggagaaggtc gtccagcgga agccctcgga cgacgagccg
+     2161 ttctccggcc tggcgttcaa gatcgcgagc gacccgcacc tcggcaagct caccttcgtc
+     2221 cggatctact ccggtcgcct cgaggccggc accgcggtgc tgaactcggt caagggcaag
+     2281 aaggagcgca tcggcaagat ctaccgcatg cacgcgaaca agcgtgagga gatcccgtcg
+     2341 gtgggcgccg gtgacatcgt cgccgtcatg ggcctgaagc agaccaccac cggtgagacg
+     2401 ctgtgtgacg acaagaaccc ggtgatcctg gagtccatgg acttcccggc gccggtcatc
+     2461 caggtcgcca tcgagcccaa gtccaagggt gaccaggaga agctgggtgt cgccatccag
+     2521 cgcctctcgg aggaggaccc ctccttccag gtgcactccg acgaggagac cggccagacc
+     2581 atcatcggtg gtatgggcga gcttcacctc gaggtgctcg tcgaccgcat gaagcgcgag
+     2641 ttccgcgtcg aggcgaacgt cggcaagccg caggtcgcgt accgtgagac gatccgcaag
+     2701 gccgtcgagc gtatcgacta cacgcacaag aagcagactg gtggtaccgg ccagttcgcg
+     2761 aaggtgcaga tcgccatcga gcccatcgag ggtggcgacg cgtcctacga gttcgtcaac
+     2821 aaggtcaccg gtggccgcat cccccgtgag tacattccct cggtggacgc gggtgcccag
+     2881 gaagccatgc agttcggcat cctggccggc tacgagatgg tgggcgtccg cgtcaccctt
+     2941 ctcgacggtg gttaccacga ggtcgactcc tcggagctcg ccttcaagat cgctggttcg
+     3001 caggcgttca aggagggtgc ccgcaaggcg tcccccgtgc tcctcgagcc gatgatggcc
+     3061 gtcgaggtca ccacacccga ggactacatg ggtgaagtgg tcggcgacat caactcccgc
+     3121 cgtggccaga tccaggccat ggaggagcgc cacggcgctc gcgtcgtgaa gggcctcgtg
+     3181 cccctctcgg agatgttcgg ctacgtcgga gacctccgca gcaagacctc gggtcgcgca
+     3241 agctactcga tgcagttcga ctcctacgcc gaggttccgc ggaacgtcgc cgaggagatc
+     3301 atcgcgaagg ccaagggcga gtaactcttc cgagctcacg ctttaggctt gtcaccggag
+     3361 cccggtcggg catgcgtcgc agtgcggcgg atgcccccgg caccggcatt ccagcaaaga
+     3421 tcacctggcg ccgatgaagc aaggcgtaca gaaccactca ggaggacccc agtggcgaag
+     3481 gcaaagttcg agcggactaa gccgcacgtc aacatcggca ccatcggtca catcgaccac
+     3541 ggtaagacga ccctcacggc cgccattacc aaggtgctgc acgacgcgta cccggacctg
+     3601 aacgaggcct cggccttcga ccagatcgac aaggctcctg aggagcgtca gcgcggtatc
+     3661 acgatctcga tcgcgcacgt cgagtaccag acggagtcgc gtcactacgc gcacgtcgac
+     3721 tgcccgggtc acgctgacta catcaagaac atgatcacgg gtgcggcgca gatggacggc
+     3781 gccatcctcg tggtcgcggc caccgacggc ccgatgccgc agaccaagga gcacgtgctc
+     3841 ctggcccgcc aggtaggcgt gccgtacatc gtcgtcgcgc tgaacaaggc cgacatggtg
+     3901 gacgacgagg agatcctgga gctcgtcgag ctcgaggtcc gtgagctcct ctccgagtac
+     3961 gagttcccgg gcgacgacct tccggtcgtc aaggtctcgg cgctcaaggc cctcgagggc
+     4021 gacgccgagt ggggccagac cgttctcgac ctgatgaagg ccgtcgacga gtccatcccg
+     4081 cagcccgagc gtgacgtcga gaagccgttc ctcatgccca tcgaggacgt cttcacgatc
+     4141 accggtcgcg gtacggtcgt caccggccgc atcgagcgtg gtgtcctgaa ggtcaacgag
+     4201 accgtcgaca tcgtcggtat caagaccgag aagaccacca ccacggtcac cggcatcgag
+     4261 atgttccgca agctgctcga cgagggccag gccggtgaga acgtcggtct gctgcttcgt
+     4321 ggcatcaagc gcgaggacgt cgagcgcggc caggtcatca tcaagccggg ttcggtcacg
+     4381 ccgcacaccg agttccaggc ccaggcctac atcctgtcga aggacgaggg tggccgtcac
+     4441 acccccttct tcaacaacta ccgcccgcag ttctacttcc gtaccacgga cgtgacgggc
+     4501 gttgtgaccc ttcccgaggg caccgagatg gtcatgccgg gtgacaacac cctcatggac
+     4561 gtcgcgctga tccagccggt cgccatggaa gagggcctga agttcgccat ccgtgagggt
+     4621 ggtcgtacgg tgggcgccgg ccaggtcacc aagatcacca agtaattccg attacttgtg
+     4681 ggtcggggta acccggttgc tctgaactga gcgcacagca ccaagcaggg cccgcacggc
+     4741 atcacgccgt gcgggccctg tgctgtctcc gcggggctcg cggcggacag gacgacgggc
+     4801 gcggcccgca caccctgagg gtgtgcgggc cgtcgtgacg cgggcgccgt tcgcgtcggg
+     4861 aggtgcggtc agctctccgg tggccgggcg tatgtggtgg ccagcaggga atggcgggtc
+     4921 gccccgtcgc tgtggaccag atggtgcacg gacgacggga cgaagccgcc gaacagtgtg
+     4981 ccgacgagcc ccagcgcctg gcacctgaga ttgaggtcgt aggtggccat cgaggcccgc
+     5041 agcgcactga tccggtagcc gtgggaaccc gcccaggtgt ccgcccggtc cagactcgcg
+     5101 tagagcgcga cgatccccgc tccggtggag aactcgcgct ggacgccgag gttctcggcc
+     5161 ggtccgatct cgtcgagccc ggccaggtag cacgtctcct cggcggtcac ccggtacagc
+     5221 cctggttcgg cgccctcgct gcggaacgcg aagacgaagc cctccagcgc gcccgccgag
+     5281 gcgtcgagcc cccagtcgtc acggtcccgg cgcagcacgt cacgcagcag cgacaggatc
+     5341 acgtccgtgc gtaccggcag tggcgcgtag tgcagcgagg agcggcggcg ttcgagcgtg
+     5401 ccccgcaggt cgtggaccgg ccgcaggtcc tcggcagacg gttgcggttc gacgggcacg
+     5461 gagtccgcga acggcaccgg ccggcgttcg gccgccaggc cgtccgacgt gggcgtccgg
+     5521 gaggtgaagc cctccagcac ggccgtcatg gatgtcacgt cgttcctcat gcggtacctc
+     5581 ccagggacgc gaaggccgcc accggctcgt cggcaggcga tgttccgagc agccgggcca
+     5641 acgcgtcgtc gtcccagtcc gacctgggtg tgaagcccag gcccaggtgc tgtgcgagct
+     5701 cccgcagact ggccaggttg cagcccgcgt ccagaaagac cagcctgaag ccgaacgtgc
+     5761 cgtacttggt catgaccttc ttgagatcgc cggtgatgat gatgtcgcac ggcgagtcac
+     5821 ccggaggtac ctcgccggag acggtcacga gtgtgtggct gccctgggcg tacgcgtaga
+     5881 ccccaggagg catgatccgg tcgtcgcgca ccaccgcgta ggccgtcgtg ctgccgatgt
+     5941 ttcccgacgc cgcggtccat cgtttgacgc gctccggagt ggtctcgtcc tccttcaccc
+     6001 cgaaggcgac cttcagcagc agtcccaggg agctcagggt gagcggggtg tcgccgtggg
+     6061 acgggtcatg ccgttccgac ccggcgagga cggagatgtc cagggcgggg agcggtgtgt
+     6121 gcgggcggct cggccagtcc ttgaactgcg actgcaggcg caggttggac gcgtagtagt
+     6181 gcgcctggtg gtccttcgga gcgaggaacg cgcgtggagg catcgcgacc gaggcctcgt
+     6241 agacggcacc ggcgggtgcc tggggtgcga ccggcccccg cgcgtaggag cagcgagggc
+     6301 accccgggcg gacggcaacc ggccggtaga ccgtgctcag ggtcgcggtg tcgatgacgg
+     6361 tgaagtcgcc ggggaggtgc gagatcgtgg cccgtgcgag caacgcggtg acgtggtggg
+     6421 aggccaggcc gacgaccagg tcgtgcaggt attccggcgg ctcgccggag aggtccgcct
+     6481 caccgtgacg gccgcagtcc aggcacggtg tgatcgagag gtcggcgtac gggccgatcg
+     6541 tgatcgtccg cgcgtcggcg cgcacccgca gcagcggacg cccgtcctgc cggcaccgtt
+     6601 cctcggtggc ggcgagcagg ggtgccgacg cgggcgtctc gaagaacacg gtgagctcgt
+     6661 caccggggcc cggcggcccg gccggttccg tgaccaccgg gcacacctcc cgcagcgacc
+     6721 ggcgggcccc ggcgaccagg gcggcgtccc cctcgaggcg caccgagcgc gacaccagcc
+     6781 gcgccgcggc atccgcccag gacgggttcg atccggtgga gttgcccagc cgggagagga
+     6841 agacggccca ttcgggcgtg acgtccggtt cctcgccgga catggcctcc tcgaccgccc
+     6901 cggccgtcga cagcaaggcc aggcacttgt agaccgtggc ctcgtcgaat ccggtcttga
+     6961 gcgccagttc agtgtggtcg cgggttccgt cgcacgcctc ggtcaggggc accagccctt
+     7021 ctcgtgcgaa cgctccggag aacacctgtg cccggtcggc cccgtcgagc acgacggatt
+     7081 cccctgccct gcgcagcctc accccgcggc gcaggacggg ccgggcaggg acggtcaact
+     7141 gcatgtccgt acgtgcggcc cgcccgatct gctcggcctt catctcggca accatgatcg
+     7201 atctccctct taccgcagca gttccggaac aggcatggcg ctgtccgcgc gcgcctgcga
+     7261 gaaccggccc ccgcaccggg ggcaccggtt caccgagacc gtcaccgcgg acgagacggc
+     7321 tccggagacg agattgaacg tacgtacggt gcccccgatc cccgtcacac ccgtggcgat
+     7381 ctcggtcagg gcgaggtcca ggaggccgga ggcgacggac aggtgctgcc ggccgaaccc
+     7441 ctcgggcagg ccggacaggg cggcgtccat gtcgtacggg cgggcggtgc ccgcgtgttg
+     7501 ggcggcgcgc ttcctgtagc acgcgtagca ggccgtccgg cccggaacca cgacgggtcc
+     7561 gcagaggacc ttggtcggca gcagctggag cccgacggac ggtgtgctcc gttccgcgca
+     7621 gatccggtcc gtctcgtcgc ggagcccggg gtcgccgccg gcatgcacgg ggacgacgac
+     7681 atccgcgtgc ggtgccgtgt cgttcaggaa gtcctcgaag gacacctgta gctccggggg
+     7741 gacaccacgg tgctccgtga gccgccggct gaaggtgtca cccacgaggt acaggcactg
+     7801 cggacgcgga tcgtgcgaca tgctgctctg ctccttcttc tcgctcatgc gaacggctgc
+     7861 tgcacggggt tgaccgcgtc ctcggcatgg ctcgtgtgcc ccatcgcgcg cggcgcgtca
+     7921 tagaggcgcg gggtgcccag gtaccgctcg ccgtgtacga acgacacggg catggcctcg
+     7981 ggcacgagca ccttgaccgc ccgcatgccc acctggcgcg cctcgtccgt ggtgatgtcc
+     8041 gtgacgagca cctcggcacc ccgtgccgcg agccgggcga cgacggtgtc cagcgggtcc
+     8101 gctcccgccg gaagccccgg catgccttcg aggccgtacg ccggccgttc accgtccagc
+     8161 aggaagccga acacgtcccg ccggtcgcgc gtcgcgttgt ggaccgcgcc gccgacgacg
+     8221 ctcaccttgg ccggatcggg ctcgcgcccg gcgtacgccg agaggtatcc gcgcaacgcc
+     8281 acccgcaggg aggccagttc gcggtagatc ttgcccagcg cctgttccgg atggacgtcg
+     8341 caggtggcgg cgacgatctg ggcgagcgcg gggtcggcgt cggagagctg cacggcgtag
+     8401 atcaccggga cgccgaagtc cgtcgtcgcg tcgaacagcc tcacccgcag gtccgtggag
+     8461 gtgccgacgc ggtgcagttc ccggacgccg gcgtcgagcc gcgccggatc cacgaccagc
+     8521 tcgggcagcc gcagctgctg gagccagacc agcgcgatgg cgtcccgctc gacgacttcc
+     8581 agcaacccgc cgagcacggc gctgcgtacg tcggagtgga cggcggcacc cgtggtgatc
+     8641 ccgcggatga actcttcgga cttcgactgg tagggcatgt gcagatacac ggagatcgcc
+     8701 ggcacgagca ccggtatccg ccgggtgagc gaccatgcgc gcacccatcg gatcgggacc
+     8761 gaggggtcgt acgcggacag gctgcagtcg tcgcgggcca gctcggtggg cgagcagctg
+     8821 ggccaccggg acggcgacac gaactcctct gtgaggtcgt tctcggcggc gaccaccatc
+     8881 tcgtcgtcgt cccaggcaca cgtggagtac cgttcgagcg cttccgcgat ggagaccagc
+     8941 ttggcgcgct cgggcgtgag cccggtgccc gcgccgtcgg aatttccggt gtcctcgtcg
+     9001 tgggcccacg tccgcaggtt gggaagagca cgggacggga caccgagata ggccagctgg
+     9061 acggcgaagg gcggttcgcc ctcgcgcacc ggcagcggcg cggtgcgtga caccagtccg
+     9121 tagggcgaga cgagctcctc gagaccgcgc agctcggccg agatgtgctc ctgaggcctg
+     9181 ttcagatgca tgtggttctt cttcccgcgt ccggatgttc cccgtgtcct tgccgcgcta
+     9241 ctcggcggtc ttcgtggatc ccgacgccat gctcctggcc aggccgtcga gctcgtcctg
+     9301 gagctccttc atggactttc cgcagacgtc ttcgttcgca cgtgcgacga cgtagcagaa
+     9361 gacgtacctc tccgccaggc tgaaaccgag ggtgtacagg cagctgtaca gcaggctggt
+     9421 ctgcagccgg aaggccagga agtcggcgtc gcggtgcatc aggcggtcca gctcggggct
+     9481 cggtggcgta tggaagcgcg tcggctcgac cggggcgcct cgcttgcgaa cgaggtcttc
+     9541 gagcgtgtgt ccggcgttga cgacggatcc gtccgagaaa ttttcggcca ggtggcttga
+     9601 ttcagacgtg atggatttcg tccaaagccg aaccatctcg tcggccggat cgtcgtccgg
+     9661 atcgccgcac gccgtgatga attcccggac gccggcccct acctgttcgt agaaccgggc
+     9721 gcaggcggcg tcgaaggatt gcggatcctt ggtgcgcaga taaatcgcct cgaaatgcga
+     9781 acggtaactc agcaggcgta gggaaagcag ctcacggaat tcgtatccgt cgatttctct
+     9841 ctgcggtgat ctcagaagag tggctctggt gtgcgcggcc atgaggcgga tggcaccgag
+     9901 tgccggcgga ggcgattgag gtgtctctgc cgcacgattc aagaatgaga cgagagtcgg
+     9961 tgcggcctcc tggaagagcc gcttggagga agcggagaat ccggttccgg taaaccccgc
+    10021 gttccacaac ggtgagggaa ccgaatcaag gggcgtggcg tccacctgtg cggcgcaccc
+    10081 atatcgttcg gccgttctat gcaggtcagc gcgggtggag tcctcgtcgg acaccccttc
+    10141 gatgctgatc tgcagggtgt ccgtgccgag ctgcgcgtcc tgggtgcgga cgaagtagta
+    10201 agggccgtga ccgcctgctc cacctacccg ctccggcgcg aacaattcgc gaatcagggg
+    10261 cgcgaacgtg tcgttgtatt ttgaaccaac tggctgcggg atggtggcaa acaagtgcac
+    10321 gaatgaaacc cccgagattt cggccggcgc cagacgggcc gtcacgtgat cttgtaatcg
+    10381 accgtaactc agagagatga gaagaaggaa ccccggctcc agtgtgatct gcgtcactcc
+    10441 ggaaactgct tgcgttgtga tcgtccgaat gcctagattc gaatcacatt gacgaaaggg
+    10501 ggtgtaatca atggagcagc agatcgaact cgatgtgctc gagatttcgg acctcattgc
+    10561 aggtgccggg gagaacgatg acctggcgca ggtgatggcc gcctcgtgca cgaccaccag
+    10621 tgtttcgacg agttcttcgt cgtcctcgtc ctgaatctag ggaccgggaa acagttgagc
+    10681 caccgtcggg gtgttcctcg gcggtggcct tctgcagtcc tggcgtgcct ttttcgattc
+    10741 gaggaacgcg ccgtccctgg aggtcgacca gaggtgcggg cgtcgtgccc cggacgacga
+    10801 cgggctcgtg ccaagccggc cggccgagag ccggaacaca tgttgtcaag cgcgaactga
+    10861 ccggtgcggt ggagtgacag gcgggctccg tgcggcgggg cgcctctccg cggcccggcc
+    10921 ccgacgtccg cacgccgtgg tgaaccgggc cggcgtgacg aagttggggg attcctatgg
+    10981 gtgtgaacat cagtccgtac gtcgtctatc ggcgcagcag actcccactg ggcgagctcg
+    11041 gagggatgtc cttcaccacc gcctggtcgc gcatcgatga actgcacgcc ctgcgggacg
+    11101 agatcggcaa gaacgccgtc ggcctggccg accgcctcgg cgagctcgtg cctacgctgg
+    11161 gggacgacgt ccgggccgac ctgatcaggc tgcggcgcga cgtgcacaat ctgcggcacg
+    11221 accgggcggt ggcgcgactg gagccactgc gtccgcatct cggccgcgag gtggtcgacg
+    11281 aggtcgagac ctggtgcgcg ctcggcgtgc gggccgaaca gtgcgagcga gcagggcgcg
+    11341 aggagctcga gagtgagaag gcccgggccg ccgacggctt cggcgccctc ttcgagcacg
+    11401 atgcgatggc gcgcagcatc caactctccg gcgaccggct gtaccggggc ctgcgcgacc
+    11461 tcgtcgcggg cgacgaggcg agcgccctca agccgagcaa ggcccggctg cgggagtctt
+    11521 ccctcgtcaa cttcgcctac cgggcgagct tgaagccgtc ccccttcgga cggttcaccg
+    11581 agatcggcgc gttccctccg gacgacccgc gccccgcgga tcccggtggc cggcacggcg
+    11641 ggacgcagga gtcggtcacg acgctgaacc gtctcctcgt gaactggggg ccccccggcc
+    11701 tgccgctcgt accgggcggg atggagccgg ggcacctcgt gctgaactcc acgctgcggg
+    11761 ccggcaccga gtacgtcgag tacgtcggtg tcgctcccgg ctcccgtgag gacggccgga
+    11821 tggccaccga gagggtgctg cgcgtacgcc gggagggact cttcgacgca ctgctcgcgg
+    11881 cgatgcccga aggatcggct ccggcggcca cggtgctgcg cgacctcacc gccgtcaccg
+    11941 ggaaggcgga gacgagccgg aaggtcgtgc aggggctgat ccgggccggc atcctcttct
+    12001 tccggccgga gatcgacgat cacgaccccg actactccat gaagctcgac cgcgtactcg
+    12061 cggccggcgg gacgccggag acggccgcgc tacgcggaca cttctccgaa ctcaggcggt
+    12121 tggagacgga cttctccgag gcggcggccg acgagaggca gaagctgctc gactcggcgt
+    12181 acgcggcgat cggcggcatc gccgagctgt gcaaggtgtc cccgcccccc gaggaggtcc
+    12241 tgaagtcacc ggtcttcgag gacactccgg catccacggc gccccaggcc tggaacctgc
+    12301 cgacggtgga ggggagcatc cccgccctga cgggcctctg gcgtctggcc tcgatgatgg
+    12361 acaacggcca ggtgaagcga ctgggtctct actccttcgc cacccgcgtg ctcggcgacc
+    12421 gcagcacgat gcccttcctc gagttcttcc aggccttctc gtcgctgacg gaccaggaac
+    12481 aggtcgacgt gttcatgggg cgcgacgtgg aggaggccga gaggtacacg aggcagcggg
+    12541 cggaggctct gcgcacgatc cggcagcggc tggtgcccgg ggacggcacc gtgcacctgg
+    12601 acccctcggt catcgagaag gcctgcgagg gcgtggagga cctcctcgac acggaatcgg
+    12661 tgacgttccg cgcgcagttc gcccagggag tgctgcccga ccgggaccgg acgttggtcg
+    12721 tgaacggcct gctcaccggc tacggcgtct acttctcacg gttcggctcg ttcgtcgagg
+    12781 gcaccgacga atggtccctg ccggccgccc agcgggagca cctcgcacgc aggttccccg
+    12841 gccaggtcga cctcaactcc gtgctcggat tcaacttcaa cctgcacccc tcggtgaccc
+    12901 ggcgggtcgt caactacccc ggcgcggtgt cgctcggcgc cgagcggacg gtctacggac
+    12961 tggcgcgtct ggaggtccgc gcggatcagg ccaccaggtc gctgcgcctc tgggaccctg
+    13021 aggcgcagga aaccctcgac ctcgtgccca tgaacttcat gaccccgatc ggggtcccgc
+    13081 tgctctaccg tctgctcgag gcgctgtccc cgtccaaccg ctacctgtgg aagcccctgg
+    13141 acgacatcag ggacgcggga gggcccacgg tgtacggcga gacggcaccc cggctggtcg
+    13201 tgggtgacgt cgtggccgac cgcaggtcct ggaacgtggc cgcggccgag atccccatgc
+    13261 tccaggatct gagccgggac gtgcccgaag cgctcgtggc cttcgacgcg tggcgcctga
+    13321 cgcggggcct tccccgccac gccttcgtgc tgtgccagac gcccgaggag cgagacgtca
+    13381 tggccgggcg cagccggaag gtgacccgcc agtgggcgga ctacgcgcac ctgcggcgcg
+    13441 ccagcgtgca caagccgatg tacgtcgact tccggaaccc cttcctggtc cggagcttcg
+    13501 cgaagtcggc cctgtcacgc ggcgatgtcg tcgcgtcgat ccgcgagtgc cttccttcgg
+    13561 tggacgacta cggcccggac acgggctgga ccgcagcaga ggagttcttc gttgaactgt
+    13621 gtaccgacaa ctagtgggca gaccggaacg cgtgagtgga ggacggtcca catccacgtt
+    13681 ccgcactcgc tgcacacccc cttcctgtgc gacgtggtcg agccgctgct ccggtccgag
+    13741 ggactccagg accacttctt cttcctccgg tactggcagg gcggccccca tctgcggctg
+    13801 cggatgctct gcggccccgg ggccggttcg gccgaggcgg ccgaacgggt cgtcgcgggt
+    13861 ctggcacgtg cgatgccgga gttcggtgcg caggcgcggg aggaatacgc gctcgggctg
+    13921 accttgcagg acgagctcgc ccgcctggag aaggagacct cggaggaggg ccggcccatc
+    13981 ggggccctcg accgggtggc gtacgagccg gagtaccgca agtacggggg aacggagggg
+    14041 ctgcagatcg ccgagaccgt attccgcaag tcgtcggtgg cggtcctcgg cctgctgggc
+    14101 gggcaaccgc gggcgtgggt ggacgagcgc cgggcaccga tcggggaagc cgcgaggatc
+    14161 atggcgatgt tcctccacgg cgcaggcctc gacccgcggg ccgcagggct gttcctgcgg
+    14221 gagtacgagg actggtggcg tacgtacgcg ccggatgaca tgcagcgtgc ctggccgaaa
+    14281 ctgttcggcg gcgtctcggc acagatgacg aatctgtgcg cggcggtctg gcgtgacggc
+    14341 gccacggacg tgttccacga catcagcgcg gaggccgccg cccgcgcccg ttccgtgtgc
+    14401 ggggcggagc ccggcggcga tgtccgcgac ctccggctcg acggcacgcc ttacccgggc
+    14461 tgtctctcga actacgtgca caccaccaac aaccgtctcg gcctggtccc cgccgccgag
+    14521 gggctcgtcg cgtacctcgt gcgccggggc ctggaagcga tggacgggta gggcctgtcg
+    14581 ttgtcgggat catgcggggc cccgatgccc cggtgccgga agcccggagc ctccgtcccg
+    14641 cgcaggccct tccctgtggg gcctgcgccg gacggagagc tcaccggtgc gtcaccagct
+    14701 caccggaagc ttgtgcaggc cgtacacccc catgctgtcc ttgaacggca gttccgcgac
+    14761 cggggtggcc agcttcagtc cgggaatcct cgcgaggagg gcgttgaaga cgacctccag
+    14821 ctccagtttc gcgaggttct ggccgatgca ctgatggatg ccgtggccga acgccaggtg
+    14881 gtgcctgccg ccgcgctcga tgtccagcct gtccgggtcg gggaacaccg cctcgtcgtg
+    14941 gttgccggag gcgctgagcc cgatgacgcc ctcacccgcg cggatcagca ccccgccgat
+    15001 ctcgaggtcg gccgtcgcca cccgggaggt cacctggtcg gcgatgctga agtagcgcag
+    15061 cagctcgtcg acggcctgcg gggccagacc cgggtcggcc ctgagcttgg ccagctggtc
+    15121 ggggttctcc agcaggccca cgacgccgag ggagatcatg ttcgcggtgg tctcgtgacc
+    15181 gccgaccagg aggacgttcg ccatcccgac cagttccccg tggtcgaagg tgccggtctc
+    15241 ccggttcttc acgacgagcc tgccgagcag atcgtccccc gggtccgctt cctgagcggt
+    15301 gaccagctcg gagaagtacg cgtggagctc ttgatgcgcg ctgttgcgct cctgtgggtc
+    15361 ggcgtcgacc gacaccagct tgttggtgcg gtcctggaag aacgcgcggt cgctgcgggg
+    15421 caccccgagc agctcgcaga tcaccaggga cggcaccggg agggcgagcg cctcgaccag
+    15481 atcggccggc ccctcgccgg cgagcatgga ggagatgcac tcgtccacga tctcctcgac
+    15541 gcggggacgc agttggagca cccgcttgac ggtgaactcc gggatcacca tcttgcgctg
+    15601 cgccgtgtgt acgggcgggt ccatggcgag cagcacgggc cgcatctgct ccatgacctc
+    15661 cggcggcgca tcgaagtgga gtggatagcc cgggtgggcc aggttcgagc tcacatgagg
+    15721 gtcggcgagc aactgtcgta tgtgctcgtg ccgggtgagc agccacgcgg tccgcccgga
+    15781 ggccagagtg accttggtga tcggctgctc ggcacgcagg gacgcgtact ccttcggcgg
+    15841 gtggagcggg caggtcctgg ggtagggata cgcgctgtcc tgtcggtccg tcacggtctt
+    15901 ctccgcgggt agtccaggga aatctcctta cggactccat tcaagcggaa gatgatccac
+    15961 atcgtggcgt tatcggtgaa tgaggccgaa ctcacgtgga ccgcagggag gaaagtcggc
+    16021 cgccttcccg gcatggcccg tcccgtgctt tcggtgggag gggtcggtgc agtgcgacat
+    16081 tgcagtgatc gcgtaatccg gaatgacccc ttccgtgcgg ggatgcgtcg atagtacgtt
+    16141 ggatttcatg tgctccacat cgtggagagt tcgcttgcgc cgtcgaagtc acagtgtgtg
+    16201 ccgaggggga gttggggcta tgtacctttc gatcgtcatg tgggacctga agaagtcgga
+    16261 agccacggtg gagagcctca gggaatacct gcgggactat gccgtggacg cctactccgc
+    16321 gctggacgga atgcggctca aggcgtggtt ctccgattcc gcacgtcagc tgtggggtgc
+    16381 ggtctatctg tgggacagcc ccgagcagat gcccggcctg tacaaagtca gccgcgtgat
+    16441 cgatctcatc gggtatccgc cgacttcggt cggtggtttc acgctcgagg cgaccgccga
+    16501 agggaagagc gttcacgaga cactggccgg cctggggatc gccctggagg gcggaacgca
+    16561 gtaagggcag gcgtgcgacc ggatgggcgg gaggtttgcc gggccggtcg gttcgagggg
+    16621 cggtcgaggg ccggagccgc gcgggtggct tcgccgctcc ggctggcggc cgggtgtgga
+    16681 gtgccgcgcg gcgccggtgt cgcccgcgcc gggcatcacc gtgctcgcgg agggttcagg
+    16741 tgtgtcggac cggcgctttc gcgtgtgttc ggcaccgtcc ggtgggccgg gtgctgtgcg
+    16801 cgggcctcga gctcccgggc gggcgcggtg cgcaggccct ccccgcccgg acacgcgtat
+    16861 tccgcaccgc ttcacgaaga tcattcggtg aaggaggcgg gggcgctcgt gctaacgtcg
+    16921 tgatcgtggc cagccttgac attattaccg aacgctctga ttctgccgta caacgcatca
+    16981 tcgatgtgac aaagcattcg aggtccgttg tccgcacggt gctgatcgag gacatcgagc
+    17041 ctcttctgca gagcatccgt gccggagtgg aattcatcga gatctacgga ctcgacaccg
+    17101 tgcctgttcc ggacagtctg ctcgccgaat gtgaacggcg cagaattccg gtccggctgc
+    17161 tcgccgcttc ggtcgccaat caggtcttca agaccgagaa gaagcccaag gtattcggta
+    17221 tcgccaaggt cccgcggcct cgtcgcctgt cggacctgtc cgacatgacc ggtgacctca
+    17281 tcctgctcga cggagtgaag atcgtcggca atatcggagc catcgtgcgg acctcgttcg
+    17341 cgctcggggc ctcgggaatc gtgctcgtgg acagcgatct cggcagtatt gcggaccgcc
+    17401 gtctgatcag ggcgagccgg ggctatgtgt tctcccttcc catcgttctc gcgtcccggg
+    17461 ccgaggcgct ccagtacttc caggacaatg cgatgcgccc ggtggtgttc gaggccgacg
+    17521 gggatctcgg cgtcgctgat ctcgacggta tggacgagcg acttgtgctc atgttcggca
+    17581 gcgagaggat cggcccgtcg ggcgagttct ccgacatcgc cgccaagtcg gtctccattc
+    17641 cgatgaatcc cgcggccgag tccctcaacg tatcggtgtc ggccggaatc gcgctgcacg
+    17701 cgagggcccg ccgtaacctc tcccggtagt cccggccgca gagccccgtc aagggccccg
+    17761 tccctcccct ccgggagggg cggggccctt gacgtgcgcc gccgggagcc ctcgctaccg
+    17821 cggccgccac atccagggcg ccgcgtccgg acccaggccg gcgaccgaag gccggccgtc
+    17881 cgggcccagg cggagcgagg ccccgtccag ggcggccggc ccgtcggtcc gcacgtggag
+    17941 cccgccgccc atgagcagct ggacgaggcc ctcggcggtg cggcccagca gcacgggccc
+    18001 ccgcggcgac ggggcggcgt ccaccgnncc gtacccgtcg aaccgtgctc cgggcacggc
+    18061 cgtccccgcc cgagccgtgg tcagcccgct cccggccgct gcccggtagt agagggacac
+    18121 ggacccgtcg ggagccggga gggcggccgg cgcgtgggcc gggacgggcg ccgccgtgag
+    18181 ctgtgtgcga gccgtgagcc cggcggacgg ggtgtcctgg gtccagtggt gtacggcgtg
+    18241 gtggccggcg ccgaagacgt ggacgcgtcc accctcgtcc acggcggtgt gcaggccgtc
+    18301 ctgcacctct ccgccgccca tgtcccgcca cgcgctccac cgccccgccc cgtcccgcac
+    18361 ccgggtgctg acgcccttct cggcgtcgcg tacgaagaga tggatctgcc cgtccggagc
+    18421 ggcgacggcc accggtacgc ccgtgcgacg tacttcgtca cgtccgggag aaggagagcc
+    18481 caggccgcgc caggggcgga agccccggcc cggggcgctc tgctccagca ccacgatctc
+    18541 gcgctcgttg tcggcgccgt gcccgccgag tgcggcgaag cggagcccga acagcagcag
+    18601 gcgcccgtcc cgtgtggtgg ccgagcccag tgcgggggcg agcgggccgc cgcctaggtc
+    18661 gtgcggaggc ccccaggcac cgctgcccgg cccggtctcc tgccaccgca ccacccgcag
+    18721 ccccagcacg gcataggccg cgagcctgcc gtccggctcg gcggtgagga ccgtgcgcgt
+    18781 gcccgggtag cggtggtggg tggagcggac ccagcccttg cggttggtga gcgggcggtc
+    18841 gccgccgacg ttgtagtcac cgcagccgga cggattgccg cagtcccagt cgggcgagcc
+    18901 cccgtagggg acgaggtggg cggccttcct cgccagtacc ccctgcggca ggttcttcgg
+    18961 ccagtgccgg ttgtagtaac cgcggtaggc gaccgtgacg aagcccggta tccggccacc
+    19021 gtccgcggtc gcccgggcca cccagcggat catcgcggcc cacgcgaagc aggccgcggc
+    19081 cgtgtggtcg gcgtggtcgg agtagccggg ctgttcgctg tccctgcggc ggacggcctc
+    19141 cgtgctgtgc tggatgtccg ggtcggggtc cagggtgtgg acgacggtgg gccggtagct
+    19201 ctccatcagg ccggtgagga ccccgaccag cccgtcgtac gtgtacgagc cggcgcgccg
+    19261 cagcggcgat ccgtcggcca cgaccgtacg cagcacgagc cggcgatcct gccagagact
+    19321 gggcagcccg agccggtgcc ggccggtgtg catggccgtg ttgaggaaga tcaactccac
+    19381 tctgcggccg tcggccgcca gcacgttcac ctcggccctg tggtccgggc cgagctcggc
+    19441 gacggagacc tcccaggggg tgaagggccg cagcccgagc agggcg
+//