Mercurial > repos > ecology > sanntis_marine
changeset 1:9d689f8c9ce4 draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics commit 9dff0476530d65342db00896f3108edb899e3fd2
author | ecology |
---|---|
date | Thu, 08 Aug 2024 11:58:48 +0000 |
parents | 12870a79d56b |
children | |
files | sanntis.xml test-data/BGC0001472.fna test-data/Regex_Find_And_Replace_on_data_21.fasta test-data/Sanntis_output_data.genbank |
diffstat | 4 files changed, 637 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/sanntis.xml Fri Jul 26 14:31:32 2024 +0000 +++ b/sanntis.xml Thu Aug 08 11:58:48 2024 +0000 @@ -2,7 +2,7 @@ <description>in genomic and metagenomic data</description> <macros> <token name="@TOOL_VERSION@">0.9.3.5</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> </macros> <edam_topics> <edam_topic>topic_3387</edam_topic> @@ -11,21 +11,49 @@ <requirement type="package" version="@TOOL_VERSION@">sanntis</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - sanntis --ip-file '$input_interpro' --outfile 'output_sanntis.gff' '$input_genbank' + #if $selection.which_sanntis == 'sanntis': + sanntis --ip-file '$selection.input_interpro' --outfile 'output_sanntis.gff' '$selection.input_genbank' + #else: + sanntis_build_gb -n '$selection.input_nuc' -a '$selection.input_prot' -o 'output_sanntis_gb.gb' + #end if ]]></command> <inputs> - <param name="input_interpro" type="data" format="tabular" label="Input the TSV file from InterProScan" help="Before using this tool you need to retrieve the right data by using the InterProScan tool"/> - <param name="input_genbank" type="data" format="genbank" label="Input a Genbank .gb file" help="It needs to have the right structure and fit the protein fasta file used in InterProScan"/> + <conditional name="selection"> + <param name="which_sanntis" type="select" label="Do you want to build a genbank or to make a SMBGC Annotation?" help="If you decide to build a genbank you can then use this genbank to then conduct the annotation."> + <option value="sanntis">Run sanntis</option> + <option value="genbank">Build genbank</option> + </param> + <when value="sanntis"> + <param name="input_interpro" type="data" format="tabular" label="Input the tabular file from InterProScan" help="Before using this tool you need to retrieve the right data by using the InterProScan tool"/> + <param name="input_genbank" type="data" format="genbank" label="Input a Genbank file" help="It needs to have the right structure and fit the protein fasta file used in InterProScan"/> + </when> + <when value="genbank"> + <param name="input_nuc" type="data" format="fasta" label="Input a nucleotide fasta file"/> + <param name="input_prot" type="data" format="fasta" label="Input a protein fasta file" help="Before using this tool you can get the right protein data by using the Prodigal tool"/> + </when> + </conditional> </inputs> <outputs> - <data name="output_sanntis" from_work_dir="output_sanntis.gff" format="gff3" label="Sanntis output data"/> + <data name="output_sanntis" from_work_dir="output_sanntis.gff" format="gff3" label="Sanntis output data"> + <filter>selection['which_sanntis'] == 'sanntis'</filter> + </data> + <data name="output_sanntis_gb" from_work_dir="output_sanntis_gb.gb" format="genbank" label="Sanntis output data genbank"> + <filter>selection['which_sanntis'] == 'genbank'</filter> + </data> </outputs> <tests> <test expect_num_outputs="1"> + <param name="which_sanntis" value="sanntis"/> <param name="input_interpro" value="BGC0001472.fna.prodigal.faa.ip.tsv"/> <param name="input_genbank" value="BGC0001472.fna.prodigal.faa.gb"/> <output name="output_sanntis" value="Sanntis_output_data.gff3"/> </test> + <test expect_num_outputs="1"> + <param name="which_sanntis" value="genbank"/> + <param name="input_nuc" value="BGC0001472.fna"/> + <param name="input_prot" value="Regex_Find_And_Replace_on_data_21.fasta"/> + <output name="output_sanntis_gb" value="Sanntis_output_data.genbank"/> + </test> </tests> <help><![CDATA[ @@ -36,6 +64,10 @@ SMBGC Annotation using Neural Networks Trained on Interpro Signatures Tool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data +**Or** + +This tool can also create a Genbank adapted to be used in sanntis + .....
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BGC0001472.fna Thu Aug 08 11:58:48 2024 +0000 @@ -0,0 +1,2 @@ +>BGC0001472 +GCCCCGGGGGCCGTCGCTCCGGGGGTCGGTCCTGCCCGGTGGCGCAGGACCACGGGGGCCGGGGCCCGGGGGTGGACGGCATTTGTTTTGACCCAGCTCCGTGAGGTAGGTACGCTCAAGCCTTGTGCCTGGGGTGTGCCTGGGCTCGGGTGCGTGTCCTCAACCGCATGGCGAGTCCGTAAGTGGCCACCGCAATCTGTGTTCCGTCTGCCTTCCAGCAGGGGCGTGCAGTATTCGACACACCCGACCGCGTGGGTCGGTGACTGTTCCAGGTTAGTTTCACCGAACGGCACACAGAAACCGGAGAAGTAGTGCCTACGATCCAGCAGCTGGTCCGGAAGGGCCGGCAGGACAAGGTCGAGAAGAACAAGACGCCCGCGCTCGAGGGTTCGCCCCAGCGTCGTGGTGTCTGCACGCGTGTGTTCACGACCACCCCGAAGAAGCCGAACTCGGCGCTCCGTAAGGTCGCGCGTGTGCGTCTGACCTCCGGTATCGAGGTCACGGCCTACATCCCGGGTGAGGGGCACAACCTGCAGGAGCACTCCATCGTGCTCGTGCGTGGTGGCCGTGTGAAGGACCTGCCGGGTGTTCGTTACAAGATCATCCGCGGTTCGCTCGACACCCAGGGTGTCAAGAACCGCAAGCAGGCCCGCAGCCGCTACGGCGCCAAGAAGGAGAAGTAAGAATGCCTCGTAAGGGCCCCGCCCCGAAGCGCCCGGTCATCATCGACCCGGTCTACAGCTCTCCTCTTGTCACCTCGCTGATCAACAAGATCCTGCTCGACGGCAAGCGTTCCACCGCCGAGCGGATCGTGTACGGCGCCATGGAAGGCCTCCGCGAGAAGACCGGCGCTGACCCGGTCATCACGCTGAAGCGCGCGCTTGAGAACGTCAAGCCCTCGCTCGAGGTCAAGTCCCGCCGTGTCGGTGGCGCCACCTACCAGGTGCCGATCGAGGTCAAGCCCGGTCGCGCCGCCACCCTCGCTCTGCGCTGGGTCGTGGGTTACTCCCGCGCCCGTCGCGAGAAGACCATGACCGAGCGCCTCATGAACGAGCTGCTCGACGCCTCCAACGGTCTTGGCGCTGCCGTCAAGAAGCGCGAGGACACCCACAAGATGGCCGAGTCGAACAAGGCCTTCGCGCACTACCGCTGGTAGTCGCTCACCCCATCGAGACCGAGAGAAGATTGAGCCTTATGGCCACCACTTCGCTTGACCTGGCCAAGGTCCGCAACATCGGGATCATGGCCCACATCGACGCGGGCAAGACGACCACCACCGAGCGGATCCTCTTCTACACCGGCGTTTCGTACAAGATCGGTGAAGTCCACGACGGCGCAGCCACGATGGACTGGATGGAGCAGGAGCAGGAGCGCGGCATCACGATCACGTCCGCCGCGACGACCTGTCACTGGCCGCTCAATGATGTTGACCACACCATCAACATCATCGACACCCCGGGTCACGTCGACTTCACCGTCGAGGTGGAGCGTTCGCTCCGCGTCCTCGACGGTGCCGTCACCGTGTTCGACGGTGTGGCCGGCGTCGAGCCCCAGTCCGAGACCGTCTGGCGTCAGGCGGACCGCTACGGCGTGCCGCGTATCTGCTTCGTCAACAAGCTCGACCGCACGGGCGCCGACTTCCTCCGTTGCGTCGACATGATCGTCCAGCGCCTCGGCGCTGTCCCGATCGTCATGCAGCTCCCCATCGGTGCGGAGGCTGACTTCCGCGGCGTCGTCGACCTCGTGTCGATGAAGGCCTTCGTTTACCCCGAAGAGGCCGTCAAGGGCGAGATGTACGACACCGTCGAGATCCCGGACAACCTCAAGGAGGCCGCCGAGGAATGGCGCGGCAAGCTCCTCGAGGCCGTCTCGGAGAACGACGACCAGATGATGGAGCTGTACCTCGAGGGCGAAGAGCCCACCGAGGAGCAGCTGCACGAGGCGATCCGTCGGATCACCCTCGCGTCGAAGGGCTCGGCCGACTCCGTCACCGTGACCCCCGTCTTCTGTGGCACGGCGTTCAAGAACAAGGGCGTCCAGCCCCTGCTCGACGCCGTCGTCCGCTACCTGCCTTCCCCCCTGGACGTCGAGGCCATCGAGGGCCACGACGTCAAGGACCCGGAGAAGGTCGTCCAGCGGAAGCCCTCGGACGACGAGCCGTTCTCCGGCCTGGCGTTCAAGATCGCGAGCGACCCGCACCTCGGCAAGCTCACCTTCGTCCGGATCTACTCCGGTCGCCTCGAGGCCGGCACCGCGGTGCTGAACTCGGTCAAGGGCAAGAAGGAGCGCATCGGCAAGATCTACCGCATGCACGCGAACAAGCGTGAGGAGATCCCGTCGGTGGGCGCCGGTGACATCGTCGCCGTCATGGGCCTGAAGCAGACCACCACCGGTGAGACGCTGTGTGACGACAAGAACCCGGTGATCCTGGAGTCCATGGACTTCCCGGCGCCGGTCATCCAGGTCGCCATCGAGCCCAAGTCCAAGGGTGACCAGGAGAAGCTGGGTGTCGCCATCCAGCGCCTCTCGGAGGAGGACCCCTCCTTCCAGGTGCACTCCGACGAGGAGACCGGCCAGACCATCATCGGTGGTATGGGCGAGCTTCACCTCGAGGTGCTCGTCGACCGCATGAAGCGCGAGTTCCGCGTCGAGGCGAACGTCGGCAAGCCGCAGGTCGCGTACCGTGAGACGATCCGCAAGGCCGTCGAGCGTATCGACTACACGCACAAGAAGCAGACTGGTGGTACCGGCCAGTTCGCGAAGGTGCAGATCGCCATCGAGCCCATCGAGGGTGGCGACGCGTCCTACGAGTTCGTCAACAAGGTCACCGGTGGCCGCATCCCCCGTGAGTACATTCCCTCGGTGGACGCGGGTGCCCAGGAAGCCATGCAGTTCGGCATCCTGGCCGGCTACGAGATGGTGGGCGTCCGCGTCACCCTTCTCGACGGTGGTTACCACGAGGTCGACTCCTCGGAGCTCGCCTTCAAGATCGCTGGTTCGCAGGCGTTCAAGGAGGGTGCCCGCAAGGCGTCCCCCGTGCTCCTCGAGCCGATGATGGCCGTCGAGGTCACCACACCCGAGGACTACATGGGTGAAGTGGTCGGCGACATCAACTCCCGCCGTGGCCAGATCCAGGCCATGGAGGAGCGCCACGGCGCTCGCGTCGTGAAGGGCCTCGTGCCCCTCTCGGAGATGTTCGGCTACGTCGGAGACCTCCGCAGCAAGACCTCGGGTCGCGCAAGCTACTCGATGCAGTTCGACTCCTACGCCGAGGTTCCGCGGAACGTCGCCGAGGAGATCATCGCGAAGGCCAAGGGCGAGTAACTCTTCCGAGCTCACGCTTTAGGCTTGTCACCGGAGCCCGGTCGGGCATGCGTCGCAGTGCGGCGGATGCCCCCGGCACCGGCATTCCAGCAAAGATCACCTGGCGCCGATGAAGCAAGGCGTACAGAACCACTCAGGAGGACCCCAGTGGCGAAGGCAAAGTTCGAGCGGACTAAGCCGCACGTCAACATCGGCACCATCGGTCACATCGACCACGGTAAGACGACCCTCACGGCCGCCATTACCAAGGTGCTGCACGACGCGTACCCGGACCTGAACGAGGCCTCGGCCTTCGACCAGATCGACAAGGCTCCTGAGGAGCGTCAGCGCGGTATCACGATCTCGATCGCGCACGTCGAGTACCAGACGGAGTCGCGTCACTACGCGCACGTCGACTGCCCGGGTCACGCTGACTACATCAAGAACATGATCACGGGTGCGGCGCAGATGGACGGCGCCATCCTCGTGGTCGCGGCCACCGACGGCCCGATGCCGCAGACCAAGGAGCACGTGCTCCTGGCCCGCCAGGTAGGCGTGCCGTACATCGTCGTCGCGCTGAACAAGGCCGACATGGTGGACGACGAGGAGATCCTGGAGCTCGTCGAGCTCGAGGTCCGTGAGCTCCTCTCCGAGTACGAGTTCCCGGGCGACGACCTTCCGGTCGTCAAGGTCTCGGCGCTCAAGGCCCTCGAGGGCGACGCCGAGTGGGGCCAGACCGTTCTCGACCTGATGAAGGCCGTCGACGAGTCCATCCCGCAGCCCGAGCGTGACGTCGAGAAGCCGTTCCTCATGCCCATCGAGGACGTCTTCACGATCACCGGTCGCGGTACGGTCGTCACCGGCCGCATCGAGCGTGGTGTCCTGAAGGTCAACGAGACCGTCGACATCGTCGGTATCAAGACCGAGAAGACCACCACCACGGTCACCGGCATCGAGATGTTCCGCAAGCTGCTCGACGAGGGCCAGGCCGGTGAGAACGTCGGTCTGCTGCTTCGTGGCATCAAGCGCGAGGACGTCGAGCGCGGCCAGGTCATCATCAAGCCGGGTTCGGTCACGCCGCACACCGAGTTCCAGGCCCAGGCCTACATCCTGTCGAAGGACGAGGGTGGCCGTCACACCCCCTTCTTCAACAACTACCGCCCGCAGTTCTACTTCCGTACCACGGACGTGACGGGCGTTGTGACCCTTCCCGAGGGCACCGAGATGGTCATGCCGGGTGACAACACCCTCATGGACGTCGCGCTGATCCAGCCGGTCGCCATGGAAGAGGGCCTGAAGTTCGCCATCCGTGAGGGTGGTCGTACGGTGGGCGCCGGCCAGGTCACCAAGATCACCAAGTAATTCCGATTACTTGTGGGTCGGGGTAACCCGGTTGCTCTGAACTGAGCGCACAGCACCAAGCAGGGCCCGCACGGCATCACGCCGTGCGGGCCCTGTGCTGTCTCCGCGGGGCTCGCGGCGGACAGGACGACGGGCGCGGCCCGCACACCCTGAGGGTGTGCGGGCCGTCGTGACGCGGGCGCCGTTCGCGTCGGGAGGTGCGGTCAGCTCTCCGGTGGCCGGGCGTATGTGGTGGCCAGCAGGGAATGGCGGGTCGCCCCGTCGCTGTGGACCAGATGGTGCACGGACGACGGGACGAAGCCGCCGAACAGTGTGCCGACGAGCCCCAGCGCCTGGCACCTGAGATTGAGGTCGTAGGTGGCCATCGAGGCCCGCAGCGCACTGATCCGGTAGCCGTGGGAACCCGCCCAGGTGTCCGCCCGGTCCAGACTCGCGTAGAGCGCGACGATCCCCGCTCCGGTGGAGAACTCGCGCTGGACGCCGAGGTTCTCGGCCGGTCCGATCTCGTCGAGCCCGGCCAGGTAGCACGTCTCCTCGGCGGTCACCCGGTACAGCCCTGGTTCGGCGCCCTCGCTGCGGAACGCGAAGACGAAGCCCTCCAGCGCGCCCGCCGAGGCGTCGAGCCCCCAGTCGTCACGGTCCCGGCGCAGCACGTCACGCAGCAGCGACAGGATCACGTCCGTGCGTACCGGCAGTGGCGCGTAGTGCAGCGAGGAGCGGCGGCGTTCGAGCGTGCCCCGCAGGTCGTGGACCGGCCGCAGGTCCTCGGCAGACGGTTGCGGTTCGACGGGCACGGAGTCCGCGAACGGCACCGGCCGGCGTTCGGCCGCCAGGCCGTCCGACGTGGGCGTCCGGGAGGTGAAGCCCTCCAGCACGGCCGTCATGGATGTCACGTCGTTCCTCATGCGGTACCTCCCAGGGACGCGAAGGCCGCCACCGGCTCGTCGGCAGGCGATGTTCCGAGCAGCCGGGCCAACGCGTCGTCGTCCCAGTCCGACCTGGGTGTGAAGCCCAGGCCCAGGTGCTGTGCGAGCTCCCGCAGACTGGCCAGGTTGCAGCCCGCGTCCAGAAAGACCAGCCTGAAGCCGAACGTGCCGTACTTGGTCATGACCTTCTTGAGATCGCCGGTGATGATGATGTCGCACGGCGAGTCACCCGGAGGTACCTCGCCGGAGACGGTCACGAGTGTGTGGCTGCCCTGGGCGTACGCGTAGACCCCAGGAGGCATGATCCGGTCGTCGCGCACCACCGCGTAGGCCGTCGTGCTGCCGATGTTTCCCGACGCCGCGGTCCATCGTTTGACGCGCTCCGGAGTGGTCTCGTCCTCCTTCACCCCGAAGGCGACCTTCAGCAGCAGTCCCAGGGAGCTCAGGGTGAGCGGGGTGTCGCCGTGGGACGGGTCATGCCGTTCCGACCCGGCGAGGACGGAGATGTCCAGGGCGGGGAGCGGTGTGTGCGGGCGGCTCGGCCAGTCCTTGAACTGCGACTGCAGGCGCAGGTTGGACGCGTAGTAGTGCGCCTGGTGGTCCTTCGGAGCGAGGAACGCGCGTGGAGGCATCGCGACCGAGGCCTCGTAGACGGCACCGGCGGGTGCCTGGGGTGCGACCGGCCCCCGCGCGTAGGAGCAGCGAGGGCACCCCGGGCGGACGGCAACCGGCCGGTAGACCGTGCTCAGGGTCGCGGTGTCGATGACGGTGAAGTCGCCGGGGAGGTGCGAGATCGTGGCCCGTGCGAGCAACGCGGTGACGTGGTGGGAGGCCAGGCCGACGACCAGGTCGTGCAGGTATTCCGGCGGCTCGCCGGAGAGGTCCGCCTCACCGTGACGGCCGCAGTCCAGGCACGGTGTGATCGAGAGGTCGGCGTACGGGCCGATCGTGATCGTCCGCGCGTCGGCGCGCACCCGCAGCAGCGGACGCCCGTCCTGCCGGCACCGTTCCTCGGTGGCGGCGAGCAGGGGTGCCGACGCGGGCGTCTCGAAGAACACGGTGAGCTCGTCACCGGGGCCCGGCGGCCCGGCCGGTTCCGTGACCACCGGGCACACCTCCCGCAGCGACCGGCGGGCCCCGGCGACCAGGGCGGCGTCCCCCTCGAGGCGCACCGAGCGCGACACCAGCCGCGCCGCGGCATCCGCCCAGGACGGGTTCGATCCGGTGGAGTTGCCCAGCCGGGAGAGGAAGACGGCCCATTCGGGCGTGACGTCCGGTTCCTCGCCGGACATGGCCTCCTCGACCGCCCCGGCCGTCGACAGCAAGGCCAGGCACTTGTAGACCGTGGCCTCGTCGAATCCGGTCTTGAGCGCCAGTTCAGTGTGGTCGCGGGTTCCGTCGCACGCCTCGGTCAGGGGCACCAGCCCTTCTCGTGCGAACGCTCCGGAGAACACCTGTGCCCGGTCGGCCCCGTCGAGCACGACGGATTCCCCTGCCCTGCGCAGCCTCACCCCGCGGCGCAGGACGGGCCGGGCAGGGACGGTCAACTGCATGTCCGTACGTGCGGCCCGCCCGATCTGCTCGGCCTTCATCTCGGCAACCATGATCGATCTCCCTCTTACCGCAGCAGTTCCGGAACAGGCATGGCGCTGTCCGCGCGCGCCTGCGAGAACCGGCCCCCGCACCGGGGGCACCGGTTCACCGAGACCGTCACCGCGGACGAGACGGCTCCGGAGACGAGATTGAACGTACGTACGGTGCCCCCGATCCCCGTCACACCCGTGGCGATCTCGGTCAGGGCGAGGTCCAGGAGGCCGGAGGCGACGGACAGGTGCTGCCGGCCGAACCCCTCGGGCAGGCCGGACAGGGCGGCGTCCATGTCGTACGGGCGGGCGGTGCCCGCGTGTTGGGCGGCGCGCTTCCTGTAGCACGCGTAGCAGGCCGTCCGGCCCGGAACCACGACGGGTCCGCAGAGGACCTTGGTCGGCAGCAGCTGGAGCCCGACGGACGGTGTGCTCCGTTCCGCGCAGATCCGGTCCGTCTCGTCGCGGAGCCCGGGGTCGCCGCCGGCATGCACGGGGACGACGACATCCGCGTGCGGTGCCGTGTCGTTCAGGAAGTCCTCGAAGGACACCTGTAGCTCCGGGGGGACACCACGGTGCTCCGTGAGCCGCCGGCTGAAGGTGTCACCCACGAGGTACAGGCACTGCGGACGCGGATCGTGCGACATGCTGCTCTGCTCCTTCTTCTCGCTCATGCGAACGGCTGCTGCACGGGGTTGACCGCGTCCTCGGCATGGCTCGTGTGCCCCATCGCGCGCGGCGCGTCATAGAGGCGCGGGGTGCCCAGGTACCGCTCGCCGTGTACGAACGACACGGGCATGGCCTCGGGCACGAGCACCTTGACCGCCCGCATGCCCACCTGGCGCGCCTCGTCCGTGGTGATGTCCGTGACGAGCACCTCGGCACCCCGTGCCGCGAGCCGGGCGACGACGGTGTCCAGCGGGTCCGCTCCCGCCGGAAGCCCCGGCATGCCTTCGAGGCCGTACGCCGGCCGTTCACCGTCCAGCAGGAAGCCGAACACGTCCCGCCGGTCGCGCGTCGCGTTGTGGACCGCGCCGCCGACGACGCTCACCTTGGCCGGATCGGGCTCGCGCCCGGCGTACGCCGAGAGGTATCCGCGCAACGCCACCCGCAGGGAGGCCAGTTCGCGGTAGATCTTGCCCAGCGCCTGTTCCGGATGGACGTCGCAGGTGGCGGCGACGATCTGGGCGAGCGCGGGGTCGGCGTCGGAGAGCTGCACGGCGTAGATCACCGGGACGCCGAAGTCCGTCGTCGCGTCGAACAGCCTCACCCGCAGGTCCGTGGAGGTGCCGACGCGGTGCAGTTCCCGGACGCCGGCGTCGAGCCGCGCCGGATCCACGACCAGCTCGGGCAGCCGCAGCTGCTGGAGCCAGACCAGCGCGATGGCGTCCCGCTCGACGACTTCCAGCAACCCGCCGAGCACGGCGCTGCGTACGTCGGAGTGGACGGCGGCACCCGTGGTGATCCCGCGGATGAACTCTTCGGACTTCGACTGGTAGGGCATGTGCAGATACACGGAGATCGCCGGCACGAGCACCGGTATCCGCCGGGTGAGCGACCATGCGCGCACCCATCGGATCGGGACCGAGGGGTCGTACGCGGACAGGCTGCAGTCGTCGCGGGCCAGCTCGGTGGGCGAGCAGCTGGGCCACCGGGACGGCGACACGAACTCCTCTGTGAGGTCGTTCTCGGCGGCGACCACCATCTCGTCGTCGTCCCAGGCACACGTGGAGTACCGTTCGAGCGCTTCCGCGATGGAGACCAGCTTGGCGCGCTCGGGCGTGAGCCCGGTGCCCGCGCCGTCGGAATTTCCGGTGTCCTCGTCGTGGGCCCACGTCCGCAGGTTGGGAAGAGCACGGGACGGGACACCGAGATAGGCCAGCTGGACGGCGAAGGGCGGTTCGCCCTCGCGCACCGGCAGCGGCGCGGTGCGTGACACCAGTCCGTAGGGCGAGACGAGCTCCTCGAGACCGCGCAGCTCGGCCGAGATGTGCTCCTGAGGCCTGTTCAGATGCATGTGGTTCTTCTTCCCGCGTCCGGATGTTCCCCGTGTCCTTGCCGCGCTACTCGGCGGTCTTCGTGGATCCCGACGCCATGCTCCTGGCCAGGCCGTCGAGCTCGTCCTGGAGCTCCTTCATGGACTTTCCGCAGACGTCTTCGTTCGCACGTGCGACGACGTAGCAGAAGACGTACCTCTCCGCCAGGCTGAAACCGAGGGTGTACAGGCAGCTGTACAGCAGGCTGGTCTGCAGCCGGAAGGCCAGGAAGTCGGCGTCGCGGTGCATCAGGCGGTCCAGCTCGGGGCTCGGTGGCGTATGGAAGCGCGTCGGCTCGACCGGGGCGCCTCGCTTGCGAACGAGGTCTTCGAGCGTGTGTCCGGCGTTGACGACGGATCCGTCCGAGAAATTTTCGGCCAGGTGGCTTGATTCAGACGTGATGGATTTCGTCCAAAGCCGAACCATCTCGTCGGCCGGATCGTCGTCCGGATCGCCGCACGCCGTGATGAATTCCCGGACGCCGGCCCCTACCTGTTCGTAGAACCGGGCGCAGGCGGCGTCGAAGGATTGCGGATCCTTGGTGCGCAGATAAATCGCCTCGAAATGCGAACGGTAACTCAGCAGGCGTAGGGAAAGCAGCTCACGGAATTCGTATCCGTCGATTTCTCTCTGCGGTGATCTCAGAAGAGTGGCTCTGGTGTGCGCGGCCATGAGGCGGATGGCACCGAGTGCCGGCGGAGGCGATTGAGGTGTCTCTGCCGCACGATTCAAGAATGAGACGAGAGTCGGTGCGGCCTCCTGGAAGAGCCGCTTGGAGGAAGCGGAGAATCCGGTTCCGGTAAACCCCGCGTTCCACAACGGTGAGGGAACCGAATCAAGGGGCGTGGCGTCCACCTGTGCGGCGCACCCATATCGTTCGGCCGTTCTATGCAGGTCAGCGCGGGTGGAGTCCTCGTCGGACACCCCTTCGATGCTGATCTGCAGGGTGTCCGTGCCGAGCTGCGCGTCCTGGGTGCGGACGAAGTAGTAAGGGCCGTGACCGCCTGCTCCACCTACCCGCTCCGGCGCGAACAATTCGCGAATCAGGGGCGCGAACGTGTCGTTGTATTTTGAACCAACTGGCTGCGGGATGGTGGCAAACAAGTGCACGAATGAAACCCCCGAGATTTCGGCCGGCGCCAGACGGGCCGTCACGTGATCTTGTAATCGACCGTAACTCAGAGAGATGAGAAGAAGGAACCCCGGCTCCAGTGTGATCTGCGTCACTCCGGAAACTGCTTGCGTTGTGATCGTCCGAATGCCTAGATTCGAATCACATTGACGAAAGGGGGTGTAATCAATGGAGCAGCAGATCGAACTCGATGTGCTCGAGATTTCGGACCTCATTGCAGGTGCCGGGGAGAACGATGACCTGGCGCAGGTGATGGCCGCCTCGTGCACGACCACCAGTGTTTCGACGAGTTCTTCGTCGTCCTCGTCCTGAATCTAGGGACCGGGAAACAGTTGAGCCACCGTCGGGGTGTTCCTCGGCGGTGGCCTTCTGCAGTCCTGGCGTGCCTTTTTCGATTCGAGGAACGCGCCGTCCCTGGAGGTCGACCAGAGGTGCGGGCGTCGTGCCCCGGACGACGACGGGCTCGTGCCAAGCCGGCCGGCCGAGAGCCGGAACACATGTTGTCAAGCGCGAACTGACCGGTGCGGTGGAGTGACAGGCGGGCTCCGTGCGGCGGGGCGCCTCTCCGCGGCCCGGCCCCGACGTCCGCACGCCGTGGTGAACCGGGCCGGCGTGACGAAGTTGGGGGATTCCTATGGGTGTGAACATCAGTCCGTACGTCGTCTATCGGCGCAGCAGACTCCCACTGGGCGAGCTCGGAGGGATGTCCTTCACCACCGCCTGGTCGCGCATCGATGAACTGCACGCCCTGCGGGACGAGATCGGCAAGAACGCCGTCGGCCTGGCCGACCGCCTCGGCGAGCTCGTGCCTACGCTGGGGGACGACGTCCGGGCCGACCTGATCAGGCTGCGGCGCGACGTGCACAATCTGCGGCACGACCGGGCGGTGGCGCGACTGGAGCCACTGCGTCCGCATCTCGGCCGCGAGGTGGTCGACGAGGTCGAGACCTGGTGCGCGCTCGGCGTGCGGGCCGAACAGTGCGAGCGAGCAGGGCGCGAGGAGCTCGAGAGTGAGAAGGCCCGGGCCGCCGACGGCTTCGGCGCCCTCTTCGAGCACGATGCGATGGCGCGCAGCATCCAACTCTCCGGCGACCGGCTGTACCGGGGCCTGCGCGACCTCGTCGCGGGCGACGAGGCGAGCGCCCTCAAGCCGAGCAAGGCCCGGCTGCGGGAGTCTTCCCTCGTCAACTTCGCCTACCGGGCGAGCTTGAAGCCGTCCCCCTTCGGACGGTTCACCGAGATCGGCGCGTTCCCTCCGGACGACCCGCGCCCCGCGGATCCCGGTGGCCGGCACGGCGGGACGCAGGAGTCGGTCACGACGCTGAACCGTCTCCTCGTGAACTGGGGGCCCCCCGGCCTGCCGCTCGTACCGGGCGGGATGGAGCCGGGGCACCTCGTGCTGAACTCCACGCTGCGGGCCGGCACCGAGTACGTCGAGTACGTCGGTGTCGCTCCCGGCTCCCGTGAGGACGGCCGGATGGCCACCGAGAGGGTGCTGCGCGTACGCCGGGAGGGACTCTTCGACGCACTGCTCGCGGCGATGCCCGAAGGATCGGCTCCGGCGGCCACGGTGCTGCGCGACCTCACCGCCGTCACCGGGAAGGCGGAGACGAGCCGGAAGGTCGTGCAGGGGCTGATCCGGGCCGGCATCCTCTTCTTCCGGCCGGAGATCGACGATCACGACCCCGACTACTCCATGAAGCTCGACCGCGTACTCGCGGCCGGCGGGACGCCGGAGACGGCCGCGCTACGCGGACACTTCTCCGAACTCAGGCGGTTGGAGACGGACTTCTCCGAGGCGGCGGCCGACGAGAGGCAGAAGCTGCTCGACTCGGCGTACGCGGCGATCGGCGGCATCGCCGAGCTGTGCAAGGTGTCCCCGCCCCCCGAGGAGGTCCTGAAGTCACCGGTCTTCGAGGACACTCCGGCATCCACGGCGCCCCAGGCCTGGAACCTGCCGACGGTGGAGGGGAGCATCCCCGCCCTGACGGGCCTCTGGCGTCTGGCCTCGATGATGGACAACGGCCAGGTGAAGCGACTGGGTCTCTACTCCTTCGCCACCCGCGTGCTCGGCGACCGCAGCACGATGCCCTTCCTCGAGTTCTTCCAGGCCTTCTCGTCGCTGACGGACCAGGAACAGGTCGACGTGTTCATGGGGCGCGACGTGGAGGAGGCCGAGAGGTACACGAGGCAGCGGGCGGAGGCTCTGCGCACGATCCGGCAGCGGCTGGTGCCCGGGGACGGCACCGTGCACCTGGACCCCTCGGTCATCGAGAAGGCCTGCGAGGGCGTGGAGGACCTCCTCGACACGGAATCGGTGACGTTCCGCGCGCAGTTCGCCCAGGGAGTGCTGCCCGACCGGGACCGGACGTTGGTCGTGAACGGCCTGCTCACCGGCTACGGCGTCTACTTCTCACGGTTCGGCTCGTTCGTCGAGGGCACCGACGAATGGTCCCTGCCGGCCGCCCAGCGGGAGCACCTCGCACGCAGGTTCCCCGGCCAGGTCGACCTCAACTCCGTGCTCGGATTCAACTTCAACCTGCACCCCTCGGTGACCCGGCGGGTCGTCAACTACCCCGGCGCGGTGTCGCTCGGCGCCGAGCGGACGGTCTACGGACTGGCGCGTCTGGAGGTCCGCGCGGATCAGGCCACCAGGTCGCTGCGCCTCTGGGACCCTGAGGCGCAGGAAACCCTCGACCTCGTGCCCATGAACTTCATGACCCCGATCGGGGTCCCGCTGCTCTACCGTCTGCTCGAGGCGCTGTCCCCGTCCAACCGCTACCTGTGGAAGCCCCTGGACGACATCAGGGACGCGGGAGGGCCCACGGTGTACGGCGAGACGGCACCCCGGCTGGTCGTGGGTGACGTCGTGGCCGACCGCAGGTCCTGGAACGTGGCCGCGGCCGAGATCCCCATGCTCCAGGATCTGAGCCGGGACGTGCCCGAAGCGCTCGTGGCCTTCGACGCGTGGCGCCTGACGCGGGGCCTTCCCCGCCACGCCTTCGTGCTGTGCCAGACGCCCGAGGAGCGAGACGTCATGGCCGGGCGCAGCCGGAAGGTGACCCGCCAGTGGGCGGACTACGCGCACCTGCGGCGCGCCAGCGTGCACAAGCCGATGTACGTCGACTTCCGGAACCCCTTCCTGGTCCGGAGCTTCGCGAAGTCGGCCCTGTCACGCGGCGATGTCGTCGCGTCGATCCGCGAGTGCCTTCCTTCGGTGGACGACTACGGCCCGGACACGGGCTGGACCGCAGCAGAGGAGTTCTTCGTTGAACTGTGTACCGACAACTAGTGGGCAGACCGGAACGCGTGAGTGGAGGACGGTCCACATCCACGTTCCGCACTCGCTGCACACCCCCTTCCTGTGCGACGTGGTCGAGCCGCTGCTCCGGTCCGAGGGACTCCAGGACCACTTCTTCTTCCTCCGGTACTGGCAGGGCGGCCCCCATCTGCGGCTGCGGATGCTCTGCGGCCCCGGGGCCGGTTCGGCCGAGGCGGCCGAACGGGTCGTCGCGGGTCTGGCACGTGCGATGCCGGAGTTCGGTGCGCAGGCGCGGGAGGAATACGCGCTCGGGCTGACCTTGCAGGACGAGCTCGCCCGCCTGGAGAAGGAGACCTCGGAGGAGGGCCGGCCCATCGGGGCCCTCGACCGGGTGGCGTACGAGCCGGAGTACCGCAAGTACGGGGGAACGGAGGGGCTGCAGATCGCCGAGACCGTATTCCGCAAGTCGTCGGTGGCGGTCCTCGGCCTGCTGGGCGGGCAACCGCGGGCGTGGGTGGACGAGCGCCGGGCACCGATCGGGGAAGCCGCGAGGATCATGGCGATGTTCCTCCACGGCGCAGGCCTCGACCCGCGGGCCGCAGGGCTGTTCCTGCGGGAGTACGAGGACTGGTGGCGTACGTACGCGCCGGATGACATGCAGCGTGCCTGGCCGAAACTGTTCGGCGGCGTCTCGGCACAGATGACGAATCTGTGCGCGGCGGTCTGGCGTGACGGCGCCACGGACGTGTTCCACGACATCAGCGCGGAGGCCGCCGCCCGCGCCCGTTCCGTGTGCGGGGCGGAGCCCGGCGGCGATGTCCGCGACCTCCGGCTCGACGGCACGCCTTACCCGGGCTGTCTCTCGAACTACGTGCACACCACCAACAACCGTCTCGGCCTGGTCCCCGCCGCCGAGGGGCTCGTCGCGTACCTCGTGCGCCGGGGCCTGGAAGCGATGGACGGGTAGGGCCTGTCGTTGTCGGGATCATGCGGGGCCCCGATGCCCCGGTGCCGGAAGCCCGGAGCCTCCGTCCCGCGCAGGCCCTTCCCTGTGGGGCCTGCGCCGGACGGAGAGCTCACCGGTGCGTCACCAGCTCACCGGAAGCTTGTGCAGGCCGTACACCCCCATGCTGTCCTTGAACGGCAGTTCCGCGACCGGGGTGGCCAGCTTCAGTCCGGGAATCCTCGCGAGGAGGGCGTTGAAGACGACCTCCAGCTCCAGTTTCGCGAGGTTCTGGCCGATGCACTGATGGATGCCGTGGCCGAACGCCAGGTGGTGCCTGCCGCCGCGCTCGATGTCCAGCCTGTCCGGGTCGGGGAACACCGCCTCGTCGTGGTTGCCGGAGGCGCTGAGCCCGATGACGCCCTCACCCGCGCGGATCAGCACCCCGCCGATCTCGAGGTCGGCCGTCGCCACCCGGGAGGTCACCTGGTCGGCGATGCTGAAGTAGCGCAGCAGCTCGTCGACGGCCTGCGGGGCCAGACCCGGGTCGGCCCTGAGCTTGGCCAGCTGGTCGGGGTTCTCCAGCAGGCCCACGACGCCGAGGGAGATCATGTTCGCGGTGGTCTCGTGACCGCCGACCAGGAGGACGTTCGCCATCCCGACCAGTTCCCCGTGGTCGAAGGTGCCGGTCTCCCGGTTCTTCACGACGAGCCTGCCGAGCAGATCGTCCCCCGGGTCCGCTTCCTGAGCGGTGACCAGCTCGGAGAAGTACGCGTGGAGCTCTTGATGCGCGCTGTTGCGCTCCTGTGGGTCGGCGTCGACCGACACCAGCTTGTTGGTGCGGTCCTGGAAGAACGCGCGGTCGCTGCGGGGCACCCCGAGCAGCTCGCAGATCACCAGGGACGGCACCGGGAGGGCGAGCGCCTCGACCAGATCGGCCGGCCCCTCGCCGGCGAGCATGGAGGAGATGCACTCGTCCACGATCTCCTCGACGCGGGGACGCAGTTGGAGCACCCGCTTGACGGTGAACTCCGGGATCACCATCTTGCGCTGCGCCGTGTGTACGGGCGGGTCCATGGCGAGCAGCACGGGCCGCATCTGCTCCATGACCTCCGGCGGCGCATCGAAGTGGAGTGGATAGCCCGGGTGGGCCAGGTTCGAGCTCACATGAGGGTCGGCGAGCAACTGTCGTATGTGCTCGTGCCGGGTGAGCAGCCACGCGGTCCGCCCGGAGGCCAGAGTGACCTTGGTGATCGGCTGCTCGGCACGCAGGGACGCGTACTCCTTCGGCGGGTGGAGCGGGCAGGTCCTGGGGTAGGGATACGCGCTGTCCTGTCGGTCCGTCACGGTCTTCTCCGCGGGTAGTCCAGGGAAATCTCCTTACGGACTCCATTCAAGCGGAAGATGATCCACATCGTGGCGTTATCGGTGAATGAGGCCGAACTCACGTGGACCGCAGGGAGGAAAGTCGGCCGCCTTCCCGGCATGGCCCGTCCCGTGCTTTCGGTGGGAGGGGTCGGTGCAGTGCGACATTGCAGTGATCGCGTAATCCGGAATGACCCCTTCCGTGCGGGGATGCGTCGATAGTACGTTGGATTTCATGTGCTCCACATCGTGGAGAGTTCGCTTGCGCCGTCGAAGTCACAGTGTGTGCCGAGGGGGAGTTGGGGCTATGTACCTTTCGATCGTCATGTGGGACCTGAAGAAGTCGGAAGCCACGGTGGAGAGCCTCAGGGAATACCTGCGGGACTATGCCGTGGACGCCTACTCCGCGCTGGACGGAATGCGGCTCAAGGCGTGGTTCTCCGATTCCGCACGTCAGCTGTGGGGTGCGGTCTATCTGTGGGACAGCCCCGAGCAGATGCCCGGCCTGTACAAAGTCAGCCGCGTGATCGATCTCATCGGGTATCCGCCGACTTCGGTCGGTGGTTTCACGCTCGAGGCGACCGCCGAAGGGAAGAGCGTTCACGAGACACTGGCCGGCCTGGGGATCGCCCTGGAGGGCGGAACGCAGTAAGGGCAGGCGTGCGACCGGATGGGCGGGAGGTTTGCCGGGCCGGTCGGTTCGAGGGGCGGTCGAGGGCCGGAGCCGCGCGGGTGGCTTCGCCGCTCCGGCTGGCGGCCGGGTGTGGAGTGCCGCGCGGCGCCGGTGTCGCCCGCGCCGGGCATCACCGTGCTCGCGGAGGGTTCAGGTGTGTCGGACCGGCGCTTTCGCGTGTGTTCGGCACCGTCCGGTGGGCCGGGTGCTGTGCGCGGGCCTCGAGCTCCCGGGCGGGCGCGGTGCGCAGGCCCTCCCCGCCCGGACACGCGTATTCCGCACCGCTTCACGAAGATCATTCGGTGAAGGAGGCGGGGGCGCTCGTGCTAACGTCGTGATCGTGGCCAGCCTTGACATTATTACCGAACGCTCTGATTCTGCCGTACAACGCATCATCGATGTGACAAAGCATTCGAGGTCCGTTGTCCGCACGGTGCTGATCGAGGACATCGAGCCTCTTCTGCAGAGCATCCGTGCCGGAGTGGAATTCATCGAGATCTACGGACTCGACACCGTGCCTGTTCCGGACAGTCTGCTCGCCGAATGTGAACGGCGCAGAATTCCGGTCCGGCTGCTCGCCGCTTCGGTCGCCAATCAGGTCTTCAAGACCGAGAAGAAGCCCAAGGTATTCGGTATCGCCAAGGTCCCGCGGCCTCGTCGCCTGTCGGACCTGTCCGACATGACCGGTGACCTCATCCTGCTCGACGGAGTGAAGATCGTCGGCAATATCGGAGCCATCGTGCGGACCTCGTTCGCGCTCGGGGCCTCGGGAATCGTGCTCGTGGACAGCGATCTCGGCAGTATTGCGGACCGCCGTCTGATCAGGGCGAGCCGGGGCTATGTGTTCTCCCTTCCCATCGTTCTCGCGTCCCGGGCCGAGGCGCTCCAGTACTTCCAGGACAATGCGATGCGCCCGGTGGTGTTCGAGGCCGACGGGGATCTCGGCGTCGCTGATCTCGACGGTATGGACGAGCGACTTGTGCTCATGTTCGGCAGCGAGAGGATCGGCCCGTCGGGCGAGTTCTCCGACATCGCCGCCAAGTCGGTCTCCATTCCGATGAATCCCGCGGCCGAGTCCCTCAACGTATCGGTGTCGGCCGGAATCGCGCTGCACGCGAGGGCCCGCCGTAACCTCTCCCGGTAGTCCCGGCCGCAGAGCCCCGTCAAGGGCCCCGTCCCTCCCCTCCGGGAGGGGCGGGGCCCTTGACGTGCGCCGCCGGGAGCCCTCGCTACCGCGGCCGCCACATCCAGGGCGCCGCGTCCGGACCCAGGCCGGCGACCGAAGGCCGGCCGTCCGGGCCCAGGCGGAGCGAGGCCCCGTCCAGGGCGGCCGGCCCGTCGGTCCGCACGTGGAGCCCGCCGCCCATGAGCAGCTGGACGAGGCCCTCGGCGGTGCGGCCCAGCAGCACGGGCCCCCGCGGCGACGGGGCGGCGTCCACCGNNCCGTACCCGTCGAACCGTGCTCCGGGCACGGCCGTCCCCGCCCGAGCCGTGGTCAGCCCGCTCCCGGCCGCTGCCCGGTAGTAGAGGGACACGGACCCGTCGGGAGCCGGGAGGGCGGCCGGCGCGTGGGCCGGGACGGGCGCCGCCGTGAGCTGTGTGCGAGCCGTGAGCCCGGCGGACGGGGTGTCCTGGGTCCAGTGGTGTACGGCGTGGTGGCCGGCGCCGAAGACGTGGACGCGTCCACCCTCGTCCACGGCGGTGTGCAGGCCGTCCTGCACCTCTCCGCCGCCCATGTCCCGCCACGCGCTCCACCGCCCCGCCCCGTCCCGCACCCGGGTGCTGACGCCCTTCTCGGCGTCGCGTACGAAGAGATGGATCTGCCCGTCCGGAGCGGCGACGGCCACCGGTACGCCCGTGCGACGTACTTCGTCACGTCCGGGAGAAGGAGAGCCCAGGCCGCGCCAGGGGCGGAAGCCCCGGCCCGGGGCGCTCTGCTCCAGCACCACGATCTCGCGCTCGTTGTCGGCGCCGTGCCCGCCGAGTGCGGCGAAGCGGAGCCCGAACAGCAGCAGGCGCCCGTCCCGTGTGGTGGCCGAGCCCAGTGCGGGGGCGAGCGGGCCGCCGCCTAGGTCGTGCGGAGGCCCCCAGGCACCGCTGCCCGGCCCGGTCTCCTGCCACCGCACCACCCGCAGCCCCAGCACGGCATAGGCCGCGAGCCTGCCGTCCGGCTCGGCGGTGAGGACCGTGCGCGTGCCCGGGTAGCGGTGGTGGGTGGAGCGGACCCAGCCCTTGCGGTTGGTGAGCGGGCGGTCGCCGCCGACGTTGTAGTCACCGCAGCCGGACGGATTGCCGCAGTCCCAGTCGGGCGAGCCCCCGTAGGGGACGAGGTGGGCGGCCTTCCTCGCCAGTACCCCCTGCGGCAGGTTCTTCGGCCAGTGCCGGTTGTAGTAACCGCGGTAGGCGACCGTGACGAAGCCCGGTATCCGGCCACCGTCCGCGGTCGCCCGGGCCACCCAGCGGATCATCGCGGCCCACGCGAAGCAGGCCGCGGCCGTGTGGTCGGCGTGGTCGGAGTAGCCGGGCTGTTCGCTGTCCCTGCGGCGGACGGCCTCCGTGCTGTGCTGGATGTCCGGGTCGGGGTCCAGGGTGTGGACGACGGTGGGCCGGTAGCTCTCCATCAGGCCGGTGAGGACCCCGACCAGCCCGTCGTACGTGTACGAGCCGGCGCGCCGCAGCGGCGATCCGTCGGCCACGACCGTACGCAGCACGAGCCGGCGATCCTGCCAGAGACTGGGCAGCCCGAGCCGGTGCCGGCCGGTGTGCATGGCCGTGTTGAGGAAGATCAACTCCACTCTGCGGCCGTCGGCCGCCAGCACGTTCACCTCGGCCCTGTGGTCCGGGCCGAGCTCGGCGACGGAGACCTCCCAGGGGGTGAAGGGCCGCAGCCCGAGCAGGGCG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Regex_Find_And_Replace_on_data_21.fasta Thu Aug 08 11:58:48 2024 +0000 @@ -0,0 +1,119 @@ +>BGC0001472_1 # 312 # 683 # 1 # ID=1_1;partial=00;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.642 +MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKPNSALRKVARVRLTSG +IEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDTQGVKNRKQARSRYGAK +KEK +>BGC0001472_2 # 686 # 1156 # 1 # ID=1_2;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.660 +MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYGAMEGLREKTGADPVI +TLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWVVGYSRARREKTMTERL +MNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW +>BGC0001472_3 # 1195 # 3324 # 1 # ID=1_3;partial=00;start_type=ATG;rbs_motif=AGxAG;rbs_spacer=5-10bp;gc_cont=0.652 +MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVHDGAATMDWMEQEQER +GITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVLDGAVTVFDGVAGVEPQ +SETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAVPIVMQLPIGAEADFRGV +VDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLLEAVSENDDQMMELYLEGE +EPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGVQPLLDAVVRYLPSPLDVEA +IEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLTFVRIYSGRLEAGTAVLNSVK +GKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTTGETLCDDKNPVILESMDFPAP +VIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEETGQTIIGGMGELHLEVLVDRMK +REFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGGTGQFAKVQIAIEPIEGGDASYEF +VNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMVGVRVTLLDGGYHEVDSSELAFKIA +GSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEVVGDINSRRGQIQAMEERHGARVVKG +LVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEVPRNVAEEIIAKAKGE +>BGC0001472_4 # 3472 # 4665 # 1 # ID=1_4;partial=00;start_type=GTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.642 +MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNEASAFDQIDKAPEERQ +RGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGAILVVAATDGPMPQTKE +HVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEYEFPGDDLPVVKVSALKA +LEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFTITGRGTVVTGRIERGVLK +VNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLLLRGIKREDVERGQVIIKPG +SVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTDVTGVVTLPEGTEMVMPGDNT +LMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK +>BGC0001472_5 # 4869 # 5570 # -1 # ID=1_5;partial=00;start_type=ATG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.712 +MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPSAEDLRPVHDLRGTLE +RRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEGFVFAFRSEGAEPGLYR +VTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRADTWAGSHGYRISALRAS +MATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSLLATTYARPPES +>BGC0001472_6 # 5567 # 7195 # -1 # ID=1_6;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.709 +MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDGADRAQVFSGAFAREG +LVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEAMSGEEPDVTPEWAVFL +SRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLREVCPVVTEPAGPPGPGDE +LTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGPYADLSITPCLDCGRHGEA +DLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVIDTATLSTVYRPVAVRPGCP +RCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHYYASNLRLQSQFKDWPSRPHT +PLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFGVKEDETTPERVKRWTAASGNI +GSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPPGDSPCDIIITGDLKKVMTKYGT +FGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDDALARLLGTSPADEPVAAFASLGG +TA +>BGC0001472_7 # 7210 # 7821 # -1 # ID=1_7;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.712 +MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPHADVVVPVHAGGDPGL +RDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRKRAAQHAGTARPYDMDA +ALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFNLVSGAVSSAVTVSVNRC +PRCGGRFSQARADSAMPVPELLR +>BGC0001472_8 # 7845 # 9191 # -1 # ID=1_8;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=11-12bp;gc_cont=0.709 +MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQLAYLGVPSRALPNLR +TWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDEMVVAAENDLTEEFVSP +SRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLVPAISVYLHMPYQSKSEE +FIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRLPELVVDPARLDAGVRELH +RVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVAATCDVHPEQALGKIYRELA +SLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDVFGFLLDGERPAYGLEGMPGL +PAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAVKVLVPEAMPVSFVHGERYLGT +PRLYDAPRAMGHTSHAEDAVNPVQQPFA +>BGC0001472_9 # 9238 # 10437 # -1 # ID=1_9;partial=00;start_type=GTG;rbs_motif=AGxAG;rbs_spacer=5-10bp;gc_cont=0.621 +MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFATIPQPVGSKYNDTFAP +LIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSDEDSTRADLHRTAERYG +CAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLVSFLNRAAETPQSPPPAL +GAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHFEAIYLRTKDPQSFDAACA +RFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESSHLAENFSDGSVVNAGHTLE +DLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQTSLLYSCLYTLGFSLAERYVF +CYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE +>BGC0001472_10 # 10511 # 10654 # 1 # ID=1_10;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.590 +MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSSSS +>BGC0001472_11 # 10977 # 13634 # 1 # ID=1_11;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.701 +MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGKNAVGLADRLGELVPT +LGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVETWCALGVRAEQCERAG +REELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLVAGDEASALKPSKARLRE +SSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGGTQESVTTLNRLLVNWGPP +GLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGRMATERVLRVRREGLFDALL +AAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGILFFRPEIDDHDPDYSMKLDRV +LAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLDSAYAAIGGIAELCKVSPPPEE +VLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLASMMDNGQVKRLGLYSFATRVLG +DRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERYTRQRAEALRTIRQRLVPGDGTVH +LDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDRDRTLVVNGLLTGYGVYFSRFGSFV +EGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFNLHPSVTRRVVNYPGAVSLGAERTVY +GLARLEVRADQATRSLRLWDPEAQETLDLVPMNFMTPIGVPLLYRLLEALSPSNRYLWKP +LDDIRDAGGPTVYGETAPRLVVGDVVADRRSWNVAAAEIPMLQDLSRDVPEALVAFDAWR +LTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQWADYAHLRRASVHKPMYVDFRNPFLVRS +FAKSALSRGDVVASIRECLPSVDDYGPDTGWTAAEEFFVELCTDN +>BGC0001472_12 # 13612 # 14571 # 1 # ID=1_12;partial=00;start_type=TTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.706 +MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGLQDHFFFLRYWQGGPH +LRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLTLQDELARLEKETSEEG +RPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLGGQPRAWVDERRAPIGEA +ARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWPKLFGGVSAQMTNLCAAVW +RDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPYPGCLSNYVHTTNNRLGLVP +AAEGLVAYLVRRGLEAMDG +>BGC0001472_13 # 14692 # 15894 # -1 # ID=1_13;partial=00;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.685 +MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWLLTRHEHIRQLLADPH +VSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVIPEFTVKRVLQLRPRVE +EIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDRAFFQDRTNKLVSVDADP +QERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGTFDHGELVGMANVLLVGGH +ETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRYFSIADQVTSRVATADLEIG +GVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHLAFGHGIHQCIGQNLAKLELE +VVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVSW +>BGC0001472_14 # 16220 # 16564 # 1 # ID=1_14;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.626 +MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFSDSARQLWGAVYLWDS +PEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAGLGIALEGGTQ +>BGC0001472_15 # 17019 # 17729 # 1 # ID=1_15;partial=00;start_type=GTG;rbs_motif=3Base/5BMM;rbs_spacer=13-15bp;gc_cont=0.636 +MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPVRLLAASVANQVFKTE +KKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVRTSFALGASGIVLVDSD +LGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVVFEADGDLGVADLDGMDE +RLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSAGIALHARARRNLSR +>BGC0001472_16 # 17815 # 19485 # -1 # ID=1_16;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.756 +ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAMHTGRHRLGLPSLWQD +RRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHTLDPDPDIQHSTEAVRR +RDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFVTVAYRGYYNRHWPKNLP +QGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNRKGWVRSTHHRYPGTRTVL +TAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGPLAPALGSATTRDGRLLLFG +LRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSPGRDEVRRTGVPVAVAAPDGQ +IHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLHTAVDEGGRVHVFGAGHHAVHH +WTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSLYYRAAAGSGLTTARAGTAVPGA +RFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLHVRTDGPAALDGASLRLGPDGRPS +VAGLGPDAAPWMWRPR
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Sanntis_output_data.genbank Thu Aug 08 11:58:48 2024 +0000 @@ -0,0 +1,479 @@ +LOCUS BGC0001472 19486 bp DNA UNK 01-JAN-1980 +DEFINITION BGC0001472. +ACCESSION BGC0001472 +VERSION BGC0001472 +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + CDS 312..683 + /translation="MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKP + NSALRKVARVRLTSGIEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDT + QGVKNRKQARSRYGAKKEK" + /protein_id="BGC0001472_1" + CDS 686..1156 + /translation="MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYG + AMEGLREKTGADPVITLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWV + VGYSRARREKTMTERLMNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW" + /protein_id="BGC0001472_2" + CDS 1195..3324 + /translation="MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVH + DGAATMDWMEQEQERGITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVL + DGAVTVFDGVAGVEPQSETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAV + PIVMQLPIGAEADFRGVVDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLL + EAVSENDDQMMELYLEGEEPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGV + QPLLDAVVRYLPSPLDVEAIEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLT + FVRIYSGRLEAGTAVLNSVKGKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTT + GETLCDDKNPVILESMDFPAPVIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEE + TGQTIIGGMGELHLEVLVDRMKREFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGG + TGQFAKVQIAIEPIEGGDASYEFVNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMV + GVRVTLLDGGYHEVDSSELAFKIAGSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEV + VGDINSRRGQIQAMEERHGARVVKGLVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEV + PRNVAEEIIAKAKGE" + /protein_id="BGC0001472_3" + CDS 3472..4665 + /translation="MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNE + ASAFDQIDKAPEERQRGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGA + ILVVAATDGPMPQTKEHVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEY + EFPGDDLPVVKVSALKALEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFT + ITGRGTVVTGRIERGVLKVNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLL + LRGIKREDVERGQVIIKPGSVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTD + VTGVVTLPEGTEMVMPGDNTLMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK" + /protein_id="BGC0001472_4" + CDS 4869..5570 + /translation="MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPS + AEDLRPVHDLRGTLERRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEG + FVFAFRSEGAEPGLYRVTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRA + DTWAGSHGYRISALRASMATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSL + LATTYARPPES" + /protein_id="BGC0001472_5" + CDS 5567..7195 + /translation="MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDG + ADRAQVFSGAFAREGLVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEA + MSGEEPDVTPEWAVFLSRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLRE + VCPVVTEPAGPPGPGDELTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGP + YADLSITPCLDCGRHGEADLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVI + DTATLSTVYRPVAVRPGCPRCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHY + YASNLRLQSQFKDWPSRPHTPLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFG + VKEDETTPERVKRWTAASGNIGSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPP + GDSPCDIIITGDLKKVMTKYGTFGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDD + ALARLLGTSPADEPVAAFASLGGTA" + /protein_id="BGC0001472_6" + CDS 7210..7821 + /translation="MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPH + ADVVVPVHAGGDPGLRDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRK + RAAQHAGTARPYDMDAALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFN + LVSGAVSSAVTVSVNRCPRCGGRFSQARADSAMPVPELLR" + /protein_id="BGC0001472_7" + CDS 7845..9191 + /translation="MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQ + LAYLGVPSRALPNLRTWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDE + MVVAAENDLTEEFVSPSRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLV + PAISVYLHMPYQSKSEEFIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRL + PELVVDPARLDAGVRELHRVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVA + ATCDVHPEQALGKIYRELASLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDV + FGFLLDGERPAYGLEGMPGLPAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAV + KVLVPEAMPVSFVHGERYLGTPRLYDAPRAMGHTSHAEDAVNPVQQPFA" + /protein_id="BGC0001472_8" + CDS 9238..10437 + /translation="MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFAT + IPQPVGSKYNDTFAPLIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSD + EDSTRADLHRTAERYGCAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLV + SFLNRAAETPQSPPPALGAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHF + EAIYLRTKDPQSFDAACARFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESS + HLAENFSDGSVVNAGHTLEDLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQT + SLLYSCLYTLGFSLAERYVFCYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE + " + /protein_id="BGC0001472_9" + CDS 10511..10654 + /translation="MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSS + SS" + /protein_id="BGC0001472_10" + CDS 10977..13634 + /translation="MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGK + NAVGLADRLGELVPTLGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVE + TWCALGVRAEQCERAGREELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLV + AGDEASALKPSKARLRESSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGG + TQESVTTLNRLLVNWGPPGLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGR + MATERVLRVRREGLFDALLAAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGIL + FFRPEIDDHDPDYSMKLDRVLAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLD + SAYAAIGGIAELCKVSPPPEEVLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLA + SMMDNGQVKRLGLYSFATRVLGDRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERY + TRQRAEALRTIRQRLVPGDGTVHLDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDR + DRTLVVNGLLTGYGVYFSRFGSFVEGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFN + LHPSVTRRVVNYPGAVSLGAERTVYGLARLEVRADQATRSLRLWDPEAQETLDLVPMNF + MTPIGVPLLYRLLEALSPSNRYLWKPLDDIRDAGGPTVYGETAPRLVVGDVVADRRSWN + VAAAEIPMLQDLSRDVPEALVAFDAWRLTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQ + WADYAHLRRASVHKPMYVDFRNPFLVRSFAKSALSRGDVVASIRECLPSVDDYGPDTGW + TAAEEFFVELCTDN" + /protein_id="BGC0001472_11" + CDS 13612..14571 + /translation="MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGL + QDHFFFLRYWQGGPHLRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLT + LQDELARLEKETSEEGRPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLG + GQPRAWVDERRAPIGEAARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWP + KLFGGVSAQMTNLCAAVWRDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPY + PGCLSNYVHTTNNRLGLVPAAEGLVAYLVRRGLEAMDG" + /protein_id="BGC0001472_12" + CDS 14692..15894 + /translation="MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWL + LTRHEHIRQLLADPHVSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVI + PEFTVKRVLQLRPRVEEIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDR + AFFQDRTNKLVSVDADPQERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGT + FDHGELVGMANVLLVGGHETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRY + FSIADQVTSRVATADLEIGGVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHL + AFGHGIHQCIGQNLAKLELEVVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVS + W" + /protein_id="BGC0001472_13" + CDS 16220..16564 + /translation="MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFS + DSARQLWGAVYLWDSPEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAG + LGIALEGGTQ" + /protein_id="BGC0001472_14" + CDS 17019..17729 + /translation="MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPV + RLLAASVANQVFKTEKKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVR + TSFALGASGIVLVDSDLGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVV + FEADGDLGVADLDGMDERLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSA + GIALHARARRNLSR" + /protein_id="BGC0001472_15" + CDS 17815..19485 + /translation="ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAM + HTGRHRLGLPSLWQDRRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHT + LDPDPDIQHSTEAVRRRDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFV + TVAYRGYYNRHWPKNLPQGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNR + KGWVRSTHHRYPGTRTVLTAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGP + LAPALGSATTRDGRLLLFGLRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSP + GRDEVRRTGVPVAVAAPDGQIHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLH + TAVDEGGRVHVFGAGHHAVHHWTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSL + YYRAAAGSGLTTARAGTAVPGARFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLH + VRTDGPAALDGASLRLGPDGRPSVAGLGPDAAPWMWRPR" + /protein_id="BGC0001472_16" +ORIGIN + 1 gccccggggg ccgtcgctcc gggggtcggt cctgcccggt ggcgcaggac cacgggggcc + 61 ggggcccggg ggtggacggc atttgttttg acccagctcc gtgaggtagg tacgctcaag + 121 ccttgtgcct ggggtgtgcc tgggctcggg tgcgtgtcct caaccgcatg gcgagtccgt + 181 aagtggccac cgcaatctgt gttccgtctg ccttccagca ggggcgtgca gtattcgaca + 241 cacccgaccg cgtgggtcgg tgactgttcc aggttagttt caccgaacgg cacacagaaa + 301 ccggagaagt agtgcctacg atccagcagc tggtccggaa gggccggcag gacaaggtcg + 361 agaagaacaa gacgcccgcg ctcgagggtt cgccccagcg tcgtggtgtc tgcacgcgtg + 421 tgttcacgac caccccgaag aagccgaact cggcgctccg taaggtcgcg cgtgtgcgtc + 481 tgacctccgg tatcgaggtc acggcctaca tcccgggtga ggggcacaac ctgcaggagc + 541 actccatcgt gctcgtgcgt ggtggccgtg tgaaggacct gccgggtgtt cgttacaaga + 601 tcatccgcgg ttcgctcgac acccagggtg tcaagaaccg caagcaggcc cgcagccgct + 661 acggcgccaa gaaggagaag taagaatgcc tcgtaagggc cccgccccga agcgcccggt + 721 catcatcgac ccggtctaca gctctcctct tgtcacctcg ctgatcaaca agatcctgct + 781 cgacggcaag cgttccaccg ccgagcggat cgtgtacggc gccatggaag gcctccgcga + 841 gaagaccggc gctgacccgg tcatcacgct gaagcgcgcg cttgagaacg tcaagccctc + 901 gctcgaggtc aagtcccgcc gtgtcggtgg cgccacctac caggtgccga tcgaggtcaa + 961 gcccggtcgc gccgccaccc tcgctctgcg ctgggtcgtg ggttactccc gcgcccgtcg + 1021 cgagaagacc atgaccgagc gcctcatgaa cgagctgctc gacgcctcca acggtcttgg + 1081 cgctgccgtc aagaagcgcg aggacaccca caagatggcc gagtcgaaca aggccttcgc + 1141 gcactaccgc tggtagtcgc tcaccccatc gagaccgaga gaagattgag ccttatggcc + 1201 accacttcgc ttgacctggc caaggtccgc aacatcggga tcatggccca catcgacgcg + 1261 ggcaagacga ccaccaccga gcggatcctc ttctacaccg gcgtttcgta caagatcggt + 1321 gaagtccacg acggcgcagc cacgatggac tggatggagc aggagcagga gcgcggcatc + 1381 acgatcacgt ccgccgcgac gacctgtcac tggccgctca atgatgttga ccacaccatc + 1441 aacatcatcg acaccccggg tcacgtcgac ttcaccgtcg aggtggagcg ttcgctccgc + 1501 gtcctcgacg gtgccgtcac cgtgttcgac ggtgtggccg gcgtcgagcc ccagtccgag + 1561 accgtctggc gtcaggcgga ccgctacggc gtgccgcgta tctgcttcgt caacaagctc + 1621 gaccgcacgg gcgccgactt cctccgttgc gtcgacatga tcgtccagcg cctcggcgct + 1681 gtcccgatcg tcatgcagct ccccatcggt gcggaggctg acttccgcgg cgtcgtcgac + 1741 ctcgtgtcga tgaaggcctt cgtttacccc gaagaggccg tcaagggcga gatgtacgac + 1801 accgtcgaga tcccggacaa cctcaaggag gccgccgagg aatggcgcgg caagctcctc + 1861 gaggccgtct cggagaacga cgaccagatg atggagctgt acctcgaggg cgaagagccc + 1921 accgaggagc agctgcacga ggcgatccgt cggatcaccc tcgcgtcgaa gggctcggcc + 1981 gactccgtca ccgtgacccc cgtcttctgt ggcacggcgt tcaagaacaa gggcgtccag + 2041 cccctgctcg acgccgtcgt ccgctacctg ccttcccccc tggacgtcga ggccatcgag + 2101 ggccacgacg tcaaggaccc ggagaaggtc gtccagcgga agccctcgga cgacgagccg + 2161 ttctccggcc tggcgttcaa gatcgcgagc gacccgcacc tcggcaagct caccttcgtc + 2221 cggatctact ccggtcgcct cgaggccggc accgcggtgc tgaactcggt caagggcaag + 2281 aaggagcgca tcggcaagat ctaccgcatg cacgcgaaca agcgtgagga gatcccgtcg + 2341 gtgggcgccg gtgacatcgt cgccgtcatg ggcctgaagc agaccaccac cggtgagacg + 2401 ctgtgtgacg acaagaaccc ggtgatcctg gagtccatgg acttcccggc gccggtcatc + 2461 caggtcgcca tcgagcccaa gtccaagggt gaccaggaga agctgggtgt cgccatccag + 2521 cgcctctcgg aggaggaccc ctccttccag gtgcactccg acgaggagac cggccagacc + 2581 atcatcggtg gtatgggcga gcttcacctc gaggtgctcg tcgaccgcat gaagcgcgag + 2641 ttccgcgtcg aggcgaacgt cggcaagccg caggtcgcgt accgtgagac gatccgcaag + 2701 gccgtcgagc gtatcgacta cacgcacaag aagcagactg gtggtaccgg ccagttcgcg + 2761 aaggtgcaga tcgccatcga gcccatcgag ggtggcgacg cgtcctacga gttcgtcaac + 2821 aaggtcaccg gtggccgcat cccccgtgag tacattccct cggtggacgc gggtgcccag + 2881 gaagccatgc agttcggcat cctggccggc tacgagatgg tgggcgtccg cgtcaccctt + 2941 ctcgacggtg gttaccacga ggtcgactcc tcggagctcg ccttcaagat cgctggttcg + 3001 caggcgttca aggagggtgc ccgcaaggcg tcccccgtgc tcctcgagcc gatgatggcc + 3061 gtcgaggtca ccacacccga ggactacatg ggtgaagtgg tcggcgacat caactcccgc + 3121 cgtggccaga tccaggccat ggaggagcgc cacggcgctc gcgtcgtgaa gggcctcgtg + 3181 cccctctcgg agatgttcgg ctacgtcgga gacctccgca gcaagacctc gggtcgcgca + 3241 agctactcga tgcagttcga ctcctacgcc gaggttccgc ggaacgtcgc cgaggagatc + 3301 atcgcgaagg ccaagggcga gtaactcttc cgagctcacg ctttaggctt gtcaccggag + 3361 cccggtcggg catgcgtcgc agtgcggcgg atgcccccgg caccggcatt ccagcaaaga + 3421 tcacctggcg ccgatgaagc aaggcgtaca gaaccactca ggaggacccc agtggcgaag + 3481 gcaaagttcg agcggactaa gccgcacgtc aacatcggca ccatcggtca catcgaccac + 3541 ggtaagacga ccctcacggc cgccattacc aaggtgctgc acgacgcgta cccggacctg + 3601 aacgaggcct cggccttcga ccagatcgac aaggctcctg aggagcgtca gcgcggtatc + 3661 acgatctcga tcgcgcacgt cgagtaccag acggagtcgc gtcactacgc gcacgtcgac + 3721 tgcccgggtc acgctgacta catcaagaac atgatcacgg gtgcggcgca gatggacggc + 3781 gccatcctcg tggtcgcggc caccgacggc ccgatgccgc agaccaagga gcacgtgctc + 3841 ctggcccgcc aggtaggcgt gccgtacatc gtcgtcgcgc tgaacaaggc cgacatggtg + 3901 gacgacgagg agatcctgga gctcgtcgag ctcgaggtcc gtgagctcct ctccgagtac + 3961 gagttcccgg gcgacgacct tccggtcgtc aaggtctcgg cgctcaaggc cctcgagggc + 4021 gacgccgagt ggggccagac cgttctcgac ctgatgaagg ccgtcgacga gtccatcccg + 4081 cagcccgagc gtgacgtcga gaagccgttc ctcatgccca tcgaggacgt cttcacgatc + 4141 accggtcgcg gtacggtcgt caccggccgc atcgagcgtg gtgtcctgaa ggtcaacgag + 4201 accgtcgaca tcgtcggtat caagaccgag aagaccacca ccacggtcac cggcatcgag + 4261 atgttccgca agctgctcga cgagggccag gccggtgaga acgtcggtct gctgcttcgt + 4321 ggcatcaagc gcgaggacgt cgagcgcggc caggtcatca tcaagccggg ttcggtcacg + 4381 ccgcacaccg agttccaggc ccaggcctac atcctgtcga aggacgaggg tggccgtcac + 4441 acccccttct tcaacaacta ccgcccgcag ttctacttcc gtaccacgga cgtgacgggc + 4501 gttgtgaccc ttcccgaggg caccgagatg gtcatgccgg gtgacaacac cctcatggac + 4561 gtcgcgctga tccagccggt cgccatggaa gagggcctga agttcgccat ccgtgagggt + 4621 ggtcgtacgg tgggcgccgg ccaggtcacc aagatcacca agtaattccg attacttgtg + 4681 ggtcggggta acccggttgc tctgaactga gcgcacagca ccaagcaggg cccgcacggc + 4741 atcacgccgt gcgggccctg tgctgtctcc gcggggctcg cggcggacag gacgacgggc + 4801 gcggcccgca caccctgagg gtgtgcgggc cgtcgtgacg cgggcgccgt tcgcgtcggg + 4861 aggtgcggtc agctctccgg tggccgggcg tatgtggtgg ccagcaggga atggcgggtc + 4921 gccccgtcgc tgtggaccag atggtgcacg gacgacggga cgaagccgcc gaacagtgtg + 4981 ccgacgagcc ccagcgcctg gcacctgaga ttgaggtcgt aggtggccat cgaggcccgc + 5041 agcgcactga tccggtagcc gtgggaaccc gcccaggtgt ccgcccggtc cagactcgcg + 5101 tagagcgcga cgatccccgc tccggtggag aactcgcgct ggacgccgag gttctcggcc + 5161 ggtccgatct cgtcgagccc ggccaggtag cacgtctcct cggcggtcac ccggtacagc + 5221 cctggttcgg cgccctcgct gcggaacgcg aagacgaagc cctccagcgc gcccgccgag + 5281 gcgtcgagcc cccagtcgtc acggtcccgg cgcagcacgt cacgcagcag cgacaggatc + 5341 acgtccgtgc gtaccggcag tggcgcgtag tgcagcgagg agcggcggcg ttcgagcgtg + 5401 ccccgcaggt cgtggaccgg ccgcaggtcc tcggcagacg gttgcggttc gacgggcacg + 5461 gagtccgcga acggcaccgg ccggcgttcg gccgccaggc cgtccgacgt gggcgtccgg + 5521 gaggtgaagc cctccagcac ggccgtcatg gatgtcacgt cgttcctcat gcggtacctc + 5581 ccagggacgc gaaggccgcc accggctcgt cggcaggcga tgttccgagc agccgggcca + 5641 acgcgtcgtc gtcccagtcc gacctgggtg tgaagcccag gcccaggtgc tgtgcgagct + 5701 cccgcagact ggccaggttg cagcccgcgt ccagaaagac cagcctgaag ccgaacgtgc + 5761 cgtacttggt catgaccttc ttgagatcgc cggtgatgat gatgtcgcac ggcgagtcac + 5821 ccggaggtac ctcgccggag acggtcacga gtgtgtggct gccctgggcg tacgcgtaga + 5881 ccccaggagg catgatccgg tcgtcgcgca ccaccgcgta ggccgtcgtg ctgccgatgt + 5941 ttcccgacgc cgcggtccat cgtttgacgc gctccggagt ggtctcgtcc tccttcaccc + 6001 cgaaggcgac cttcagcagc agtcccaggg agctcagggt gagcggggtg tcgccgtggg + 6061 acgggtcatg ccgttccgac ccggcgagga cggagatgtc cagggcgggg agcggtgtgt + 6121 gcgggcggct cggccagtcc ttgaactgcg actgcaggcg caggttggac gcgtagtagt + 6181 gcgcctggtg gtccttcgga gcgaggaacg cgcgtggagg catcgcgacc gaggcctcgt + 6241 agacggcacc ggcgggtgcc tggggtgcga ccggcccccg cgcgtaggag cagcgagggc + 6301 accccgggcg gacggcaacc ggccggtaga ccgtgctcag ggtcgcggtg tcgatgacgg + 6361 tgaagtcgcc ggggaggtgc gagatcgtgg cccgtgcgag caacgcggtg acgtggtggg + 6421 aggccaggcc gacgaccagg tcgtgcaggt attccggcgg ctcgccggag aggtccgcct + 6481 caccgtgacg gccgcagtcc aggcacggtg tgatcgagag gtcggcgtac gggccgatcg + 6541 tgatcgtccg cgcgtcggcg cgcacccgca gcagcggacg cccgtcctgc cggcaccgtt + 6601 cctcggtggc ggcgagcagg ggtgccgacg cgggcgtctc gaagaacacg gtgagctcgt + 6661 caccggggcc cggcggcccg gccggttccg tgaccaccgg gcacacctcc cgcagcgacc + 6721 ggcgggcccc ggcgaccagg gcggcgtccc cctcgaggcg caccgagcgc gacaccagcc + 6781 gcgccgcggc atccgcccag gacgggttcg atccggtgga gttgcccagc cgggagagga + 6841 agacggccca ttcgggcgtg acgtccggtt cctcgccgga catggcctcc tcgaccgccc + 6901 cggccgtcga cagcaaggcc aggcacttgt agaccgtggc ctcgtcgaat ccggtcttga + 6961 gcgccagttc agtgtggtcg cgggttccgt cgcacgcctc ggtcaggggc accagccctt + 7021 ctcgtgcgaa cgctccggag aacacctgtg cccggtcggc cccgtcgagc acgacggatt + 7081 cccctgccct gcgcagcctc accccgcggc gcaggacggg ccgggcaggg acggtcaact + 7141 gcatgtccgt acgtgcggcc cgcccgatct gctcggcctt catctcggca accatgatcg + 7201 atctccctct taccgcagca gttccggaac aggcatggcg ctgtccgcgc gcgcctgcga + 7261 gaaccggccc ccgcaccggg ggcaccggtt caccgagacc gtcaccgcgg acgagacggc + 7321 tccggagacg agattgaacg tacgtacggt gcccccgatc cccgtcacac ccgtggcgat + 7381 ctcggtcagg gcgaggtcca ggaggccgga ggcgacggac aggtgctgcc ggccgaaccc + 7441 ctcgggcagg ccggacaggg cggcgtccat gtcgtacggg cgggcggtgc ccgcgtgttg + 7501 ggcggcgcgc ttcctgtagc acgcgtagca ggccgtccgg cccggaacca cgacgggtcc + 7561 gcagaggacc ttggtcggca gcagctggag cccgacggac ggtgtgctcc gttccgcgca + 7621 gatccggtcc gtctcgtcgc ggagcccggg gtcgccgccg gcatgcacgg ggacgacgac + 7681 atccgcgtgc ggtgccgtgt cgttcaggaa gtcctcgaag gacacctgta gctccggggg + 7741 gacaccacgg tgctccgtga gccgccggct gaaggtgtca cccacgaggt acaggcactg + 7801 cggacgcgga tcgtgcgaca tgctgctctg ctccttcttc tcgctcatgc gaacggctgc + 7861 tgcacggggt tgaccgcgtc ctcggcatgg ctcgtgtgcc ccatcgcgcg cggcgcgtca + 7921 tagaggcgcg gggtgcccag gtaccgctcg ccgtgtacga acgacacggg catggcctcg + 7981 ggcacgagca ccttgaccgc ccgcatgccc acctggcgcg cctcgtccgt ggtgatgtcc + 8041 gtgacgagca cctcggcacc ccgtgccgcg agccgggcga cgacggtgtc cagcgggtcc + 8101 gctcccgccg gaagccccgg catgccttcg aggccgtacg ccggccgttc accgtccagc + 8161 aggaagccga acacgtcccg ccggtcgcgc gtcgcgttgt ggaccgcgcc gccgacgacg + 8221 ctcaccttgg ccggatcggg ctcgcgcccg gcgtacgccg agaggtatcc gcgcaacgcc + 8281 acccgcaggg aggccagttc gcggtagatc ttgcccagcg cctgttccgg atggacgtcg + 8341 caggtggcgg cgacgatctg ggcgagcgcg gggtcggcgt cggagagctg cacggcgtag + 8401 atcaccggga cgccgaagtc cgtcgtcgcg tcgaacagcc tcacccgcag gtccgtggag + 8461 gtgccgacgc ggtgcagttc ccggacgccg gcgtcgagcc gcgccggatc cacgaccagc + 8521 tcgggcagcc gcagctgctg gagccagacc agcgcgatgg cgtcccgctc gacgacttcc + 8581 agcaacccgc cgagcacggc gctgcgtacg tcggagtgga cggcggcacc cgtggtgatc + 8641 ccgcggatga actcttcgga cttcgactgg tagggcatgt gcagatacac ggagatcgcc + 8701 ggcacgagca ccggtatccg ccgggtgagc gaccatgcgc gcacccatcg gatcgggacc + 8761 gaggggtcgt acgcggacag gctgcagtcg tcgcgggcca gctcggtggg cgagcagctg + 8821 ggccaccggg acggcgacac gaactcctct gtgaggtcgt tctcggcggc gaccaccatc + 8881 tcgtcgtcgt cccaggcaca cgtggagtac cgttcgagcg cttccgcgat ggagaccagc + 8941 ttggcgcgct cgggcgtgag cccggtgccc gcgccgtcgg aatttccggt gtcctcgtcg + 9001 tgggcccacg tccgcaggtt gggaagagca cgggacggga caccgagata ggccagctgg + 9061 acggcgaagg gcggttcgcc ctcgcgcacc ggcagcggcg cggtgcgtga caccagtccg + 9121 tagggcgaga cgagctcctc gagaccgcgc agctcggccg agatgtgctc ctgaggcctg + 9181 ttcagatgca tgtggttctt cttcccgcgt ccggatgttc cccgtgtcct tgccgcgcta + 9241 ctcggcggtc ttcgtggatc ccgacgccat gctcctggcc aggccgtcga gctcgtcctg + 9301 gagctccttc atggactttc cgcagacgtc ttcgttcgca cgtgcgacga cgtagcagaa + 9361 gacgtacctc tccgccaggc tgaaaccgag ggtgtacagg cagctgtaca gcaggctggt + 9421 ctgcagccgg aaggccagga agtcggcgtc gcggtgcatc aggcggtcca gctcggggct + 9481 cggtggcgta tggaagcgcg tcggctcgac cggggcgcct cgcttgcgaa cgaggtcttc + 9541 gagcgtgtgt ccggcgttga cgacggatcc gtccgagaaa ttttcggcca ggtggcttga + 9601 ttcagacgtg atggatttcg tccaaagccg aaccatctcg tcggccggat cgtcgtccgg + 9661 atcgccgcac gccgtgatga attcccggac gccggcccct acctgttcgt agaaccgggc + 9721 gcaggcggcg tcgaaggatt gcggatcctt ggtgcgcaga taaatcgcct cgaaatgcga + 9781 acggtaactc agcaggcgta gggaaagcag ctcacggaat tcgtatccgt cgatttctct + 9841 ctgcggtgat ctcagaagag tggctctggt gtgcgcggcc atgaggcgga tggcaccgag + 9901 tgccggcgga ggcgattgag gtgtctctgc cgcacgattc aagaatgaga cgagagtcgg + 9961 tgcggcctcc tggaagagcc gcttggagga agcggagaat ccggttccgg taaaccccgc + 10021 gttccacaac ggtgagggaa ccgaatcaag gggcgtggcg tccacctgtg cggcgcaccc + 10081 atatcgttcg gccgttctat gcaggtcagc gcgggtggag tcctcgtcgg acaccccttc + 10141 gatgctgatc tgcagggtgt ccgtgccgag ctgcgcgtcc tgggtgcgga cgaagtagta + 10201 agggccgtga ccgcctgctc cacctacccg ctccggcgcg aacaattcgc gaatcagggg + 10261 cgcgaacgtg tcgttgtatt ttgaaccaac tggctgcggg atggtggcaa acaagtgcac + 10321 gaatgaaacc cccgagattt cggccggcgc cagacgggcc gtcacgtgat cttgtaatcg + 10381 accgtaactc agagagatga gaagaaggaa ccccggctcc agtgtgatct gcgtcactcc + 10441 ggaaactgct tgcgttgtga tcgtccgaat gcctagattc gaatcacatt gacgaaaggg + 10501 ggtgtaatca atggagcagc agatcgaact cgatgtgctc gagatttcgg acctcattgc + 10561 aggtgccggg gagaacgatg acctggcgca ggtgatggcc gcctcgtgca cgaccaccag + 10621 tgtttcgacg agttcttcgt cgtcctcgtc ctgaatctag ggaccgggaa acagttgagc + 10681 caccgtcggg gtgttcctcg gcggtggcct tctgcagtcc tggcgtgcct ttttcgattc + 10741 gaggaacgcg ccgtccctgg aggtcgacca gaggtgcggg cgtcgtgccc cggacgacga + 10801 cgggctcgtg ccaagccggc cggccgagag ccggaacaca tgttgtcaag cgcgaactga + 10861 ccggtgcggt ggagtgacag gcgggctccg tgcggcgggg cgcctctccg cggcccggcc + 10921 ccgacgtccg cacgccgtgg tgaaccgggc cggcgtgacg aagttggggg attcctatgg + 10981 gtgtgaacat cagtccgtac gtcgtctatc ggcgcagcag actcccactg ggcgagctcg + 11041 gagggatgtc cttcaccacc gcctggtcgc gcatcgatga actgcacgcc ctgcgggacg + 11101 agatcggcaa gaacgccgtc ggcctggccg accgcctcgg cgagctcgtg cctacgctgg + 11161 gggacgacgt ccgggccgac ctgatcaggc tgcggcgcga cgtgcacaat ctgcggcacg + 11221 accgggcggt ggcgcgactg gagccactgc gtccgcatct cggccgcgag gtggtcgacg + 11281 aggtcgagac ctggtgcgcg ctcggcgtgc gggccgaaca gtgcgagcga gcagggcgcg + 11341 aggagctcga gagtgagaag gcccgggccg ccgacggctt cggcgccctc ttcgagcacg + 11401 atgcgatggc gcgcagcatc caactctccg gcgaccggct gtaccggggc ctgcgcgacc + 11461 tcgtcgcggg cgacgaggcg agcgccctca agccgagcaa ggcccggctg cgggagtctt + 11521 ccctcgtcaa cttcgcctac cgggcgagct tgaagccgtc ccccttcgga cggttcaccg + 11581 agatcggcgc gttccctccg gacgacccgc gccccgcgga tcccggtggc cggcacggcg + 11641 ggacgcagga gtcggtcacg acgctgaacc gtctcctcgt gaactggggg ccccccggcc + 11701 tgccgctcgt accgggcggg atggagccgg ggcacctcgt gctgaactcc acgctgcggg + 11761 ccggcaccga gtacgtcgag tacgtcggtg tcgctcccgg ctcccgtgag gacggccgga + 11821 tggccaccga gagggtgctg cgcgtacgcc gggagggact cttcgacgca ctgctcgcgg + 11881 cgatgcccga aggatcggct ccggcggcca cggtgctgcg cgacctcacc gccgtcaccg + 11941 ggaaggcgga gacgagccgg aaggtcgtgc aggggctgat ccgggccggc atcctcttct + 12001 tccggccgga gatcgacgat cacgaccccg actactccat gaagctcgac cgcgtactcg + 12061 cggccggcgg gacgccggag acggccgcgc tacgcggaca cttctccgaa ctcaggcggt + 12121 tggagacgga cttctccgag gcggcggccg acgagaggca gaagctgctc gactcggcgt + 12181 acgcggcgat cggcggcatc gccgagctgt gcaaggtgtc cccgcccccc gaggaggtcc + 12241 tgaagtcacc ggtcttcgag gacactccgg catccacggc gccccaggcc tggaacctgc + 12301 cgacggtgga ggggagcatc cccgccctga cgggcctctg gcgtctggcc tcgatgatgg + 12361 acaacggcca ggtgaagcga ctgggtctct actccttcgc cacccgcgtg ctcggcgacc + 12421 gcagcacgat gcccttcctc gagttcttcc aggccttctc gtcgctgacg gaccaggaac + 12481 aggtcgacgt gttcatgggg cgcgacgtgg aggaggccga gaggtacacg aggcagcggg + 12541 cggaggctct gcgcacgatc cggcagcggc tggtgcccgg ggacggcacc gtgcacctgg + 12601 acccctcggt catcgagaag gcctgcgagg gcgtggagga cctcctcgac acggaatcgg + 12661 tgacgttccg cgcgcagttc gcccagggag tgctgcccga ccgggaccgg acgttggtcg + 12721 tgaacggcct gctcaccggc tacggcgtct acttctcacg gttcggctcg ttcgtcgagg + 12781 gcaccgacga atggtccctg ccggccgccc agcgggagca cctcgcacgc aggttccccg + 12841 gccaggtcga cctcaactcc gtgctcggat tcaacttcaa cctgcacccc tcggtgaccc + 12901 ggcgggtcgt caactacccc ggcgcggtgt cgctcggcgc cgagcggacg gtctacggac + 12961 tggcgcgtct ggaggtccgc gcggatcagg ccaccaggtc gctgcgcctc tgggaccctg + 13021 aggcgcagga aaccctcgac ctcgtgccca tgaacttcat gaccccgatc ggggtcccgc + 13081 tgctctaccg tctgctcgag gcgctgtccc cgtccaaccg ctacctgtgg aagcccctgg + 13141 acgacatcag ggacgcggga gggcccacgg tgtacggcga gacggcaccc cggctggtcg + 13201 tgggtgacgt cgtggccgac cgcaggtcct ggaacgtggc cgcggccgag atccccatgc + 13261 tccaggatct gagccgggac gtgcccgaag cgctcgtggc cttcgacgcg tggcgcctga + 13321 cgcggggcct tccccgccac gccttcgtgc tgtgccagac gcccgaggag cgagacgtca + 13381 tggccgggcg cagccggaag gtgacccgcc agtgggcgga ctacgcgcac ctgcggcgcg + 13441 ccagcgtgca caagccgatg tacgtcgact tccggaaccc cttcctggtc cggagcttcg + 13501 cgaagtcggc cctgtcacgc ggcgatgtcg tcgcgtcgat ccgcgagtgc cttccttcgg + 13561 tggacgacta cggcccggac acgggctgga ccgcagcaga ggagttcttc gttgaactgt + 13621 gtaccgacaa ctagtgggca gaccggaacg cgtgagtgga ggacggtcca catccacgtt + 13681 ccgcactcgc tgcacacccc cttcctgtgc gacgtggtcg agccgctgct ccggtccgag + 13741 ggactccagg accacttctt cttcctccgg tactggcagg gcggccccca tctgcggctg + 13801 cggatgctct gcggccccgg ggccggttcg gccgaggcgg ccgaacgggt cgtcgcgggt + 13861 ctggcacgtg cgatgccgga gttcggtgcg caggcgcggg aggaatacgc gctcgggctg + 13921 accttgcagg acgagctcgc ccgcctggag aaggagacct cggaggaggg ccggcccatc + 13981 ggggccctcg accgggtggc gtacgagccg gagtaccgca agtacggggg aacggagggg + 14041 ctgcagatcg ccgagaccgt attccgcaag tcgtcggtgg cggtcctcgg cctgctgggc + 14101 gggcaaccgc gggcgtgggt ggacgagcgc cgggcaccga tcggggaagc cgcgaggatc + 14161 atggcgatgt tcctccacgg cgcaggcctc gacccgcggg ccgcagggct gttcctgcgg + 14221 gagtacgagg actggtggcg tacgtacgcg ccggatgaca tgcagcgtgc ctggccgaaa + 14281 ctgttcggcg gcgtctcggc acagatgacg aatctgtgcg cggcggtctg gcgtgacggc + 14341 gccacggacg tgttccacga catcagcgcg gaggccgccg cccgcgcccg ttccgtgtgc + 14401 ggggcggagc ccggcggcga tgtccgcgac ctccggctcg acggcacgcc ttacccgggc + 14461 tgtctctcga actacgtgca caccaccaac aaccgtctcg gcctggtccc cgccgccgag + 14521 gggctcgtcg cgtacctcgt gcgccggggc ctggaagcga tggacgggta gggcctgtcg + 14581 ttgtcgggat catgcggggc cccgatgccc cggtgccgga agcccggagc ctccgtcccg + 14641 cgcaggccct tccctgtggg gcctgcgccg gacggagagc tcaccggtgc gtcaccagct + 14701 caccggaagc ttgtgcaggc cgtacacccc catgctgtcc ttgaacggca gttccgcgac + 14761 cggggtggcc agcttcagtc cgggaatcct cgcgaggagg gcgttgaaga cgacctccag + 14821 ctccagtttc gcgaggttct ggccgatgca ctgatggatg ccgtggccga acgccaggtg + 14881 gtgcctgccg ccgcgctcga tgtccagcct gtccgggtcg gggaacaccg cctcgtcgtg + 14941 gttgccggag gcgctgagcc cgatgacgcc ctcacccgcg cggatcagca ccccgccgat + 15001 ctcgaggtcg gccgtcgcca cccgggaggt cacctggtcg gcgatgctga agtagcgcag + 15061 cagctcgtcg acggcctgcg gggccagacc cgggtcggcc ctgagcttgg ccagctggtc + 15121 ggggttctcc agcaggccca cgacgccgag ggagatcatg ttcgcggtgg tctcgtgacc + 15181 gccgaccagg aggacgttcg ccatcccgac cagttccccg tggtcgaagg tgccggtctc + 15241 ccggttcttc acgacgagcc tgccgagcag atcgtccccc gggtccgctt cctgagcggt + 15301 gaccagctcg gagaagtacg cgtggagctc ttgatgcgcg ctgttgcgct cctgtgggtc + 15361 ggcgtcgacc gacaccagct tgttggtgcg gtcctggaag aacgcgcggt cgctgcgggg + 15421 caccccgagc agctcgcaga tcaccaggga cggcaccggg agggcgagcg cctcgaccag + 15481 atcggccggc ccctcgccgg cgagcatgga ggagatgcac tcgtccacga tctcctcgac + 15541 gcggggacgc agttggagca cccgcttgac ggtgaactcc gggatcacca tcttgcgctg + 15601 cgccgtgtgt acgggcgggt ccatggcgag cagcacgggc cgcatctgct ccatgacctc + 15661 cggcggcgca tcgaagtgga gtggatagcc cgggtgggcc aggttcgagc tcacatgagg + 15721 gtcggcgagc aactgtcgta tgtgctcgtg ccgggtgagc agccacgcgg tccgcccgga + 15781 ggccagagtg accttggtga tcggctgctc ggcacgcagg gacgcgtact ccttcggcgg + 15841 gtggagcggg caggtcctgg ggtagggata cgcgctgtcc tgtcggtccg tcacggtctt + 15901 ctccgcgggt agtccaggga aatctcctta cggactccat tcaagcggaa gatgatccac + 15961 atcgtggcgt tatcggtgaa tgaggccgaa ctcacgtgga ccgcagggag gaaagtcggc + 16021 cgccttcccg gcatggcccg tcccgtgctt tcggtgggag gggtcggtgc agtgcgacat + 16081 tgcagtgatc gcgtaatccg gaatgacccc ttccgtgcgg ggatgcgtcg atagtacgtt + 16141 ggatttcatg tgctccacat cgtggagagt tcgcttgcgc cgtcgaagtc acagtgtgtg + 16201 ccgaggggga gttggggcta tgtacctttc gatcgtcatg tgggacctga agaagtcgga + 16261 agccacggtg gagagcctca gggaatacct gcgggactat gccgtggacg cctactccgc + 16321 gctggacgga atgcggctca aggcgtggtt ctccgattcc gcacgtcagc tgtggggtgc + 16381 ggtctatctg tgggacagcc ccgagcagat gcccggcctg tacaaagtca gccgcgtgat + 16441 cgatctcatc gggtatccgc cgacttcggt cggtggtttc acgctcgagg cgaccgccga + 16501 agggaagagc gttcacgaga cactggccgg cctggggatc gccctggagg gcggaacgca + 16561 gtaagggcag gcgtgcgacc ggatgggcgg gaggtttgcc gggccggtcg gttcgagggg + 16621 cggtcgaggg ccggagccgc gcgggtggct tcgccgctcc ggctggcggc cgggtgtgga + 16681 gtgccgcgcg gcgccggtgt cgcccgcgcc gggcatcacc gtgctcgcgg agggttcagg + 16741 tgtgtcggac cggcgctttc gcgtgtgttc ggcaccgtcc ggtgggccgg gtgctgtgcg + 16801 cgggcctcga gctcccgggc gggcgcggtg cgcaggccct ccccgcccgg acacgcgtat + 16861 tccgcaccgc ttcacgaaga tcattcggtg aaggaggcgg gggcgctcgt gctaacgtcg + 16921 tgatcgtggc cagccttgac attattaccg aacgctctga ttctgccgta caacgcatca + 16981 tcgatgtgac aaagcattcg aggtccgttg tccgcacggt gctgatcgag gacatcgagc + 17041 ctcttctgca gagcatccgt gccggagtgg aattcatcga gatctacgga ctcgacaccg + 17101 tgcctgttcc ggacagtctg ctcgccgaat gtgaacggcg cagaattccg gtccggctgc + 17161 tcgccgcttc ggtcgccaat caggtcttca agaccgagaa gaagcccaag gtattcggta + 17221 tcgccaaggt cccgcggcct cgtcgcctgt cggacctgtc cgacatgacc ggtgacctca + 17281 tcctgctcga cggagtgaag atcgtcggca atatcggagc catcgtgcgg acctcgttcg + 17341 cgctcggggc ctcgggaatc gtgctcgtgg acagcgatct cggcagtatt gcggaccgcc + 17401 gtctgatcag ggcgagccgg ggctatgtgt tctcccttcc catcgttctc gcgtcccggg + 17461 ccgaggcgct ccagtacttc caggacaatg cgatgcgccc ggtggtgttc gaggccgacg + 17521 gggatctcgg cgtcgctgat ctcgacggta tggacgagcg acttgtgctc atgttcggca + 17581 gcgagaggat cggcccgtcg ggcgagttct ccgacatcgc cgccaagtcg gtctccattc + 17641 cgatgaatcc cgcggccgag tccctcaacg tatcggtgtc ggccggaatc gcgctgcacg + 17701 cgagggcccg ccgtaacctc tcccggtagt cccggccgca gagccccgtc aagggccccg + 17761 tccctcccct ccgggagggg cggggccctt gacgtgcgcc gccgggagcc ctcgctaccg + 17821 cggccgccac atccagggcg ccgcgtccgg acccaggccg gcgaccgaag gccggccgtc + 17881 cgggcccagg cggagcgagg ccccgtccag ggcggccggc ccgtcggtcc gcacgtggag + 17941 cccgccgccc atgagcagct ggacgaggcc ctcggcggtg cggcccagca gcacgggccc + 18001 ccgcggcgac ggggcggcgt ccaccgnncc gtacccgtcg aaccgtgctc cgggcacggc + 18061 cgtccccgcc cgagccgtgg tcagcccgct cccggccgct gcccggtagt agagggacac + 18121 ggacccgtcg ggagccggga gggcggccgg cgcgtgggcc gggacgggcg ccgccgtgag + 18181 ctgtgtgcga gccgtgagcc cggcggacgg ggtgtcctgg gtccagtggt gtacggcgtg + 18241 gtggccggcg ccgaagacgt ggacgcgtcc accctcgtcc acggcggtgt gcaggccgtc + 18301 ctgcacctct ccgccgccca tgtcccgcca cgcgctccac cgccccgccc cgtcccgcac + 18361 ccgggtgctg acgcccttct cggcgtcgcg tacgaagaga tggatctgcc cgtccggagc + 18421 ggcgacggcc accggtacgc ccgtgcgacg tacttcgtca cgtccgggag aaggagagcc + 18481 caggccgcgc caggggcgga agccccggcc cggggcgctc tgctccagca ccacgatctc + 18541 gcgctcgttg tcggcgccgt gcccgccgag tgcggcgaag cggagcccga acagcagcag + 18601 gcgcccgtcc cgtgtggtgg ccgagcccag tgcgggggcg agcgggccgc cgcctaggtc + 18661 gtgcggaggc ccccaggcac cgctgcccgg cccggtctcc tgccaccgca ccacccgcag + 18721 ccccagcacg gcataggccg cgagcctgcc gtccggctcg gcggtgagga ccgtgcgcgt + 18781 gcccgggtag cggtggtggg tggagcggac ccagcccttg cggttggtga gcgggcggtc + 18841 gccgccgacg ttgtagtcac cgcagccgga cggattgccg cagtcccagt cgggcgagcc + 18901 cccgtagggg acgaggtggg cggccttcct cgccagtacc ccctgcggca ggttcttcgg + 18961 ccagtgccgg ttgtagtaac cgcggtaggc gaccgtgacg aagcccggta tccggccacc + 19021 gtccgcggtc gcccgggcca cccagcggat catcgcggcc cacgcgaagc aggccgcggc + 19081 cgtgtggtcg gcgtggtcgg agtagccggg ctgttcgctg tccctgcggc ggacggcctc + 19141 cgtgctgtgc tggatgtccg ggtcggggtc cagggtgtgg acgacggtgg gccggtagct + 19201 ctccatcagg ccggtgagga ccccgaccag cccgtcgtac gtgtacgagc cggcgcgccg + 19261 cagcggcgat ccgtcggcca cgaccgtacg cagcacgagc cggcgatcct gccagagact + 19321 gggcagcccg agccggtgcc ggccggtgtg catggccgtg ttgaggaaga tcaactccac + 19381 tctgcggccg tcggccgcca gcacgttcac ctcggccctg tggtccgggc cgagctcggc + 19441 gacggagacc tcccaggggg tgaagggccg cagcccgagc agggcg +//