Repository 'sanntis_marine'
hg clone https://toolshed.g2.bx.psu.edu/repos/ecology/sanntis_marine

Changeset 1:9d689f8c9ce4 (2024-08-08)
Previous changeset 0:12870a79d56b (2024-07-26)
Commit message:
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/marine_omics commit 9dff0476530d65342db00896f3108edb899e3fd2
modified:
sanntis.xml
added:
test-data/BGC0001472.fna
test-data/Regex_Find_And_Replace_on_data_21.fasta
test-data/Sanntis_output_data.genbank
b
diff -r 12870a79d56b -r 9d689f8c9ce4 sanntis.xml
--- a/sanntis.xml Fri Jul 26 14:31:32 2024 +0000
+++ b/sanntis.xml Thu Aug 08 11:58:48 2024 +0000
[
@@ -2,7 +2,7 @@
     <description>in genomic and metagenomic data</description>
     <macros>
         <token name="@TOOL_VERSION@">0.9.3.5</token>
-        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@VERSION_SUFFIX@">1</token>
     </macros>
     <edam_topics>
         <edam_topic>topic_3387</edam_topic>
@@ -11,21 +11,49 @@
         <requirement type="package" version="@TOOL_VERSION@">sanntis</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        sanntis --ip-file '$input_interpro' --outfile 'output_sanntis.gff' '$input_genbank'
+    #if $selection.which_sanntis == 'sanntis': 
+        sanntis --ip-file '$selection.input_interpro' --outfile 'output_sanntis.gff' '$selection.input_genbank'
+    #else:
+        sanntis_build_gb  -n '$selection.input_nuc' -a '$selection.input_prot' -o 'output_sanntis_gb.gb'
+    #end if 
     ]]></command>
     <inputs>
-        <param name="input_interpro" type="data" format="tabular" label="Input the TSV file from InterProScan" help="Before using this tool you need to retrieve the right data by using the InterProScan tool"/>
-        <param name="input_genbank" type="data" format="genbank" label="Input a Genbank .gb file" help="It needs to have the right structure and fit the protein fasta file used in InterProScan"/>
+        <conditional name="selection">
+            <param name="which_sanntis" type="select" label="Do you want to build a genbank or to make a SMBGC Annotation?" help="If you decide to build a genbank you can then use this genbank to then conduct the annotation.">
+                <option value="sanntis">Run sanntis</option>
+                <option value="genbank">Build genbank</option>
+            </param>
+            <when value="sanntis">
+                <param name="input_interpro" type="data" format="tabular" label="Input the tabular file from InterProScan" help="Before using this tool you need to retrieve the right data by using the InterProScan tool"/>
+                <param name="input_genbank" type="data" format="genbank" label="Input a Genbank file" help="It needs to have the right structure and fit the protein fasta file used in InterProScan"/>
+            </when>
+            <when value="genbank">
+                <param name="input_nuc" type="data" format="fasta" label="Input a nucleotide fasta file"/>
+                <param name="input_prot" type="data" format="fasta" label="Input a protein fasta file" help="Before using this tool you can get the right protein data by using the Prodigal tool"/>
+            </when>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="output_sanntis" from_work_dir="output_sanntis.gff" format="gff3" label="Sanntis output data"/>
+        <data name="output_sanntis" from_work_dir="output_sanntis.gff" format="gff3" label="Sanntis output data">
+            <filter>selection['which_sanntis'] == 'sanntis'</filter>
+        </data>
+        <data name="output_sanntis_gb" from_work_dir="output_sanntis_gb.gb" format="genbank" label="Sanntis output data genbank">
+            <filter>selection['which_sanntis'] == 'genbank'</filter>
+        </data>
     </outputs>
     <tests>
         <test expect_num_outputs="1">
+            <param name="which_sanntis" value="sanntis"/>
             <param name="input_interpro" value="BGC0001472.fna.prodigal.faa.ip.tsv"/>
             <param name="input_genbank" value="BGC0001472.fna.prodigal.faa.gb"/>
             <output name="output_sanntis" value="Sanntis_output_data.gff3"/>
         </test>
+        <test expect_num_outputs="1">
+            <param name="which_sanntis" value="genbank"/>
+            <param name="input_nuc" value="BGC0001472.fna"/>
+            <param name="input_prot" value="Regex_Find_And_Replace_on_data_21.fasta"/>
+            <output name="output_sanntis_gb" value="Sanntis_output_data.genbank"/>
+        </test>
     </tests>
     <help><![CDATA[
 
@@ -36,6 +64,10 @@
 SMBGC Annotation using Neural Networks Trained on Interpro Signatures
 Tool for identifying biosynthetic gene clusters (BGCs) in genomic & metagenomic data
 
+**Or**
+
+This tool can also create a Genbank adapted to be used in sanntis
+
 .....
 
 
b
diff -r 12870a79d56b -r 9d689f8c9ce4 test-data/BGC0001472.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/BGC0001472.fna Thu Aug 08 11:58:48 2024 +0000
b
b'@@ -0,0 +1,2 @@\n+>BGC0001472\n+GCCCCGGGGGCCGTCGCTCCGGGGGTCGGTCCTGCCCGGTGGCGCAGGACCACGGGGGCCGGGGCCCGGGGGTGGACGGCATTTGTTTTGACCCAGCTCCGTGAGGTAGGTACGCTCAAGCCTTGTGCCTGGGGTGTGCCTGGGCTCGGGTGCGTGTCCTCAACCGCATGGCGAGTCCGTAAGTGGCCACCGCAATCTGTGTTCCGTCTGCCTTCCAGCAGGGGCGTGCAGTATTCGACACACCCGACCGCGTGGGTCGGTGACTGTTCCAGGTTAGTTTCACCGAACGGCACACAGAAACCGGAGAAGTAGTGCCTACGATCCAGCAGCTGGTCCGGAAGGGCCGGCAGGACAAGGTCGAGAAGAACAAGACGCCCGCGCTCGAGGGTTCGCCCCAGCGTCGTGGTGTCTGCACGCGTGTGTTCACGACCACCCCGAAGAAGCCGAACTCGGCGCTCCGTAAGGTCGCGCGTGTGCGTCTGACCTCCGGTATCGAGGTCACGGCCTACATCCCGGGTGAGGGGCACAACCTGCAGGAGCACTCCATCGTGCTCGTGCGTGGTGGCCGTGTGAAGGACCTGCCGGGTGTTCGTTACAAGATCATCCGCGGTTCGCTCGACACCCAGGGTGTCAAGAACCGCAAGCAGGCCCGCAGCCGCTACGGCGCCAAGAAGGAGAAGTAAGAATGCCTCGTAAGGGCCCCGCCCCGAAGCGCCCGGTCATCATCGACCCGGTCTACAGCTCTCCTCTTGTCACCTCGCTGATCAACAAGATCCTGCTCGACGGCAAGCGTTCCACCGCCGAGCGGATCGTGTACGGCGCCATGGAAGGCCTCCGCGAGAAGACCGGCGCTGACCCGGTCATCACGCTGAAGCGCGCGCTTGAGAACGTCAAGCCCTCGCTCGAGGTCAAGTCCCGCCGTGTCGGTGGCGCCACCTACCAGGTGCCGATCGAGGTCAAGCCCGGTCGCGCCGCCACCCTCGCTCTGCGCTGGGTCGTGGGTTACTCCCGCGCCCGTCGCGAGAAGACCATGACCGAGCGCCTCATGAACGAGCTGCTCGACGCCTCCAACGGTCTTGGCGCTGCCGTCAAGAAGCGCGAGGACACCCACAAGATGGCCGAGTCGAACAAGGCCTTCGCGCACTACCGCTGGTAGTCGCTCACCCCATCGAGACCGAGAGAAGATTGAGCCTTATGGCCACCACTTCGCTTGACCTGGCCAAGGTCCGCAACATCGGGATCATGGCCCACATCGACGCGGGCAAGACGACCACCACCGAGCGGATCCTCTTCTACACCGGCGTTTCGTACAAGATCGGTGAAGTCCACGACGGCGCAGCCACGATGGACTGGATGGAGCAGGAGCAGGAGCGCGGCATCACGATCACGTCCGCCGCGACGACCTGTCACTGGCCGCTCAATGATGTTGACCACACCATCAACATCATCGACACCCCGGGTCACGTCGACTTCACCGTCGAGGTGGAGCGTTCGCTCCGCGTCCTCGACGGTGCCGTCACCGTGTTCGACGGTGTGGCCGGCGTCGAGCCCCAGTCCGAGACCGTCTGGCGTCAGGCGGACCGCTACGGCGTGCCGCGTATCTGCTTCGTCAACAAGCTCGACCGCACGGGCGCCGACTTCCTCCGTTGCGTCGACATGATCGTCCAGCGCCTCGGCGCTGTCCCGATCGTCATGCAGCTCCCCATCGGTGCGGAGGCTGACTTCCGCGGCGTCGTCGACCTCGTGTCGATGAAGGCCTTCGTTTACCCCGAAGAGGCCGTCAAGGGCGAGATGTACGACACCGTCGAGATCCCGGACAACCTCAAGGAGGCCGCCGAGGAATGGCGCGGCAAGCTCCTCGAGGCCGTCTCGGAGAACGACGACCAGATGATGGAGCTGTACCTCGAGGGCGAAGAGCCCACCGAGGAGCAGCTGCACGAGGCGATCCGTCGGATCACCCTCGCGTCGAAGGGCTCGGCCGACTCCGTCACCGTGACCCCCGTCTTCTGTGGCACGGCGTTCAAGAACAAGGGCGTCCAGCCCCTGCTCGACGCCGTCGTCCGCTACCTGCCTTCCCCCCTGGACGTCGAGGCCATCGAGGGCCACGACGTCAAGGACCCGGAGAAGGTCGTCCAGCGGAAGCCCTCGGACGACGAGCCGTTCTCCGGCCTGGCGTTCAAGATCGCGAGCGACCCGCACCTCGGCAAGCTCACCTTCGTCCGGATCTACTCCGGTCGCCTCGAGGCCGGCACCGCGGTGCTGAACTCGGTCAAGGGCAAGAAGGAGCGCATCGGCAAGATCTACCGCATGCACGCGAACAAGCGTGAGGAGATCCCGTCGGTGGGCGCCGGTGACATCGTCGCCGTCATGGGCCTGAAGCAGACCACCACCGGTGAGACGCTGTGTGACGACAAGAACCCGGTGATCCTGGAGTCCATGGACTTCCCGGCGCCGGTCATCCAGGTCGCCATCGAGCCCAAGTCCAAGGGTGACCAGGAGAAGCTGGGTGTCGCCATCCAGCGCCTCTCGGAGGAGGACCCCTCCTTCCAGGTGCACTCCGACGAGGAGACCGGCCAGACCATCATCGGTGGTATGGGCGAGCTTCACCTCGAGGTGCTCGTCGACCGCATGAAGCGCGAGTTCCGCGTCGAGGCGAACGTCGGCAAGCCGCAGGTCGCGTACCGTGAGACGATCCGCAAGGCCGTCGAGCGTATCGACTACACGCACAAGAAGCAGACTGGTGGTACCGGCCAGTTCGCGAAGGTGCAGATCGCCATCGAGCCCATCGAGGGTGGCGACGCGTCCTACGAGTTCGTCAACAAGGTCACCGGTGGCCGCATCCCCCGTGAGTACATTCCCTCGGTGGACGCGGGTGCCCAGGAAGCCATGCAGTTCGGCATCCTGGCCGGCTACGAGATGGTGGGCGTCCGCGTCACCCTTCTCGACGGTGGTTACCACGAGGTCGACTCCTCGGAGCTCGCCTTCAAGATCGCTGGTTCGCAGGCGTTCAAGGAGGGTGCCCGCAAGGCGTCCCCCGTGCTCCTCGAGCCGATGATGGCCGTCGAGGTCACCACACCCGAGGACTACATGGGTGAAGTGGTCGGCGACATCAACTCCCGCCGTGGCCAGATCCAGGCCATGGAGGAGCGCCACGGCGCTCGCGTCGTGAAGGGCCTCGTGCCCCTCTCGGAGATGTTCGGCTACGTCGGAGACCTCCGCAGCAAGACCTCGGGTCGCGCAAGCTACTCGATGCAGTTCGACTCCTACGCCGAGGTTCCGCGGAACGTCGCCGAGGAGATCATCGCGAAGGCCAAGGGCGAGTAACTCTTCCGAGCTCACGCTTTAGGCTTGTCACCGGAGCCCGGTCGGGCATGCGTCGCAGTGCGGCGGATGCCCCCGGCACCGGCATTCCAGCAAAGATCACCTGGCGCCGATGAAGCAAGGCGTACAGAACCACTCAGGAGGACCCCAGTGGCGAAGGCAAAGTTCGAGCGGACTAAGCCGCACGTCAACATCGGCACCATCGGTCACATCGACCACGGTAAGACGACCCTCACGGCCGCCATTACCAAGGTGCTGCACGACGCGTACCCGGACCTGAACGAGGCCTCGGCCTTCGACCAGATCGACAAGGCTCCTGAGGAGCGTCAGCGCGGTATCACGATCTCGATCGCGCACGTCGAGTACCAGACGGAGTCGCGTCACTACGCGCACGTCGACTGCCCGGGTCACGCTGACTACATCAAGAACATGATCACGGGTGCGGCGCAGATGGACGGCGCCATCCTCGTGGTCGCGGCCACCGACGGCCCGATGCCGCAGACCAAGGAGCACGTGCTCCTGGCCCGCCAGGTAGGCGTGCCGTACATCGTCGTCGCGCTGAACAAGGCCGACATGGTGGACGACGAGGAGATCCTGGAGCTCGTCGAGCTCGAGGTCCGTGAGCTCCTCTCCGAGTACGAGTTCCCG'..b'GCCCCTCGCCGGCGAGCATGGAGGAGATGCACTCGTCCACGATCTCCTCGACGCGGGGACGCAGTTGGAGCACCCGCTTGACGGTGAACTCCGGGATCACCATCTTGCGCTGCGCCGTGTGTACGGGCGGGTCCATGGCGAGCAGCACGGGCCGCATCTGCTCCATGACCTCCGGCGGCGCATCGAAGTGGAGTGGATAGCCCGGGTGGGCCAGGTTCGAGCTCACATGAGGGTCGGCGAGCAACTGTCGTATGTGCTCGTGCCGGGTGAGCAGCCACGCGGTCCGCCCGGAGGCCAGAGTGACCTTGGTGATCGGCTGCTCGGCACGCAGGGACGCGTACTCCTTCGGCGGGTGGAGCGGGCAGGTCCTGGGGTAGGGATACGCGCTGTCCTGTCGGTCCGTCACGGTCTTCTCCGCGGGTAGTCCAGGGAAATCTCCTTACGGACTCCATTCAAGCGGAAGATGATCCACATCGTGGCGTTATCGGTGAATGAGGCCGAACTCACGTGGACCGCAGGGAGGAAAGTCGGCCGCCTTCCCGGCATGGCCCGTCCCGTGCTTTCGGTGGGAGGGGTCGGTGCAGTGCGACATTGCAGTGATCGCGTAATCCGGAATGACCCCTTCCGTGCGGGGATGCGTCGATAGTACGTTGGATTTCATGTGCTCCACATCGTGGAGAGTTCGCTTGCGCCGTCGAAGTCACAGTGTGTGCCGAGGGGGAGTTGGGGCTATGTACCTTTCGATCGTCATGTGGGACCTGAAGAAGTCGGAAGCCACGGTGGAGAGCCTCAGGGAATACCTGCGGGACTATGCCGTGGACGCCTACTCCGCGCTGGACGGAATGCGGCTCAAGGCGTGGTTCTCCGATTCCGCACGTCAGCTGTGGGGTGCGGTCTATCTGTGGGACAGCCCCGAGCAGATGCCCGGCCTGTACAAAGTCAGCCGCGTGATCGATCTCATCGGGTATCCGCCGACTTCGGTCGGTGGTTTCACGCTCGAGGCGACCGCCGAAGGGAAGAGCGTTCACGAGACACTGGCCGGCCTGGGGATCGCCCTGGAGGGCGGAACGCAGTAAGGGCAGGCGTGCGACCGGATGGGCGGGAGGTTTGCCGGGCCGGTCGGTTCGAGGGGCGGTCGAGGGCCGGAGCCGCGCGGGTGGCTTCGCCGCTCCGGCTGGCGGCCGGGTGTGGAGTGCCGCGCGGCGCCGGTGTCGCCCGCGCCGGGCATCACCGTGCTCGCGGAGGGTTCAGGTGTGTCGGACCGGCGCTTTCGCGTGTGTTCGGCACCGTCCGGTGGGCCGGGTGCTGTGCGCGGGCCTCGAGCTCCCGGGCGGGCGCGGTGCGCAGGCCCTCCCCGCCCGGACACGCGTATTCCGCACCGCTTCACGAAGATCATTCGGTGAAGGAGGCGGGGGCGCTCGTGCTAACGTCGTGATCGTGGCCAGCCTTGACATTATTACCGAACGCTCTGATTCTGCCGTACAACGCATCATCGATGTGACAAAGCATTCGAGGTCCGTTGTCCGCACGGTGCTGATCGAGGACATCGAGCCTCTTCTGCAGAGCATCCGTGCCGGAGTGGAATTCATCGAGATCTACGGACTCGACACCGTGCCTGTTCCGGACAGTCTGCTCGCCGAATGTGAACGGCGCAGAATTCCGGTCCGGCTGCTCGCCGCTTCGGTCGCCAATCAGGTCTTCAAGACCGAGAAGAAGCCCAAGGTATTCGGTATCGCCAAGGTCCCGCGGCCTCGTCGCCTGTCGGACCTGTCCGACATGACCGGTGACCTCATCCTGCTCGACGGAGTGAAGATCGTCGGCAATATCGGAGCCATCGTGCGGACCTCGTTCGCGCTCGGGGCCTCGGGAATCGTGCTCGTGGACAGCGATCTCGGCAGTATTGCGGACCGCCGTCTGATCAGGGCGAGCCGGGGCTATGTGTTCTCCCTTCCCATCGTTCTCGCGTCCCGGGCCGAGGCGCTCCAGTACTTCCAGGACAATGCGATGCGCCCGGTGGTGTTCGAGGCCGACGGGGATCTCGGCGTCGCTGATCTCGACGGTATGGACGAGCGACTTGTGCTCATGTTCGGCAGCGAGAGGATCGGCCCGTCGGGCGAGTTCTCCGACATCGCCGCCAAGTCGGTCTCCATTCCGATGAATCCCGCGGCCGAGTCCCTCAACGTATCGGTGTCGGCCGGAATCGCGCTGCACGCGAGGGCCCGCCGTAACCTCTCCCGGTAGTCCCGGCCGCAGAGCCCCGTCAAGGGCCCCGTCCCTCCCCTCCGGGAGGGGCGGGGCCCTTGACGTGCGCCGCCGGGAGCCCTCGCTACCGCGGCCGCCACATCCAGGGCGCCGCGTCCGGACCCAGGCCGGCGACCGAAGGCCGGCCGTCCGGGCCCAGGCGGAGCGAGGCCCCGTCCAGGGCGGCCGGCCCGTCGGTCCGCACGTGGAGCCCGCCGCCCATGAGCAGCTGGACGAGGCCCTCGGCGGTGCGGCCCAGCAGCACGGGCCCCCGCGGCGACGGGGCGGCGTCCACCGNNCCGTACCCGTCGAACCGTGCTCCGGGCACGGCCGTCCCCGCCCGAGCCGTGGTCAGCCCGCTCCCGGCCGCTGCCCGGTAGTAGAGGGACACGGACCCGTCGGGAGCCGGGAGGGCGGCCGGCGCGTGGGCCGGGACGGGCGCCGCCGTGAGCTGTGTGCGAGCCGTGAGCCCGGCGGACGGGGTGTCCTGGGTCCAGTGGTGTACGGCGTGGTGGCCGGCGCCGAAGACGTGGACGCGTCCACCCTCGTCCACGGCGGTGTGCAGGCCGTCCTGCACCTCTCCGCCGCCCATGTCCCGCCACGCGCTCCACCGCCCCGCCCCGTCCCGCACCCGGGTGCTGACGCCCTTCTCGGCGTCGCGTACGAAGAGATGGATCTGCCCGTCCGGAGCGGCGACGGCCACCGGTACGCCCGTGCGACGTACTTCGTCACGTCCGGGAGAAGGAGAGCCCAGGCCGCGCCAGGGGCGGAAGCCCCGGCCCGGGGCGCTCTGCTCCAGCACCACGATCTCGCGCTCGTTGTCGGCGCCGTGCCCGCCGAGTGCGGCGAAGCGGAGCCCGAACAGCAGCAGGCGCCCGTCCCGTGTGGTGGCCGAGCCCAGTGCGGGGGCGAGCGGGCCGCCGCCTAGGTCGTGCGGAGGCCCCCAGGCACCGCTGCCCGGCCCGGTCTCCTGCCACCGCACCACCCGCAGCCCCAGCACGGCATAGGCCGCGAGCCTGCCGTCCGGCTCGGCGGTGAGGACCGTGCGCGTGCCCGGGTAGCGGTGGTGGGTGGAGCGGACCCAGCCCTTGCGGTTGGTGAGCGGGCGGTCGCCGCCGACGTTGTAGTCACCGCAGCCGGACGGATTGCCGCAGTCCCAGTCGGGCGAGCCCCCGTAGGGGACGAGGTGGGCGGCCTTCCTCGCCAGTACCCCCTGCGGCAGGTTCTTCGGCCAGTGCCGGTTGTAGTAACCGCGGTAGGCGACCGTGACGAAGCCCGGTATCCGGCCACCGTCCGCGGTCGCCCGGGCCACCCAGCGGATCATCGCGGCCCACGCGAAGCAGGCCGCGGCCGTGTGGTCGGCGTGGTCGGAGTAGCCGGGCTGTTCGCTGTCCCTGCGGCGGACGGCCTCCGTGCTGTGCTGGATGTCCGGGTCGGGGTCCAGGGTGTGGACGACGGTGGGCCGGTAGCTCTCCATCAGGCCGGTGAGGACCCCGACCAGCCCGTCGTACGTGTACGAGCCGGCGCGCCGCAGCGGCGATCCGTCGGCCACGACCGTACGCAGCACGAGCCGGCGATCCTGCCAGAGACTGGGCAGCCCGAGCCGGTGCCGGCCGGTGTGCATGGCCGTGTTGAGGAAGATCAACTCCACTCTGCGGCCGTCGGCCGCCAGCACGTTCACCTCGGCCCTGTGGTCCGGGCCGAGCTCGGCGACGGAGACCTCCCAGGGGGTGAAGGGCCGCAGCCCGAGCAGGGCG\n'
b
diff -r 12870a79d56b -r 9d689f8c9ce4 test-data/Regex_Find_And_Replace_on_data_21.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Regex_Find_And_Replace_on_data_21.fasta Thu Aug 08 11:58:48 2024 +0000
b
@@ -0,0 +1,119 @@
+>BGC0001472_1 # 312 # 683 # 1 # ID=1_1;partial=00;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.642
+MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKPNSALRKVARVRLTSG
+IEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDTQGVKNRKQARSRYGAK
+KEK
+>BGC0001472_2 # 686 # 1156 # 1 # ID=1_2;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.660
+MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYGAMEGLREKTGADPVI
+TLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWVVGYSRARREKTMTERL
+MNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW
+>BGC0001472_3 # 1195 # 3324 # 1 # ID=1_3;partial=00;start_type=ATG;rbs_motif=AGxAG;rbs_spacer=5-10bp;gc_cont=0.652
+MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVHDGAATMDWMEQEQER
+GITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVLDGAVTVFDGVAGVEPQ
+SETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAVPIVMQLPIGAEADFRGV
+VDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLLEAVSENDDQMMELYLEGE
+EPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGVQPLLDAVVRYLPSPLDVEA
+IEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLTFVRIYSGRLEAGTAVLNSVK
+GKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTTGETLCDDKNPVILESMDFPAP
+VIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEETGQTIIGGMGELHLEVLVDRMK
+REFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGGTGQFAKVQIAIEPIEGGDASYEF
+VNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMVGVRVTLLDGGYHEVDSSELAFKIA
+GSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEVVGDINSRRGQIQAMEERHGARVVKG
+LVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEVPRNVAEEIIAKAKGE
+>BGC0001472_4 # 3472 # 4665 # 1 # ID=1_4;partial=00;start_type=GTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.642
+MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNEASAFDQIDKAPEERQ
+RGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGAILVVAATDGPMPQTKE
+HVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEYEFPGDDLPVVKVSALKA
+LEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFTITGRGTVVTGRIERGVLK
+VNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLLLRGIKREDVERGQVIIKPG
+SVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTDVTGVVTLPEGTEMVMPGDNT
+LMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK
+>BGC0001472_5 # 4869 # 5570 # -1 # ID=1_5;partial=00;start_type=ATG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.712
+MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPSAEDLRPVHDLRGTLE
+RRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEGFVFAFRSEGAEPGLYR
+VTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRADTWAGSHGYRISALRAS
+MATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSLLATTYARPPES
+>BGC0001472_6 # 5567 # 7195 # -1 # ID=1_6;partial=00;start_type=ATG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.709
+MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDGADRAQVFSGAFAREG
+LVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEAMSGEEPDVTPEWAVFL
+SRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLREVCPVVTEPAGPPGPGDE
+LTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGPYADLSITPCLDCGRHGEA
+DLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVIDTATLSTVYRPVAVRPGCP
+RCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHYYASNLRLQSQFKDWPSRPHT
+PLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFGVKEDETTPERVKRWTAASGNI
+GSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPPGDSPCDIIITGDLKKVMTKYGT
+FGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDDALARLLGTSPADEPVAAFASLGG
+TA
+>BGC0001472_7 # 7210 # 7821 # -1 # ID=1_7;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.712
+MSHDPRPQCLYLVGDTFSRRLTEHRGVPPELQVSFEDFLNDTAPHADVVVPVHAGGDPGL
+RDETDRICAERSTPSVGLQLLPTKVLCGPVVVPGRTACYACYRKRAAQHAGTARPYDMDA
+ALSGLPEGFGRQHLSVASGLLDLALTEIATGVTGIGGTVRTFNLVSGAVSSAVTVSVNRC
+PRCGGRFSQARADSAMPVPELLR
+>BGC0001472_8 # 7845 # 9191 # -1 # ID=1_8;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=11-12bp;gc_cont=0.709
+MHLNRPQEHISAELRGLEELVSPYGLVSRTAPLPVREGEPPFAVQLAYLGVPSRALPNLR
+TWAHDEDTGNSDGAGTGLTPERAKLVSIAEALERYSTCAWDDDEMVVAAENDLTEEFVSP
+SRWPSCSPTELARDDCSLSAYDPSVPIRWVRAWSLTRRIPVLVPAISVYLHMPYQSKSEE
+FIRGITTGAAVHSDVRSAVLGGLLEVVERDAIALVWLQQLRLPELVVDPARLDAGVRELH
+RVGTSTDLRVRLFDATTDFGVPVIYAVQLSDADPALAQIVAATCDVHPEQALGKIYRELA
+SLRVALRGYLSAYAGREPDPAKVSVVGGAVHNATRDRRDVFGFLLDGERPAYGLEGMPGL
+PAGADPLDTVVARLAARGAEVLVTDITTDEARQVGMRAVKVLVPEAMPVSFVHGERYLGT
+PRLYDAPRAMGHTSHAEDAVNPVQQPFA
+>BGC0001472_9 # 9238 # 10437 # -1 # ID=1_9;partial=00;start_type=GTG;rbs_motif=AGxAG;rbs_spacer=5-10bp;gc_cont=0.621
+MTQITLEPGFLLLISLSYGRLQDHVTARLAPAEISGVSFVHLFATIPQPVGSKYNDTFAP
+LIRELFAPERVGGAGGHGPYYFVRTQDAQLGTDTLQISIEGVSDEDSTRADLHRTAERYG
+CAAQVDATPLDSVPSPLWNAGFTGTGFSASSKRLFQEAAPTLVSFLNRAAETPQSPPPAL
+GAIRLMAAHTRATLLRSPQREIDGYEFRELLSLRLLSYRSHFEAIYLRTKDPQSFDAACA
+RFYEQVGAGVREFITACGDPDDDPADEMVRLWTKSITSESSHLAENFSDGSVVNAGHTLE
+DLVRKRGAPVEPTRFHTPPSPELDRLMHRDADFLAFRLQTSLLYSCLYTLGFSLAERYVF
+CYVVARANEDVCGKSMKELQDELDGLARSMASGSTKTAE
+>BGC0001472_10 # 10511 # 10654 # 1 # ID=1_10;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.590
+MEQQIELDVLEISDLIAGAGENDDLAQVMAASCTTTSVSTSSSSSSS
+>BGC0001472_11 # 10977 # 13634 # 1 # ID=1_11;partial=00;start_type=ATG;rbs_motif=GGA/GAG/AGG;rbs_spacer=5-10bp;gc_cont=0.701
+MGVNISPYVVYRRSRLPLGELGGMSFTTAWSRIDELHALRDEIGKNAVGLADRLGELVPT
+LGDDVRADLIRLRRDVHNLRHDRAVARLEPLRPHLGREVVDEVETWCALGVRAEQCERAG
+REELESEKARAADGFGALFEHDAMARSIQLSGDRLYRGLRDLVAGDEASALKPSKARLRE
+SSLVNFAYRASLKPSPFGRFTEIGAFPPDDPRPADPGGRHGGTQESVTTLNRLLVNWGPP
+GLPLVPGGMEPGHLVLNSTLRAGTEYVEYVGVAPGSREDGRMATERVLRVRREGLFDALL
+AAMPEGSAPAATVLRDLTAVTGKAETSRKVVQGLIRAGILFFRPEIDDHDPDYSMKLDRV
+LAAGGTPETAALRGHFSELRRLETDFSEAAADERQKLLDSAYAAIGGIAELCKVSPPPEE
+VLKSPVFEDTPASTAPQAWNLPTVEGSIPALTGLWRLASMMDNGQVKRLGLYSFATRVLG
+DRSTMPFLEFFQAFSSLTDQEQVDVFMGRDVEEAERYTRQRAEALRTIRQRLVPGDGTVH
+LDPSVIEKACEGVEDLLDTESVTFRAQFAQGVLPDRDRTLVVNGLLTGYGVYFSRFGSFV
+EGTDEWSLPAAQREHLARRFPGQVDLNSVLGFNFNLHPSVTRRVVNYPGAVSLGAERTVY
+GLARLEVRADQATRSLRLWDPEAQETLDLVPMNFMTPIGVPLLYRLLEALSPSNRYLWKP
+LDDIRDAGGPTVYGETAPRLVVGDVVADRRSWNVAAAEIPMLQDLSRDVPEALVAFDAWR
+LTRGLPRHAFVLCQTPEERDVMAGRSRKVTRQWADYAHLRRASVHKPMYVDFRNPFLVRS
+FAKSALSRGDVVASIRECLPSVDDYGPDTGWTAAEEFFVELCTDN
+>BGC0001472_12 # 13612 # 14571 # 1 # ID=1_12;partial=00;start_type=TTG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.706
+MNCVPTTSGQTGTREWRTVHIHVPHSLHTPFLCDVVEPLLRSEGLQDHFFFLRYWQGGPH
+LRLRMLCGPGAGSAEAAERVVAGLARAMPEFGAQAREEYALGLTLQDELARLEKETSEEG
+RPIGALDRVAYEPEYRKYGGTEGLQIAETVFRKSSVAVLGLLGGQPRAWVDERRAPIGEA
+ARIMAMFLHGAGLDPRAAGLFLREYEDWWRTYAPDDMQRAWPKLFGGVSAQMTNLCAAVW
+RDGATDVFHDISAEAAARARSVCGAEPGGDVRDLRLDGTPYPGCLSNYVHTTNNRLGLVP
+AAEGLVAYLVRRGLEAMDG
+>BGC0001472_13 # 14692 # 15894 # -1 # ID=1_13;partial=00;start_type=GTG;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;gc_cont=0.685
+MTDRQDSAYPYPRTCPLHPPKEYASLRAEQPITKVTLASGRTAWLLTRHEHIRQLLADPH
+VSSNLAHPGYPLHFDAPPEVMEQMRPVLLAMDPPVHTAQRKMVIPEFTVKRVLQLRPRVE
+EIVDECISSMLAGEGPADLVEALALPVPSLVICELLGVPRSDRAFFQDRTNKLVSVDADP
+QERNSAHQELHAYFSELVTAQEADPGDDLLGRLVVKNRETGTFDHGELVGMANVLLVGGH
+ETTANMISLGVVGLLENPDQLAKLRADPGLAPQAVDELLRYFSIADQVTSRVATADLEIG
+GVLIRAGEGVIGLSASGNHDEAVFPDPDRLDIERGGRHHLAFGHGIHQCIGQNLAKLELE
+VVFNALLARIPGLKLATPVAELPFKDSMGVYGLHKLPVSW
+>BGC0001472_14 # 16220 # 16564 # 1 # ID=1_14;partial=00;start_type=ATG;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;gc_cont=0.626
+MYLSIVMWDLKKSEATVESLREYLRDYAVDAYSALDGMRLKAWFSDSARQLWGAVYLWDS
+PEQMPGLYKVSRVIDLIGYPPTSVGGFTLEATAEGKSVHETLAGLGIALEGGTQ
+>BGC0001472_15 # 17019 # 17729 # 1 # ID=1_15;partial=00;start_type=GTG;rbs_motif=3Base/5BMM;rbs_spacer=13-15bp;gc_cont=0.636
+MLIEDIEPLLQSIRAGVEFIEIYGLDTVPVPDSLLAECERRRIPVRLLAASVANQVFKTE
+KKPKVFGIAKVPRPRRLSDLSDMTGDLILLDGVKIVGNIGAIVRTSFALGASGIVLVDSD
+LGSIADRRLIRASRGYVFSLPIVLASRAEALQYFQDNAMRPVVFEADGDLGVADLDGMDE
+RLVLMFGSERIGPSGEFSDIAAKSVSIPMNPAAESLNVSVSAGIALHARARRNLSR
+>BGC0001472_16 # 17815 # 19485 # -1 # ID=1_16;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.756
+ALLGLRPFTPWEVSVAELGPDHRAEVNVLAADGRRVELIFLNTAMHTGRHRLGLPSLWQD
+RRLVLRTVVADGSPLRRAGSYTYDGLVGVLTGLMESYRPTVVHTLDPDPDIQHSTEAVRR
+RDSEQPGYSDHADHTAAACFAWAAMIRWVARATADGGRIPGFVTVAYRGYYNRHWPKNLP
+QGVLARKAAHLVPYGGSPDWDCGNPSGCGDYNVGGDRPLTNRKGWVRSTHHRYPGTRTVL
+TAEPDGRLAAYAVLGLRVVRWQETGPGSGAWGPPHDLGGGPLAPALGSATTRDGRLLLFG
+LRFAALGGHGADNEREIVVLEQSAPGRGFRPWRGLGSPSPGRDEVRRTGVPVAVAAPDGQ
+IHLFVRDAEKGVSTRVRDGAGRWSAWRDMGGGEVQDGLHTAVDEGGRVHVFGAGHHAVHH
+WTQDTPSAGLTARTQLTAAPVPAHAPAALPAPDGSVSLYYRAAAGSGLTTARAGTAVPGA
+RFDGYXXVDAAPSPRGPVLLGRTAEGLVQLLMGGGLHVRTDGPAALDGASLRLGPDGRPS
+VAGLGPDAAPWMWRPR
b
diff -r 12870a79d56b -r 9d689f8c9ce4 test-data/Sanntis_output_data.genbank
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Sanntis_output_data.genbank Thu Aug 08 11:58:48 2024 +0000
b
b'@@ -0,0 +1,479 @@\n+LOCUS       BGC0001472             19486 bp    DNA              UNK 01-JAN-1980\n+DEFINITION  BGC0001472.\n+ACCESSION   BGC0001472\n+VERSION     BGC0001472\n+KEYWORDS    .\n+SOURCE      .\n+  ORGANISM  .\n+            .\n+FEATURES             Location/Qualifiers\n+     CDS             312..683\n+                     /translation="MPTIQQLVRKGRQDKVEKNKTPALEGSPQRRGVCTRVFTTTPKKP\n+                     NSALRKVARVRLTSGIEVTAYIPGEGHNLQEHSIVLVRGGRVKDLPGVRYKIIRGSLDT\n+                     QGVKNRKQARSRYGAKKEK"\n+                     /protein_id="BGC0001472_1"\n+     CDS             686..1156\n+                     /translation="MPRKGPAPKRPVIIDPVYSSPLVTSLINKILLDGKRSTAERIVYG\n+                     AMEGLREKTGADPVITLKRALENVKPSLEVKSRRVGGATYQVPIEVKPGRAATLALRWV\n+                     VGYSRARREKTMTERLMNELLDASNGLGAAVKKREDTHKMAESNKAFAHYRW"\n+                     /protein_id="BGC0001472_2"\n+     CDS             1195..3324\n+                     /translation="MATTSLDLAKVRNIGIMAHIDAGKTTTTERILFYTGVSYKIGEVH\n+                     DGAATMDWMEQEQERGITITSAATTCHWPLNDVDHTINIIDTPGHVDFTVEVERSLRVL\n+                     DGAVTVFDGVAGVEPQSETVWRQADRYGVPRICFVNKLDRTGADFLRCVDMIVQRLGAV\n+                     PIVMQLPIGAEADFRGVVDLVSMKAFVYPEEAVKGEMYDTVEIPDNLKEAAEEWRGKLL\n+                     EAVSENDDQMMELYLEGEEPTEEQLHEAIRRITLASKGSADSVTVTPVFCGTAFKNKGV\n+                     QPLLDAVVRYLPSPLDVEAIEGHDVKDPEKVVQRKPSDDEPFSGLAFKIASDPHLGKLT\n+                     FVRIYSGRLEAGTAVLNSVKGKKERIGKIYRMHANKREEIPSVGAGDIVAVMGLKQTTT\n+                     GETLCDDKNPVILESMDFPAPVIQVAIEPKSKGDQEKLGVAIQRLSEEDPSFQVHSDEE\n+                     TGQTIIGGMGELHLEVLVDRMKREFRVEANVGKPQVAYRETIRKAVERIDYTHKKQTGG\n+                     TGQFAKVQIAIEPIEGGDASYEFVNKVTGGRIPREYIPSVDAGAQEAMQFGILAGYEMV\n+                     GVRVTLLDGGYHEVDSSELAFKIAGSQAFKEGARKASPVLLEPMMAVEVTTPEDYMGEV\n+                     VGDINSRRGQIQAMEERHGARVVKGLVPLSEMFGYVGDLRSKTSGRASYSMQFDSYAEV\n+                     PRNVAEEIIAKAKGE"\n+                     /protein_id="BGC0001472_3"\n+     CDS             3472..4665\n+                     /translation="MAKAKFERTKPHVNIGTIGHIDHGKTTLTAAITKVLHDAYPDLNE\n+                     ASAFDQIDKAPEERQRGITISIAHVEYQTESRHYAHVDCPGHADYIKNMITGAAQMDGA\n+                     ILVVAATDGPMPQTKEHVLLARQVGVPYIVVALNKADMVDDEEILELVELEVRELLSEY\n+                     EFPGDDLPVVKVSALKALEGDAEWGQTVLDLMKAVDESIPQPERDVEKPFLMPIEDVFT\n+                     ITGRGTVVTGRIERGVLKVNETVDIVGIKTEKTTTTVTGIEMFRKLLDEGQAGENVGLL\n+                     LRGIKREDVERGQVIIKPGSVTPHTEFQAQAYILSKDEGGRHTPFFNNYRPQFYFRTTD\n+                     VTGVVTLPEGTEMVMPGDNTLMDVALIQPVAMEEGLKFAIREGGRTVGAGQVTKITK"\n+                     /protein_id="BGC0001472_4"\n+     CDS             4869..5570\n+                     /translation="MRNDVTSMTAVLEGFTSRTPTSDGLAAERRPVPFADSVPVEPQPS\n+                     AEDLRPVHDLRGTLERRRSSLHYAPLPVRTDVILSLLRDVLRRDRDDWGLDASAGALEG\n+                     FVFAFRSEGAEPGLYRVTAEETCYLAGLDEIGPAENLGVQREFSTGAGIVALYASLDRA\n+                     DTWAGSHGYRISALRASMATYDLNLRCQALGLVGTLFGGFVPSSVHHLVHSDGATRHSL\n+                     LATTYARPPES"\n+                     /protein_id="BGC0001472_5"\n+     CDS             5567..7195\n+                     /translation="MVAEMKAEQIGRAARTDMQLTVPARPVLRRGVRLRRAGESVVLDG\n+                     ADRAQVFSGAFAREGLVPLTEACDGTRDHTELALKTGFDEATVYKCLALLSTAGAVEEA\n+                     MSGEEPDVTPEWAVFLSRLGNSTGSNPSWADAAARLVSRSVRLEGDAALVAGARRSLRE\n+                     VCPVVTEPAGPPGPGDELTVFFETPASAPLLAATEERCRQDGRPLLRVRADARTITIGP\n+                     YADLSITPCLDCGRHGEADLSGEPPEYLHDLVVGLASHHVTALLARATISHLPGDFTVI\n+                     DTATLSTVYRPVAVRPGCPRCSYARGPVAPQAPAGAVYEASVAMPPRAFLAPKDHQAHY\n+                     YASNLRLQSQFKDWPSRPHTPLPALDISVLAGSERHDPSHGDTPLTLSSLGLLLKVAFG\n+                     VKEDETTPERVKRWTAASGNIGSTTAYAVVRDDRIMPPGVYAYAQGSHTLVTVSGEVPP\n+                     GDSPCDIIITGDLKKVMTKYGTFGFRLVFLDAGCNLASLRELAQHLGLGFTPRSDWDDD\n+                     ALARLLGTSPADEPVAAFASLGGTA"\n+                     /protein_id="BGC0001472_6"\n+     CDS             7210..7821\n+         '..b'ggtgc\n+    16381 ggtctatctg tgggacagcc ccgagcagat gcccggcctg tacaaagtca gccgcgtgat\n+    16441 cgatctcatc gggtatccgc cgacttcggt cggtggtttc acgctcgagg cgaccgccga\n+    16501 agggaagagc gttcacgaga cactggccgg cctggggatc gccctggagg gcggaacgca\n+    16561 gtaagggcag gcgtgcgacc ggatgggcgg gaggtttgcc gggccggtcg gttcgagggg\n+    16621 cggtcgaggg ccggagccgc gcgggtggct tcgccgctcc ggctggcggc cgggtgtgga\n+    16681 gtgccgcgcg gcgccggtgt cgcccgcgcc gggcatcacc gtgctcgcgg agggttcagg\n+    16741 tgtgtcggac cggcgctttc gcgtgtgttc ggcaccgtcc ggtgggccgg gtgctgtgcg\n+    16801 cgggcctcga gctcccgggc gggcgcggtg cgcaggccct ccccgcccgg acacgcgtat\n+    16861 tccgcaccgc ttcacgaaga tcattcggtg aaggaggcgg gggcgctcgt gctaacgtcg\n+    16921 tgatcgtggc cagccttgac attattaccg aacgctctga ttctgccgta caacgcatca\n+    16981 tcgatgtgac aaagcattcg aggtccgttg tccgcacggt gctgatcgag gacatcgagc\n+    17041 ctcttctgca gagcatccgt gccggagtgg aattcatcga gatctacgga ctcgacaccg\n+    17101 tgcctgttcc ggacagtctg ctcgccgaat gtgaacggcg cagaattccg gtccggctgc\n+    17161 tcgccgcttc ggtcgccaat caggtcttca agaccgagaa gaagcccaag gtattcggta\n+    17221 tcgccaaggt cccgcggcct cgtcgcctgt cggacctgtc cgacatgacc ggtgacctca\n+    17281 tcctgctcga cggagtgaag atcgtcggca atatcggagc catcgtgcgg acctcgttcg\n+    17341 cgctcggggc ctcgggaatc gtgctcgtgg acagcgatct cggcagtatt gcggaccgcc\n+    17401 gtctgatcag ggcgagccgg ggctatgtgt tctcccttcc catcgttctc gcgtcccggg\n+    17461 ccgaggcgct ccagtacttc caggacaatg cgatgcgccc ggtggtgttc gaggccgacg\n+    17521 gggatctcgg cgtcgctgat ctcgacggta tggacgagcg acttgtgctc atgttcggca\n+    17581 gcgagaggat cggcccgtcg ggcgagttct ccgacatcgc cgccaagtcg gtctccattc\n+    17641 cgatgaatcc cgcggccgag tccctcaacg tatcggtgtc ggccggaatc gcgctgcacg\n+    17701 cgagggcccg ccgtaacctc tcccggtagt cccggccgca gagccccgtc aagggccccg\n+    17761 tccctcccct ccgggagggg cggggccctt gacgtgcgcc gccgggagcc ctcgctaccg\n+    17821 cggccgccac atccagggcg ccgcgtccgg acccaggccg gcgaccgaag gccggccgtc\n+    17881 cgggcccagg cggagcgagg ccccgtccag ggcggccggc ccgtcggtcc gcacgtggag\n+    17941 cccgccgccc atgagcagct ggacgaggcc ctcggcggtg cggcccagca gcacgggccc\n+    18001 ccgcggcgac ggggcggcgt ccaccgnncc gtacccgtcg aaccgtgctc cgggcacggc\n+    18061 cgtccccgcc cgagccgtgg tcagcccgct cccggccgct gcccggtagt agagggacac\n+    18121 ggacccgtcg ggagccggga gggcggccgg cgcgtgggcc gggacgggcg ccgccgtgag\n+    18181 ctgtgtgcga gccgtgagcc cggcggacgg ggtgtcctgg gtccagtggt gtacggcgtg\n+    18241 gtggccggcg ccgaagacgt ggacgcgtcc accctcgtcc acggcggtgt gcaggccgtc\n+    18301 ctgcacctct ccgccgccca tgtcccgcca cgcgctccac cgccccgccc cgtcccgcac\n+    18361 ccgggtgctg acgcccttct cggcgtcgcg tacgaagaga tggatctgcc cgtccggagc\n+    18421 ggcgacggcc accggtacgc ccgtgcgacg tacttcgtca cgtccgggag aaggagagcc\n+    18481 caggccgcgc caggggcgga agccccggcc cggggcgctc tgctccagca ccacgatctc\n+    18541 gcgctcgttg tcggcgccgt gcccgccgag tgcggcgaag cggagcccga acagcagcag\n+    18601 gcgcccgtcc cgtgtggtgg ccgagcccag tgcgggggcg agcgggccgc cgcctaggtc\n+    18661 gtgcggaggc ccccaggcac cgctgcccgg cccggtctcc tgccaccgca ccacccgcag\n+    18721 ccccagcacg gcataggccg cgagcctgcc gtccggctcg gcggtgagga ccgtgcgcgt\n+    18781 gcccgggtag cggtggtggg tggagcggac ccagcccttg cggttggtga gcgggcggtc\n+    18841 gccgccgacg ttgtagtcac cgcagccgga cggattgccg cagtcccagt cgggcgagcc\n+    18901 cccgtagggg acgaggtggg cggccttcct cgccagtacc ccctgcggca ggttcttcgg\n+    18961 ccagtgccgg ttgtagtaac cgcggtaggc gaccgtgacg aagcccggta tccggccacc\n+    19021 gtccgcggtc gcccgggcca cccagcggat catcgcggcc cacgcgaagc aggccgcggc\n+    19081 cgtgtggtcg gcgtggtcgg agtagccggg ctgttcgctg tccctgcggc ggacggcctc\n+    19141 cgtgctgtgc tggatgtccg ggtcggggtc cagggtgtgg acgacggtgg gccggtagct\n+    19201 ctccatcagg ccggtgagga ccccgaccag cccgtcgtac gtgtacgagc cggcgcgccg\n+    19261 cagcggcgat ccgtcggcca cgaccgtacg cagcacgagc cggcgatcct gccagagact\n+    19321 gggcagcccg agccggtgcc ggccggtgtg catggccgtg ttgaggaaga tcaactccac\n+    19381 tctgcggccg tcggccgcca gcacgttcac ctcggccctg tggtccgggc cgagctcggc\n+    19441 gacggagacc tcccaggggg tgaagggccg cagcccgagc agggcg\n+//\n'