Repository 'dram_merge_annotations'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/dram_merge_annotations

Changeset 0:2675f8d7b2a5 (2022-12-10)
Next changeset 1:96c0067106ec (2023-07-11)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dram commit df10ba86507266a6a6f83c9bbefb7191a41b46f5
added:
dram_merge_annotations.xml
macros.xml
test-data/annotate_custom.fasta
test-data/annotate_custom.hmm
test-data/annotated1.genbank
test-data/annotated1.gff
test-data/annotated1.tabular
test-data/annotated1_genes_faa.fasta
test-data/annotated1_genes_fna.fasta
test-data/annotated1_rrnas.tabular
test-data/annotated1_scaffold.fasta
test-data/annotated1_trnas.tabular
test-data/annotated2.genbank
test-data/annotated2.gff
test-data/annotated2.tabular
test-data/annotated2_genes_faa.fasta
test-data/annotated2_genes_fna.fasta
test-data/annotated2_rrnas.tabular
test-data/annotated2_scaffold.fasta
test-data/annotated2_trnas.tabular
test-data/distill_custom.tabular
test-data/input_annotate1.fasta.gz
test-data/input_distill_rrna1.tabular
test-data/input_distill_trna1.tabular
test-data/neighborhoods_genes_loc1.fna
test-data/neighborhoods_output1.fasta
test-data/neighborhoods_scaffolds_loc1.fasta
test-data/strainer_input_fasta1.fasta.gz
test-data/strainer_output1.fasta
b
diff -r 000000000000 -r 2675f8d7b2a5 dram_merge_annotations.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dram_merge_annotations.xml Sat Dec 10 21:14:28 2022 +0000
[
b'@@ -0,0 +1,195 @@\n+<tool id="dram_merge_annotations" name="DRAM merge multiple annotations" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+    <description>into a single set</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="requirements"/>\n+    <command detect_errors="exit_code"><![CDATA[\n+## DRAM expects a specific file name for each file in the set.\n+#set annotations_file_name = \'annotations.tsv\'\n+#set genbank_file_name = \'genbank\'\n+#set trnas_file_name = \'trnas.tsv\'\n+#set rrnas_file_name = \'rrnas.tsv\'\n+#set scaffolds_file_name = \'scaffolds.fna\'\n+#set genes_gff_file_name = \'genes.gff\'\n+#set genes_fna_file_name = \'genes.fna\'\n+#set genes_faa_file_name = \'genes.faa\'\n+\n+## DRAM expects each annotation set to be in a different directory.\n+#set input_dirs = list()\n+#for $index, $item in enumerate($annotations_collection):\n+    #set dir_name = \'input_dir_\' + str($index)\n+    mkdir \'$dir_name\' &&\n+    ln -s \'$item\' \'$dir_name/$annotations_file_name\' &&\n+    ln -s \'$genbank_collection[$index]\' \'$dir_name/$genbank_file_name\' &&\n+    #if $trnas_collection:\n+        ln -s \'$trnas_collection[$index]\' \'$dir_name/$trnas_file_name\' &&\n+    #end if\n+    #if $rrnas_collection\n+        ln -s \'$rrnas_collection[$index]\' \'$dir_name/$rrnas_file_name\' &&\n+    #end if\n+    ln -s \'$scaffolds_collection[$index]\' \'$dir_name/$scaffolds_file_name\' &&\n+    ln -s \'$genes_gff_collection[$index]\' \'$dir_name/$genes_gff_file_name\' &&\n+    ln -s \'$genes_fna_collection[$index]\' \'$dir_name/$genes_fna_file_name\' &&\n+    ln -s \'$genes_faa_collection[$index]\' \'$dir_name/$genes_faa_file_name\' &&\n+    $input_dirs.append($dir_name)\n+#end for\n+\n+DRAM.py merge_annotations\n+--input_dirs \'input_dir*\'\n+--output_dir \'output_dir\'\n+&& test -f \'output_dir/genes.faa\' && mv \'output_dir/genes.faa\' \'$output_genes_faa\' || echo \'No genes.faa output produced\'\n+&& test -f \'output_dir/genes.fna\' && mv \'output_dir/genes.fna\' \'$output_genes_fna\' || echo \'No genes.fna output produced\'\n+&& test -f \'output_dir/genes.gff\' && mv \'output_dir/genes.gff\' \'$output_genes_gff\' || echo \'No genes.gff output produced\'\n+&& test -f \'output_dir/scaffolds.fna\' && mv \'output_dir/scaffolds.fna\' \'$output_scaffolds_fna\' || echo \'No scaffolds.fna output produced\'\n+#if $rrnas_collection:\n+    && test -f \'output_dir/rrnas.tsv\' && mv \'output_dir/rrnas.tsv\' \'$output_rrnas\' || echo \'No rrnas.tsv output produced\'\n+#end if\n+#if $trnas_collection:\n+    && test -f \'output_dir/trnas.tsv\' && mv \'output_dir/trnas.tsv\' \'$output_trnas\' || echo \'No trnas.tsv output produced\'\n+#end if\n+&& test -f \'output_dir/annotations.tsv\' && mv \'output_dir/annotations.tsv\' \'$output_annotations\' || echo \'No annotations.tsv output produced\'\n+    ]]></command>\n+    <inputs>\n+        <param name="annotations_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of annotation files"/>\n+        <param name="genbank_collection" type="data_collection" format="genbank" collection_type="list" label="Collection of genbank files"/>\n+        <param name="trnas_collection" type="data_collection" format="tabular" collection_type="list" optional="true" label="Collection of trna files"/>\n+        <param name="rrnas_collection" type="data_collection" format="tabular" collection_type="list" optional="true" label="Collection of rrna files"/>\n+        <param name="scaffolds_collection" type="data_collection" format="fasta" collection_type="list" label="Collection of scaffolds files"/>\n+        <param name="genes_gff_collection" type="data_collection" format="gff3" collection_type="list" label="Collection of genes.gff files"/>\n+        <param name="genes_fna_collection" type="data_collection" format="fasta" collection_type="list" label="Collection of genes.fna files"/>\n+        <param name="genes_faa_collection" type="data_collection" format="fasta" collection_type="list" label="Collection of genes.faa files"/>\n+    </inputs>\n+    <outputs>\n+        '..b'                 <element name="annotated2" value="annotated2_trnas.tabular"/>\n+                </collection>\n+            </param>\n+            <param name="rrnas_collection">\n+                <collection type="list">\n+                    <element name="annotated1" value="annotated1_rrnas.tabular"/>\n+                    <element name="annotated2" value="annotated2_rrnas.tabular"/>\n+                </collection>\n+            </param>\n+            <param name="scaffolds_collection">\n+                <collection type="list">\n+                    <element name="annotated1" value="annotated1_scaffold.fasta"/>\n+                    <element name="annotated2" value="annotated2_scaffold.fasta"/>\n+                </collection>\n+            </param>\n+            <param name="genes_gff_collection">\n+                <collection type="list">\n+                    <element name="annotated1" value="annotated1.gff"/>\n+                    <element name="annotated2" value="annotated2.gff"/>\n+                </collection>\n+            </param>\n+            <param name="genes_fna_collection">\n+                <collection type="list">\n+                    <element name="annotated1" value="annotated1_genes_fna.fasta"/>\n+                    <element name="annotated2" value="annotated2_genes_fna.fasta"/>\n+                </collection>\n+            </param>\n+            <param name="genes_faa_collection">\n+                <collection type="list">\n+                    <element name="annotated1" value="annotated1_genes_faa.fasta"/>\n+                    <element name="annotated2" value="annotated2_genes_faa.fasta"/>\n+                </collection>\n+            </param>\n+            <output name="output_annotations">\n+                <assert_contents>\n+                    <has_n_lines n="530" delta="1"/>\n+                </assert_contents>\n+            </output>\n+            <output_collection name="output_genbank" type="list" count="2">\n+                <element name="annotation_0.gbk" ftype="genbank">\n+                    <assert_contents>\n+                        <has_text text="LOCUS"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="annotation_1.gbk" ftype="genbank">\n+                    <assert_contents>\n+                        <has_text text="LOCUS"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n+            <output name="output_trnas">\n+                <assert_contents>\n+                    <has_n_lines n="8" delta="1"/>\n+                </assert_contents>\n+            </output>\n+            <output name="output_rrnas">\n+                <assert_contents>\n+                    <has_n_lines n="3" delta="1"/>\n+                </assert_contents>\n+            </output>\n+            <output name="output_scaffolds_fna">\n+                <assert_contents>\n+                    <has_n_lines n="4" delta="1"/>\n+                </assert_contents>\n+            </output>\n+            <output name="output_genes_gff">\n+                <assert_contents>\n+                    <has_n_lines n="23" delta="1"/>\n+                </assert_contents>\n+            </output>\n+            <output name="output_genes_fna">\n+                <assert_contents>\n+                    <has_n_lines n="26" delta="1"/>\n+                </assert_contents>\n+            </output>\n+            <output name="output_genes_faa">\n+                <assert_contents>\n+                    <has_n_lines n="26" delta="1"/>\n+                </assert_contents>\n+            </output>\n+       </test>\n+    </tests>\n+    <help>\n+**What it does**\n+ \n+@WHATITDOESHEADER@\n+\n+This tool accepts collections of the components of a DRAM annotations (i.e., annotations, genbank files, GFF files, tRNAs,\n+rRNas, etc) and, except for the genbank files which remain a collection,  merges the set of files for each component into\n+a single file.\n+\n+@WHATITDOESFOOTER@\n+    </help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 2675f8d7b2a5 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,72 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.3.5</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.09</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">dram</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <regex match="FileNotFoundError" source="stderr" level="fatal"/>
+            <regex match="returned non-zero exit status" source="stdout" level="fatal"/>
+            <regex match="returned non-zero exit status" source="stderr" level="fatal"/>
+            <regex match="Invalid file path or buffer object type" source="stderr" level="fatal"/>
+            <exit_code range="1:" level="fatal"/>
+        </stdio>
+    </xml>
+    <xml name="categories_param">
+        <param argument="--categories" type="text" value="" label="Distillate categories" help="Optional, leave blank to ignore">
+            <expand macro="sanitizer"/>
+        </param>
+    </xml>
+    <xml name="custom_distillate_param">
+        <param argument="--custom_distillate" type="data" format="tabular" optional="true" label="File containing a custom distillate form" help="Optional, leave blank to ignore"/>
+    </xml>
+    <xml name="genes_param">
+        <param argument="--genes" type="text" value="" label="Space-separated list of genes to keep" help="Optional, leave blank to ignore">
+            <expand macro="sanitizer"/>
+        </param>
+    </xml>
+    <xml name="identifiers_param">
+        <param argument="--identifiers" type="text" value="" label="Database identifiers" help="Optional, leave blank to ignore">
+            <expand macro="sanitizer"/>
+        </param>
+    </xml>
+    <xml name="input_file_param">
+        <param argument="--input_file" type="data" format="tabular" label="Annotations file" help="Produced by the DRAM annotate tool"/>
+    </xml>
+    <xml name="sanitizer">
+        <sanitizer>
+            <valid initial="string.printable">
+                <remove value="&apos;"/>
+            </valid>
+            <mapping initial="none">
+                <add source="&apos;" target="__sq__"/>
+            </mapping>
+        </sanitizer>
+    </xml>
+    <token name="@WHATITDOESHEADER@">DRAM (Distilled and Refined Annotation of Metabolism) is a tool for annotating metagenomic assembled genomes and VirSorter identified viral contigs. DRAM annotates MAGs and viral contigs using UniRef90, PFAM, dbCAN, RefSeq viral, VOGDB and the MEROPS peptidase database.</token>
+    <token name="@CUSTOMDISTILLATEFILES@">**Using Custom Distillate files**
+
+The custom distillate sheet must be a tabular file with all the columns specified below.  This sheet is an extension
+of the genome_summary_form.tsv file that is installed with the dram databases and which is one of several key data files
+that characterizes the distillate.  The genome_summary_form.tsv file is available for viewing here
+https://github.com/WrightonLabCSU/DRAM/blob/master/data/genome_summary_form.tsv.  The custom distillate sheet must
+contain the following columns.
+
+ * gene_id: the KO ids of the genes in which you are interested
+ * gene_description: descriptions of the geans
+ * module: the name of your module that you are adding
+ * sheet: the name you would like on the excel sheet in which your results appear
+ * header: the header that will appear in the dram metabolism summary
+ * subheader: the sub-header that will appear in the metabolism summary</token>
+    <token name="@WHATITDOESFOOTER@">More information about DRAM can be found here https://github.com/shafferm/DRAM/wiki.</token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1186/s13104-016-1900-2</citation>
+        </citations>
+    </xml>
+</macros>
+
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotate_custom.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate_custom.fasta Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,83 @@
+>sp|Q6GZX4|001R_FRG3G Putative transcription factor 001R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-001R PE=4 SV=1
+MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPS
+EKGLIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLD
+AKIKAYNLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHL
+EKDLVKDFKALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDD
+SFRKIYTDLGWKFTPL
+>sp|Q6GZX3|002L_FRG3G Uncharacterized protein 002L OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-002L PE=4 SV=1
+MSIIGATRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCASGFCTSQPLCAR
+IKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSL
+AERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADC
+KCINRASDPVYQKVKTLHAYPDQCWYVPCAADVGELKMGTQRDTPTNCPTQVCQIVFNML
+DDGSVTMDDVKNTINCDFSKYVPPPPPPKPTPPTPPTPPTPPTPPTPPTPPTPRPVHNRK
+VMFFVAGAVLVAILISTVRW
+>sp|Q197F8|002R_IIV3 Uncharacterized protein 002R OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-002R PE=4 SV=1
+MASNTVSAQGGSNRPVRDFSNIQDVAQFLLFDPIWNEQPGSIVPWKMNREQALAERYPEL
+QTSEPSEDYSGPVESLELLPLEIKLDIMQYLSWEQISWCKHPWLWTRWYKDNVVRVSAIT
+FEDFQREYAFPEKIQEIHFTDTRAEEIKAILETTPNVTRLVIRRIDDMNYNTHGDLGLDD
+LEFLTHLMVEDACGFTDFWAPSLTHLTIKNLDMHPRWFGPVMDGIKSMQSTLKYLYIFET
+YGVNKPFVQWCTDNIETFYCTNSYRYENVPRPIYVWVLFQEDEWHGYRVEDNKFHRRYMY
+STILHKRDTDWVENNPLKTPAQVEMYKFLLRISQLNRDGTGYESDSDPENEHFDDESFSS
+GEEDSSDEDDPTWAPDSDDSDWETETEEEPSVAARILEKGKLTITNLMKSLGFKPKPKKI
+QSIDRYFCSLDSNYNSEDEDFEYDSDSEDDDSDSEDDC
+>sp|Q197F7|003L_IIV3 Uncharacterized protein 003L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-003L PE=4 SV=1
+MYQAINPCPQSWYGSPQLEREIVCKMSGAPHYPNYYPVHPNALGGAWFDTSLNARSLTTT
+PSLTTCTPPSLAACTPPTSLGMVDSPPHINPPRRIGTLCFDFGSAKSPQRCECVASDRPS
+TTSNTAPDTYRLLITNSKTRKNNYGTCRLEPLTYGI
+>sp|Q6GZX2|003R_FRG3G Uncharacterized protein 3R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-003R PE=3 SV=1
+MARPLLGKTSSVRRRLESLSACSIFFFLRKFCQKMASLVFLNSPVYQMSNILLTERRQVD
+RAMGGSDDDGVMVVALSPSDFKTVLGSALLAVERDMVHVVPKYLQTPGILHDMLVLLTPI
+FGEALSVDMSGATDVMVQQIATAGFVDVDPLHSSVSWKDNVSCPVALLAVSNAVRTMMGQ
+PCQVTLIIDVGTQNILRDLVNLPVEMSGDLQVMAYTKDPLGKVPAVGVSVFDSGSVQKGD
+AHSVGAPDGLVSFHTHPVSSAVELNYHAGWPSNVDMSSLLTMKNLMHVVVAEEGLWTMAR
+TLSMQRLTKVLTDAEKDVMRAAAFNLFLPLNELRVMGTKDSNNKSLKTYFEVFETFTIGA
+LMKHSGVTPTAFVDRRWLDNTIYHMGFIPWGRDMRFVVEYDLDGTNPFLNTVPTLMSVKR
+KAKIQEMFDNMVSRMVTS
+>sp|Q6GZX1|004R_FRG3G Uncharacterized protein 004R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-004R PE=4 SV=1
+MNAKYDTDQGVGRMLFLGTIGLAVVVGGLMAYGYYYDGKTPSSGTSFHTASPSFSSRYRY
+>sp|Q197F5|005L_IIV3 Uncharacterized protein 005L OS=Invertebrate iridescent virus 3 OX=345201 GN=IIV3-005L PE=3 SV=1
+MRYTVLIALQGALLLLLLIDDGQGQSPYPYPGMPCNSSRQCGLGTCVHSRCAHCSSDGTL
+CSPEDPTMVWPCCPESSCQLVVGLPSLVNHYNCLPNQCTDSSQCPGGFGCMTRRSKCELC
+KADGEACNSPYLDWRKDKECCSGYCHTEARGLEGVCIDPKKIFCTPKNPWQLAPYPPSYH
+QPTTLRPPTSLYDSWLMSGFLVKSTTAPSTQEEEDDY
+>sp|Q6GZX0|005R_FRG3G Uncharacterized protein 005R OS=Frog virus 3 (isolate Goorha) OX=654924 GN=FV3-005R PE=4 SV=1
+MQNPLPEVMSPEHDKRTTTPMSKEANKFIRELDKKPGDLAVVSDFVKRNTGKRLPIGKRS
+NLYVRICDLSGTIYMGETFILESWEELYLPEPTKMEVLGTLESCCGIPPFPEWIVMVGED
+QCVYAYGDEEILLFAYSVKQLVEEGIQETGISYKYPDDISDVDEEVLQQDEEIQKIRKKT
+REFVDKDAQEFQDFLNSLDASLLS
+>sp|Q91G88|006L_IIV6 Putative KilA-N domain-containing protein 006L OS=Invertebrate iridescent virus 6 OX=176652 GN=IIV6-006L PE=3 SV=1
+MDSLNEVCYEQIKGTFYKGLFGDFPLIVDKKTGCFNATKLCVLGGKRFVDWNKTLRSKKL
+IQYYETRCDIKTESLLYEIKGDNNDEITKQITGTYLPKEFILDIASWISVEFYDKCNNII
+>fcresfdr
+MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVPDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL
+>BUSCOaEOG7B0HST
+MAADQAQFQQLLVSLLSTDNEVRKQAEEAYNNLPVESKVTFLLGAIANGQLSEEVRQLAA
+VLLRRLFSSEFLEFYKKLPAEAQAQLKEQILLAVQQEVSEQLRRKVCEVVAEVARNLIDE
+DGNNQWPEFLQFLFQCANSPSPQLKESALRIFTSVPGIFGNQEAQYLDLIKQMLAKSLED
+TEDAEVRLQAVRAVGAFILLHDKEKEIQKHFADLLPALLQVVAESIEKQDDDALLKVLID
+LAEATPKFLRPQLETILELCLKVLSEEDVEDSWRHLALEVLVTLAETAPAMVRKRAEKYI
+VALVPLVLKMMTDLEEDEDWSVADEITEDDNDSNNVVAESALDRLACGLGGKVVLPLVVE
+AIPAMLSSSDWKKRHAALMAISAIGEGCHKQMEALLDQVLDGVLKYLQDPHPRVRYAACN
+AIGQMSTDFAPIFEKKFHDKVIPGLLLLLDDEANPRVQAHAGAALVNFSEDCPKNILTRY
+LDAIMAKLEAILTSKFKELVEKGTKLVLEQVVTTIASVADTAEEEFVAYYDRLMPCLKYI
+IQNANSEELKLLRGKTIECVSLIGLAVGREKFIADASEVMDLLLKTHTEGAELPDDDPQT
+SYLISAWARICKILGKQFEQYLPLVMGPVLRTASLKPEVALLDNEDLEDIEGDVDWQFVS
+LGEQQNFGIRTAGLEDKASACEMLVCYARELKEGFAEYAEEVVRLMVPLLKFYFHDGVRT
+AAAESLPYLLDCAKIKGPQYLEGMWAYICPELLKAIDTEPEKEVLSELLSSLAKCIETLG
+AGCLSEEALKELLRILDKLLKEHFERAEKRLEKRKDEDYDEVVEEELAEEDDEDVYILSK
+VADILHALFATYKEAFLPAFDQVVPHFVKLLEPERPLADRQWALCVFDDVIEFGGPACVK
+>FBpp0306926
+MSSFLLVIFILLALRTSESSETGNPLANEPDPLYMKLVDPMVAG
+ESPKRMIKDQKDVGLKSTSSSEELRKLPKTRGRQKRFIRNPNYVKANEFYDKMLSSEY
+VSKRYKDLPPPHPGFGADQPPA
+>FBpp0078508
+MPPTINNSAVNSAAEKRPQRQTERKSEIICRVKYGNNLPDIPFD
+LKFLQYPFDSHRFVQYNPTSLERNFKYDVLTEHDLGVTVDLINRELYQADSMTLLDPA
+DEKLLEEETLTPTDSVRSRQHSRTVSWLRKSEYISTEQTRFQPQNLENIEAKVGYNVK
+KSLREETLYLDREAQIKAIEKTFSDTKSEITKHYSKPNVVPVEVLPIFPDFTNWKFPC
+AQVIFDSDPAPAGKNVPAQLEEMSQAMIRGVMDESGEQFVAYFLPTEQTLEKRRTDFI
+NGELYKEEEEYEYKIAREYNWNVKTKASKGYEENYFFVMRQDGIYYNELETRVRLNKR
+RVKVGQQPNNTKLVVKHRPLDSMEHRMQRYRERQLEVPGEEEEIVEEVREEEQMQIIG
+ETEKTSEDAAVGAQAASGADSPAQVARDRQSRSRSRTRSGSSSGSGSGSGSRASSRSK
+SGSRSGSGSRSRTNSPAGSQKSGSRSRSVSRSRSRSKSGSRSRSRSRSKSGSRSRSGS
+RSGSGSRSPSRSRSGSPSGSGSSSGSASDE
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotate_custom.hmm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotate_custom.hmm Sat Dec 10 21:14:28 2022 +0000
[
b'@@ -0,0 +1,104 @@\n+HMMER3/b [3.0 | March 2010]\n+NAME  CBM10\n+LENG  28\n+ALPH  amino\n+RF    no\n+CS    no\n+MAP   yes\n+DATE  Thu Apr 21 15:04:19 2011\n+NSEQ  84\n+EFFN  8.697876\n+CKSUM 1939305542\n+STATS LOCAL MSV       -7.3395  0.71998\n+STATS LOCAL VITERBI   -7.4498  0.71998\n+STATS LOCAL FORWARD   -3.9737  0.71998\n+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   \n+            m->m     m->i     m->d     i->m     i->i     d->m     d->d\n+  COMPO   3.06033  2.42663  2.85747  2.77283  4.58046  2.24214  4.56942  3.29197  3.49682  3.34028  4.33944  2.39813  3.09771  2.95262  3.60586  2.44839  2.84569  3.24117  2.72301  3.28669\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.59367  5.66823  0.81137  0.61958  0.77255  0.00000        *\n+      1   2.74340  3.47729  3.44643  2.64352  4.17985  3.79353  4.02282  3.57300  2.87097  3.22136  2.03109  3.38316  4.18747  1.79120  3.30841  2.03216  2.98385  3.28775  5.51287  4.21451      1 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00925  5.08381  5.80616  0.61958  0.77255  0.72961  0.65797\n+      2   3.21366  0.33490  5.23672  5.17809  5.69922  3.92522  5.89105  5.22956  5.14397  4.94899  5.73770  3.11988  4.78715  5.32569  5.20447  2.46009  3.79312  4.43668  7.07761  5.95722      2 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00690  5.37599  6.09834  0.61958  0.77255  0.73167  0.65606\n+      3   3.46517  6.03524  2.03286  2.67508  5.36194  2.25828  4.36563  4.86858  2.77875  4.33291  5.09631  1.07839  4.52524  2.62719  3.73072  2.75509  3.71412  4.41890  6.46315  5.00841      3 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.04091  5.49877  3.32435  0.61958  0.77255  0.52775  0.89143\n+      4   2.51597  5.24536  3.21395  3.79383  4.81190  3.30725  4.89534  4.21717  3.89295  3.90352  4.76570  4.13904  4.73005  4.17210  4.26805  3.51068  1.58681  3.86875  0.87084  4.95297      4 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00570  5.56595  6.28829  0.61958  0.77255  0.68571  0.70065\n+      5   1.88947  5.00491  4.00042  3.04471  4.13869  4.17114  3.84034  3.52228  3.40249  3.22374  4.10809  3.86994  4.55537  3.71667  3.77844  2.67165  2.19530  3.28758  3.58338  1.27239      5 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00552  5.59725  6.31960  0.61958  0.77255  0.60266  0.79264\n+      6   3.27926  5.74099  2.79040  2.94678  5.08062  1.27592  4.24831  3.41153  3.01359  4.05572  4.81107  3.21498  2.15563  2.32440  3.50951  2.31938  3.51838  4.13470  6.20511  4.79969      6 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00538  5.62391  6.34625  0.61958  0.77255  0.69965  0.68669\n+      7   2.98451  5.68149  2.03356  2.71108  5.01893  3.09684  4.15179  4.49988  2.89056  3.98355  4.72438  1.82442  4.35665  2.68802  3.38251  2.74393  1.71463  4.06807  3.52699  3.90983 '..b' 7.77288     27 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00434  5.83749  6.55983  0.61958  0.77255  0.48576  0.95510\n+     21   2.85558  4.93320  5.01904  4.44670  3.96129  4.55274  4.87439  3.39985  4.30306  3.14653  4.07295  3.28033  4.94659  4.51322  4.45054  3.85882  2.48037  2.52459  0.81497  2.55247     28 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00434  5.83749  6.55983  0.61958  0.77255  0.48576  0.95510\n+     22   4.41613  7.05734  3.41290  0.33211  6.45410  4.55267  5.18133  6.03259  4.21333  5.45749  6.34879  4.02373  5.23058  2.45539  4.78815  2.76925  4.73557  5.53995  7.57078  6.01524     29 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00434  5.83749  6.55983  0.61958  0.77255  0.48576  0.95510\n+     23   4.65182  7.42009  2.82915  3.46315  6.72740  4.60645  5.35687  6.43956  4.62773  5.83877  6.82459  0.20801  5.34677  3.75994  5.38642  4.44973  5.00947  5.90632  7.92820  6.25143     30 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00434  5.83749  6.55983  0.61958  0.77255  0.48576  0.95510\n+     24   3.03871  6.18494  3.21297  2.61231  5.52195  2.08712  4.55336  5.02325  3.36798  4.49202  5.24999  1.02698  4.71928  2.57491  3.89135  1.97384  3.88199  4.57575  6.62318  5.17899     31 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00504  5.68941  6.41175  0.61958  0.77255  0.48576  0.95510\n+     25   1.99175  5.71724  3.48758  2.45228  5.05473  4.00588  4.19152  4.53331  2.79074  4.01804  4.28019  3.47693  4.39887  1.29371  2.11828  2.43019  3.30733  4.10393  6.15338  3.96473     32 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00504  5.68941  6.41175  0.61958  0.77255  0.48576  0.95510\n+     26   3.43388  5.57346  3.69941  2.95794  4.91426  4.17833  4.53372  3.14649  3.35913  3.94178  4.77104  3.49497  4.65004  3.34410  3.80156  0.56810  2.88463  3.99116  6.19270  4.86921     33 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.00504  5.68941  6.41175  0.61958  0.77255  0.48576  0.95510\n+     27   4.26126  0.16067  6.38751  6.27473  5.43175  4.92822  6.57062  4.62334  5.97406  4.19855  3.09118  5.80642  5.70066  6.20961  5.93464  4.54901  4.79228  4.45835  7.10686  5.93986     34 - -\n+          2.68618  4.42225  2.77519  2.73123  3.46354  2.40513  3.72494  3.29354  2.67741  2.69355  4.24690  2.90347  2.73739  3.18146  2.89801  2.37887  2.77519  2.98518  4.58477  3.61503\n+          0.05109  5.66823  3.07142  0.61958  0.77255  0.48576  0.95510\n+     28   4.30379  3.20827  6.48590  6.05149  4.97840  6.03933  6.92759  0.73764  6.03790  2.64441  4.67131  6.21290  6.22644  6.33190  6.22989  5.50520  4.56636  1.06340  7.18287  5.92811     35 - -\n+          2.68632  4.41952  2.77533  2.73137  3.46368  2.40527  3.72508  3.29292  2.67755  2.69369  4.24704  2.90360  2.73753  3.18160  2.89814  2.37901  2.77533  2.98380  4.58491  3.61517\n+          0.20465  1.68705        *  0.08250  2.53598  0.00000        *\n+//\n'
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1.genbank
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated1.genbank Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,24 @@
+LOCUS       dataset_5327_scaffold_1510   11453 bp   DNA   linear   ENV   05-AUG-2022
+FEATURES             Location/Qualifiers
+     CDS             478..1935
+                     /conf=100.00
+                     /cscore=116.40
+                     /gc_cont=0.437
+                     /gene=dataset_5327_scaffold_1510_1
+                     /partial=00
+                     /codon_start=1
+                     /rbs_motif=AGxAGG/AGGxGG
+                     /rbs_spacer=11-12bp
+                     /rscore=4.78
+                     /score=128.32
+                     /inference=Prodigal_v2.6.3
+                     /sscore=11.92
+                     /start_type=ATG
+                     /translation=MGQDRQNSQELLNELNYLRQRLAELEEMNRDYLGMIENSYDAMSIADCDGRLLLINPAFERIMGITKSETLSRTIQDLTNDGITDASAALKAFETGKQESVIINTRAGRQVLSTGVPFYDQTGKIVRVYCNIRDVTELNHLRQKFEQSQKLASRYLFELLEFKRGKTFKFVAHSNKIKQMLETVHRIAVVDSTVLILGESGVGKDLVARIIHEASSRNDSGSFLKINCAAIPAELLESELFGYEGGAFTGAKKDGKAGYFEIADKGTLFLDEIGELPQKLQVKLLAVIQDQKITRIGGVKEKDVDVRIIAATNRDLEEMVKQGNFREDLFYRLNVIPITIPPLRERKEDIPFLIVHYTELFNKKYNRAVKFSKEAIEMLCKYNWPGNVRELANLVERVIVIGQESILNPEHIPGKYHTAAQNMAETVSDFKSLSDAVEKYELKLVKNTLELCKTREEAASKLGISLSGLSRRIRRLKQLENEGFI*
+ORIGIN
+        1 attgcctctt gtggccgggc catatcgaaa ggtatgtgcc gtagcaatac ggaaatctct
+       61 gggatgctga aaaaagctct cggctctaag ttgactatct tgggttaact tggatgcccc
+      121 tcactttagt gtcactttag tgaggggagg aggtcacttt atccagtgag ggcataaaaa
+      181 atacggtagt tctttgcctg tttaatctgc ttaatcattc ttgcactgct gaattagagg
+      241 acatccggcg gttggatgac ctattggaca gttggcagct cggtcccgcg ccaggcgatc
+      301 tatgggatac tataatgacc ggggttagcc agtgtcatgc gcaaagtacc tgctttggta
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated1.gff Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,11 @@
+##gff-version 3
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 478 1935 128.32 + 0 ID=dataset_5327_scaffold_1510_1;conf=100.00;cscore=116.40;gc_cont=0.437;partial=00;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=11-12bp;rscore=4.78;sscore=11.92;start_type=ATG;tscore=2.70;uscore=4.44
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 2271 3857 216.97 + 0 ID=dataset_5327_scaffold_1510_2;conf=99.99;cscore=204.79;gc_cont=0.522;partial=00;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;rscore=6.26;sscore=12.18;start_type=ATG;tscore=2.70;uscore=3.87
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 4217 5395 100.23 + 0 ID=dataset_5327_scaffold_1510_3;conf=100.00;cscore=107.40;gc_cont=0.520;partial=00;rbs_motif=None;rbs_spacer=None;rscore=-11.04;sscore=-7.17;start_type=ATG;tscore=2.70;uscore=1.83
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 5385 6587 141.67 + 0 ID=dataset_5327_scaffold_1510_4;conf=100.00;cscore=125.51;gc_cont=0.526;partial=00;rbs_motif=GGAGG;rbs_spacer=5-10bp;rscore=12.15;sscore=16.15;start_type=ATG;tscore=2.70;uscore=0.08
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 6684 8315 209.17 + 0 ID=dataset_5327_scaffold_1510_5;conf=99.99;cscore=189.80;Dbxref="ko:K00179";gc_cont=0.559;partial=00;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;rscore=8.35;sscore=19.37;start_type=ATG;tscore=2.70;uscore=8.32
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 8453 9043 86.60 + 0 ID=dataset_5327_scaffold_1510_6;conf=100.00;cscore=72.66;Dbxref="ko:K00180";gc_cont=0.584;partial=00;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=5-10bp;rscore=8.35;sscore=13.94;start_type=ATG;tscore=2.70;uscore=2.89
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 9217 9381 10.26 + 0 ID=dataset_5327_scaffold_1510_7;conf=91.36;cscore=6.45;gc_cont=0.491;partial=00;rbs_motif=AGxAGG/AGGxGG;rbs_spacer=11-12bp;rscore=3.10;sscore=3.81;start_type=GTG;tscore=-4.98;uscore=5.69
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 9671 10636 92.34 - 0 ID=dataset_5327_scaffold_1510_8;conf=100.00;cscore=90.86;gc_cont=0.475;partial=00;rbs_motif=AGGA;rbs_spacer=5-10bp;rscore=0.52;sscore=1.48;start_type=TTG;tscore=-1.88;uscore=3.50
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 10802 11086 32.01 + 0 ID=dataset_5327_scaffold_1510_9;conf=99.94;cscore=15.94;gc_cont=0.474;partial=00;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;rscore=6.26;sscore=16.08;start_type=ATG;tscore=2.70;uscore=7.11
+dataset_5327_scaffold_1510 Prodigal_v2.6.3 CDS 11386 11451 6.71 + 0 ID=dataset_5327_scaffold_1510_10;conf=82.39;cscore=-2.06;Dbxref="ko:K03666";gc_cont=0.318;partial=01;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp;rscore=6.26;sscore=8.77;start_type=ATG;tscore=2.70;uscore=4.81
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated1.tabular Sat Dec 10 21:14:28 2022 +0000
[
b'@@ -0,0 +1,520 @@\n+\tfasta\tscaffold\tgene_position\tstart_position\tend_position\tstrandedness\trank\tko_id\tkegg_hit\tpeptidase_id\tpeptidase_family\tpeptidase_hit\tpeptidase_RBH\tpeptidase_identity\tpeptidase_bitScore\tpeptidase_eVal\tpfam_hits\tcazy_id\tcazy_hits\theme_regulatory_motif_count\n+dataset_4924_scaffold_1140_1\tdataset_4924\tscaffold_1140\t1\t233\t793\t-1\tE\t\t\t\t\t\t\t\t\t\t\t\t\t0\n+dataset_4924_scaffold_1140_2\tdataset_4924\tscaffold_1140\t2\t910\t1617\t-1\tD\t\t\t\t\t\t\t\t\t\tVitamin B12 dependent methionine synthase, activation domain [PF02965.20]\t\t\t0\n+dataset_4924_scaffold_1140_3\tdataset_4924\tscaffold_1140\t3\t1614\t3491\t-1\tD\t\t\t\t\t\t\t\t\t\tC-terminal domain of RACo the ASKHA domain [PF14574.9]; RACo middle region [PF17651.4]; RACo linker region [PF17650.4]; 2Fe-2S iron-sulfur cluster binding domain [PF00111.30]\t\t\t0\n+dataset_4924_scaffold_1140_4\tdataset_4924\tscaffold_1140\t4\t3670\t4368\t-1\tD\t\t\t\t\t\t\t\t\t\tUTRA domain [PF07702.16]; Bacterial regulatory proteins, gntR family [PF00392.24]\t\t\t0\n+dataset_4924_scaffold_1140_5\tdataset_4924\tscaffold_1140\t5\t4361\t5023\t-1\tD\t\t\t\t\t\t\t\t\t\tB12 binding domain [PF02310.22]; B12 binding domain [PF02607.20]\t\t\t0\n+dataset_4924_scaffold_1140_6\tdataset_4924\tscaffold_1140\t6\t5287\t5979\t-1\tD\t\t\t\t\t\t\t\t\t\tProtein of unknown function (DUF1638) [PF07796.14]\t\t\t0\n+dataset_4924_scaffold_1140_7\tdataset_4924\tscaffold_1140\t7\t6682\t7914\t-1\tD\t\t\t\t\t\t\t\t\t\tMajor Facilitator Superfamily [PF07690.19]; Transmembrane secretion effector [PF05977.16]; Sugar (and other) transporter [PF00083.27]\t\t\t0\n+dataset_4924_scaffold_1140_8\tdataset_4924\tscaffold_1140\t8\t8299\t8454\t-1\tE\t\t\t\t\t\t\t\t\t\t\t\t\t0\n+dataset_4924_scaffold_1140_9\tdataset_4924\tscaffold_1140\t9\t8469\t9626\t-1\tD\t\t\t\t\t\t\t\t\t\tUroporphyrinogen decarboxylase (URO-D) [PF01208.20]\t\t\t0\n+dataset_4924_scaffold_1140_10\tdataset_4924\tscaffold_1140\t10\t9919\t10062\t-1\tE\t\t\t\t\t\t\t\t\t\t\t\t\t0\n+dataset_4924_scaffold_1140_11\tdataset_4924\tscaffold_1140\t11\t10076\t11230\t-1\tD\t\t\t\t\t\t\t\t\t\tUroporphyrinogen decarboxylase (URO-D) [PF01208.20]\t\t\t0\n+dataset_4924_scaffold_1140_12\tdataset_4924\tscaffold_1140\t12\t11232\t11870\t-1\tD\t\t\t\t\t\t\t\t\t\tB12 binding domain [PF02310.22]; B12 binding domain [PF02607.20]\t\t\t0\n+dataset_4924_scaffold_1140_13\tdataset_4924\tscaffold_1140\t13\t11978\t12793\t-1\tC\tK15023\t5-methyltetrahydrofolate corrinoid/iron sulfur protein methyltransferase [EC:2.1.1.258]\t\t\t\t\t\t\t\tPterin binding enzyme [PF00809.25]\t\t\t0\n+dataset_4924_scaffold_1140_14\tdataset_4924\tscaffold_1140\t14\t12970\t14265\t-1\tD\t\t\t\t\t\t\t\t\t\tAcetyl-CoA hydrolase/transferase C-terminal domain [PF13336.9]; Acetyl-CoA hydrolase/transferase N-terminal domain [PF02550.18]\t\t\t0\n+dataset_4924_scaffold_1140_15\tdataset_4924\tscaffold_1140\t15\t14665\t16461\t-1\tD\t\t\t\t\t\t\t\t\t\tSigma-54 interaction domain [PF00158.29]; Sigma-54 interaction domain [PF14532.9]; Helix-turn-helix domain [PF18024.4]; PAS fold [PF08448.13]; PAS domain [PF13426.10]; PAS fold [PF00989.28]\t\t\t0\n+dataset_4924_scaffold_1140_16\tdataset_4924\tscaffold_1140\t16\t16623\t16886\t-1\tE\t\t\t\t\t\t\t\t\t\t\t\t\t0\n+dataset_4924_scaffold_1140_17\tdataset_4924\tscaffold_1140\t17\t16949\t17716\t-1\tC\tK07546\t(E)-benzylidenesuccinyl-CoA hydratase [EC:4.2.1.180]\tMER1073240\tS49C\tMER1073240 - subfamily S49C non-peptidase homologues (Acinetobacter bohemicus) [S49.UNC]#S49C#{peptidase unit: 84-212}~source N8Q6E4~\tFalse\t0.498\t111.0\t1.871e-25\tEnoyl-CoA hydratase/isomerase [PF00378.23]; Enoyl-CoA hydratase/isomerase [PF16113.8]\t\t\t0\n+dataset_4924_scaffold_1140_18\tdataset_4924\tscaffold_1140\t18\t17764\t18030\t-1\tD\t\t\t\t\t\t\t\t\t\tAcyl-CoA dehydrogenase, C-terminal domain [PF00441.27]\t\t\t0\n+dataset_4924_scaffold_1172_1\tdataset_4924\tscaffold_1172\t1\t3\t233\t1\tD\t\t\t\t\t\t\t\t\t\tACS/CODH beta subunit C-terminal [PF19436.2]\t\t\t0\n+dataset_4924_scaffold_1172_2\tdataset_4924\tscaffold_1172\t2\t366\t1763\t1\tC\tK00197\tacetyl-CoA decarbonylase/synthase, CODH/ACS complex subunit gamma [EC:2.1.1.245]\t\t\t\t\t\t\t\tCO dehydrogenase/acetyl-CoA synthase delta subunit [PF03599.19]; Putative Fe-S cluster [PF04060.16]\t\t\t0\n+dataset_4924_scaffold_1172_3\tdataset_4924\tscaffold_1172\t3\t1862\t2620\t1\tC\tK07321\tCO dehydrogenase maturation factor\t\t\t\t\t\t\t\tAAA domain [PF13614.9]\t\t\t0\n+datase'..b'rotein, Fis family [PF02954.22]\t\t\t0\n+dataset_4924_scaffold_988_4\tdataset_4924\tscaffold_988\t4\t4895\t5542\t1\tD\t\t\t\t\t\t\t\t\t\tBacterial regulatory proteins, tetR family [PF00440.26]\t\t\t0\n+dataset_4924_scaffold_988_5\tdataset_4924\tscaffold_988\t5\t6103\t7443\t1\tC\tK01845\tglutamate-1-semialdehyde 2,1-aminomutase [EC:5.4.3.8]\t\t\t\t\t\t\t\tAminotransferase class-III [PF00202.24]\t\t\t0\n+dataset_4924_scaffold_988_6\tdataset_4924\tscaffold_988\t6\t8230\t8502\t1\tD\t\t\t\t\t\t\t\t\t\tAntitoxin Phd_YefM, type II toxin-antitoxin system [PF02604.22]\t\t\t0\n+dataset_4924_scaffold_988_7\tdataset_4924\tscaffold_988\t7\t9340\t10764\t1\tD\t\t\t\t\t\t\t\t\t\tAmino acid permease [PF13520.9]; Amino acid permease [PF00324.24]\t\t\t0\n+dataset_4924_scaffold_988_8\tdataset_4924\tscaffold_988\t8\t10793\t11359\t1\tD\t\t\t\t\t\t\t\t\t\tProtein of unknown function (DUF3156) [PF11354.11]\t\t\t0\n+dataset_4924_scaffold_988_9\tdataset_4924\tscaffold_988\t9\t11462\t12565\t1\tD\t\t\tMER0158050\tS09X\tMER0158050 - family S9 non-peptidase homologues (Dipodomys ordii) [S09.UNW]#S09X#{peptidase unit: 54-305}~source ENSDORP00000014784~\tFalse\t0.269\t87.0\t1.587e-16\tNeurobeachin beta propeller domain [PF20426.1]; Eukaryotic translation initiation factor eIF2A [PF08662.14]; Anaphase-promoting complex subunit 4 WD40 domain [PF12894.10]; Cytochrome D1 heme domain [PF02239.19]; WD domain, G-beta repeat [PF00400.35]; WD40-like domain [PF17005.8]; Lactonase, 7-bladed beta-propeller [PF10282.12]; WD40 region of Ge1, enhancer of mRNA-decapping protein [PF16529.8]\t\t\t0\n+dataset_4924_scaffold_988_10\tdataset_4924\tscaffold_988\t10\t12655\t14052\t1\tC\tK01915\tglutamine synthetase [EC:6.3.1.2]\t\t\t\t\t\t\t\tGlutamine synthetase, catalytic domain [PF00120.27]; Glutamine synthetase N-terminal domain [PF16952.8]; Glutamine synthetase, beta-Grasp domain [PF03951.22]\t\t\t0\n+dataset_4924_scaffold_988_11\tdataset_4924\tscaffold_988\t11\t14086\t14982\t1\tC\tK22081\tmethylamine---glutamate N-methyltransferase subunit A [EC:2.1.1.21]\tMER0459985\tC44\tMER0459985 - family C44 unassigned peptidases (Desulfotomaculum gibsoniae) [C44.UPW]#C44#{peptidase unit: 2-218}~source ZP_09099530~\tTrue\t0.968\t445.0\t6.218e-140\tGlutamine amidotransferase domain [PF13522.9]; Glutamine amidotransferase domain [PF13537.9]; Glutamine amidotransferases class-II [PF13230.9]\t\t\t0\n+dataset_4924_scaffold_988_12\tdataset_4924\tscaffold_988\t12\t14970\t15671\t1\tC\tK22082\tmethylamine---glutamate N-methyltransferase subunit B [EC:2.1.1.21]\t\t\t\t\t\t\t\tGXGXG motif [PF01493.22]\t\t\t0\n+dataset_4924_scaffold_988_13\tdataset_4924\tscaffold_988\t13\t15701\t16999\t1\tC\tK22083\tmethylamine---glutamate N-methyltransferase subunit C [EC:2.1.1.21]\t\t\t\t\t\t\t\tConserved region in glutamate synthase [PF01645.20]; FMN-dependent dehydrogenase [PF01070.21]; Nitronate monooxygenase [PF03060.18]\t\t\t1\n+dataset_4924_scaffold_988_14\tdataset_4924\tscaffold_988\t14\t17019\t17342\t1\tD\t\t\t\t\t\t\t\t\t\t2Fe-2S iron-sulfur cluster binding domain [PF13510.9]\t\t\t0\n+dataset_4924_scaffold_988_15\tdataset_4924\tscaffold_988\t15\t17339\t18931\t1\tD\t\t\t\t\t\t\t\t\t\tPyridine nucleotide-disulphide oxidoreductase [PF07992.17]; HI0933-like protein [PF03486.17]; NAD(P)-binding Rossmann-like domain [PF13450.9]; FAD dependent oxidoreductase [PF12831.10]; FAD binding domain [PF00890.27]; FAD binding domain [PF01494.22]; 4Fe-4S dicluster domain [PF13187.9]; 4Fe-4S dicluster domain [PF14697.9]; 4Fe-4S dicluster domain [PF13247.9]; Flavin-binding monooxygenase-like [PF00743.22]; FAD dependent oxidoreductase [PF01266.27]\t\t\t0\n+dataset_4924_scaffold_988_16\tdataset_4924\tscaffold_988\t16\t19221\t19571\t1\tD\t\t\t\t\t\t\t\t\t\tSarcosine oxidase A3 domain [PF17806.4]; BFD-like [2Fe-2S] binding domain [PF04324.18]\t\t\t0\n+dataset_4924_scaffold_988_17\tdataset_4924\tscaffold_988\t17\t19564\t20685\t1\tD\t\t\t\t\t\t\t\t\t\tFAD dependent oxidoreductase [PF01266.27]\t\t\t0\n+dataset_4924_scaffold_988_18\tdataset_4924\tscaffold_988\t18\t20716\t21117\t1\tC\tK09022\t2-iminobutanoate/2-iminopropanoate deaminase [EC:3.5.99.10]\t\t\t\t\t\t\t\tEndoribonuclease L-PSP [PF01042.24]\t\t\t0\n+dataset_4924_scaffold_988_19\tdataset_4924\tscaffold_988\t19\t21469\t21915\t-1\tD\t\t\t\t\t\t\t\t\t\tFamily of unknown function (DUF6144) [PF19641.2]\t\t\t0\n'
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_genes_faa.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated1_genes_faa.fasta Sat Dec 10 21:14:28 2022 +0000
[
@@ -0,0 +1,6 @@
+>dataset_5327_scaffold_1510_1 rank: D; Sigma-54 interaction domain [PF00158.29]; Sigma-54 interaction domain [PF14532.9]; PAS fold [PF08448.13]; PAS domain [PF13426.10]; PAS fold [PF00989.28] (db=pfam)
+MGQDRQNSQELLNELNYLRQRLAELEEMNRDYLGMIENSYDAMSIADCDGRLLLINPAFERIMGITKSETLSRTIQDLTNDGITDASAALKAFETGKQESVIINTRAGRQVLSTGVPFYDQTGKIVRVYCNIRDVTELNHLRQKFEQSQKLASRYLFELLEFKRGKTFKFVAHSNKIKQMLETVHRIAVVDSTVLILGESGVGKDLVARIIHEASSRNDSGSFLKINCAAIPAELLESELFGYEGGAFTGAKKDGKAGYFEIADKGTLFLDEIGELPQKLQVKLLAVIQDQKITRIGGVKEKDVDVRIIAATNRDLEEMVKQGNFREDLFYRLNVIPITIPPLRERKEDIPFLIVHYTELFNKKYNRAVKFSKEAIEMLCKYNWPGNVRELANLVERVIVIGQESILNPEHIPGKYHTAAQNMAETVSDFKSLSDAVEKYELKLVKNTLELCKTREEAASKLGISLSGLSRRIRRLKQLENEGFI*
+>dataset_5327_scaffold_1510_2 rank: D; AMP-binding enzyme [PF00501.31]; AMP-binding enzyme C-terminal domain [PF13193.9] (db=pfam)
+MTVSKWMHVGVALKMNARNYPDKLGCQDKRKSYTFKEWNERSCRLASALKDMGVGYGERVAVIAYNRVEWMEIYAACAKGGQIVVPVMFRLTPHEFEYIVNHSGCKAFIVEEPFVKGVDSVRDILTTIPEGNYIYLGDGEAPEGYIHYESVMAQGDPSEPDISVDAADPWTIMYTSGTTGRPKGVVRTHENYLGQYLINNINMGVRPNDKPLLVMPMCHVNSIYYSFCYTYISAPVMVYNMVSFDPEDLLKTIVDYRVTFTSLVPTHYIMILALPDEIKQKYDTSCIRQLLISSAPARRDLKLAIMKYFKSAELWEAYGSTETSLVTYLRPEDQLTKLGSIGKEVFGCDEIKLLDENGEEVPVGEVGELYSRSPGMFKEYWKDPGKTSEVFRGKWCTAGDMGRRDEDGYYYLVDRKANMIISGGENVYPSEVENVVGAHPAVKDGAVIGVPDQKWGEIVLAFIILHEGYQAGDELAGEIINFCKDRVAGYKRPKSIRFISEEEMPRTGNGKIMHRVLREKYGKWSDSV*
+>dataset_5327_scaffold_1510_3 rank: D; 2-hydroxyglutaryl-CoA dehydratase, D-component [PF06050.16] (db=pfam)
+MTDRKTIKEICAQFKEIIAEPGLKIQRLQAEKPAPVIGFLPTDVPEELIHASGAYPFGLVAYDGLWVNRADAHLQTWACSLARCSFGMSLAGKFDYLNGLIIPHICDTTRMISDIWKQNRPYDFMENFILPRQVDRPSARSYLTGELGRLKARLEQFTGRSINGEKLNRSINLYNKHRALLRKLYQLHGHHPDLITNLDLFNAIKSSMLIPKGLHNTMVSELISAVEQQAREKQAEDNHGRVRVVVTGKVWEPPDIMEILDQSKVVCVADDLCTGYRYIANDAAEDGDPLETLAVRQINRPPSPCFVNREQDRLEYLTRKVNECGAKGVIFLHLKFCETENYDYPLLRDALSAANIPSVRVETEIGNMSQGQISTRIQAFAEMLGGGDIYGS*
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_genes_fna.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated1_genes_fna.fasta Sat Dec 10 21:14:28 2022 +0000
[
@@ -0,0 +1,6 @@
+>dataset_5327_scaffold_1510_1 rank: D; Sigma-54 interaction domain [PF00158.29]; Sigma-54 interaction domain [PF14532.9]; PAS fold [PF08448.13]; PAS domain [PF13426.10]; PAS fold [PF00989.28] (db=pfam)
+ATGGGGCAGGATAGGCAAAATTCCCAAGAGTTACTGAATGAGCTTAATTATTTGCGTCAAAGGTTGGCTGAACTAGAAGAGATGAACAGGGACTATCTGGGGATGATCGAAAACTCTTACGACGCTATGAGTATAGCTGATTGTGACGGTAGGCTTCTTTTGATCAATCCGGCTTTTGAGAGAATTATGGGCATAACCAAATCGGAAACACTGAGCCGGACAATTCAGGACTTGACAAATGACGGGATTACCGATGCCAGCGCGGCCTTGAAGGCATTTGAAACCGGCAAGCAGGAATCGGTAATTATAAATACGCGTGCAGGGAGGCAAGTTTTAAGTACCGGAGTCCCGTTTTATGACCAGACCGGTAAAATTGTCAGAGTATACTGTAATATACGTGACGTAACGGAGTTGAATCATTTAAGGCAAAAATTTGAACAATCACAAAAACTGGCTTCGAGATATTTGTTTGAGTTATTGGAATTCAAAAGGGGGAAAACATTTAAATTTGTCGCCCACAGCAACAAGATCAAACAAATGTTGGAAACAGTCCATAGAATCGCCGTCGTGGACAGTACGGTGCTTATCCTCGGTGAATCCGGTGTGGGCAAGGACCTTGTGGCTCGTATAATCCACGAAGCCAGTTCGCGAAACGATTCCGGTTCTTTTTTAAAAATCAATTGTGCCGCCATCCCAGCCGAATTGTTGGAATCCGAACTTTTTGGTTATGAGGGAGGGGCGTTTACCGGAGCAAAAAAAGACGGCAAAGCCGGCTATTTTGAAATAGCGGATAAAGGAACCCTATTTCTTGATGAAATAGGGGAGCTTCCCCAAAAACTACAGGTAAAACTGCTGGCGGTAATTCAGGATCAAAAAATCACCCGGATTGGCGGGGTAAAAGAAAAGGACGTTGATGTCCGTATCATTGCCGCTACCAACCGGGATCTGGAGGAAATGGTGAAGCAAGGTAATTTTAGGGAAGATCTTTTTTACAGGCTAAATGTAATTCCAATCACCATTCCCCCGTTACGGGAAAGGAAAGAAGATATCCCTTTCCTGATCGTCCATTATACCGAACTTTTTAACAAGAAATATAACCGGGCGGTTAAGTTCAGCAAAGAGGCCATTGAAATGTTATGTAAGTATAATTGGCCCGGCAATGTCAGGGAACTTGCCAATCTTGTCGAAAGAGTCATCGTAATCGGCCAGGAGTCAATACTTAACCCGGAACATATACCCGGCAAGTACCATACAGCGGCCCAAAACATGGCCGAAACAGTTTCAGATTTCAAGTCCCTCAGTGATGCGGTGGAAAAATACGAGTTAAAGCTTGTCAAAAATACCCTGGAACTGTGCAAAACCCGGGAAGAGGCCGCCAGTAAACTTGGCATAAGTCTTTCGGGTTTAAGTAGAAGAATAAGAAGGCTGAAACAACTCGAAAATGAAGGTTTTATTTAA
+>dataset_5327_scaffold_1510_2 rank: D; AMP-binding enzyme [PF00501.31]; AMP-binding enzyme C-terminal domain [PF13193.9] (db=pfam)
+ATGACGGTTAGTAAATGGATGCACGTCGGGGTGGCACTGAAGATGAATGCCCGCAATTACCCCGATAAGCTGGGTTGCCAGGACAAGAGAAAAAGTTATACCTTCAAGGAATGGAACGAGCGGTCGTGCCGCCTGGCATCCGCATTGAAGGACATGGGAGTGGGCTACGGTGAGCGGGTGGCCGTAATTGCCTACAACAGGGTGGAATGGATGGAAATTTACGCCGCCTGCGCCAAGGGCGGTCAGATTGTCGTTCCGGTAATGTTTCGCCTGACACCGCATGAATTTGAGTATATTGTTAATCATTCGGGCTGTAAAGCATTCATTGTGGAGGAACCTTTCGTGAAAGGGGTCGACAGTGTTCGCGACATTCTGACCACTATTCCCGAAGGGAATTATATTTACCTGGGTGACGGTGAGGCTCCGGAGGGCTATATCCATTACGAGTCGGTTATGGCTCAGGGCGACCCGTCCGAACCGGACATCTCGGTGGACGCCGCCGATCCCTGGACCATTATGTATACATCCGGGACCACCGGCAGGCCGAAGGGGGTTGTGCGGACCCATGAAAATTATCTGGGCCAGTATTTAATCAACAACATTAACATGGGAGTGCGGCCCAACGACAAGCCCTTGCTGGTCATGCCCATGTGTCATGTTAATTCCATTTATTATTCTTTTTGTTACACCTATATCAGCGCTCCGGTAATGGTTTATAACATGGTTAGCTTTGATCCTGAAGACCTGTTAAAAACCATTGTCGATTACAGGGTAACCTTTACTTCCCTGGTGCCGACCCACTACATCATGATCCTGGCCCTGCCCGATGAAATCAAACAAAAGTACGACACCAGTTGTATACGGCAGCTTTTAATCTCTTCGGCACCAGCCCGGCGGGATTTGAAACTGGCCATTATGAAGTATTTCAAATCGGCTGAGCTTTGGGAAGCTTACGGTTCCACGGAAACGTCCCTGGTCACTTACCTGCGCCCGGAAGACCAGTTAACTAAACTGGGTTCCATCGGTAAAGAAGTTTTTGGGTGCGACGAAATCAAGCTGCTGGATGAAAACGGGGAGGAAGTGCCGGTTGGGGAGGTGGGTGAACTCTACAGCCGTTCGCCGGGTATGTTCAAGGAGTATTGGAAAGACCCCGGCAAGACCAGTGAAGTGTTTCGCGGGAAGTGGTGCACCGCGGGCGACATGGGCAGACGGGACGAGGACGGTTATTACTACCTGGTGGACCGTAAAGCGAACATGATCATCTCGGGCGGTGAAAATGTCTACCCGTCGGAGGTGGAAAATGTCGTGGGTGCCCACCCGGCGGTGAAAGATGGGGCGGTAATCGGCGTTCCCGACCAGAAGTGGGGTGAGATTGTATTGGCCTTCATAATACTGCACGAGGGTTACCAGGCGGGAGATGAACTGGCCGGGGAGATTATCAACTTCTGTAAAGACCGGGTTGCCGGTTACAAGCGCCCCAAGTCGATCCGTTTTATTAGCGAGGAAGAAATGCCCCGGACCGGCAACGGCAAAATCATGCACCGGGTCTTGCGTGAAAAGTACGGCAAGTGGAGTGACTCTGTGTAA
+>dataset_5327_scaffold_1510_3 rank: D; 2-hydroxyglutaryl-CoA dehydratase, D-component [PF06050.16] (db=pfam)
+ATGACTGACCGTAAAACCATTAAAGAAATCTGTGCACAATTTAAAGAAATTATTGCCGAGCCTGGTCTAAAGATTCAGCGGCTGCAAGCCGAAAAACCTGCTCCGGTAATCGGGTTTTTGCCCACTGACGTGCCTGAAGAATTAATCCATGCCTCGGGCGCTTACCCCTTCGGGCTGGTGGCTTATGATGGATTATGGGTCAACCGGGCCGACGCCCACTTGCAGACCTGGGCATGCTCTCTGGCACGATGTTCCTTCGGGATGTCTCTGGCCGGGAAGTTCGATTACCTGAACGGGCTGATCATCCCTCATATTTGCGACACCACCCGGATGATTTCAGACATTTGGAAACAAAACCGGCCTTACGATTTCATGGAGAACTTTATACTGCCCCGGCAGGTTGATCGTCCCAGTGCCAGGAGTTATCTCACCGGTGAATTGGGCCGGTTGAAGGCGCGTTTGGAGCAGTTTACGGGCAGGTCTATTAACGGTGAAAAATTAAACCGGAGCATCAACCTTTACAACAAGCATCGTGCTTTATTAAGAAAACTCTACCAGCTCCATGGCCACCACCCGGATCTGATCACCAACCTGGACTTATTCAATGCCATCAAATCTTCCATGCTGATCCCGAAAGGACTGCACAACACCATGGTCAGTGAACTGATCAGCGCTGTTGAGCAGCAGGCCCGGGAAAAACAGGCGGAAGATAACCACGGCCGGGTTAGGGTGGTAGTTACGGGTAAAGTCTGGGAACCCCCGGACATTATGGAGATCCTGGACCAGTCGAAAGTTGTGTGCGTGGCCGACGACCTGTGCACCGGCTACCGCTATATTGCCAACGATGCAGCAGAGGATGGCGATCCGCTGGAAACGCTGGCCGTTCGCCAAATAAATCGCCCACCGTCCCCCTGTTTTGTAAACCGCGAGCAGGACCGCCTAGAATACTTGACCCGTAAGGTAAATGAGTGCGGGGCAAAGGGTGTGATTTTCCTGCATTTGAAATTCTGCGAGACGGAAAATTATGACTACCCGCTGCTGCGTGATGCTCTGTCAGCCGCCAATATACCAAGCGTTCGGGTTGAAACGGAGATTGGAAACATGTCCCAGGGGCAGATTAGCACGCGCATTCAGGCTTTTGCGGAAATGTTGGGGGGAGGCGATATCTATGGCAGTTAG
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_rrnas.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated1_rrnas.tabular Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,2 @@
+scaffold fasta begin end strand type e-value note
+dataset_5327_scaffold_361 dataset_5327 61952 62058 - 5S rRNA 5.2e-16
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_scaffold.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated1_scaffold.fasta Sat Dec 10 21:14:28 2022 +0000
b
b'@@ -0,0 +1,2 @@\n+>dataset_5327_scaffold_1510\n+TACAGGATGCTTTTTTAAACCAGGTTAGAAAGGATAATATACCTGTAACAATCTTTTTGGTTAATGGATTTCAATTAAAAGGTATGGTTAAGGGTTTTGATAATTTTACAGTAATTATGGAAAGTGACGGCAAACAAATGATGGTATATAAACACGCTATTTCTACAGTTAGTCCTATGAAACCGGTAAATACTTCTTTTTCGGAAGTTAAGCCTGGCTAAATAAAAGGTGTAGGTCATGTCTTTTAGTATATTCCACTTAGAAAACATTCCAAATTCAATCATTCCGCCTGGCTTTTCATTGAAAAGCAGGCGGTTTCTATTTTATAGCGCAACTTTGGTATTGATTGGGATGACTATAATAGTCCATAATTACACATATACATAATCATTTAATCAAGCGTGGTTATGTGACTGTAATTCCACTGTTCGTTTGGAATTGAATATACTCTTTGGGATTGAGAAGGGAAGTGAATAAATGGGGCAGGATAGGCAAAATTCCCAAGAGTTACTGAATGAGCTTAATTATTTGCGTCAAAGGTTGGCTGAACTAGAAGAGATGAACAGGGACTATCTGGGGATGATCGAAAACTCTTACGACGCTATGAGTATAGCTGATTGTGACGGTAGGCTTCTTTTGATCAATCCGGCTTTTGAGAGAATTATGGGCATAACCAAATCGGAAACACTGAGCCGGACAATTCAGGACTTGACAAATGACGGGATTACCGATGCCAGCGCGGCCTTGAAGGCATTTGAAACCGGCAAGCAGGAATCGGTAATTATAAATACGCGTGCAGGGAGGCAAGTTTTAAGTACCGGAGTCCCGTTTTATGACCAGACCGGTAAAATTGTCAGAGTATACTGTAATATACGTGACGTAACGGAGTTGAATCATTTAAGGCAAAAATTTGAACAATCACAAAAACTGGCTTCGAGATATTTGTTTGAGTTATTGGAATTCAAAAGGGGGAAAACATTTAAATTTGTCGCCCACAGCAACAAGATCAAACAAATGTTGGAAACAGTCCATAGAATCGCCGTCGTGGACAGTACGGTGCTTATCCTCGGTGAATCCGGTGTGGGCAAGGACCTTGTGGCTCGTATAATCCACGAAGCCAGTTCGCGAAACGATTCCGGTTCTTTTTTAAAAATCAATTGTGCCGCCATCCCAGCCGAATTGTTGGAATCCGAACTTTTTGGTTATGAGGGAGGGGCGTTTACCGGAGCAAAAAAAGACGGCAAAGCCGGCTATTTTGAAATAGCGGATAAAGGAACCCTATTTCTTGATGAAATAGGGGAGCTTCCCCAAAAACTACAGGTAAAACTGCTGGCGGTAATTCAGGATCAAAAAATCACCCGGATTGGCGGGGTAAAAGAAAAGGACGTTGATGTCCGTATCATTGCCGCTACCAACCGGGATCTGGAGGAAATGGTGAAGCAAGGTAATTTTAGGGAAGATCTTTTTTACAGGCTAAATGTAATTCCAATCACCATTCCCCCGTTACGGGAAAGGAAAGAAGATATCCCTTTCCTGATCGTCCATTATACCGAACTTTTTAACAAGAAATATAACCGGGCGGTTAAGTTCAGCAAAGAGGCCATTGAAATGTTATGTAAGTATAATTGGCCCGGCAATGTCAGGGAACTTGCCAATCTTGTCGAAAGAGTCATCGTAATCGGCCAGGAGTCAATACTTAACCCGGAACATATACCCGGCAAGTACCATACAGCGGCCCAAAACATGGCCGAAACAGTTTCAGATTTCAAGTCCCTCAGTGATGCGGTGGAAAAATACGAGTTAAAGCTTGTCAAAAATACCCTGGAACTGTGCAAAACCCGGGAAGAGGCCGCCAGTAAACTTGGCATAAGTCTTTCGGGTTTAAGTAGAAGAATAAGAAGGCTGAAACAACTCGAAAATGAAGGTTTTATTTAAATGTTTTTTCAAGTTTAAGTATAAATTAAAAAACTAGGTCAAAGTTGAATGTCATGTTTAACCTTGTCAAAAGTCTTTATTTTAGTACAATAACAGTTTCATATTTGGTTTTGTATGTCAAATTTGACACCCTCAAAATCTGACAATTCTGAAGATCGGGCTGAAAAGATGGTCTAAAAGTTGGCACGTATTTTGCGTAAAATACCATTATCATACATGAATAGAACATTAATGGTTAAGGAGGGAACATCTATGACAGCTTGAGGGAATGCTTTGCCTGTTCTATGGTGGATGTTTTCAGAACCTTCGGACTTGTCAAAAGAGAGGGATATGAAATGACGGTTAGTAAATGGATGCACGTCGGGGTGGCACTGAAGATGAATGCCCGCAATTACCCCGATAAGCTGGGTTGCCAGGACAAGAGAAAAAGTTATACCTTCAAGGAATGGAACGAGCGGTCGTGCCGCCTGGCATCCGCATTGAAGGACATGGGAGTGGGCTACGGTGAGCGGGTGGCCGTAATTGCCTACAACAGGGTGGAATGGATGGAAATTTACGCCGCCTGCGCCAAGGGCGGTCAGATTGTCGTTCCGGTAATGTTTCGCCTGACACCGCATGAATTTGAGTATATTGTTAATCATTCGGGCTGTAAAGCATTCATTGTGGAGGAACCTTTCGTGAAAGGGGTCGACAGTGTTCGCGACATTCTGACCACTATTCCCGAAGGGAATTATATTTACCTGGGTGACGGTGAGGCTCCGGAGGGCTATATCCATTACGAGTCGGTTATGGCTCAGGGCGACCCGTCCGAACCGGACATCTCGGTGGACGCCGCCGATCCCTGGACCATTATGTATACATCCGGGACCACCGGCAGGCCGAAGGGGGTTGTGCGGACCCATGAAAATTATCTGGGCCAGTATTTAATCAACAACATTAACATGGGAGTGCGGCCCAACGACAAGCCCTTGCTGGTCATGCCCATGTGTCATGTTAATTCCATTTATTATTCTTTTTGTTACACCTATATCAGCGCTCCGGTAATGGTTTATAACATGGTTAGCTTTGATCCTGAAGACCTGTTAAAAACCATTGTCGATTACAGGGTAACCTTTACTTCCCTGGTGCCGACCCACTACATCATGATCCTGGCCCTGCCCGATGAAATCAAACAAAAGTACGACACCAGTTGTATACGGCAGCTTTTAATCTCTTCGGCACCAGCCCGGCGGGATTTGAAACTGGCCATTATGAAGTATTTCAAATCGGCTGAGCTTTGGGAAGCTTACGGTTCCACGGAAACGTCCCTGGTCACTTACCTGCGCCCGGAAGACCAGTTAACTAAACTGGGTTCCATCGGTAAAGAAGTTTTTGGGTGCGACGAAATCAAGCTGCTGGATGAAAACGGGGAGGAAGTGCCGGTTGGGGAGGTGGGTGAACTCTACAGCCGTTCGCCGGGTATGTTCAAGGAGTATTGGAAAGACCCCGGCAAGACCAGTGAAGTGTTTCGCGGGAAGTGGTGCACCGCGGGCGACATGGGCAGACGGGACGAGGACGGTTATTACTACCTGGTGGACCGTAAAGCGAACATGATCATCTCGGGCGGTGAAAATGTCTACCCGTCGGAGGTGGAAAATGTCGTGGGTGCCCACCCGGCGGTGAAAGATGGGGCGGTAATCGGCGTTCCCGACCAGAAGTGGGGTGAGATTGTATTGGCCTTCATAATACTGCACGAGGGTTACCAGGCGGGAGATGAACTGGCCGGGGAGATTATCAACTTCTGTAAAGACCGGGTTGCCGGTTACAAGCGCCCCAAGTCGATCCGTTTTATTAGCGAGGAAGAAATGCCCCGGACCGGCAACGGCAAAATCATGCACCGGGTCTTGCGTGAAAAGTACGGCAAGTGGAGTGACTCTGTGTAAAATATAAGTCAGAATATAATGGAATATAGGAAAGATAATGGATTATTTAATAAGTTAATCCAAAGCAGCAATGGTGCCAGGTGTTTAAAGGTTGAA'..b'GCATGGTATATCCGCTCCCTGACAAGCTAATCCGGGAACTGGCCGCGCGGGTGAAGCAAGTGGTGGTCATTGAGGAACTCGACCCCTTCATTGAAGAACAGGTACGGCTGGTGGGCATCCCGGCCCGAGGGAAAGATATTTTCCCCAATATTGGAGAATTTAACCCCGGGCGCGTTCGCCGGTGTGCCCAAGAAGCCGGGCTGATTCCCGGGCCGGCCCAACCGTCCCCCGCCGTGTCGGTACCCCAGCTGCCCGGACGGCCGCCCATGCTCTGTCCGGGTTGCGGTCACCGGGGCCTGTTTTACGCTCTTCAGCGTTTAAATGCGACTGTATTTGGAGACATCGGCTGTTACACCCTGGGTGCGGCGCCCCCTCTGAACGCCATGCATACCACCGGCTGTATGGGCGCCAGCATCGGCGTCGTACACGGGGTTGACCGGATAGGCGTGAAGGACCGCACCGTTGCCGTAATCGGTGACTCCACCTTTTTCCACAGCGGTGTGGCTCCATTGATTAACCTTCATTATAACAACGGTACCAGTACGGTAATTGTTGTTGACAACCGGGTGACGGCCATGACCGGGCACCAGACAAATCCCGGCACGGGTAATACATTGCTGGGTATAGAGTCCCCCGTGGTTAAAATTGACGAGCTGGCCCGCGGCATCGGTTTTAAAAAGGTGGATGTGGTGAACCCGTATGACTTTAAGACCGTGGTGGCCACTATCAAGGACCACCTACAATCCGAAGAACCTTCATTAATTGTGGCCCAGTACCCCTGTGTTTTGTATAAACGGGAACGGAAGCCGGCCCTGGTGGTGGATGTGGAGAAATGCAACGAGTGCGGCAACTGTTTGTGAATCGGTTGTTCTCCCATCACCAAAGTAGAAGGGGGCGTCAGCATCAACGCCGCGCTGTGCATTGGCTGCGGTTTTTGTGCCACCATTTGCAATCAGGGCGCCATTAGCCTGCCAAATAAAGAAAGGGGGGCTGAATAATGAAAAAGAAATTGGATTTCCTGTTTGCCGGAGTGGGGGGCCAGGGTACCATACTGGCCAGCAATATCGTCTCCGAAGTGGGTATGCGCTGCGGGTATGACATCAAGAAATCGGAAGTACATGGCATGTCCCAGCGGGGGGGTGCTGTGGAAAGCCATGTGCGCTGGGCGGAAAAAGTCTATTCCCCGTTGATTGAAGAGGGCAGTGCCGATTTCCTGCTGGCCTTTGAAATGCTGGAGGCGGCCCGCTGGCCCCAATACATGGCACCCGGTTCGGTGGTTCTGGTCAACAACCACCGGGTGATGCCGCCGTCGGTAAACCTGGGCCAGGCGCAATATCCGGAGGTAAAGGAAATTGAATCCATCATGACCGCGGCCGGCAGTCAGGTAATCTGGGTCGAGGGCGCAGCCAAGGCGGAAGAATTGGGGAACCCCGCCCTGGCCGGGGTGGTGCTGCTGGGCGTGCTGTCCGCCCGCCTGGACGAGCCGGTGGAAACCTGGCTGCAGGCCGTGCAGGATCTAGTTCCTGCAAAATTCAAGGAACTTAACGTAAAAGCTTTTCTGGCCGGGCGAGAAATGGCGGCGAAATAGCCTATAATTGAATTAGCACTACAGCAAAAAGTATAAGCGGATTTACCGTGGTGCCAGGTGTTGAAAAGTTGAAAGTTGAAAATGAAGACGATCAGGAGTAATGGCTTAATAAGTAATTTTTGCTGTAGTATTAGTAATTGAATTGAATAAATAAGAAGGGGGTTACATCGCCCGTGGTGGAAGCAGTGTTCAATGCGATGTTTATGACCGGCTACCGGGGATACTTTCAGAGCACGTTCAATGGCAGTGCAGGCGGCCATAATTTGCTTTTCCTCAATGGCGTGGAGGACTGCTTGATATCCTGCCTGATGTATCTGCAGGACCATTTCATGAAATAGGACGCTGCGAGCGCACAGTCTGCCTCGATGGACAGCAGCTTTTTCGGGCCCACAATAAATATCAGGCACGGGGCAATATAGAAAACGTTGAAATGCTGATTGCGCAGTATATTCTTATATTTGCTACTCATAAAGTCGGGATTCTGCTCGAGATGATGTAGAAGATTACGTTTGCTTTCGTCCGAAAGCCTTTTGATGACCTCCCTATTTTCAAGCTAGGCTATAATGGGTGTTGTCACAAGCTCTTTAATGATTAACTACCTCCGCACAAATGAACGTGCTTCTAACTTTAGTTTTCCAGTTCTTTCTTAAGTTCCAATACGGTCTCCACGGATAGTCCGGTTATTTCTGCCACGTCATTCACCGTGAAGCCCTTTTTCAAAGCAATCTTGGCAACCTCAACTTTGCCTTCAAGCTTGCCTGCAATTAGGGTCTGTTTTTTCATATCATCCAGGGTCCGCTCAATGTTGGTAATCATTTTTTCCACCTCCCGGGGTTCGGTTTCATCCAGTACACGTTCAATTTCCTTTTGCAGTGTACCTGGTAATTTGCGCTTGATCACGTTCCTCATCCAGACTGTTATTTGCCGGAATTGTTCCTGGTCTAGTTCCTTTAATATGTCGGCTAGTTTCCTGAGCCGGACCCCCAGTTCTTGCGGCTCAAGGGTCTGGTCCAGGTAGAATATGCTGGACACCACGTTAGCTGCACGGTGAAGGTCTTCCTCATTGTAACGAACGACGTCGAAAAGAATGTAGGAAAAGTCCAGTAAACGTCCAGGGAATTGTTCGTGCCCCGACTGGTACTCTCTGAAGCTTTGGTGGGCTGTCCAGCCTTTCTTGCCGTTGTAGAGAACTGCTGGTACGATAGCCGGCAGTTTGAAGTTCTTGCGTCTTCGTTCCTTTTCCGGGGCATTTTTGTAAATGTCCCGCCATATCTCAACCATGTATTGCAGCAAGCGAAAGGGCATAGTGTGGTCTACAGTCGACTGTAGTTCTAGAAGTACATAGAAGATTATTTCGGACCCGCCCAACCGCATTCTGTATACTATGTCGGCCTCTTTATCGCTAAAGTCCTGCAGTACATAGGATTTGTTGATCAACGTCAGGTCGCCCTCTTTTATTTCACGCACCCAACCCTCAGCCACAAATGTCTGCAGCAGTTCCAGGAAGGTCTTTTTGTTAGTCAGAAGCTGCCTGTATCCTTTGTCGTGAGGGTGGTGAGGCGGATTTTGTGCTGATTTCCCTCGGTTTGGCAATTTGTTCACTTCCTTACTGGTATTATATCATGGGGTTAGCTAGGGTGACAGAGGTTATGAAACTTATATTCCCTAACAAGCGATGCATCAGATATGGGCTTTGGCGATATGGCTACTGCTGGGCAAATGAGTCTATTAAAAGCTAAGAATGGTGGAGAAAAATAAATGGCCAAACCATCTTTTGACAAGTTCGCTGCCATGCTTAATCGAGCCGTAGACAGCATTCCACCACATTTCCTACGAGGCTTGACCGGCGGTTTTAACTTGCAGGAGGACGAAAAGTGTGAAGGCGAATACTATATTTTGGGTGAGTACATTGAGGATAGTATCCTGGGCTGCTTTATTGTGTTTTATTATGGTTCCTTCGTGGGACTGCTGAAAAACGAGCCCGACGATTGCTGGGAAGCAGAGATTGTTGATACGGTGCTGTATTTGTGCGCGCATCCCTGACCCCGGTTGTTGAACAGGCTATTACCTCTGCCGGCACTCATCATACCCTGGTTTCCATTTGTGGGTTATCTACATCACGAGTATTCTTTGGACGAAGCCGTTAATTTATTGAAAAGAAATACCAGAAGGTTTGCCAAGCGACAGTTGACTTGGTTTAGACGATATAGTAGTATCAAATGGATAGATATGGAGAAGTATGATATAATAAATAATGTTGCGGAAGAGATTAAATGTTTTATAGCTGAACTTATCCGGTGTCCATAGAAGGAAAAGTATAAAATACCAACAGAGGGAATTATATGACAAAGCCTCAGATAAATTTACAGGATGCTTTTTTAAACCAGGTTAGAAAGGATAATATACCTGT\n'
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated1_trnas.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated1_trnas.tabular Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,7 @@
+fasta Name tRNA # Begin End Type Codon Score Note
+dataset_5327 dataset_5327_scaffold_1814  1 1368 1292 His GTG 73.0
+dataset_5327 dataset_5327_scaffold_1814  2 1285 1214 Gln TTG 60.8
+dataset_5327 dataset_5327_scaffold_1814  3 1182 1110 Lys TTT 77.5
+dataset_5327 dataset_5327_scaffold_361   1 29420 29495 Met CAT 73.6
+dataset_5327 dataset_5327_scaffold_361   2 56462 56380 Leu CAA 63.7
+dataset_5327 dataset_5327_scaffold_361   3 12681 12586 Cys GCA 26.9 pseudo
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2.genbank
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated2.genbank Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,26 @@
+LOCUS       dataset_5328_scaffold_2027   6089 bp   DNA   linear   ENV   05-AUG-2022
+FEATURES             Location/Qualifiers
+     CDS             3..500
+                     /conf=100.00
+                     /cscore=78.24
+                     /gc_cont=0.735
+                     /gene=dataset_5328_scaffold_2027_1
+                     /partial=10
+                     /codon_start=1
+                     /rbs_motif=None
+                     /rbs_spacer=None
+                     /rscore=0.00
+                     /score=81.46
+                     /inference=Prodigal_v2.6.3
+                     /sscore=3.22
+                     /start_type=Edge
+                     /translation=RLVEGARSHPEGRLRRLGVAVLYLLGEAGVWPRELFALRLEDFQPAARVLRVRGEKARSVPLSKEATEALKAYLEDRESVAGLAPLPSPYLLLRMTPKKGGLGRPLNRDTLKGLLERALEMGGLEHPRPTGALRWRAVRRYLQQGLSPQEVARRTGVASVLSLKD*
+                     /tscore=3.22
+                     /uscore=0.00
+ORIGIN
+        1 cccgcctggt ggagggagcc cgctcccacc ccgaagggcg gctgcggcgg ttgggggtgg
+       61 cggtgctgta cctgctgggg gaggccgggg tgtggcccag ggaactcttc gccctgcgcc
+      121 tggaggactt ccagcccgcc gcccgcgtct tgcgggtgcg gggggaaaaa gcccgcagcg
+      181 tgccgctttc caaggaggcg accgaggccc tcaaagccta tctggaagac cgggagagcg
+      241 tggccggcct ggccccgctg ccctcgccct acctccttct gcgcatgacc cccaagaaag
+      301 gcgggctggg acgacccctc aaccgcgaca ccctgaaggg gctgctggag cgggcgctgg
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated2.gff Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,13 @@
+##gff-version  3
+# Sequence Data: seqnum=1;seqlen=6089;seqhdr="scaffold_2027"
+# Model Data: version=Prodigal.v2.6.3;run_type=Metagenomic;model="13|Catenulispora_acidiphila_DSM_44928|B|69.8|11|1";gc_cont=69.80;transl_table=11;uses_sd=1
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 3 500 81.5 + 0 ID=dataset_5328_scaffold_2027_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.735;conf=100.00;score=81.46;cscore=78.24;sscore=3.22;rscore=0.00;uscore=0.00;tscore=3.22;
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 550 1395 121.4 + 0 ID=dataset_5328_scaffold_2027_2;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.652;conf=100.00;score=122.16;cscore=123.21;sscore=-1.05;rscore=-4.48;uscore=-1.84;tscore=4.54;
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 1362 1499 15.7 - 0 ID=dataset_5328_scaffold_2027_3;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.572;conf=97.34;score=15.66;cscore=6.69;sscore=8.97;rscore=6.72;uscore=-0.20;tscore=2.45;
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 1583 2284 128.2 - 0 ID=dataset_5328_scaffold_2027_4;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.674;conf=100.00;score=129.04;cscore=129.30;sscore=-0.26;rscore=-4.48;uscore=-1.16;tscore=4.54;
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 2324 2995 109.1 - 0 ID=dataset_5328_scaffold_2027_5;partial=00;start_type=TTG;rbs_motif=None;rbs_spacer=None;gc_cont=0.711;conf=100.00;score=109.08;cscore=123.84;sscore=-14.76;rscore=-4.48;uscore=-0.76;tscore=-9.51;Dbxref="ko:K02338";
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 2959 3363 42.5 - 0 ID=dataset_5328_scaffold_2027_6;partial=00;start_type=ATG;rbs_motif=AGGAGG;rbs_spacer=3-4bp;gc_cont=0.699;conf=99.99;score=41.89;cscore=22.38;sscore=19.51;rscore=14.94;uscore=0.69;tscore=4.54;
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 3435 3653 31.0 + 0 ID=dataset_5328_scaffold_2027_7;partial=00;start_type=ATG;rbs_motif=AGGAG;rbs_spacer=5-10bp;gc_cont=0.644;conf=99.92;score=31.04;cscore=15.02;sscore=16.03;rscore=10.75;uscore=1.35;tscore=3.92;
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 3716 4126 90.1 + 0 ID=dataset_5328_scaffold_2027_8;partial=00;start_type=ATG;rbs_motif=AGGAGG;rbs_spacer=3-4bp;gc_cont=0.710;conf=100.00;score=90.07;cscore=68.99;sscore=21.08;rscore=14.94;uscore=1.61;tscore=4.54;
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 4128 4658 85.9 - 0 ID=dataset_5328_scaffold_2027_9;partial=00;start_type=GTG;rbs_motif=GGAGG;rbs_spacer=5-10bp;gc_cont=0.718;conf=100.00;score=87.09;cscore=80.36;sscore=6.73;rscore=10.10;uscore=-1.14;tscore=-3.38;
+dataset_5328_scaffold_2027 Prodigal_v2.6.3 CDS 4642 6087 162.5 - 0 ID=dataset_5328_scaffold_2027_10;partial=01;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.717;conf=100.00;score=162.48;cscore=159.27;sscore=3.22;rscore=0.00;uscore=0.00;tscore=3.22;
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated2.tabular Sat Dec 10 21:14:28 2022 +0000
[
@@ -0,0 +1,11 @@
+ fasta scaffold gene_position start_position end_position strandedness rank ko_id kegg_hit pfam_hits heme_regulatory_motif_count
+dataset_4710_scaffold_2027_1 dataset_4710 scaffold_2027 1 3 500 1 D Phage integrase family [PF00589.25] 0
+dataset_4710_scaffold_2027_2 dataset_4710 scaffold_2027 2 550 1395 1 D Protein of unknown function (DUF1385) [PF07136.14] 0
+dataset_4710_scaffold_2027_3 dataset_4710 scaffold_2027 3 1362 1499 -1 E 0
+dataset_4710_scaffold_2027_4 dataset_4710 scaffold_2027 4 1583 2284 -1 E 0
+dataset_4710_scaffold_2027_5 dataset_4710 scaffold_2027 5 2324 2995 -1 C K02338 DNA polymerase III subunit beta [EC:2.7.7.7] DNA polymerase III beta subunit, central domain [PF02767.19]; DNA polymerase III beta subunit, C-terminal domain [PF02768.18] 0
+dataset_4710_scaffold_2027_6 dataset_4710 scaffold_2027 6 2959 3363 -1 E 0
+dataset_4710_scaffold_2027_7 dataset_4710 scaffold_2027 7 3435 3653 1 E 0
+dataset_4710_scaffold_2027_8 dataset_4710 scaffold_2027 8 3716 4126 1 E 0
+dataset_4710_scaffold_2027_9 dataset_4710 scaffold_2027 9 4128 4658 -1 E 0
+dataset_4710_scaffold_2027_10 dataset_4710 scaffold_2027 10 4642 6087 -1 E 0
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_genes_faa.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated2_genes_faa.fasta Sat Dec 10 21:14:28 2022 +0000
[
@@ -0,0 +1,20 @@
+>dataset_5328_scaffold_2027_1 rank: D; Phage integrase family [PF00589.25] (db=pfam)
+RLVEGARSHPEGRLRRLGVAVLYLLGEAGVWPRELFALRLEDFQPAARVLRVRGEKARSVPLSKEATEALKAYLEDRESVAGLAPLPSPYLLLRMTPKKGGLGRPLNRDTLKGLLERALEMGGLEHPRPTGALRWRAVRRYLQQGLSPQEVARRTGVASVLSLKD*
+>dataset_5328_scaffold_2027_2 rank: D; Protein of unknown function (DUF1385) [PF07136.14] (db=pfam)
+MDLNKLMGGMALPHGVVLMSTERVALGYYDKEGTLQLYTRELNNPSGGLKGLWTFFLEAARALWKTYPHQGEFRSVVAGVLAGVLTGIPIGLFLSRASLLPAWQMLLLSTSLVVLMFLALYRFYPPFRQGLQRMARYHGAEHKMIWALEKGEVSREGVRQQPLLHPACGSNLFALYLPFYLLSFPQSLLAPGFWWLQLLILPLLFPVFGWMRRHPEHPLARRLLALGYRFQRHTLAEPGEAELEAAWRALQGLEMETPSTSTVEGVRERCDYRRVKPRQAR*
+>dataset_5328_scaffold_2027_3 rank: E
+MIALFFLGSLTLTVAVAATLFFNFPAGLAVVAGIIYLAWRGFTRR*
+>dataset_5328_scaffold_2027_4 rank: E
+MDNPLEQIRSIPSRMLMALTWHGISLIQAPAALQRFDLPVRNLADLADPLTLLAALTPTTLYDFARSYTTATRWLETGRGEVVDLSYLHFVPGVFVQRAQEQAERGVLQGIYLLAMHPYMSLTAFTPVWPVLQITHPVLGPGFPLYELWPLQHWHLEDERRAVKGAFRTLLNVFAHTRLVPQGVLLDGSISGLCFEGYLHPAYALTEYRTAEWNLKEALTPRPEPGEWEPNDI*
+>dataset_5328_scaffold_2027_5 rank: C; DNA polymerase III subunit beta [EC:2.7.7.7] (db=kegg)
+MRDALEAVRYAVAREEYRKVLTGVRLEVGQTLRAVASDGYRLALQEFPLPAPLPAFQGVLPGAAVGDLLRLLAEEESVHLSLEKQMCYCQGERFRYATPLLSGEFPDYQRVMPTRYPAQAEVGPELSAALKRLEALSEDRVSKVQLTLQQEALHLRSENAYGLAEETVPAAVQGEPLHLTLNGRFLREALPDGGATLRFSGPATPLLITGQEGYQALLMPLRT*
+>dataset_5328_scaffold_2027_6 rank: E
+MVTVDKKPLHDAIGSLLRGLPQQERTVWFSAGENLKLQSFSHQRDLEVVVPLEAPAPAGFSLALDGSLLAEVVARMPEEIRLEREGEQLHLLGGTFQARLQTGWIDPPAPASARGRPPSPPGPCGTRWRRCATR*
+>dataset_5328_scaffold_2027_7 rank: E
+MKVKNPIFVRRILEEGAPLELLRHVAPDLPLEDLEEGRKLLRQAIWPEDVFDGERLFLRPRTPAQRALLQRL*
+>dataset_5328_scaffold_2027_8 rank: E
+MVPDFVLAYLPFVAALGLGGMVGRLLRVPLRAALWGLGLLGLGYVLLEPGQALALAQGWGRRGIEEALVWLGLPVQWAAYAFSPQWTWLVEQVSRHLVQQALAQADAGALERLNAYLQRADVGFVLGVLAGIDQRK*
+>dataset_5328_scaffold_2027_9 rank: E
+MLRVAESPQEALACDQALSLSQFQRRWPGLDPTALPGAVLLEREVAEITKARPRTVAFVALEALQEAEGFALRHLAGVAEMRYALGARPQDWQTDAAAVHLDATPDALWWSPEGVAAIEYDIRYNRDLVLRKAQDYRRIYVRQYWGATRLSRVQYLQRLLGPDPHTRVLLAPWLGG*
+>dataset_5328_scaffold_2027_10 rank: E
+ASHKRPSSVSYTSSTSSPDANSVPTPPPVASTPAPSGATPPSTATSAPPPPPLPTTTTPASQSPTPSAPASVPPPPSPPSSAGPSAPGERPYSGVVPPPPLPEYSATPAPAPGASPAAPAKPAFNPVKLQQAQQTASESGPVALKGEAASAQGKYAQLEREQAAKEPAYTAVLNPTSTKYTVFEGGKAEARNPVVLQGEQVQRSGYTPAIQSLSGPGGGYVALKTPQEQEKSAAAEPPTSAPTSSSDPLSPAPQASAALPAPTPAPTPASLSLSSPSVPTGQSPLSSPAPQAAASSPALAEYPFDVGEKQEGELVSGVVIPEGSSQAALMVRTKEGYVFFGVASLDRAGRLQMQFDRAYKGKTAYVVRAIALDERGVAGIPAQVSEQTPNLLTNLLRGAATGLVSYIDFYAKSSSTTILPGGGVASSNTPPPLGLTILSGSAKQIAAPPDSTSVVRVWSLDPGTKMQILIVPGEAAGAAGR*
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_genes_fna.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated2_genes_fna.fasta Sat Dec 10 21:14:28 2022 +0000
[
@@ -0,0 +1,20 @@
+>dataset_5328_scaffold_2027_1 rank: D; Phage integrase family [PF00589.25] (db=pfam)
+CGCCTGGTGGAGGGAGCCCGCTCCCACCCCGAAGGGCGGCTGCGGCGGTTGGGGGTGGCGGTGCTGTACCTGCTGGGGGAGGCCGGGGTGTGGCCCAGGGAACTCTTCGCCCTGCGCCTGGAGGACTTCCAGCCCGCCGCCCGCGTCTTGCGGGTGCGGGGGGAAAAAGCCCGCAGCGTGCCGCTTTCCAAGGAGGCGACCGAGGCCCTCAAAGCCTATCTGGAAGACCGGGAGAGCGTGGCCGGCCTGGCCCCGCTGCCCTCGCCCTACCTCCTTCTGCGCATGACCCCCAAGAAAGGCGGGCTGGGACGACCCCTCAACCGCGACACCCTGAAGGGGCTGCTGGAGCGGGCGCTGGAGATGGGCGGGCTGGAGCACCCGCGCCCCACCGGGGCCCTGCGCTGGCGGGCGGTGCGGCGGTACTTGCAGCAGGGCCTCTCCCCCCAGGAGGTGGCCCGGCGCACCGGGGTGGCCAGCGTGCTGAGCCTCAAGGACTGA
+>dataset_5328_scaffold_2027_2 rank: D; Protein of unknown function (DUF1385) [PF07136.14] (db=pfam)
+ATGGACCTGAATAAACTCATGGGTGGCATGGCCCTGCCCCACGGGGTGGTGTTGATGAGCACGGAGCGGGTGGCGCTGGGCTACTACGACAAAGAGGGAACCTTGCAGCTCTACACCCGCGAACTCAACAATCCCTCGGGCGGCTTGAAAGGCCTGTGGACGTTTTTCTTGGAGGCCGCACGGGCCCTGTGGAAGACCTACCCGCATCAGGGCGAATTTCGGAGCGTCGTGGCGGGGGTGCTGGCGGGGGTGCTGACGGGGATTCCCATCGGGCTGTTCCTGTCCCGGGCCTCGCTGCTTCCCGCTTGGCAGATGCTCCTCTTGAGCACCTCCCTGGTGGTGCTGATGTTTCTGGCGCTGTACCGCTTCTATCCGCCCTTCCGCCAGGGGTTACAGCGCATGGCCCGCTACCACGGGGCCGAGCACAAGATGATCTGGGCCCTCGAGAAAGGCGAGGTCAGCCGGGAAGGAGTCCGGCAGCAGCCGCTGTTGCATCCCGCTTGCGGCAGCAACCTGTTCGCCCTGTACCTGCCGTTCTACCTTCTGAGCTTCCCGCAGTCTCTGCTTGCTCCCGGGTTCTGGTGGTTGCAGCTGCTGATCCTGCCGCTGCTTTTTCCGGTCTTCGGCTGGATGCGCCGCCACCCCGAGCACCCGCTGGCGCGCAGGCTGCTGGCCCTGGGCTACCGCTTCCAGCGCCACACGCTGGCCGAGCCGGGGGAGGCCGAACTGGAGGCGGCCTGGCGGGCTTTGCAGGGGCTGGAAATGGAAACCCCCTCGACTTCAACAGTCGAGGGGGTCAGGGAGCGCTGTGACTACCGGCGGGTAAAGCCCCGCCAGGCCAGATAG
+>dataset_5328_scaffold_2027_3 rank: E
+ATGATAGCACTGTTCTTTTTGGGGAGCTTGACCCTGACTGTGGCCGTCGCGGCCACACTTTTTTTCAACTTCCCGGCGGGGTTGGCCGTCGTTGCCGGGATTATCTATCTGGCCTGGCGGGGCTTTACCCGCCGGTAG
+>dataset_5328_scaffold_2027_4 rank: E
+ATGGACAACCCCCTCGAACAGATCCGCTCCATCCCCTCCCGAATGCTCATGGCGCTGACCTGGCACGGTATCAGCCTGATCCAGGCTCCCGCTGCCCTCCAGCGCTTCGACCTGCCGGTGCGCAACCTGGCTGACCTGGCCGACCCGCTGACCCTGCTGGCGGCCCTCACCCCCACCACCCTGTACGACTTCGCCCGGAGCTACACCACCGCAACGCGCTGGCTGGAGACCGGGCGGGGGGAGGTGGTCGATCTCAGCTACCTGCACTTCGTCCCCGGAGTCTTCGTACAGCGGGCCCAGGAACAGGCCGAGCGGGGGGTCTTGCAGGGAATCTACCTGCTGGCCATGCACCCCTACATGTCCCTCACCGCCTTCACCCCGGTCTGGCCGGTGTTGCAGATCACCCACCCGGTGCTGGGCCCCGGCTTTCCGCTGTACGAGCTGTGGCCGTTGCAGCACTGGCACCTCGAGGACGAGCGGCGGGCGGTCAAAGGGGCCTTCCGCACCCTGCTGAACGTCTTCGCCCACACCCGGCTGGTCCCCCAGGGGGTGCTCCTGGACGGCAGCATCAGCGGGCTGTGCTTCGAGGGCTACCTGCACCCGGCCTACGCCCTCACCGAGTACCGCACCGCCGAGTGGAACCTCAAGGAAGCCCTCACTCCCCGCCCCGAGCCTGGCGAATGGGAGCCAAATGATATATAA
+>dataset_5328_scaffold_2027_5 rank: C; DNA polymerase III subunit beta [EC:2.7.7.7] (db=kegg)
+TTGCGGGACGCGCTGGAGGCGGTGCGCTACGCGGTAGCGCGGGAGGAGTACCGCAAGGTGCTGACCGGGGTGCGGCTGGAGGTGGGCCAGACCCTGCGGGCGGTGGCCTCCGACGGCTACCGCCTGGCCCTCCAGGAGTTCCCCCTGCCCGCCCCTCTCCCGGCCTTCCAGGGAGTCCTGCCCGGCGCGGCGGTAGGGGATCTGCTGCGCCTTCTGGCCGAGGAGGAATCAGTCCACCTCTCCCTGGAGAAGCAGATGTGCTACTGCCAGGGGGAGCGCTTCCGCTACGCCACCCCCCTGCTCTCGGGGGAGTTCCCCGACTACCAGCGGGTGATGCCCACCCGCTACCCCGCCCAGGCCGAGGTGGGGCCGGAACTCTCCGCTGCGCTGAAGCGGCTGGAAGCGCTGTCGGAAGACCGGGTTTCCAAGGTGCAGCTCACCCTCCAGCAGGAGGCCTTACACCTGCGCAGCGAAAACGCATACGGCCTCGCTGAGGAGACGGTCCCGGCGGCGGTGCAGGGGGAACCCCTGCACCTGACCCTGAACGGTCGCTTCCTCCGCGAGGCCCTGCCCGACGGAGGGGCCACCCTGCGCTTCAGCGGCCCCGCCACCCCGCTGCTGATCACCGGCCAGGAGGGCTACCAGGCCCTCCTCATGCCCCTGAGGACCTGA
+>dataset_5328_scaffold_2027_6 rank: E
+ATGGTGACGGTAGATAAAAAACCCCTACACGACGCGATTGGCAGCCTGTTGCGGGGCCTCCCACAACAGGAGCGCACGGTCTGGTTCTCCGCCGGGGAGAACCTCAAGCTCCAGTCCTTCTCGCACCAGCGGGATCTGGAGGTGGTGGTCCCCCTGGAGGCCCCGGCCCCGGCGGGCTTCAGCCTGGCCCTGGACGGCTCCCTGCTGGCCGAGGTGGTGGCCCGGATGCCGGAGGAGATCCGGCTGGAGCGGGAGGGGGAACAGCTTCATCTACTCGGCGGCACCTTCCAGGCCCGGCTGCAAACCGGCTGGATAGACCCCCCCGCCCCCGCTTCGGCCAGGGGCAGGCCGCCCTCCCCGCCCGGTCCTTGCGGGACGCGCTGGAGGCGGTGCGCTACGCGGTAG
+>dataset_5328_scaffold_2027_7 rank: E
+ATGAAAGTAAAAAATCCAATTTTTGTCCGCCGGATTCTCGAGGAGGGTGCGCCGCTGGAACTCCTCCGGCATGTCGCCCCGGACCTGCCCCTGGAGGATCTGGAAGAGGGACGCAAGCTCCTCCGCCAGGCCATCTGGCCGGAGGACGTGTTCGACGGGGAGCGGCTTTTCCTGCGCCCCCGCACCCCGGCGCAGCGGGCGCTCTTGCAGCGTCTGTAA
+>dataset_5328_scaffold_2027_8 rank: E
+ATGGTACCGGACTTTGTACTGGCCTATCTGCCCTTTGTGGCCGCGCTGGGGCTGGGAGGGATGGTGGGGCGGCTGTTGCGGGTACCGCTGCGGGCGGCGCTGTGGGGACTGGGCTTGCTGGGGCTGGGCTACGTCCTGCTCGAGCCGGGTCAGGCCCTGGCCCTGGCCCAGGGGTGGGGACGCCGGGGGATAGAGGAGGCCCTGGTGTGGCTGGGGCTGCCCGTGCAGTGGGCGGCCTACGCCTTCAGCCCGCAATGGACCTGGCTGGTGGAGCAGGTCAGCCGGCACCTGGTCCAGCAGGCCCTGGCTCAGGCCGACGCCGGAGCCCTGGAGCGCCTGAACGCCTATCTCCAGCGCGCCGACGTGGGCTTCGTCCTGGGAGTGCTGGCCGGGATCGACCAGCGCAAATAG
+>dataset_5328_scaffold_2027_9 rank: E
+GTGCTGCGGGTCGCTGAGTCCCCCCAGGAAGCCCTGGCCTGCGACCAGGCCCTGAGCCTGTCTCAGTTCCAGCGCCGCTGGCCGGGCCTCGACCCCACCGCTTTGCCCGGGGCGGTGCTGCTGGAGCGGGAGGTGGCCGAGATCACCAAAGCCCGGCCCCGAACGGTGGCCTTCGTGGCCCTGGAGGCCCTTCAGGAGGCCGAAGGCTTCGCCCTGCGCCACCTGGCCGGGGTGGCCGAGATGCGCTACGCCCTGGGAGCCCGCCCCCAAGACTGGCAGACCGACGCCGCTGCGGTCCACCTCGACGCGACCCCCGACGCTTTGTGGTGGAGCCCGGAAGGGGTGGCGGCCATCGAGTACGACATCCGCTACAACCGCGACCTGGTGCTGCGCAAGGCCCAGGACTACCGCCGCATCTACGTGCGGCAATACTGGGGAGCCACCCGGCTTTCGCGGGTGCAGTACCTTCAGCGCCTGCTGGGCCCCGACCCCCACACGCGGGTGCTGCTGGCCCCCTGGCTAGGAGGTTGA
+>dataset_5328_scaffold_2027_10 rank: E
+GCCAGCCATAAGCGTCCGTCGAGCGTGAGCTACACCTCCTCCACCTCGAGCCCGGACGCCAATTCCGTCCCTACCCCGCCCCCCGTCGCCTCCACCCCCGCCCCTTCGGGCGCCACGCCCCCCTCCACCGCCACCTCCGCCCCGCCTCCCCCGCCGCTGCCCACCACCACCACTCCGGCCTCGCAAAGCCCCACACCGTCCGCCCCCGCCAGCGTTCCCCCACCCCCTAGCCCCCCCAGCAGCGCAGGGCCATCGGCCCCCGGCGAGCGCCCGTACTCCGGGGTGGTGCCGCCGCCCCCGCTGCCCGAGTACAGCGCCACCCCGGCCCCCGCGCCCGGCGCTTCCCCTGCGGCCCCGGCCAAACCGGCCTTCAACCCGGTCAAGCTGCAACAAGCCCAGCAGACGGCCAGCGAAAGCGGCCCGGTGGCCCTCAAGGGCGAGGCCGCCTCGGCTCAGGGCAAGTACGCTCAGCTGGAGCGGGAACAAGCCGCCAAGGAACCGGCCTACACCGCCGTGCTCAACCCCACGAGCACCAAGTACACCGTCTTCGAGGGGGGCAAAGCCGAAGCCCGCAACCCGGTGGTTTTGCAGGGCGAACAGGTCCAGCGCAGCGGCTATACCCCGGCCATCCAGTCCCTCTCCGGGCCCGGCGGAGGGTATGTGGCCCTGAAAACTCCACAGGAGCAGGAAAAGTCTGCCGCTGCCGAGCCTCCAACCAGCGCCCCGACCTCCTCCAGCGACCCCCTCAGCCCCGCCCCCCAGGCCAGCGCCGCACTGCCCGCTCCCACACCCGCTCCCACACCCGCTTCCCTCTCCCTTTCCAGCCCCAGCGTTCCCACCGGCCAAAGCCCCCTGAGCAGCCCCGCCCCCCAGGCCGCTGCTTCCTCCCCGGCCCTGGCCGAGTACCCCTTCGACGTGGGGGAAAAGCAGGAAGGGGAGTTGGTCTCGGGGGTGGTGATCCCCGAGGGCAGCTCCCAGGCCGCCTTGATGGTGCGCACCAAGGAAGGCTACGTCTTCTTCGGGGTGGCCAGCCTGGACCGGGCCGGGCGCTTGCAAATGCAGTTCGACCGGGCCTACAAGGGCAAGACCGCCTACGTGGTGCGGGCCATCGCCCTGGACGAACGGGGCGTGGCGGGGATTCCCGCCCAGGTCAGCGAGCAGACCCCCAACCTGCTCACCAACCTGCTGCGGGGGGCCGCCACCGGCCTGGTCAGCTACATCGACTTCTACGCCAAGAGCAGTTCCACTACCATCCTGCCCGGGGGCGGGGTGGCCAGCAGCAACACCCCCCCGCCGCTGGGATTGACCATCCTCTCCGGCTCGGCCAAGCAGATCGCCGCCCCTCCCGACTCGACCAGCGTGGTGCGGGTGTGGAGCCTCGACCCGGGCACCAAGATGCAGATCCTCATCGTCCCCGGGGAGGCCGCGGGTGCTGCGGGTCGCTGA
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_rrnas.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated2_rrnas.tabular Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,2 @@
+scaffold fasta begin end strand type e-value note
+dataset_5328_scaffold_362 dataset_5328 61952 62058 - 5S rRNA 5.2e-16
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_scaffold.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated2_scaffold.fasta Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,2 @@
+>dataset_5328_scaffold_2027
+CCCGCCTGGTGGAGGGAGCCCGCTCCCACCCCGAAGGGCGGCTGCGGCGGTTGGGGGTGGCGGTGCTGTACCTGCTGGGGGAGGCCGGGGTGTGGCCCAGGGAACTCTTCGCCCTGCGCCTGGAGGACTTCCAGCCCGCCGCCCGCGTCTTGCGGGTGCGGGGGGAAAAAGCCCGCAGCGTGCCGCTTTCCAAGGAGGCGACCGAGGCCCTCAAAGCCTATCTGGAAGACCGGGAGAGCGTGGCCGGCCTGGCCCCGCTGCCCTCGCCCTACCTCCTTCTGCGCATGACCCCCAAGAAAGGCGGGCTGGGACGACCCCTCAACCGCGACACCCTGAAGGGGCTGCTGGAGCGGGCGCTGGAGATGGGCGGGCTGGAGCACCCGCGCCCCACCGGGGCCCTGCGCTGGCGGGCGGTGCGGCGGTACTTGCAGCAGGGCCTCTCCCCCCAGGAGGTGGCCCGGCGCACCGGGGTGGCCAGCGTGCTGAGCCTCAAGGACTGAACTTCTCATCTGGCCCGGCGGGCGGCTTTTTCTCCTGTACCATCAGGGCATGGACCTGAATAAACTCATGGGTGGCATGGCCCTGCCCCACGGGGTGGTGTTGATGAGCACGGAGCGGGTGGCGCTGGGCTACTACGACAAAGAGGGAACCTTGCAGCTCTACACCCGCGAACTCAACAATCCCTCGGGCGGCTTGAAAGGCCTGTGGACGTTTTTCTTGGAGGCCGCACGGGCCCTGTGGAAGACCTACCCGCATCAGGGCGAATTTCGGAGCGTCGTGGCGGGGGTGCTGGCGGGGGTGCTGACGGGGATTCCCATCGGGCTGTTCCTGTCCCGGGCCTCGCTGCTTCCCGCTTGGCAGATGCTCCTCTTGAGCACCTCCCTGGTGGTGCTGATGTTTCTGGCGCTGTACCGCTTCTATCCGCCCTTCCGCCAGGGGTTACAGCGCATGGCCCGCTACCACGGGGCCGAGCACAAGATGATCTGGGCCCTCGAGAAAGGCGAGGTCAGCCGGGAAGGAGTCCGGCAGCAGCCGCTGTTGCATCCCGCTTGCGGCAGCAACCTGTTCGCCCTGTACCTGCCGTTCTACCTTCTGAGCTTCCCGCAGTCTCTGCTTGCTCCCGGGTTCTGGTGGTTGCAGCTGCTGATCCTGCCGCTGCTTTTTCCGGTCTTCGGCTGGATGCGCCGCCACCCCGAGCACCCGCTGGCGCGCAGGCTGCTGGCCCTGGGCTACCGCTTCCAGCGCCACACGCTGGCCGAGCCGGGGGAGGCCGAACTGGAGGCGGCCTGGCGGGCTTTGCAGGGGCTGGAAATGGAAACCCCCTCGACTTCAACAGTCGAGGGGGTCAGGGAGCGCTGTGACTACCGGCGGGTAAAGCCCCGCCAGGCCAGATAGATAATCCCGGCAACGACGGCCAACCCCGCCGGGAAGTTGAAAAAAAGTGTGGCCGCGACGGCCACAGTCAGGGTCAAGCTCCCCAAAAAGAACAGTGCTATCATTTTTTCCCTCCTATCGAGGAGGTACCTGTGACTTTTCAAGCTCCCTGGGGGGCCTGAAAAGTTCAGGCCCTCCTGGTTAGCTATTATATATCATTTGGCTCCCATTCGCCAGGCTCGGGGCGGGGAGTGAGGGCTTCCTTGAGGTTCCACTCGGCGGTGCGGTACTCGGTGAGGGCGTAGGCCGGGTGCAGGTAGCCCTCGAAGCACAGCCCGCTGATGCTGCCGTCCAGGAGCACCCCCTGGGGGACCAGCCGGGTGTGGGCGAAGACGTTCAGCAGGGTGCGGAAGGCCCCTTTGACCGCCCGCCGCTCGTCCTCGAGGTGCCAGTGCTGCAACGGCCACAGCTCGTACAGCGGAAAGCCGGGGCCCAGCACCGGGTGGGTGATCTGCAACACCGGCCAGACCGGGGTGAAGGCGGTGAGGGACATGTAGGGGTGCATGGCCAGCAGGTAGATTCCCTGCAAGACCCCCCGCTCGGCCTGTTCCTGGGCCCGCTGTACGAAGACTCCGGGGACGAAGTGCAGGTAGCTGAGATCGACCACCTCCCCCCGCCCGGTCTCCAGCCAGCGCGTTGCGGTGGTGTAGCTCCGGGCGAAGTCGTACAGGGTGGTGGGGGTGAGGGCCGCCAGCAGGGTCAGCGGGTCGGCCAGGTCAGCCAGGTTGCGCACCGGCAGGTCGAAGCGCTGGAGGGCAGCGGGAGCCTGGATCAGGCTGATACCGTGCCAGGTCAGCGCCATGAGCATTCGGGAGGGGATGGAGCGGATCTGTTCGAGGGGGTTGTCCATAAGAGAAAAGGGGAAGGGGCGTTGCCCCTTCCCGTCGGGTCAGGTCCTCAGGGGCATGAGGAGGGCCTGGTAGCCCTCCTGGCCGGTGATCAGCAGCGGGGTGGCGGGGCCGCTGAAGCGCAGGGTGGCCCCTCCGTCGGGCAGGGCCTCGCGGAGGAAGCGACCGTTCAGGGTCAGGTGCAGGGGTTCCCCCTGCACCGCCGCCGGGACCGTCTCCTCAGCGAGGCCGTATGCGTTTTCGCTGCGCAGGTGTAAGGCCTCCTGCTGGAGGGTGAGCTGCACCTTGGAAACCCGGTCTTCCGACAGCGCTTCCAGCCGCTTCAGCGCAGCGGAGAGTTCCGGCCCCACCTCGGCCTGGGCGGGGTAGCGGGTGGGCATCACCCGCTGGTAGTCGGGGAACTCCCCCGAGAGCAGGGGGGTGGCGTAGCGGAAGCGCTCCCCCTGGCAGTAGCACATCTGCTTCTCCAGGGAGAGGTGGACTGATTCCTCCTCGGCCAGAAGGCGCAGCAGATCCCCTACCGCCGCGCCGGGCAGGACTCCCTGGAAGGCCGGGAGAGGGGCGGGCAGGGGGAACTCCTGGAGGGCCAGGCGGTAGCCGTCGGAGGCCACCGCCCGCAGGGTCTGGCCCACCTCCAGCCGCACCCCGGTCAGCACCTTGCGGTACTCCTCCCGCGCTACCGCGTAGCGCACCGCCTCCAGCGCGTCCCGCAAGGACCGGGCGGGGAGGGCGGCCTGCCCCTGGCCGAAGCGGGGGCGGGGGGGTCTATCCAGCCGGTTTGCAGCCGGGCCTGGAAGGTGCCGCCGAGTAGATGAAGCTGTTCCCCCTCCCGCTCCAGCCGGATCTCCTCCGGCATCCGGGCCACCACCTCGGCCAGCAGGGAGCCGTCCAGGGCCAGGCTGAAGCCCGCCGGGGCCGGGGCCTCCAGGGGGACCACCACCTCCAGATCCCGCTGGTGCGAGAAGGACTGGAGCTTGAGGTTCTCCCCGGCGGAGAACCAGACCGTGCGCTCCTGTTGTGGGAGGCCCCGCAACAGGCTGCCAATCGCGTCGTGTAGGGGTTTTTTATCTACCGTCACCATTTTACCTCCTATCGGTGAGCATTATACCAAAAGGAGCTTTTTGCTATACTGAAACGCGATAGGAGGTAAGGATGAAAGTAAAAAATCCAATTTTTGTCCGCCGGATTCTCGAGGAGGGTGCGCCGCTGGAACTCCTCCGGCATGTCGCCCCGGACCTGCCCCTGGAGGATCTGGAAGAGGGACGCAAGCTCCTCCGCCAGGCCATCTGGCCGGAGGACGTGTTCGACGGGGAGCGGCTTTTCCTGCGCCCCCGCACCCCGGCGCAGCGGGCGCTCTTGCAGCGTCTGTAAGTCAGGGAGGCACAGCCGGATTCGCTCCTAAAGGAGCGAATCTTTTATAGGAAGGAGGCCGGATGGTACCGGACTTTGTACTGGCCTATCTGCCCTTTGTGGCCGCGCTGGGGCTGGGAGGGATGGTGGGGCGGCTGTTGCGGGTACCGCTGCGGGCGGCGCTGTGGGGACTGGGCTTGCTGGGGCTGGGCTACGTCCTGCTCGAGCCGGGTCAGGCCCTGGCCCTGGCCCAGGGGTGGGGACGCCGGGGGATAGAGGAGGCCCTGGTGTGGCTGGGGCTGCCCGTGCAGTGGGCGGCCTACGCCTTCAGCCCGCAATGGACCTGGCTGGTGGAGCAGGTCAGCCGGCACCTGGTCCAGCAGGCCCTGGCTCAGGCCGACGCCGGAGCCCTGGAGCGCCTGAACGCCTATCTCCAGCGCGCCGACGTGGGCTTCGTCCTGGGAGTGCTGGCCGGGATCGACCAGCGCAAATAGCTCAACCTCCTAGCCAGGGGGCCAGCAGCACCCGCGTGTGGGGGTCGGGGCCCAGCAGGCGCTGAAGGTACTGCACCCGCGAAAGCCGGGTGGCTCCCCAGTATTGCCGCACGTAGATGCGGCGGTAGTCCTGGGCCTTGCGCAGCACCAGGTCGCGGTTGTAGCGGATGTCGTACTCGATGGCCGCCACCCCTTCCGGGCTCCACCACAAAGCGTCGGGGGTCGCGTCGAGGTGGACCGCAGCGGCGTCGGTCTGCCAGTCTTGGGGGCGGGCTCCCAGGGCGTAGCGCATCTCGGCCACCCCGGCCAGGTGGCGCAGGGCGAAGCCTTCGGCCTCCTGAAGGGCCTCCAGGGCCACGAAGGCCACCGTTCGGGGCCGGGCTTTGGTGATCTCGGCCACCTCCCGCTCCAGCAGCACCGCCCCGGGCAAAGCGGTGGGGTCGAGGCCCGGCCAGCGGCGCTGGAACTGAGACAGGCTCAGGGCCTGGTCGCAGGCCAGGGCTTCCTGGGGGGACTCAGCGACCCGCAGCACCCGCGGCCTCCCCGGGGACGATGAGGATCTGCATCTTGGTGCCCGGGTCGAGGCTCCACACCCGCACCACGCTGGTCGAGTCGGGAGGGGCGGCGATCTGCTTGGCCGAGCCGGAGAGGATGGTCAATCCCAGCGGCGGGGGGGTGTTGCTGCTGGCCACCCCGCCCCCGGGCAGGATGGTAGTGGAACTGCTCTTGGCGTAGAAGTCGATGTAGCTGACCAGGCCGGTGGCGGCCCCCCGCAGCAGGTTGGTGAGCAGGTTGGGGGTCTGCTCGCTGACCTGGGCGGGAATCCCCGCCACGCCCCGTTCGTCCAGGGCGATGGCCCGCACCACGTAGGCGGTCTTGCCCTTGTAGGCCCGGTCGAACTGCATTTGCAAGCGCCCGGCCCGGTCCAGGCTGGCCACCCCGAAGAAGACGTAGCCTTCCTTGGTGCGCACCATCAAGGCGGCCTGGGAGCTGCCCTCGGGGATCACCACCCCCGAGACCAACTCCCCTTCCTGCTTTTCCCCCACGTCGAAGGGGTACTCGGCCAGGGCCGGGGAGGAAGCAGCGGCCTGGGGGGCGGGGCTGCTCAGGGGGCTTTGGCCGGTGGGAACGCTGGGGCTGGAAAGGGAGAGGGAAGCGGGTGTGGGAGCGGGTGTGGGAGCGGGCAGTGCGGCGCTGGCCTGGGGGGCGGGGCTGAGGGGGTCGCTGGAGGAGGTCGGGGCGCTGGTTGGAGGCTCGGCAGCGGCAGACTTTTCCTGCTCCTGTGGAGTTTTCAGGGCCACATACCCTCCGCCGGGCCCGGAGAGGGACTGGATGGCCGGGGTATAGCCGCTGCGCTGGACCTGTTCGCCCTGCAAAACCACCGGGTTGCGGGCTTCGGCTTTGCCCCCCTCGAAGACGGTGTACTTGGTGCTCGTGGGGTTGAGCACGGCGGTGTAGGCCGGTTCCTTGGCGGCTTGTTCCCGCTCCAGCTGAGCGTACTTGCCCTGAGCCGAGGCGGCCTCGCCCTTGAGGGCCACCGGGCCGCTTTCGCTGGCCGTCTGCTGGGCTTGTTGCAGCTTGACCGGGTTGAAGGCCGGTTTGGCCGGGGCCGCAGGGGAAGCGCCGGGCGCGGGGGCCGGGGTGGCGCTGTACTCGGGCAGCGGGGGCGGCGGCACCACCCCGGAGTACGGGCGCTCGCCGGGGGCCGATGGCCCTGCGCTGCTGGGGGGGCTAGGGGGTGGGGGAACGCTGGCGGGGGCGGACGGTGTGGGGCTTTGCGAGGCCGGAGTGGTGGTGGTGGGCAGCGGCGGGGGAGGCGGGGCGGAGGTGGCGGTGGAGGGGGGCGTGGCGCCCGAAGGGGCGGGGGTGGAGGCGACGGGGGGCGGGGTAGGGACGGAATTGGCGTCCGGGCTCGAGGTGGAGGAGGTGTAGCTCACGCTCGACGGACGCTTATGGCTGGCCA
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/annotated2_trnas.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotated2_trnas.tabular Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,2 @@
+fasta Name tRNA # Begin End Type Codon Score Note
+dataset_5328 dataset_5328_scaffold_1814  1 1368 1292 His GTG 73.0
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/distill_custom.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/distill_custom.tabular Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,32 @@
+gene_id gene_description module sheet header subheader potential_amg
+K02981 small subunit ribosomal protein S2e Ribosome, eukaryotes MISC Information systems TRUE
+K02985 small subunit ribosomal protein S3e Ribosome, eukaryotes MISC Information systems TRUE
+K02984 small subunit ribosomal protein S3Ae Ribosome, eukaryotes MISC Information systems TRUE
+K02987 small subunit ribosomal protein S4e Ribosome, eukaryotes MISC Information systems TRUE
+K02989 small subunit ribosomal protein S5e Ribosome, eukaryotes MISC Information systems TRUE
+K02991 small subunit ribosomal protein S6e Ribosome, eukaryotes MISC Information systems TRUE
+K02993 small subunit ribosomal protein S7e Ribosome, eukaryotes MISC Information systems TRUE
+K02995 small subunit ribosomal protein S8e Ribosome, eukaryotes MISC Information systems TRUE
+K02997 small subunit ribosomal protein S9e Ribosome, eukaryotes MISC Information systems TRUE
+K02947 small subunit ribosomal protein S10e Ribosome, eukaryotes MISC Information systems TRUE
+K02949 small subunit ribosomal protein S11e Ribosome, eukaryotes MISC Information systems TRUE
+K02951 small subunit ribosomal protein S12e Ribosome, eukaryotes MISC Information systems TRUE
+K02953 small subunit ribosomal protein S13e Ribosome, eukaryotes MISC Information systems TRUE
+K02955 small subunit ribosomal protein S14e Ribosome, eukaryotes MISC Information systems TRUE
+K02958 small subunit ribosomal protein S15e Ribosome, eukaryotes MISC Information systems TRUE
+K02957 small subunit ribosomal protein S15Ae Ribosome, eukaryotes MISC Information systems TRUE
+K02960 small subunit ribosomal protein S16e Ribosome, eukaryotes MISC Information systems TRUE
+K02962 small subunit ribosomal protein S17e Ribosome, eukaryotes MISC Information systems TRUE
+K02964 small subunit ribosomal protein S18e Ribosome, eukaryotes MISC Information systems TRUE
+K02966 small subunit ribosomal protein S19e Ribosome, eukaryotes MISC Information systems TRUE
+K02969 small subunit ribosomal protein S20e Ribosome, eukaryotes MISC Information systems TRUE
+K02971 small subunit ribosomal protein S21e Ribosome, eukaryotes MISC Information systems TRUE
+K02973 small subunit ribosomal protein S23e Ribosome, eukaryotes MISC Information systems TRUE
+K02974 small subunit ribosomal protein S24e Ribosome, eukaryotes MISC Information systems TRUE
+K02975 small subunit ribosomal protein S25e Ribosome, eukaryotes MISC Information systems TRUE
+K02976 small subunit ribosomal protein S26e Ribosome, eukaryotes MISC Information systems TRUE
+K02978 small subunit ribosomal protein S27e Ribosome, eukaryotes MISC Information systems TRUE
+K02977 small subunit ribosomal protein S27Ae Ribosome, eukaryotes MISC Information systems TRUE
+K02979 small subunit ribosomal protein S28e Ribosome, eukaryotes MISC Information systems TRUE
+K02980 small subunit ribosomal protein S29e Ribosome, eukaryotes MISC Information systems TRUE
+K02983 small subunit ribosomal protein S30e Ribosome, eukaryotes MISC Information systems TRUE
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/input_annotate1.fasta.gz
b
Binary file test-data/input_annotate1.fasta.gz has changed
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/input_distill_rrna1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_distill_rrna1.tabular Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,2 @@
+scaffold fasta begin end strand type e-value note
+dataset_4924_scaffold_361 dataset_4924 61952 62058 - 5S rRNA 5.2e-16
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/input_distill_trna1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_distill_trna1.tabular Sat Dec 10 21:14:28 2022 +0000
b
@@ -0,0 +1,7 @@
+fasta Name tRNA # Begin End Type Codon Score Note
+dataset_4924 dataset_4924_scaffold_1814  1 1368 1292 His GTG 73.0
+dataset_4924 dataset_4924_scaffold_1814  2 1285 1214 Gln TTG 60.8
+dataset_4924 dataset_4924_scaffold_1814  3 1182 1110 Lys TTT 77.5
+dataset_4924 dataset_4924_scaffold_361   1 29420 29495 Met CAT 73.6
+dataset_4924 dataset_4924_scaffold_361   2 56462 56380 Leu CAA 63.7
+dataset_4924 dataset_4924_scaffold_361   3 12681 12586 Cys GCA 26.9 pseudo
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/neighborhoods_genes_loc1.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/neighborhoods_genes_loc1.fna Sat Dec 10 21:14:28 2022 +0000
[
b'@@ -0,0 +1,1038 @@\n+>dataset_5073_scaffold_1510_1 rank: D; Sigma-54 interaction domain [PF00158.29]; Sigma-54 interaction domain [PF14532.9]; PAS fold [PF08448.13]; PAS domain [PF13426.10]; PAS fold [PF00989.28] (db=pfam)\n+ATGGGGCAGGATAGGCAAAATTCCCAAGAGTTACTGAATGAGCTTAATTATTTGCGTCAAAGGTTGGCTGAACTAGAAGAGATGAACAGGGACTATCTGGGGATGATCGAAAACTCTTACGACGCTATGAGTATAGCTGATTGTGACGGTAGGCTTCTTTTGATCAATCCGGCTTTTGAGAGAATTATGGGCATAACCAAATCGGAAACACTGAGCCGGACAATTCAGGACTTGACAAATGACGGGATTACCGATGCCAGCGCGGCCTTGAAGGCATTTGAAACCGGCAAGCAGGAATCGGTAATTATAAATACGCGTGCAGGGAGGCAAGTTTTAAGTACCGGAGTCCCGTTTTATGACCAGACCGGTAAAATTGTCAGAGTATACTGTAATATACGTGACGTAACGGAGTTGAATCATTTAAGGCAAAAATTTGAACAATCACAAAAACTGGCTTCGAGATATTTGTTTGAGTTATTGGAATTCAAAAGGGGGAAAACATTTAAATTTGTCGCCCACAGCAACAAGATCAAACAAATGTTGGAAACAGTCCATAGAATCGCCGTCGTGGACAGTACGGTGCTTATCCTCGGTGAATCCGGTGTGGGCAAGGACCTTGTGGCTCGTATAATCCACGAAGCCAGTTCGCGAAACGATTCCGGTTCTTTTTTAAAAATCAATTGTGCCGCCATCCCAGCCGAATTGTTGGAATCCGAACTTTTTGGTTATGAGGGAGGGGCGTTTACCGGAGCAAAAAAAGACGGCAAAGCCGGCTATTTTGAAATAGCGGATAAAGGAACCCTATTTCTTGATGAAATAGGGGAGCTTCCCCAAAAACTACAGGTAAAACTGCTGGCGGTAATTCAGGATCAAAAAATCACCCGGATTGGCGGGGTAAAAGAAAAGGACGTTGATGTCCGTATCATTGCCGCTACCAACCGGGATCTGGAGGAAATGGTGAAGCAAGGTAATTTTAGGGAAGATCTTTTTTACAGGCTAAATGTAATTCCAATCACCATTCCCCCGTTACGGGAAAGGAAAGAAGATATCCCTTTCCTGATCGTCCATTATACCGAACTTTTTAACAAGAAATATAACCGGGCGGTTAAGTTCAGCAAAGAGGCCATTGAAATGTTATGTAAGTATAATTGGCCCGGCAATGTCAGGGAACTTGCCAATCTTGTCGAAAGAGTCATCGTAATCGGCCAGGAGTCAATACTTAACCCGGAACATATACCCGGCAAGTACCATACAGCGGCCCAAAACATGGCCGAAACAGTTTCAGATTTCAAGTCCCTCAGTGATGCGGTGGAAAAATACGAGTTAAAGCTTGTCAAAAATACCCTGGAACTGTGCAAAACCCGGGAAGAGGCCGCCAGTAAACTTGGCATAAGTCTTTCGGGTTTAAGTAGAAGAATAAGAAGGCTGAAACAACTCGAAAATGAAGGTTTTATTTAA\n+>dataset_5073_scaffold_1510_2 rank: D; AMP-binding enzyme [PF00501.31]; AMP-binding enzyme C-terminal domain [PF13193.9] (db=pfam)\n+ATGACGGTTAGTAAATGGATGCACGTCGGGGTGGCACTGAAGATGAATGCCCGCAATTACCCCGATAAGCTGGGTTGCCAGGACAAGAGAAAAAGTTATACCTTCAAGGAATGGAACGAGCGGTCGTGCCGCCTGGCATCCGCATTGAAGGACATGGGAGTGGGCTACGGTGAGCGGGTGGCCGTAATTGCCTACAACAGGGTGGAATGGATGGAAATTTACGCCGCCTGCGCCAAGGGCGGTCAGATTGTCGTTCCGGTAATGTTTCGCCTGACACCGCATGAATTTGAGTATATTGTTAATCATTCGGGCTGTAAAGCATTCATTGTGGAGGAACCTTTCGTGAAAGGGGTCGACAGTGTTCGCGACATTCTGACCACTATTCCCGAAGGGAATTATATTTACCTGGGTGACGGTGAGGCTCCGGAGGGCTATATCCATTACGAGTCGGTTATGGCTCAGGGCGACCCGTCCGAACCGGACATCTCGGTGGACGCCGCCGATCCCTGGACCATTATGTATACATCCGGGACCACCGGCAGGCCGAAGGGGGTTGTGCGGACCCATGAAAATTATCTGGGCCAGTATTTAATCAACAACATTAACATGGGAGTGCGGCCCAACGACAAGCCCTTGCTGGTCATGCCCATGTGTCATGTTAATTCCATTTATTATTCTTTTTGTTACACCTATATCAGCGCTCCGGTAATGGTTTATAACATGGTTAGCTTTGATCCTGAAGACCTGTTAAAAACCATTGTCGATTACAGGGTAACCTTTACTTCCCTGGTGCCGACCCACTACATCATGATCCTGGCCCTGCCCGATGAAATCAAACAAAAGTACGACACCAGTTGTATACGGCAGCTTTTAATCTCTTCGGCACCAGCCCGGCGGGATTTGAAACTGGCCATTATGAAGTATTTCAAATCGGCTGAGCTTTGGGAAGCTTACGGTTCCACGGAAACGTCCCTGGTCACTTACCTGCGCCCGGAAGACCAGTTAACTAAACTGGGTTCCATCGGTAAAGAAGTTTTTGGGTGCGACGAAATCAAGCTGCTGGATGAAAACGGGGAGGAAGTGCCGGTTGGGGAGGTGGGTGAACTCTACAGCCGTTCGCCGGGTATGTTCAAGGAGTATTGGAAAGACCCCGGCAAGACCAGTGAAGTGTTTCGCGGGAAGTGGTGCACCGCGGGCGACATGGGCAGACGGGACGAGGACGGTTATTACTACCTGGTGGACCGTAAAGCGAACATGATCATCTCGGGCGGTGAAAATGTCTACCCGTCGGAGGTGGAAAATGTCGTGGGTGCCCACCCGGCGGTGAAAGATGGGGCGGTAATCGGCGTTCCCGACCAGAAGTGGGGTGAGATTGTATTGGCCTTCATAATACTGCACGAGGGTTACCAGGCGGGAGATGAACTGGCCGGGGAGATTATCAACTTCTGTAAAGACCGGGTTGCCGGTTACAAGCGCCCCAAGTCGATCCGTTTTATTAGCGAGGAAGAAATGCCCCGGACCGGCAACGGCAAAATCATGCACCGGGTCTTGCGTGAAAAGTACGGCAAGTGGAGTGACTCTGTGTAA\n+>dataset_5073_scaffold_1510_3 rank: D; 2-hydroxyglutaryl-CoA dehydratase, D-component [PF06050.16] (db=pfam)\n+ATGACTGACCGTAAAACCATTAAAGAAATCTGTGCACAATTTAAAGAAATTATTGCCGAGCCTGGTCTAAAGATTCAGCGGCTGCAAGCCGAAAAACCTGCTCCGGTAATCGGGTTTTTGCCCACTGACGTGCCTGAAGAATTAATCCATGCCTCGGGCGCTTACCCCTTCGGGCTGGTGGCTTATGATGGATTATGGGTCAACCGGGCCGACGCCCACTTGCAGACCTGGGCATGCTCTCTGGCACGATGTTCCTTCGGGATGTCTCTGGCCGGGAAGTTCGATTACCTGAACGGGCTGATCATCCCTCATATTTGCGACACCACCCGGATGATTTCAGACATTTGGAAACAAAACCGGCCTTACGATTTCATGGAGAACTTTATACTGCCCCGGCAGGTTGATCGTCCCAGTGCCAGGAGTTATCTCACCGGTGAATTGGGCCGGTTGAAGGCGCGTTTGGAGCAGTTTACGGGCAGGTCTAT'..b'CCTTTTCCGGGCTGGACCCTGCCGGGAGTGATCGGGGCGGGAGCCGCCCAGACTATGATGAATATCCACGGCCTAAAGCCGGGCAACCGGGTTTTGATGGTGGGCTCGGGCAACGTGGGGCTGGTGGTGGGGTACCAGCTGCTGCAGGCCGGTTGTCAGCTGGCGGCAGTTATAGACGTTACTTCACGCATCGGCGGCTACGGGGTGCATGCTGCCAAGGTAGCCCGCACCGGTGTACCTTTTCTCATGTCCCATACCATTAAGGAGGCCTGTGGGACCACCCGGGTGGAGGGAGCCACTATTATCCAGGTGGATGGACATTGGCAACCTGTCCCCGGCAGCGAAAAGCACCTGGAAGTAGATACAATCTGCCTGGCGGTGGGCCTGTCTCCCATGTCTCAGCTGGCCAGGATGTCTCACTGCTGGATGGAAAACAACTCCGGCCGGGGGGGGATGGTCCCGATTTGCAATGAATACGGGGAAACCTCTTTGCCCGGCGTGTACGCCGTGGGGGATGTGGCAGGTATTGAAGAGGCCAGTTCGGCCATGATCCAGGGCCGCGTCGCCGGGGCTGCGGTGGCCCGGGCGCGGGGCTACCTGGGTGAGGCTGAGTTTAAGGACCGTTATGGGGATTATCACTCCTCCCTGGGGCAGTTGCGGGAAGGAATGTTTGGACACAAAAATAAAGGGCGTACCGACCTGACTCATACCGAAGAAGGCTATGCGCTTTCCCGGACGCTGCTGGCCAGGGGCTATCTGGCCGAAGAGGAACTTGCGGGTTATTCCGGCGTTTGCTCCGGGGAAAAAAGGAAAAACGGTGTTTTTCCCATTATTGAGTGTACTCAGAACATCCCCTGCAATCCCTGCCGGGACGCTTGCAAGCAGGGCTGTATCAAGGTGAGCGGTAAAATCACCAATCTGCCCGTCGTTGACGAATCGGTTTCCTGTACGGGCTGCGGGATGTGTGTTGTGTCCTGTCCCGGTCAGGCTATCTTCCTGGTTGATGAAAGCTATGCACCGGGCTATGCGGCGGTGTCCATACCTTACGAATTTTACCCCCTGCCGGAGGTGGGGGCCAGGGGTTCGGCCCTGGACCGCTCGGGAGCAGTTGTGGGAGAAGCGGAAGTTATCGGGGTTAAAATCACCAGGGCTATGGACGAGACGGCGGTCCTGACTATGAAAGTACCGCTCGACTGCTCAATGAAGGCAAGATTTTTTAAACCCCTTTAA\n+>dataset_5073_scaffold_988_16 rank: D; Sarcosine oxidase A3 domain [PF17806.4]; BFD-like [2Fe-2S] binding domain [PF04324.18] (db=pfam)\n+ATGAGCAAGAGAATTATGCGAACCCCGCCTGAGGGTGAATTTGTAGCCCGGCCGGACGATTCGCTGATCATTTGTCGGTGTGAGGAAATCACCAGGGGAGAAATCAGAAGAGCGATATACGATGGAATGCGCACAATGAATGAAATTAAAAGGTACCTGCGGGTGGGCATGGGGCTCTGCCAGGGACAGACTTGCAATCGCCTGGTTCAGGGCATTATGGCTAAAGAGCTGGGACTGGATCCGGTTGAGGTGGACATCCCTTTATCCCGCTCACCGGCCCGGCCCATTCCTATGAGTGTATACGCCAACGACGGGGTTACAAAAGAAAAAGGTGAAAGAAAATATGAATAA\n+>dataset_5073_scaffold_988_17 rank: D; FAD dependent oxidoreductase [PF01266.27] (db=pfam)\n+ATGAATAAAAAGGAAATTATCATTGTCGGCGGCGGGGTGATCGGCTGCGCCCTGGCCTATTATCTGACCAAGCTAAAAATAAAAGCCCTGGTCATCGAAAAGAATGAGATCGGCATAGAAGCTTCCAGCCGCAACGGCGGCGGGGTCCGGCAGTCGGCCAGGGATTTGCGGGAGATGCCTCTGGCCAGGCATGCCGTGCAAAACCTTTGGCCGGGCCTTTCGGATGAACTGGGAGTTGACGTGGAATACGAGCGGAAAGGGAACCTGCGCCTGGGTAAAACCGAAGAGCATGCCAAAATTCTGGAGCGGATTGTCAGCCAGGGGCGTTCGGCGGGTCTTGACTTAAAACTCATCGACAGGCAAGAGGTGCGGGAAATCTGCCCTTATGCTTCGGAGGAAGTTATGGTGGCCAGCTACTGCCCCACAGACGGGCATGCCAACCCCATGCGGACCACCCTGGCTTTTTACAAAAGGGCCAGAGAAATGGGGGCCGAATTTGTTACCGGGGAAACAGTGCAGTCCATCCTGTTGCGTAAAGGTAAGGTAGGTGGCATTAAAACCGGTGCGGGTACTTATGAATCAGACCAGGTGTTGGTGGCAGCCGGTTTTGCTTCCCGGTTCATCGCTAATTCGGTGGGCATTGACGTGCCCATGCAAAAGGTGCTGGTGGAGGCCCTGGTGACGGGCCAACAGCCTCCCATGTTTCCCCAGATGATCGGTACGGCCGGTTCCGACTTTTACGGTCACCAGACCAAACACGGCTCTTTTGTCTTTGGGGGGATGACCGGCTTGGAACCATTTGCCTCGGAGGAATCCAGGCCTATGACCAGGAACATCACCGCCCCCAGCATCTGCCGGGCCATTCTCGGTTATTTTCCCGTCCTGGATCAGGCTGATATTATCCGCACCTGGTCGGGCTTTCTGGACGTAACAGCCGACCATGTCCCCGTATTAAGCAAGGTGGACGAAATACCGGGGCTGTTCCTGGCCTGCGGTTTCAGCGGACATGGCTACGGTATATCACCTGCTGTGGGACAGGTCATGGCGGAATTGGTGATCCATGACCGGCCGTCGCTGTCCCTGGACGCTTTTCGCTATGACCGCTTTATTCCCAAAAAATAA\n+>dataset_5073_scaffold_988_18 rank: C; 2-iminobutanoate/2-iminopropanoate deaminase [EC:3.5.99.10] (db=kegg)\n+ATGAATATGGAGAGAACTAATTATTCGTCCGGGGCACCCCTGGAAGATAAGGCGGGCTACAGCCGGGTGGTCAAGGTGGGGCCCTTCGTATACGTCGGCGGGACCACTTCGGTTCAGCCCGACGGCAGTGTCTACGGGGAAAATGACGGCTATGCCCAGACGAAATACATTTTGGAAAGGATGATTGGTTTTCTGGAGCAGGCCGGGTCCAGGCGGGAGGAAGTAATCAGGGTCAAGATGTATGCCACGGACATGACCCGGGCCAAGGAATACATTGAGGCTTACTCGGAATTTTTTAAGGACATCAAACCCCTGTGTACCCTGGTGGGGATATCCACCCTGTTTCGTCCTGCCCAGCTCATCGAAATAGAAATGGACGCTGTGATTGGATCAGCAAACTAG\n+>dataset_5073_scaffold_988_19 rank: D; Family of unknown function (DUF6144) [PF19641.2] (db=pfam)\n+ATGAATATGCCTATTGACAAGAATAAAAGGTGGATAACAGGGTTGCACGAAAGCATTAACCAACTTGGTAAAGATCTGCAAGCCGCAATAATGAAACCAGTTGGTAAGCAATGCGCTTCAGACCTATTCTCATTATGTGAAAGCTATTTGAGAAATAAAATAGATACAACTGAAGATCTTATAAATGGTTGGAATACTCTTCGGGAGAAACGAAACTTGAAAGGGAAATGGGAACTTGAAGGGGATAAAATCCGAGGCACCTTTTACGAATGTGGTTGTCCACTTATTCGATCGGGAATGATCGATCTTCATCCTATCCAATACTATTGCTCCCAAGGAATGATGGAGATGATTTTTTCCAAAGCAGCGAAAAAAACGGTAAAGGTTGAGATTAAACGTTCGATTGGTTGGGGTGACGATGTATGCGAATTCTTAATAAAACTCTAA\n'
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/neighborhoods_scaffolds_loc1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/neighborhoods_scaffolds_loc1.fasta Sat Dec 10 21:14:28 2022 +0000
b
b'@@ -0,0 +1,38 @@\n+>dataset_5073_scaffold_1510\n+TACAGGATGCTTTTTTAAACCAGGTTAGAAAGGATAATATACCTGTAACAATCTTTTTGGTTAATGGATTTCAATTAAAAGGTATGGTTAAGGGTTTTGATAATTTTACAGTAATTATGGAAAGTGACGGCAAACAAATGATGGTATATAAACACGCTATTTCTACAGTTAGTCCTATGAAACCGGTAAATACTTCTTTTTCGGAAGTTAAGCCTGGCTAAATAAAAGGTGTAGGTCATGTCTTTTAGTATATTCCACTTAGAAAACATTCCAAATTCAATCATTCCGCCTGGCTTTTCATTGAAAAGCAGGCGGTTTCTATTTTATAGCGCAACTTTGGTATTGATTGGGATGACTATAATAGTCCATAATTACACATATACATAATCATTTAATCAAGCGTGGTTATGTGACTGTAATTCCACTGTTCGTTTGGAATTGAATATACTCTTTGGGATTGAGAAGGGAAGTGAATAAATGGGGCAGGATAGGCAAAATTCCCAAGAGTTACTGAATGAGCTTAATTATTTGCGTCAAAGGTTGGCTGAACTAGAAGAGATGAACAGGGACTATCTGGGGATGATCGAAAACTCTTACGACGCTATGAGTATAGCTGATTGTGACGGTAGGCTTCTTTTGATCAATCCGGCTTTTGAGAGAATTATGGGCATAACCAAATCGGAAACACTGAGCCGGACAATTCAGGACTTGACAAATGACGGGATTACCGATGCCAGCGCGGCCTTGAAGGCATTTGAAACCGGCAAGCAGGAATCGGTAATTATAAATACGCGTGCAGGGAGGCAAGTTTTAAGTACCGGAGTCCCGTTTTATGACCAGACCGGTAAAATTGTCAGAGTATACTGTAATATACGTGACGTAACGGAGTTGAATCATTTAAGGCAAAAATTTGAACAATCACAAAAACTGGCTTCGAGATATTTGTTTGAGTTATTGGAATTCAAAAGGGGGAAAACATTTAAATTTGTCGCCCACAGCAACAAGATCAAACAAATGTTGGAAACAGTCCATAGAATCGCCGTCGTGGACAGTACGGTGCTTATCCTCGGTGAATCCGGTGTGGGCAAGGACCTTGTGGCTCGTATAATCCACGAAGCCAGTTCGCGAAACGATTCCGGTTCTTTTTTAAAAATCAATTGTGCCGCCATCCCAGCCGAATTGTTGGAATCCGAACTTTTTGGTTATGAGGGAGGGGCGTTTACCGGAGCAAAAAAAGACGGCAAAGCCGGCTATTTTGAAATAGCGGATAAAGGAACCCTATTTCTTGATGAAATAGGGGAGCTTCCCCAAAAACTACAGGTAAAACTGCTGGCGGTAATTCAGGATCAAAAAATCACCCGGATTGGCGGGGTAAAAGAAAAGGACGTTGATGTCCGTATCATTGCCGCTACCAACCGGGATCTGGAGGAAATGGTGAAGCAAGGTAATTTTAGGGAAGATCTTTTTTACAGGCTAAATGTAATTCCAATCACCATTCCCCCGTTACGGGAAAGGAAAGAAGATATCCCTTTCCTGATCGTCCATTATACCGAACTTTTTAACAAGAAATATAACCGGGCGGTTAAGTTCAGCAAAGAGGCCATTGAAATGTTATGTAAGTATAATTGGCCCGGCAATGTCAGGGAACTTGCCAATCTTGTCGAAAGAGTCATCGTAATCGGCCAGGAGTCAATACTTAACCCGGAACATATACCCGGCAAGTACCATACAGCGGCCCAAAACATGGCCGAAACAGTTTCAGATTTCAAGTCCCTCAGTGATGCGGTGGAAAAATACGAGTTAAAGCTTGTCAAAAATACCCTGGAACTGTGCAAAACCCGGGAAGAGGCCGCCAGTAAACTTGGCATAAGTCTTTCGGGTTTAAGTAGAAGAATAAGAAGGCTGAAACAACTCGAAAATGAAGGTTTTATTTAAATGTTTTTTCAAGTTTAAGTATAAATTAAAAAACTAGGTCAAAGTTGAATGTCATGTTTAACCTTGTCAAAAGTCTTTATTTTAGTACAATAACAGTTTCATATTTGGTTTTGTATGTCAAATTTGACACCCTCAAAATCTGACAATTCTGAAGATCGGGCTGAAAAGATGGTCTAAAAGTTGGCACGTATTTTGCGTAAAATACCATTATCATACATGAATAGAACATTAATGGTTAAGGAGGGAACATCTATGACAGCTTGAGGGAATGCTTTGCCTGTTCTATGGTGGATGTTTTCAGAACCTTCGGACTTGTCAAAAGAGAGGGATATGAAATGACGGTTAGTAAATGGATGCACGTCGGGGTGGCACTGAAGATGAATGCCCGCAATTACCCCGATAAGCTGGGTTGCCAGGACAAGAGAAAAAGTTATACCTTCAAGGAATGGAACGAGCGGTCGTGCCGCCTGGCATCCGCATTGAAGGACATGGGAGTGGGCTACGGTGAGCGGGTGGCCGTAATTGCCTACAACAGGGTGGAATGGATGGAAATTTACGCCGCCTGCGCCAAGGGCGGTCAGATTGTCGTTCCGGTAATGTTTCGCCTGACACCGCATGAATTTGAGTATATTGTTAATCATTCGGGCTGTAAAGCATTCATTGTGGAGGAACCTTTCGTGAAAGGGGTCGACAGTGTTCGCGACATTCTGACCACTATTCCCGAAGGGAATTATATTTACCTGGGTGACGGTGAGGCTCCGGAGGGCTATATCCATTACGAGTCGGTTATGGCTCAGGGCGACCCGTCCGAACCGGACATCTCGGTGGACGCCGCCGATCCCTGGACCATTATGTATACATCCGGGACCACCGGCAGGCCGAAGGGGGTTGTGCGGACCCATGAAAATTATCTGGGCCAGTATTTAATCAACAACATTAACATGGGAGTGCGGCCCAACGACAAGCCCTTGCTGGTCATGCCCATGTGTCATGTTAATTCCATTTATTATTCTTTTTGTTACACCTATATCAGCGCTCCGGTAATGGTTTATAACATGGTTAGCTTTGATCCTGAAGACCTGTTAAAAACCATTGTCGATTACAGGGTAACCTTTACTTCCCTGGTGCCGACCCACTACATCATGATCCTGGCCCTGCCCGATGAAATCAAACAAAAGTACGACACCAGTTGTATACGGCAGCTTTTAATCTCTTCGGCACCAGCCCGGCGGGATTTGAAACTGGCCATTATGAAGTATTTCAAATCGGCTGAGCTTTGGGAAGCTTACGGTTCCACGGAAACGTCCCTGGTCACTTACCTGCGCCCGGAAGACCAGTTAACTAAACTGGGTTCCATCGGTAAAGAAGTTTTTGGGTGCGACGAAATCAAGCTGCTGGATGAAAACGGGGAGGAAGTGCCGGTTGGGGAGGTGGGTGAACTCTACAGCCGTTCGCCGGGTATGTTCAAGGAGTATTGGAAAGACCCCGGCAAGACCAGTGAAGTGTTTCGCGGGAAGTGGTGCACCGCGGGCGACATGGGCAGACGGGACGAGGACGGTTATTACTACCTGGTGGACCGTAAAGCGAACATGATCATCTCGGGCGGTGAAAATGTCTACCCGTCGGAGGTGGAAAATGTCGTGGGTGCCCACCCGGCGGTGAAAGATGGGGCGGTAATCGGCGTTCCCGACCAGAAGTGGGGTGAGATTGTATTGGCCTTCATAATACTGCACGAGGGTTACCAGGCGGGAGATGAACTGGCCGGGGAGATTATCAACTTCTGTAAAGACCGGGTTGCCGGTTACAAGCGCCCCAAGTCGATCCGTTTTATTAGCGAGGAAGAAATGCCCCGGACCGGCAACGGCAAAATCATGCACCGGGTCTTGCGTGAAAAGTACGGCAAGTGGAGTGACTCTGTGTAAAATATAAGTCAGAATATAATGGAATATAGGAAAGATAATGGATTATTTAATAAGTTAATCCAAAGCAGCAATGGTGCCAGGTGTTTAAAGGTTGA'..b'TTGAAGAGGCCAGTTCGGCCATGATCCAGGGCCGCGTCGCCGGGGCTGCGGTGGCCCGGGCGCGGGGCTACCTGGGTGAGGCTGAGTTTAAGGACCGTTATGGGGATTATCACTCCTCCCTGGGGCAGTTGCGGGAAGGAATGTTTGGACACAAAAATAAAGGGCGTACCGACCTGACTCATACCGAAGAAGGCTATGCGCTTTCCCGGACGCTGCTGGCCAGGGGCTATCTGGCCGAAGAGGAACTTGCGGGTTATTCCGGCGTTTGCTCCGGGGAAAAAAGGAAAAACGGTGTTTTTCCCATTATTGAGTGTACTCAGAACATCCCCTGCAATCCCTGCCGGGACGCTTGCAAGCAGGGCTGTATCAAGGTGAGCGGTAAAATCACCAATCTGCCCGTCGTTGACGAATCGGTTTCCTGTACGGGCTGCGGGATGTGTGTTGTGTCCTGTCCCGGTCAGGCTATCTTCCTGGTTGATGAAAGCTATGCACCGGGCTATGCGGCGGTGTCCATACCTTACGAATTTTACCCCCTGCCGGAGGTGGGGGCCAGGGGTTCGGCCCTGGACCGCTCGGGAGCAGTTGTGGGAGAAGCGGAAGTTATCGGGGTTAAAATCACCAGGGCTATGGACGAGACGGCGGTCCTGACTATGAAAGTACCGCTCGACTGCTCAATGAAGGCAAGATTTTTTAAACCCCTTTAAACTATAAAGCAATTTAACCTACTAACTTGGCGTAAGTCTCCCACTTCTATAAGTGGAGTACAACGCCAACTAAGTCATACATTTTGCAGTTCTAAAATTCAGGGAGAGTTGAATCTCCCCCTGAATTAAGAGCTTGCTTCAAAGTTAGCGGAGCCATGGGAACGATTCGAGTGTCCGGGAAGTGTGGCTAAAAAATCGAGGACATTCACCGAAGCGCTGTGTTCCTTTCCTTAACGGAAAAAGACGATGGACCGTTCTTGTGGCGCAGCTTCTTACAAGGAGGCCTGTAATGAGCAAGAGAATTATGCGAACCCCGCCTGAGGGTGAATTTGTAGCCCGGCCGGACGATTCGCTGATCATTTGTCGGTGTGAGGAAATCACCAGGGGAGAAATCAGAAGAGCGATATACGATGGAATGCGCACAATGAATGAAATTAAAAGGTACCTGCGGGTGGGCATGGGGCTCTGCCAGGGACAGACTTGCAATCGCCTGGTTCAGGGCATTATGGCTAAAGAGCTGGGACTGGATCCGGTTGAGGTGGACATCCCTTTATCCCGCTCACCGGCCCGGCCCATTCCTATGAGTGTATACGCCAACGACGGGGTTACAAAAGAAAAAGGTGAAAGAAAATATGAATAAAAAGGAAATTATCATTGTCGGCGGCGGGGTGATCGGCTGCGCCCTGGCCTATTATCTGACCAAGCTAAAAATAAAAGCCCTGGTCATCGAAAAGAATGAGATCGGCATAGAAGCTTCCAGCCGCAACGGCGGCGGGGTCCGGCAGTCGGCCAGGGATTTGCGGGAGATGCCTCTGGCCAGGCATGCCGTGCAAAACCTTTGGCCGGGCCTTTCGGATGAACTGGGAGTTGACGTGGAATACGAGCGGAAAGGGAACCTGCGCCTGGGTAAAACCGAAGAGCATGCCAAAATTCTGGAGCGGATTGTCAGCCAGGGGCGTTCGGCGGGTCTTGACTTAAAACTCATCGACAGGCAAGAGGTGCGGGAAATCTGCCCTTATGCTTCGGAGGAAGTTATGGTGGCCAGCTACTGCCCCACAGACGGGCATGCCAACCCCATGCGGACCACCCTGGCTTTTTACAAAAGGGCCAGAGAAATGGGGGCCGAATTTGTTACCGGGGAAACAGTGCAGTCCATCCTGTTGCGTAAAGGTAAGGTAGGTGGCATTAAAACCGGTGCGGGTACTTATGAATCAGACCAGGTGTTGGTGGCAGCCGGTTTTGCTTCCCGGTTCATCGCTAATTCGGTGGGCATTGACGTGCCCATGCAAAAGGTGCTGGTGGAGGCCCTGGTGACGGGCCAACAGCCTCCCATGTTTCCCCAGATGATCGGTACGGCCGGTTCCGACTTTTACGGTCACCAGACCAAACACGGCTCTTTTGTCTTTGGGGGGATGACCGGCTTGGAACCATTTGCCTCGGAGGAATCCAGGCCTATGACCAGGAACATCACCGCCCCCAGCATCTGCCGGGCCATTCTCGGTTATTTTCCCGTCCTGGATCAGGCTGATATTATCCGCACCTGGTCGGGCTTTCTGGACGTAACAGCCGACCATGTCCCCGTATTAAGCAAGGTGGACGAAATACCGGGGCTGTTCCTGGCCTGCGGTTTCAGCGGACATGGCTACGGTATATCACCTGCTGTGGGACAGGTCATGGCGGAATTGGTGATCCATGACCGGCCGTCGCTGTCCCTGGACGCTTTTCGCTATGACCGCTTTATTCCCAAAAAATAATCACAAACCACAAACAGGAGGGAAATGTTGATGAATATGGAGAGAACTAATTATTCGTCCGGGGCACCCCTGGAAGATAAGGCGGGCTACAGCCGGGTGGTCAAGGTGGGGCCCTTCGTATACGTCGGCGGGACCACTTCGGTTCAGCCCGACGGCAGTGTCTACGGGGAAAATGACGGCTATGCCCAGACGAAATACATTTTGGAAAGGATGATTGGTTTTCTGGAGCAGGCCGGGTCCAGGCGGGAGGAAGTAATCAGGGTCAAGATGTATGCCACGGACATGACCCGGGCCAAGGAATACATTGAGGCTTACTCGGAATTTTTTAAGGACATCAAACCCCTGTGTACCCTGGTGGGGATATCCACCCTGTTTCGTCCTGCCCAGCTCATCGAAATAGAAATGGACGCTGTGATTGGATCAGCAAACTAGTACTACAGCCGCTTGTCATTGCGAGCGTTAGCCAAGCAATCTTTGGCTGTTACCCCCACTAATTGGGGACATTCCTCCGAAGGAGGTATGTCCCCTTTTTTTAGCCTTATTATTTTATCATGAGCCAGAAGCTTTTTTGAGGAACAACAAACCCTATTGGAACTTTAGGCAAGCTGGACGAGGTCGAAGAACAAGGCCGTGCTGGAAAGTGCTGCGTTCTGCGATGTTATGGCAGATTTAAGGAGCCGTATCCATGCTAAAGTATCCAACTAATATAATCGAAGCTGCAGAACAGGATTTAGCGAGTCTCCCGGAAATTATGAGCAAGAGAAGAATACTGGTAACCTTTAATTAGAGTTTTATTAAGAATTCGCATACATCGTCACCCCAACCAATCGAACGTTTAATCTCAACCTTTACCGTTTTTTTCGCTGCTTTGGAAAAAATCATCTCCATCATTCCTTGGGAGCAATAGTATTGGATAGGATGAAGATCGATCATTCCCGATCGAATAAGTGGACAACCACATTCGTAAAAGGTGCCTCGGATTTTATCCCCTTCAAGTTCCCATTTCCCTTTCAAGTTTCGTTTCTCCCGAAGAGTATTCCAACCATTTATAAGATCTTCAGTTGTATCTATTTTATTTCTCAAATAGCTTTCACATAATGAGAATAGGTCTGAAGCGCATTGCTTACCAACTGGTTTCATTATTGCGGCTTGCAGATCTTTACCAAGTTGGTTAATGCTTTCGTGCAACCCTGTTATCCACCTTTTATTCTTGTCAATAGGCATATTCATAGTTAATTGCTATTCCTCCTTTAGGGCTTGCTTCAAGTCTTGGATAAGATCACCGGCGTTCTCGATTCCTACCGAAAGTCTCAAGGATGAAGAAAAAGATCGTAATAGGTTCACGAGACTACGGTCTTGTTCTACCGGTGGTCAACTGTTTAGATATACGAGTGATCAGCATTTATTAATTAATGTTAAGAACTAATGAAAAGAATGTAAATGTTAAGTTTATAAATTAATTACTCAACTTTCGCAGTTCAAAAATCAGTCTAACCCCTTACAAAATAAGAATTTAGGCCAACTTTTTTAAGCAAAAACA\n'
b
diff -r 000000000000 -r 2675f8d7b2a5 test-data/strainer_input_fasta1.fasta.gz
b
Binary file test-data/strainer_input_fasta1.fasta.gz has changed