changeset 0:ad7507073c3f draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
author jjohnson
date Sun, 24 Nov 2019 21:56:00 -0500
parents
children 86cd2e70b0dc
files cat_add_names.xml cat_bins.xml cat_contigs.xml cat_prepare.xml cat_summarise.xml datatypes_conf.xml macros.xml tabpad.py test-data/cached_locally/CAT_prepare_test/CAT_database/protIDs test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.dmnd test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.gz test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz test-data/cached_locally/cat_database.loc test-data/contigs.fasta test-data/genome2.fna test-data/genome3.fna test-data/test_contig.contig2classification.names.txt test-data/test_contig.contig2classification.txt tool-data/cat_database.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 25 files changed, 1153 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_add_names.xml	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,41 @@
+<tool id="cat_add_names" name="CAT add_names" version="@VERSION@.0">
+    <description>annotate with taxonomic classification</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+    CAT add_names -i '$input'
+    @CAT_TAXONOMY@
+    @ADD_NAMES_OPTIONS@
+    -o output_names.txt
+    && @TXT2TSV@ -i output_names.txt -o $output
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="tabular,txt" label="classification.txt or ORF2LCA.txt"/>
+        <expand macro="cat_db" />
+        <expand macro="add_names_options" />
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="tabular" value="test_contig.contig2classification.txt"/>
+            <expand macro="test_catdb"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Firmicutes" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**CAT/BAT add_names** 
+Add names for the NCBI taxomy IDs.
+
+@COMMON_HELP@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_bins.xml	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,71 @@
+<tool id="cat_bins" name="CAT bins" version="@VERSION.0">
+    <description>annotate with taxonomic classification</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+    #set $bin_dir = None
+    #if len($mags) > 1:
+        #set $bin_dir = 'inputs'
+        mkdir -p $bin_dir &&
+        #for mag in $mags:
+            ln -s '$mag' $bin_dir/ &&
+        #end for
+    #end if
+    CAT 
+    #if $bin_dir
+        bins -s '.dat' -b $bin_dir
+    #else
+        bin -b '$mags'
+    #end if 
+    @CAT_DB@
+    @USE_INTERMEDIATES@
+    @CUSTOM_SETTINGS@
+    && @TXT2TSV@ *.ORF2LCA.txt *.bin2classification.txt
+    @ADD_NAMES@
+    @SUMMARISE@
+    ]]></command>
+    <inputs>
+        <param name="mags" type="data" format="fasta" multiple="true" label="metagenome assembled genomes (MAGs/bins)"/>
+        <expand macro="cat_db" />
+        <expand macro="use_intermediates" />
+        <expand macro="custom_settings" />
+        <expand macro="add_names" />
+        <expand macro="summarise" />
+        <expand macro="select_bat_outputs" />
+    </inputs>
+    <outputs>
+        <expand macro="outputs" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="mags" ftype="fasta" value="genome3.fna"/>
+            <expand macro="test_catdb"/>
+            <param name="select_outputs" value="contig2classification"/>
+            <output name="contig2classification">
+                <assert_contents>
+                    <has_text text="1639" />
+                </assert_contents>
+            </output>
+        </test>
+            <param name="mags" ftype="fasta" value="genome2.fna,genome3.fna"/>
+            <expand macro="test_catdb"/>
+            <param name="select_outputs" value="contig2classification"/>
+            <output name="contig2classification">
+                <assert_contents>
+                    <has_text text="666" />
+                    <has_text text="1639" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**CAT bin or bins**
+Classify metagenomics assembled genomes. 
+
+@COMMON_HELP@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_contigs.xml	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,48 @@
+<tool id="cat_contigs" name="CAT contigs" version="@VERSION@.0">
+    <description>annotate with taxonomic classification</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+    CAT contigs -c '$contigs_fasta'
+    @CAT_DB@
+    @USE_INTERMEDIATES@
+    @CUSTOM_SETTINGS@
+    && @TXT2TSV@ *.ORF2LCA.txt *.contig2classification.txt
+    @ADD_NAMES@
+    @SUMMARISE@
+    ]]></command>
+    <inputs>
+        <param name="contigs_fasta" type="data" format="fasta" label="Contigs fasta"/>
+        <expand macro="cat_db" />
+        <expand macro="use_intermediates" />
+        <expand macro="custom_settings" />
+        <expand macro="add_names" />
+        <expand macro="summarise" />
+        <expand macro="select_cat_outputs" />
+    </inputs>
+    <outputs>
+        <expand macro="outputs" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="contigs_fasta" ftype="fasta" value="contigs.fasta"/>
+            <expand macro="test_catdb"/>
+            <param name="select_outputs" value="contig2classification"/>
+            <output name="contig2classification">
+                <assert_contents>
+                    <has_text text="contig_38063" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**CAT contigs**
+Classifiy metagenomics contigs.
+
+@COMMON_HELP@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_prepare.xml	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,29 @@
+<tool id="cat_prepare" name="CAT prepare" version="@VERSION@.0">
+    <description>database for CAT - Contig Annotation Tool</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+        #import os.path
+        #set $catdb = $cat_db.files_path
+        mkdir -p $catdb &&
+        echo CAT_DB `date '+%Y-%m-%d'` "@DATABASE_FOLDER@" "@TAXONOMY_FOLDER@" > $cat_db &&
+        CAT prepare --fresh
+        --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")'
+        --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+    ]]></command>
+    <inputs>
+    </inputs>
+    <outputs>
+        <data name="cat_db" format="cat_db" />
+    </outputs>
+    <help><![CDATA[
+**CAT prepare**
+Prepare CAT reference data for classifying metagomic contigs or genome assemblies.
+
+@COMMON_HELP@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_summarise.xml	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,40 @@
+<tool id="cat_summarise" name="CAT summarise" version="@VERSION@.0">
+    <description>annotate with taxonomic classification</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+    CAT summarise -c '$contigs_fasta' -i '$input'
+    -o output_names_summary.txt
+    && @TXT2TSV@ -i output_names_summary.txt -o $output
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="tabular" label="classification.official_names.txt"
+            help="The classication must be made with only_official names"/>
+        <param name="contigs_fasta" type="data" format="fasta" optional="true" label="contigs.fasta" 
+            help="Required if a contig2classification.names"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="tabular" value="test_contig.contig2classification.names.txt"/>
+            <param name="contigs_fasta" ftype="fasta" value="configs.fasta"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Firmicutes" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**CAT summarise** 
+Produce a summary report of assignments to the ofifcial taxonomic names. 
+
+@COMMON_HELP@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<datatypes>
+  <registration>
+    <datatype extension="cat_db" type="galaxy.datatypes.data:Text" subclass="true" />
+  </registration>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,267 @@
+<macros>
+    <token name="@VERSION@">5.0.3</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">cat</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command>CAT --version</version_command>
+    </xml>
+    <token name="@DATABASE_FOLDER@">CAT_database</token>
+    <token name="@TAXONOMY_FOLDER@">taxonomy</token>
+    <xml name="cat_db">
+        <conditional name="db">
+            <param name="db_src" type="select" label="CAT database from">
+                <option value="cached">local cached database</option>
+                <option value="history">history</option>
+            </param>
+            <when value="cached">
+                <param name="cat_builtin" type="select" label="Use a built-in CAT database" help="If the CAT database of interest is not listed, contact your Galaxy administrator">
+                    <options from_data_table="cat_databases">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No CAT database is available." />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="cat_db" type="data" format="txt" label="A history dataset from CAT prepare tool"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@CAT_DB@"><![CDATA[
+        #if $db.db_src == 'cached':
+        --database_folder $db.cat_builtin.fields.database_folder
+        --taxonomy_folder $db.cat_builtin.fields.taxonomy_folder
+        #else
+        #import os.path
+        #set $catdb = $db.cat_db.extra_files_path
+        --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")'
+        --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+        #end if
+]]></token>
+    <token name="@CAT_TAXONOMY@"><![CDATA[
+        #if $db.db_src == 'cached':
+        --taxonomy_folder $db.cat_builtin.fields.taxonomy_folder
+        #else
+        #import os.path
+        #set $catdb = $db.cat_db.extra_files_path
+        --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+        #end if
+]]></token>
+    <xml name="test_catdb">
+        <conditional name="db">
+            <param name="db_src" value="cached"/>
+            <param name="cat_builtin" value="CAT_prepare_test"/>
+        </conditional>
+    </xml>
+
+    <xml name="use_intermediates">
+        <conditional name="previous">
+            <param name="use_previous" type="select" label="Use previous gene prediction and diamond alignment">
+                <option value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param argument="--proteins_fasta" type="data" format="fasta" label="predicted proteins fasta"/>
+                <param argument="--diamond_alignment" type="data" format="fasta" label="alignments file"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@USE_INTERMEDIATES@"><![CDATA[
+      #if $previous.use_previous == 'yes'
+      --proteins_fasta '$previous.proteins_fasta'
+      --diamond_alignment '$previous.diamond_alignment'
+      #end if
+      --out_prefix 'cat_output'
+]]></token>
+
+    <xml name="custom_settings">
+        <param argument="--range" type="integer" value="10" min="0" max="49" label="range"/>
+        <param argument="--fraction" type="float" value="0.5" min="0" max="0.99" label="fraction"/>
+    </xml>
+    <token name="@CUSTOM_SETTINGS@"><![CDATA[
+      --range $range
+      --fraction $fraction
+]]></token>
+    <xml name="add_names_options">
+        <param argument="--only_official" type="boolean" truevalue="--only_official" falsevalue="" checked="true" 
+            label="Only output official level names."/>
+        <param argument="--exclude_scores" type="boolean" truevalue="--exclude_scores" falsevalue="" checked="false" 
+            label="Exclude bit-score support scores in the lineage."/>
+    </xml>
+    <token name="@ADD_NAMES_OPTIONS@"><![CDATA[
+    $only_official $exclude_scores 
+]]></token>
+    <xml name="add_names">
+        <conditional name="names">
+            <param name="add_names" type="select" label="add_names">
+                <option value="no">No</option>
+                <option value="orf2lca">ORF2LCA.txt</option>
+                <option value="classification">classification.txt</option>
+                <option value="both">ORF2LCA.txt and classification.txt</option>
+            </param>
+            <when value="no"/>
+            <when value="orf2lca">
+                <expand macro="add_names_options"/>
+            </when>
+            <when value="classification">
+                <expand macro="add_names_options"/>
+            </when>
+            <when value="both">
+                <expand macro="add_names_options"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@ADD_NAMES@"><![CDATA[
+    #if $names.add_names in ['classification','both']:
+        && CAT add_names $names.only_official $names.exclude_scores 
+            @CAT_TAXONOMY@
+            #if $bcat == 'CAT'
+                -i cat_output.contigs2classification.tsv
+            #else
+                -i cat_output.bin2classification.tsv
+            #end if
+            -o classification_names.txt
+        && @TXT2TSV@ -i classification_names -o $classification_names
+    #end if
+    #if $names.add_names in ['orf2lca','both']:
+        && CAT add_names $names.only_official $names.exclude_scores 
+            @CAT_TAXONOMY@
+            -i cat_output.ORF2LCA.tsv 
+            -o orf2lca_names.txt
+        && @TXT2TSV@ -i orf2lca_names.txt -o $orf2lca_names
+    #end if
+]]></token>
+    <xml name="summarise">
+        <param name="summarise" type="select" label="summarise">
+            <option value="no">No</option>
+            <option value="classification">classification.txt</option>
+        </param>
+    </xml>
+    <token name="@SUMMARISE@"><![CDATA[
+    #if $summarise in ['classification']:
+        #if $names.add_names in ['classification','both'] and $names.only_official: 
+            #set $summary_input = $classification_names
+        #else 
+            #set $summary_input = classification_offical_names
+            && CAT add_names --only_official
+                @CAT_TAXONOMY@
+                #if $bcat == 'CAT'
+                    -i cat_output.contigs2classification.tsv
+                #else
+                    -i cat_output.bin2classification.tsv
+                #end if
+                -o $summary_input
+        #end if
+        && CAT summarise 
+            #if $bcat == 'CAT'
+                -c $contigs_fasta
+            #end if
+            -i $summary_input
+            -o classification_summary.txt
+        && @TXT2TSV@ -i classification_summary.txt -o $classification_summary
+    #end if
+]]></token>
+    
+    <xml name="select_outputs">
+        <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs">
+            <option value="log" selected="true">CAT.log</option>
+            <option value="predicted_proteins_faa" selected="true">predicted_proteins.faa</option>
+            <option value="predicted_proteins_gff">predicted_proteins.gff</option>
+            <option value="alignment_diamond">alignment.diamond</option>
+            <option value="orf2lca" selected="true">ORF2LCA.txt</option>
+            <yield/>
+        </param>
+    </xml>
+    <xml name="select_cat_outputs">
+        <param name="bcat" type="hidden" value="CAT"/>
+        <param name="seqtype" type="hidden" value="contig"/>
+        <expand macro="select_outputs">
+            <option value="contig2classification" selected="true">contig2classification.txt</option>
+        </expand>
+    </xml>
+    <xml name="select_bat_outputs">
+        <param name="bcat" type="hidden" value="BAT"/>
+        <param name="seqtype" type="hidden" value="bin"/>
+        <expand macro="select_outputs">
+            <option value="bin2classification" selected="true">bin2classification.txt</option>
+        </expand>
+    </xml>
+    
+    <token name="@TXT2TSV@"><![CDATA[
+    $__tool_directory__/tabpad.py 
+]]></token>
+    <xml name="outputs">
+        <data name="log" format="txt" label="${bcat}.log" from_work_dir="cat_output.log">
+            <filter>'log' in select_outputs or not select_outputs</filter>
+        </data>
+        <data name="predicted_proteins_faa" format="fasta" label="${bcat}.predicted_proteins.faa" from_work_dir="cat_output.predicted_proteins.faa">
+            <filter>'predicted_proteins_faa' in select_outputs</filter>
+        </data>
+        <data name="predicted_proteins_gff" format="gff" label="${bcat}.predicted_proteins.gff" from_work_dir="cat_output.predicted_proteins.gff">
+            <filter>'predicted_proteins_gff' in select_outputs</filter>
+        </data>
+        <data name="alignment_diamond" format="tabular" label="${bcat}.alignment.diamond" from_work_dir="cat_output.alignment.diamond">
+            <filter>'alignment_diamond' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" />
+            </actions>
+        </data>
+        <data name="orf2lca" format="tabular" label="${bcat}.ORF2LCA.txt" from_work_dir="cat_output.ORF2LCA.tsv">
+            <filter>'orf2lca' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="ORF,lineage,bit-score" />
+            </actions>
+        </data>
+        <data name="contig2classification" format="tabular" label="${bcat}.contig2classification.txt" from_work_dir="cat_output.contig2classification.tsv">
+            <filter>'contig2classification' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="contig,classification,reason,lineage,lineage scores" />
+            </actions>
+        </data>
+        <data name="bin2classification" format="tabular" label="${bcat}.bin2classification.txt" from_work_dir="cat_output.bin2classification.tsv">
+            <filter>'bin2classification' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="bin,classification,reason,lineage,lineage scores" />
+            </actions>
+        </data>
+        <data name="orf2lca_names" format="tabular" label="${bcat}.ORF2LCA.names.txt">
+            <filter>'orf2lca' in names.add_names</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="ORF,lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" />
+            </actions>
+        </data>
+        <data name="classification_names" format="tabular" label="${bcat}.${seqtype}2classification.names.txt">
+            <filter>'classification' in names.add_names</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="${seqtype},classification,reason,lineage,lineage scores,superkingdom,phylum,class,order,family,genus,species" />
+            </actions>
+        </data>
+        <data name="classification_summary" format="tabular" label="${bcat}.${seqtype}2classification.summary.txt">
+            <filter>'classification' in summarise</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="rank,clade,number of contigs,number of ORFs,number of positions" />
+            </actions>
+        </data>
+    </xml>
+    <token name="@COMMON_HELP@"><![CDATA[
+The CAT/BAT workflow is described at: https://github.com/dutilh/CAT    
+]]></token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">https://doi.org/10.1101/072868</citation>
+            <citation type="doi">https://doi.org/10.1186/s13059-019-1817-x</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tabpad.py	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+import argparse
+import re
+
+
+def padfile(infile, outfile, fieldcnt=None):
+    with open(infile, 'r') as fh:
+        out = open(outfile, 'w')
+        tabs = '\t' * fieldcnt if fieldcnt is not None else None
+        for i, txtline in enumerate(fh):
+            line = txtline.rstrip('\r\n')
+            fields = line.split('\t')
+            if not tabs:
+                tabs = '\t' * len(fields)
+            out.write('%s%s\n' % (line, tabs[len(fields):]))
+        out.close()
+
+
+def fieldcount(infile):
+    fieldcnt = 0
+    with open(infile, 'r') as fh:
+        for i, line in enumerate(fh):
+            fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t')))
+    return fieldcnt
+
+
+def tsvname(infile):
+    return re.sub('\.txt$', '', infile) + '.tsv'
+
+
+def __main__():
+    parser = argparse.ArgumentParser(
+        description='Pad a file with TABS for equal field size across lines')
+    parser.add_argument(
+        '-i', '--input', help='input file')
+    parser.add_argument(
+        '-o', '--output', help='output file')
+    parser.add_argument(
+        'files', nargs='*', help='.txt files')
+    args = parser.parse_args()
+
+    if args.input:
+        outfile = args.output if args.output else tsvname(args.input)
+        fieldcnt = fieldcount(args.input)
+        padfile(args.input, outfile, fieldcnt=fieldcnt)
+    for infile in args.files:
+        outfile = tsvname(infile)
+        fieldcnt = fieldcount(infile)
+        padfile(infile, outfile, fieldcnt=fieldcnt)
+
+
+if __name__ == "__main__":
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/protIDs	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,11 @@
+WP_000159554.1
+WP_000214552.1
+WP_000346214.1
+WP_000568619.1
+WP_000958804.1
+WP_000991933.1
+WP_000996146.1
+WP_003722398.1
+WP_005378126.1
+XP_961517.1
+
Binary file test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.dmnd has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,10 @@
+WP_000159554.1	2
+WP_000214552.1	91061
+WP_000346214.1	91061
+WP_000568619.1	666
+WP_000958804.1	1301
+WP_000991933.1	666
+WP_000996146.1	666
+WP_003722398.1	1639
+WP_005378126.1	662
+XP_961517.1	5141
Binary file test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,17 @@
+91061
+1
+641
+1224
+1236
+131567
+1637
+1639
+1783272
+2
+662
+13562
+13562
+641
+662
+666
+91061
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,252 @@
+1	|	all	|		|	synonym	|
+1	|	root	|		|	scientific name	|
+2	|	Bacteria	|	Bacteria <prokaryotes>	|	scientific name	|
+2	|	Monera	|	Monera <Bacteria>	|	in-part	|
+2	|	Procaryotae	|	Procaryotae <Bacteria>	|	in-part	|
+2	|	Prokaryota	|	Prokaryota <Bacteria>	|	in-part	|
+2	|	Prokaryotae	|	Prokaryotae <Bacteria>	|	in-part	|
+2	|	bacteria	|	bacteria <blast2>	|	blast name	|
+2	|	eubacteria	|		|	genbank common name	|
+2	|	prokaryote	|	prokaryote <Bacteria>	|	in-part	|
+2	|	prokaryotes	|	prokaryotes <Bacteria>	|	in-part	|
+641	|	Vibrionaceae	|		|	scientific name	|
+641	|	Vibrionaceae Veron 1965	|		|	authority	|
+641	|	gamma-3 proteobacteria	|	gamma-3 proteobacteria <#3>	|	in-part	|
+662	|	"Microspira" Schroeter 1886	|		|	authority	|
+662	|	"Pacinia" Trevisan 1885	|		|	authority	|
+662	|	Beneckea	|		|	synonym	|
+662	|	Beneckea Campbell 1957	|		|	authority	|
+662	|	Listonella	|		|	synonym	|
+662	|	Listonella MacDonell and Colwell 1986	|		|	authority	|
+662	|	Microspira	|		|	synonym	|
+662	|	Pacinia	|		|	synonym	|
+662	|	Vibrio	|		|	scientific name	|
+662	|	Vibrio Pacini 1854	|		|	authority	|
+666	|	"Bacillo virgola del Koch" Trevisan 1884	|		|	authority	|
+666	|	"Bacillus cholerae" (Pacini 1854) Trevisan 1884	|		|	authority	|
+666	|	"Bacillus cholerae-asiaticae" Trevisan 1884	|		|	authority	|
+666	|	"Kommabacillus" Koch 1884	|		|	authority	|
+666	|	"Liquidivibrio cholerae" (Pacini 1854) Orla-Jensen 1909	|		|	authority	|
+666	|	"Microspira comma" Schroeter 1886	|		|	authority	|
+666	|	"Pacinia cholerae-asiaticae" (Trevisan 1884) Trevisan 1885	|		|	authority	|
+666	|	"Spirillum cholerae" (Pacini 1854) Mac1889	|		|	authority	|
+666	|	"Spirillum cholerae-asiaticae" (Trevisan 1884) Zopf 1885	|		|	authority	|
+666	|	"Vibrio cholera" (sic) Pacini 1854	|		|	authority	|
+666	|	"Vibrio cholerae-asiaticae" (Trevisan 1884) Pfeiffer 1896	|		|	authority	|
+666	|	"Vibrio comma" (Schroeter 1886) Blanchard 1906	|		|	authority	|
+666	|	ATCC 14035	|	ATCC 14035 <type strain>	|	type material	|
+666	|	ATCC 14547 [[Vibrio albensis]]	|	ATCC 14547 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	Bacillo virgola del Koch	|		|	synonym	|
+666	|	Bacillus cholerae	|		|	synonym	|
+666	|	Bacillus cholerae-asiaticae	|		|	synonym	|
+666	|	CCUG 48664 [[Vibrio albensis]]	|	CCUG 48664 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	CCUG 9118 A	|	CCUG 9118 A <type strain>	|	type material	|
+666	|	CECT 514	|	CECT 514 <type strain>	|	type material	|
+666	|	CIP 62.13	|	CIP 62.13 <type strain>	|	type material	|
+666	|	Kommabacillus	|		|	synonym	|
+666	|	LMG 4406 [[Vibrio albensis]]	|	LMG 4406 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	LMG:4406 [[Vibrio albensis]]	|	LMG:4406 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	Liquidivibrio cholerae	|		|	synonym	|
+666	|	Microspira comma	|		|	synonym	|
+666	|	NCIMB 41 [[Vibrio albensis]]	|	NCIMB 41 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	NCTC 8021	|	NCTC 8021 <type strain>	|	type material	|
+666	|	Pacinia cholerae-asiaticae	|		|	synonym	|
+666	|	Spirillum cholerae	|		|	synonym	|
+666	|	Spirillum cholerae-asiaticae	|		|	synonym	|
+666	|	Vibrio albensis	|		|	synonym	|
+666	|	Vibrio albensis Lehmann and Neumann 1896	|		|	authority	|
+666	|	Vibrio cholera	|		|	synonym	|
+666	|	Vibrio cholerae	|		|	scientific name	|
+666	|	Vibrio cholerae Pacini 1854	|		|	authority	|
+666	|	Vibrio cholerae biovar albensis	|		|	synonym	|
+666	|	Vibrio cholerae bv. albensis	|		|	synonym	|
+666	|	Vibrio cholerae-asiaticae	|		|	synonym	|
+666	|	Vibrio comma	|		|	synonym	|
+1224	|	Alphaproteobacteraeota	|		|	synonym	|
+1224	|	Alphaproteobacteraeota Oren et al. 2015	|		|	authority	|
+1224	|	Alphaproteobacteriota	|		|	synonym	|
+1224	|	Proteobacteria	|		|	scientific name	|
+1224	|	Proteobacteria Garrity et al. 2005	|		|	authority	|
+1224	|	Proteobacteria [class] Stackebrandt et al. 1988	|		|	authority	|
+1224	|	proteobacteria	|	proteobacteria <blast1224>	|	blast name	|
+1224	|	purple bacteria	|		|	common name	|
+1224	|	purple bacteria and relatives	|		|	common name	|
+1224	|	purple non-sulfur bacteria	|		|	common name	|
+1224	|	purple photosynthetic bacteria	|		|	common name	|
+1224	|	purple photosynthetic bacteria and relatives	|		|	common name	|
+1236	|	Gammaproteobacteria	|		|	scientific name	|
+1236	|	Gammaproteobacteria Garrity et al. 2005 emend. Williams and Kelly 2013	|		|	authority	|
+1236	|	Proteobacteria gamma subdivision	|		|	synonym	|
+1236	|	Purple bacteria, gamma subdivision	|		|	synonym	|
+1236	|	g-proteobacteria	|		|	blast name	|
+1236	|	gamma proteobacteria	|		|	synonym	|
+1236	|	gamma subdivision	|		|	synonym	|
+1236	|	gamma subgroup	|		|	synonym	|
+1239	|	Bacillaeota	|		|	synonym	|
+1239	|	Bacillaeota Oren et al. 2015	|		|	authority	|
+1239	|	Bacillota	|		|	synonym	|
+1239	|	Bacillus/Clostridium group	|		|	synonym	|
+1239	|	Clostridium group firmicutes	|		|	synonym	|
+1239	|	Firmacutes	|		|	synonym	|
+1239	|	Firmicutes	|		|	scientific name	|
+1239	|	Firmicutes corrig. Gibbons and Murray 1978	|		|	authority	|
+1239	|	Low G+C firmicutes	|		|	synonym	|
+1239	|	clostridial firmicutes	|		|	synonym	|
+1239	|	firmicutes	|	firmicutes <blast1239>	|	blast name	|
+1239	|	low G+C Gram-positive bacteria	|		|	common name	|
+1239	|	low GC Gram+	|		|	common name	|
+1385	|	Bacillales	|		|	scientific name	|
+1385	|	Bacillales Prevot 1953	|		|	authority	|
+1385	|	Bacillus/Staphylococcus group	|		|	synonym	|
+1637	|	"Listerella" Pirie 1927 (nom. rej. Opin. 14)	|		|	authority	|
+1637	|	Listerella	|		|	synonym	|
+1637	|	Listeria	|		|	scientific name	|
+1637	|	Listeria Pirie 1940	|		|	authority	|
+1639	|	"Bacterium monocytogenes hominis" Nyfeldt 1932	|		|	authority	|
+1639	|	"Bacterium monocytogenes" Murray et al. 1926	|		|	authority	|
+1639	|	"Corynebacterium infantisepticum" Potel 1950	|		|	authority	|
+1639	|	"Corynebacterium parvulum" Schultz et al. 1934	|		|	authority	|
+1639	|	"Erysipelothrix monocytogenes" (Murray et al. 1926) Wilson and Miles 1946	|		|	authority	|
+1639	|	"Listerella hepatolytica" Pirie 1927	|		|	authority	|
+1639	|	ATCC 15313	|	ATCC 15313 <type strain>	|	type material	|
+1639	|	Bacterium monocytogenes	|		|	synonym	|
+1639	|	Bacterium monocytogenes hominis	|		|	synonym	|
+1639	|	CCUG 15526	|	CCUG 15526 <type strain>	|	type material	|
+1639	|	CIP 82.110	|	CIP 82.110 <type strain>	|	type material	|
+1639	|	Corynebacterium infantisepticum	|		|	synonym	|
+1639	|	Corynebacterium parvulum	|		|	synonym	|
+1639	|	DSM 20600	|	DSM 20600 <type strain>	|	type material	|
+1639	|	Erysipelothrix monocytogenes	|		|	synonym	|
+1639	|	Listerella hepatolytica	|		|	synonym	|
+1639	|	Listeria monocytogenes	|		|	scientific name	|
+1639	|	Listeria monocytogenes (Murray et al. 1926) Pirie 1940	|		|	authority	|
+1639	|	Listeria sp. FDA00013359	|		|	includes	|
+1639	|	Listeria sp. FDA00013360	|		|	includes	|
+1639	|	Listeria sp. FDA00013361	|		|	includes	|
+1639	|	Listeria sp. FDA00013362	|		|	includes	|
+1639	|	Listeria sp. FDA00013363	|		|	includes	|
+1639	|	Listeria sp. FDA00013364	|		|	includes	|
+1639	|	Listeria sp. FDA00013365	|		|	includes	|
+1639	|	Listeria sp. FDA00013366	|		|	includes	|
+1639	|	Listeria sp. FDA00013367	|		|	includes	|
+1639	|	Listeria sp. FDA00013503	|		|	includes	|
+1639	|	Listeria sp. FDA00013504	|		|	includes	|
+1639	|	Listeria sp. FDA00013505	|		|	includes	|
+1639	|	Listeria sp. FDA00013506	|		|	includes	|
+1639	|	Listeria sp. FDA00013507	|		|	includes	|
+1639	|	Listeria sp. FDA00013508	|		|	includes	|
+1639	|	Listeria sp. FDA00013509	|		|	includes	|
+1639	|	Listeria sp. FDA00013510	|		|	includes	|
+1639	|	Listeria sp. FDA00013511	|		|	includes	|
+1639	|	Listeria sp. FDA00013512	|		|	includes	|
+1639	|	Listeria sp. FDA00013536	|		|	includes	|
+1639	|	Listeria sp. FDA00013537	|		|	includes	|
+1639	|	Listeria sp. FDA00013538	|		|	includes	|
+1639	|	Listeria sp. FDA00013539	|		|	includes	|
+1639	|	Listeria sp. FDA00013540	|		|	includes	|
+1639	|	Listeria sp. FDA00013541	|		|	includes	|
+1639	|	Listeria sp. FDA00013542	|		|	includes	|
+1639	|	Listeria sp. FDA00013543	|		|	includes	|
+1639	|	Listeria sp. FDA00013544	|		|	includes	|
+1639	|	Listeria sp. FDA00013545	|		|	includes	|
+1639	|	Listeria sp. FDA00013546	|		|	includes	|
+1639	|	Listeria sp. FDA00013547	|		|	includes	|
+1639	|	Listeria sp. FDA00013548	|		|	includes	|
+1639	|	Listeria sp. FDA00013549	|		|	includes	|
+1639	|	Listeria sp. FDA00013550	|		|	includes	|
+1639	|	Listeria sp. FDA00013551	|		|	includes	|
+1639	|	Listeria sp. FDA00013552	|		|	includes	|
+1639	|	Listeria sp. FDA00013553	|		|	includes	|
+1639	|	Listeria sp. FDA00013554	|		|	includes	|
+1639	|	Listeria sp. FDA00013555	|		|	includes	|
+1639	|	Listeria sp. FDA00013556	|		|	includes	|
+1639	|	Listeria sp. FDA00013557	|		|	includes	|
+1639	|	Listeria sp. FDA00013558	|		|	includes	|
+1639	|	Listeria sp. FDA00013559	|		|	includes	|
+1639	|	Listeria sp. FDA00013560	|		|	includes	|
+1639	|	Listeria sp. FDA00013561	|		|	includes	|
+1639	|	Listeria sp. FDA00013562	|		|	includes	|
+1639	|	Listeria sp. FDA00013563	|		|	includes	|
+1639	|	Listeria sp. FDA00013564	|		|	includes	|
+1639	|	Listeria sp. FDA00013565	|		|	includes	|
+1639	|	Listeria sp. FDA00013566	|		|	includes	|
+1639	|	Listeria sp. FDA00013567	|		|	includes	|
+1639	|	Listeria sp. FDA00013568	|		|	includes	|
+1639	|	Listeria sp. FDA00013570	|		|	includes	|
+1639	|	Listeria sp. FDA00013571	|		|	includes	|
+1639	|	Listeria sp. FDA00013572	|		|	includes	|
+1639	|	Listeria sp. FDA00013573	|		|	includes	|
+1639	|	Listeria sp. FDA00013574	|		|	includes	|
+1639	|	Listeria sp. FDA00013575	|		|	includes	|
+1639	|	Listeria sp. FDA00013576	|		|	includes	|
+1639	|	Listeria sp. FDA00013577	|		|	includes	|
+1639	|	Listeria sp. FDA00013578	|		|	includes	|
+1639	|	Listeria sp. FDA00013579	|		|	includes	|
+1639	|	Listeria sp. FDA00013607	|		|	includes	|
+1639	|	NCTC 10357	|	NCTC 10357 <type strain>	|	type material	|
+1639	|	SLCC 53	|	SLCC 53 <type strain>	|	type material	|
+2157	|	"Archaea" Woese et al. 1990	|		|	authority	|
+2157	|	"Archaebacteria" (sic) Woese and Fox 1977	|		|	authority	|
+2157	|	Archaea	|		|	scientific name	|
+2157	|	Archaebacteria	|		|	synonym	|
+2157	|	Mendosicutes	|		|	synonym	|
+2157	|	Metabacteria	|		|	synonym	|
+2157	|	Monera	|	Monera <Archaea>	|	in-part	|
+2157	|	Procaryotae	|	Procaryotae <Archaea>	|	in-part	|
+2157	|	Prokaryota	|	Prokaryota <Archaea>	|	in-part	|
+2157	|	Prokaryotae	|	Prokaryotae <Archaea>	|	in-part	|
+2157	|	archaea	|	archaea <blast2157>	|	blast name	|
+2157	|	prokaryote	|	prokaryote <Archaea>	|	in-part	|
+2157	|	prokaryotes	|	prokaryotes <Archaea>	|	in-part	|
+2158	|	Methanobacteriales	|		|	scientific name	|
+2158	|	Methanobacteriales Balch and Wolfe 1981	|		|	authority	|
+2159	|	Methanobacteriaceae	|		|	scientific name	|
+2159	|	Methanobacteriaceae Barker 1956	|		|	authority	|
+2172	|	Methanobrevibacter	|		|	scientific name	|
+2172	|	Methanobrevibacter Balch and Wolfe 1981	|		|	authority	|
+28890	|	"Euryarchaeota" Woese et al. 1990	|		|	authority	|
+28890	|	Euryarchaeota	|		|	scientific name	|
+28890	|	Euryarchaeota Garrity and Holt 2002	|		|	authority	|
+28890	|	Methanobacteraeota	|		|	synonym	|
+28890	|	Methanobacteraeota Oren et al. 2015	|		|	authority	|
+28890	|	Methanobacteriota	|		|	synonym	|
+28890	|	euryarchaeotes	|	euryarchaeotes <blast28890>	|	blast name	|
+83816	|	ATCC 35063	|	ATCC 35063 <type strain>	|	type material	|
+83816	|	DSM 1093	|	DSM 1093 <type strain>	|	type material	|
+83816	|	JCM 13430	|	JCM 13430 <type strain>	|	type material	|
+83816	|	Methanobacterium ruminantium	|		|	synonym	|
+83816	|	Methanobacterium ruminantium Smith and Hungate 1958 (Approved Lists 1980)	|		|	authority	|
+83816	|	Methanobrevibacter ruminantium	|		|	scientific name	|
+83816	|	Methanobrevibacter ruminantium (Smith and Hungate 1958) Balch and Wolfe 1981	|		|	authority	|
+83816	|	OCM 146	|	OCM 146 <type strain>	|	type material	|
+83816	|	strain M1	|	strain M1 <type strain> <taxid 83816>	|	type material	|
+91061	|	Bacilli	|		|	scientific name	|
+91061	|	Bacilli Ludwig et al. 2010	|		|	authority	|
+91061	|	Bacillus/Lactobacillus/Streptococcus group	|		|	synonym	|
+91061	|	Firmibacteria	|		|	synonym	|
+91061	|	Firmibacteria Murray 1988	|		|	authority	|
+131567	|	biota	|		|	synonym	|
+131567	|	cellular organisms	|		|	scientific name	|
+135623	|	'Vibrionales'	|		|	synonym	|
+135623	|	Vibrionaceae group	|		|	synonym	|
+135623	|	Vibrionales	|		|	scientific name	|
+183925	|	Archaeobacteria	|		|	synonym	|
+183925	|	Archaeobacteria Murray 1988	|		|	authority	|
+183925	|	Methanobacteria	|		|	scientific name	|
+183925	|	Methanobacteria Boone 2002	|		|	authority	|
+183967	|	Thermoplasmata	|		|	scientific name	|
+183967	|	Thermoplasmata Reysenbach 2002	|		|	authority	|
+186820	|	Listeriaceae	|		|	scientific name	|
+186820	|	Listeriaceae Ludwig et al. 2010	|		|	authority	|
+1235850	|	"Methanoplasmatales" Paul et al. 2012	|		|	authority	|
+1235850	|	Methanomassiliicoccales	|		|	scientific name	|
+1235850	|	Methanomassiliicoccales Iino et al. 2013	|		|	authority	|
+1235850	|	Methanoplasmatales	|		|	synonym	|
+1783272	|	Terrabacteria group	|		|	scientific name	|
+2283794	|	"Methanomada" Petitjean et al. 2015	|		|	authority	|
+2283794	|	Methanogen Class I	|		|	synonym	|
+2283794	|	Methanomada	|		|	equivalent name	|
+2283794	|	Methanomada group	|		|	scientific name	|
+2283796	|	Diaforarchaea	|		|	equivalent name	|
+2283796	|	Diaforarchaea Petijean et al. 2015	|		|	authority	|
+2283796	|	Diaforarchaea group	|		|	scientific name	|
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,27 @@
+1	|	1	|	no rank	|		|	8	|	0	|	1	|	0	|	0	|	0	|	0	|	0	|		|
+2	|	131567	|	superkingdom	|		|	0	|	0	|	11	|	0	|	0	|	0	|	0	|	0	|		|
+641	|	135623	|	family	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+662	|	641	|	genus	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+666	|	662	|	species	|	VC	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+1224	|	2	|	phylum	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1236	|	1224	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1239	|	1783272	|	phylum	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1385	|	91061	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1637	|	186820	|	genus	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1639	|	1637	|	species	|	LM	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+2157	|	131567	|	superkingdom	|		|	0	|	0	|	11	|	0	|	0	|	0	|	0	|	0	|		|
+2158	|	183925	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+2159	|	2158	|	family	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+2172	|	2159	|	genus	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+28890	|	2157	|	phylum	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+83816	|	2172	|	species	|	MR	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+91061	|	1239	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+131567	|	1	|	no rank	|		|	8	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+135623	|	1236	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+183925	|	2283794	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+183967	|	2283796	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+186820	|	1385	|	family	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1235850	|	183967	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1783272	|	2	|	no rank	|		|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+2283794	|	28890	|	no rank	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+2283796	|	28890	|	no rank	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
Binary file test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/cat_database.loc	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,8 @@
+## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz
+# ls CAT_prepare_20190719/
+#  2019-07-19.CAT_prepare.fresh.log
+#  2019-07-19_CAT_database
+#  2019-07-19_taxonomy
+#value  name    database_folder	taxonomy_folder
+#2019-07-19_CAT_database	2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
+CAT_database_test	CAT_database_test	${__HERE__}/CAT_prepare_test/CAT_database	${__HERE__}/CAT_prepare_test/taxonomy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contigs.fasta	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,94 @@
+
+>contig_9952
+TGGTTATGTACGCAGACAGCTTACTCCTCCTCGGAGACCACTCGCGGTACTCTATTCCCTCCACGGTTGCGAGGCGCTCGCCGTAGATCCGCTTCCCGGG
+ACAGGCAGACAGGGTGTATAGCCTCCCCCTCTCGGAAAAAACCCCGGGCACGCGGTCCAAAGACTCCATGCCCGTAACAACGCCGTGGTTTTAGAAATAA
+TCTGTGCCGTCGGTTGCAAACCCTAAATACAGGGGGATATCAATGCGGTTGCATGGATATCCACATCCTTCGTGAGATCGCCGACGCAGTTCAGGCGGCG
+GTATCTCTCATACCCGACCCCTGCAGCAGGGGCAACGAGATATGCATGGGCAACGACGGCACACCCACATCCGAGATAGACAAAGTGGCTGAGAACGCGG
+TGCTCGGGTACATAGAGTCCAACCGCCTGGCTCTGAACGTGCTCAGCGAAGAGATAGGCTTCGTGGACAACGGCGCGTCGGAGGTTCTGGTCCTCGATCC
+CATCGATGGGACAAGCAATTCAGTGGCCGAGATACCTTTCTATACGATATCCATGGCCGTCGGCAAGGATTCGCTCTGCGGCATGCACACGGCCTACATC
+AGGAACCTGGCGACAGGGGACGAGTTCTGGGCGCACAAAGGGGATGGCGCTTATTACAACGGAAGGAGGATAAACGTCAGGAAGCCGGATTTCTCCAAAC
+TGTTCGCCCTTATATACATGGGGAACGCCGCTGTCGATGAAGCGTTCGCCCTTGCAAAGAACGTCAAGACCTCCCGCTCCATGGGCTGCGCCTCCCTTGA
+GATGACGCTCGTGGCACTAGGACACGCCGATATCTATTACATGAACACCTACCGTTACAACCGTGCCGTCAGGACTGTGGACATCGCCGCCAGCGCCCTG
+ATACTCAGGGAGGCGGGCGGCGAGATATTCGATATCGGCGGCAACAAGCTGGATATGCCGCTGGACAACGCTTACCACGCAAGCTTCGTGGCGTGCTCCT
+GCAAAGAGGTATTCGACCACATCATGAGGGCCCACATCGAGGAGCACGGCGCTACGCGTTACGGGATATACGCCAACGAGACCGTCCCCGGGGCGGCCGA
+GTATGTGAGGAGGGCGTACGATGCTTTGAGGGGGGAGAAGGTAACCCTCGACACGGCGGCCGCCAGGCTGATCGGGGCGGAAGGCGTGCCTATTTCGGAG
+ATCGAGGCGGACATCGTCGTGGTGATAGGAGGGGACGGCACGATACTCAGGGCGCTCAAGAAGACGGATGCCGCCGTGATAGGGATCAACGCCGGAGGCG
+TGGGGTTCCTGGCCGAGGTCGAGCCGGACGAGATAGAGGAGAGCATATCCCGCATCAGGCGCGGAGAGTACTCGGTTGAGGAGAGGATCAAGCTCAGGAC
+TTTTTACGAGGGGGAATATCTCTCGGAGGCCGTGAACGAGACAGTGATACACACTGATTCCGTGGCGAAGATCAGGCAGTTCAAGATATATGTCAACGAA
+CACCTGGCAACGGAGGTCCGCGCGGACGGCATAATCATCTCGACGCCCACAGGCTCCACCTGCTACGCCATGAGCCTCGGCGCGCCCATAACCGACCCGG
+GGGTCGGAGCTTTCCTGATAGTCCCCATGGCGGCGTTCAAGTTCGCTTCCCGTCCGTTCGTCGTTCCCTATACGGCGAAGATAACCGTCGAGGCGGTCAT
+GGACAAGGGCTGCCTCATCGTGGTGGACGGCCAGCACGAGTACCCGATGAGGGGAGGGACGCGGGCGGAATTCTCGCTTTCCGACAACCTCGCCAAATTC
+TCGGCCCCGGCGTTCCTGGCATCGACGGGCATCTCGAAGTAGATCTCGCCGCCCATCATGTTGATGTCGGCCTTAAAAGGTAAAGAAAGCCAGATGGCGT
+TCGAGACATCGGAGTCGTCCAGCTCGGCGAAGTAATCGCCGCTCGTCGTGACAATCTTCATTCTGCTCAAAGCGCCACCCGGCCGTCGGTTCAGTTCTTT
+TTCTTCTTTTCGAAGAGCTTCCTGATCTCCCTGCCCGTGACCTCGATCTGGAGGTCCTTCTCCTTCTCCTCCATTGCCTTCAGGCCCTTCAGGTCGTTGG
+CCCATTCGGAGGTCCAGTCGGCTTTGAATTTTCCGGATTCGATGTCGTCCAGAATCTTCTTCATGCCCTTCTCGGACTCTTCGGTGATCACCAGGTCCCT
+CCTGGTAAGGCCTCCGTACTCGGCAGTGTTGGAAACGACGTGCCACATCTTCTCGAAACCGCCCTCGTTTATGAGGTCGACGATGAGCTTCGCCTCATGG
+CATACTTCGAAGTAGGCCATTTCGGGAGGGTATCCTCCCTCGACCAGGGTCTTGAATCCCGACTTTATGAGGCCGGTGGTCCCTCCGCACAGCACGGCCT
+GCTCTCCGAACAGGTCTGTGAGCGTCTCGTTGTCGAAAGTGGTCTCGAAGACGCCGGCGCGGGTGGCTCCGAGCCCCTTTGCAAGTGCAAGGGCGATCTT
+CTTGGCGTTGCCGGTATAGTCCTGGTGGACGCAAACGAGGGCCGGAACTCCGAATCCCTCGACGAACACATCCCTTTCTTTGTCCCCGGGGGCCTTGGGA
+GCCATCATTATGACGTCGATGTTTTTCGGAGGAACGATGGTCTTGTAGGTCACAGCGAAACCGTGGGCGAACTCAAGTGCGCAGCCCTCCCTGATGTTGG
+GCTCGACGAATTCTTTGTATACCTTTGGCTGGACCTCGTCGGGCAGAAGCATCATGATGACGTCCGCGGTCTTGGCGGCCTCGGCGAAATCTACGACCTT
+GAAGCCGTCCTCTTTCGCTTTGTTCCATGATCTTCCGTCTTTCCTGAGCCCGATCACTACGTTGAGGCCGGAGTCCCTGAAGCACAGGGCCTGCGCTCTC
+CCCTGGGATCCGTAGCCCATGACGGCGACCGTTTTTCCTTTAAGGACATCTATGTCCACATCTGCATCGTGGTAAATCTTCATTATATCCACCTGTTTAG
+AGGTCCAACTGCTTTATAGACTAAAAGGTATCGTTCCCGCTCCGACATATAGGTCAGTTCAGTACTGGCAGCGTCCTTTGACCAGGGCCTGATTCGGATT
+GGCAGGCAGCATGGGCAACACGTCCTCCTCGGGATCGATGTGGATGTCCAGCAGGCACGTCTCGCCGCTGTCTATTGCGGTCTTCAGGGCGTCGGCTATC
+TCTCCCGGCTTCTCGACCAGCATTCCTCTGGCCCCGTAGGCCTCGGCTATCTTGGAGAAGTCCGGGTCGGCGCCAAGCTCGGTCTCGCTGTACCTCTTGT
+TCCAGAACAGCTTCTGCCACTGTTTGACCATTCCCAGCCATCCGTTGTTCAGCAGGACTATGACGACCGGCAGGTCCTCGGCCACCGAGGTGGCCAGCTC
+CTGTTGGACCATCTGGAATCCCCCGTCCCCTGTTATGGTCAGGACGGTGCTGTCGGGCTTGGCGGCCTTCGCCCCTATGGCGGAGGGGAGCCCGAAACCC
+ATCGTGCCGAAGCTTCCCGAGGAGAGGAGCTGTCTGGGCCTGTGGACGTGCAGATGGTGCATGGCCCACATTTGGTTCTGTCCCACGTCGGTGGTGACTA
+TCATGTCGTCGTCCTTGTCGATCAGCCTGTTGATCTCGTATATGACCTTCTGAGGGACGATCGGTGTAAGGTCTATGTCGATCTTGCACCTGCAACGCCT
+CCTGTACTCCGCATAGGTGCTGTTCCAGTCGGCATGGGTATCCCTGTATCCGGAGAGCCCGTCGATGAGCGCCGCGGTACCCTTCTTAGCATCGCAGAGA
+AGGTTGACGTCGTTGTTCTTGTGCTTGTCGAACTCCGTCGCGTCTATGTCTATCTGGACGACCCTGCATGCGCCGTCGAACCTGGTGTGGGGGCTGAACG
+TCCTGTCCGAGAACTTCGTGCCTATGGCTATTACCAGGTCGGCGTTGCGGAAAGTATCGAGGGCGCACATCTTGCCGTGCATCCCCAAGGGGCCCAGGCT
+GAGCGGGTGCTCGGTGGACATGGCACCCAATCCCATGAGCGTGAAGACCGCCGGCGCGCCGATGAGCTCTGCGAGCCTCGTAACTTCCTCGGACGCGTTC
+GCGCTTATCGTTCCGCCGCCGATCAGCAGGACGGGCCTCTGCGCTTCCTTGATCCATTGGACCGCGGTGCCCAGTTCGGACATGTCCTCCCTGGGCTCCT
+TGATCCCGTACGAGATGCCCAGGAGGCTCTCGTCGATCTCCGAGTTCATCTGGTCTGAGGGGAGGTCGATGTGGACAGGCCCCGGTCGCCCGGTCTGGCA
+CATCTTCCATGCCTCGTCCACCGCATGGGGCAGCCTGTTGACGTCTAGGACCCTGAAGTTGTGCTTCGTTATAGGCATGAGGAGGCTGTACGCGTCCACT
+TCCTGGAAAGCGCCGAGCCCCAGGGACCCGGTTCCGACCTGTCCGGTAAGTGCCAGCATGGGAGTTGAGTCCGCATACGCCGTGCCTATGCCGGTGATCA
+TGTTGGTGGCACCGGGCCCGCTGGTGGCCATGCAGACGCCCGGCCTCCCGCTGGCCCTGGCATATCCGTCTGCCATGTGGGCGGCGCACTGCTCGTGGCG
+TACTAGGACATGGTTTATCGATGAGTTCATTATCTCGTCGTAGATCGAGATTACGCTTCCGCCCGGATATCCGAACATGGTCTCGACACCTCTGTCCTCC
+AGCATTTGGAGCAATGCTCTGTTTCCTTTCATGGTTGGTCTCCGGCGACGTATCGCGCTTGTTTTTTATAATTCTATTTGGAAAAGCGCGCCGAAACGCG
+CCAGCGGAAGAAGTTTATGTATACGGGGGCCATATGCCCACGCAGGTGTTTCATGGCTGTAATAAAGGTCGGTATCAACGGATTCGGAACCATAGGGAAA
+AGGGTCGCCTCCGCAGTGAGCGCACAGGATGACATGGAAGTCGTAGGTGTGACGAAGACCCGCCCGTCCTTCGAGTCGGAGGTCGCAAGGTACAGGGGAT
+TCGACCTGTACGCGCCTCAGAAAAGCGTCGAACTGTTCGACAAAGCGAACGTGCCGGTCGGGGGGACCGTCGAAGACCTCTGCGGCAAGGTAGACATCAT
+GGTCGACTGCACGCCCGGAAACGTAGGGCAGGAATACAAGGCGATGTACGCCAAAGCAGGCATAAAGGCGATATTCCAGGGAGGGGAGGACCACAGCCTG
+ACGGGGATATCCTTCAACTCCACCGCCAACTACAAGGAGTCCTGGGGCGCCCAGTTCTCCCGTGTCGTTTCTTGCAACACCACGGGGCTGCTGAGGACGC
+TCTACCCCATAGACCGCGAGTTCGGTATCGAGAAGGCGTACGTAACGTTGGTCAGAAGGGCCGCGGACCCCGGTGACAGCAAGAACGGGCCGATCAACGG
+GCTGGAGCCCACCGTCAAGCTGCCGACCCACCACGGGCCGGACGTCCAGAGCATCATGCCATGGGTCAACATCAACACCATGGCGATAAAGGCCTCCACT
+ACGTTGATGCACATGCACACGGTCACGCTGGAGCTGAAGAACTCCGCTTCCACCGAGGCCGCGGTCGAAGCGATAAGGAACTCCTCGCGCGTCAGGATGG
+TGGACGCGGCGTCCGGCATCAGGTCCACGGCGGAGGTCATGGAGCTGTCGAGGGACCTGGCCAGGGACAGGTCCGACATGTACGAGATCGTGGTATGGGA
+
+>contig_38063
+CTATCTCCTCAGGAGGTCTGGGAATCTCTGATCGGGAAGAACAGTAACTACCGCATCATAGTCGTGGACCTCAATCTGACCCGTGTGCTGTTCGGCATGA
+TAGTGGGCGCCGGCCTGGCGGTGGCCGGTGCGGTCATGCAGGCCCTGTTCAAGAACCCGATGGCCTCGCCTTATACTCTCGGGCTCTCGTCAGGCGCCGC
+ATTGGGCGCCGCATTGGGGATTCTCTTCCCTCTTTCGTTCGTACCTGAGGTCGCATCGGTCCCAATCCTGGCTTTCGTTTTCTGTCTGGGGACCATGTTC
+CTCGTGTACTCTATTGCCAGAGTGGGCAACCAGACGCACATGGAGACTCTTCTGCTGGCCGGAATAGCCGTAGCGGCATTGGCGCAGGCGGCGGTCTCCC
+TGCTCACGTACATAGCGGGCGAGAGCATCACGGAGATAGTCTTCTGGGGAATGGGCAGCCTGACCGTCAGCCTCCCATGGGTCAAGATCCCGATAGTGCT
+GGTCCTCAGCGCCGTGGGCATATTCGCAATGCTCTACTACGCCAAGGACCTGAACGCCATGATGCTGGGGGACGCCCACGCCATGGACCTTGGAATAGAC
+GTAAAAAAGACAAGGCTGGCACTGTTGATCGCCTCGTCTCTCGTCACCGCGGCTGCGGTATGTTTCGTGGGGACCATCGGCTTCGTAGGCCTTGTGATCC
+CGCACATACTCAGGATACTTCTTGGTCCGGACAACCGTCTGCTTCTGCCGATGTGCGTGCTGACCGGAGGGATATATCTTGTAGGATGCGACTATCTGGC
+ACATCTCTTCGCCCAATCTCTGGGCGTCATGCCCATAGGCATAGTGACATCTCTGATAGGCGCCCCGTATTTCATCTATCTGCTCAGGAGAAGAAAAAAG
+GAGGTGGGATGGGTATGAGCCTGGATATCCGTGACTTATTCTACAATTACGATGGGAAGCCTGTTCTCAAAGACGTTTCGTTCCTGGTCAAGGAAGGAGA
+GGTCCTGGGGATACTGGGGCCCAACGGATGCGGAAAGACGACCCTGCTGGGCAATCTGAACAGGAATCTGAGCCCCAAAGGCGGATGCGTGCTTCTGGAC
+GGGGAGGACCTTCACAATTACAAGAAAAAAGACATCGCGAAGGAGATAGCGGTGGTTCCGCAGGACAGTCGCGTAGGTTTCTCGTTCACCGTAAGAGAGA
+TCGTCTCCATGGGCAGGATGCCATTCCAGGACGCCTTCCAGGGAGACTCCTCGGAAGACCTCAGGATAATCGAAGACGCGATGAGGAAGACCAACGTACT
+GGATATGGCAGACCGTTACGTGAACACCATGAGCGGCGGGGAAAGGCAGAAGGTCATAATCGCCAGGGCCATGGCGCAGACGCCCAAGATACTGCTGATG
+GACGAGCCC
+
+>contig_44250
+GGTGATGTACTGGGGCTTGTAGGCTACTTTGACCTTTGCGTCTATCTTGCCGCCGTCTGGAGGGATCTCTCCGGCCAGCATCTTTACGAAAGTGGTCTTT
+CCTGTGGCGTTGGGACCGACGACCCCGACGGATTCCCCCATCTTTATGGAACCGCCGACGACATCCAAAGTGAACTCTCCGAAGTCCTTGGACAGGCCCT
+CGAAGGAAAGCAGGTCGGAAGTGACCCAGTCGCTCCTGGGAGGAGACGCGAAGAACTCTATCGGCCTATCCCTGAAACGGATATTCTCTTCGGGAAGGTA
+ACCGTCCAGATATACGTTTATGGCGGTCCTGACCTGTCTTGCAAGAGTGAACACGCCGTACGCCCCCTCGGTACCGTATACAACGCTGACGATGTCGGCG
+AGGAAATCGAGTATGGCAAGATCGTGTTCTATCACGACCACCTGCTTTTCTGCGCTGAGTTCTTTGATGATGCGTGCCATCCTGATCCTCTGGTAGATGT
+CAAGGTACGAGGTGGGCTCGTCGAAGAAGTATACGTCCGCGTCCTTCATGACCGTGGCAGCCATGGCGACCCTCTGAAGCTCTCCTCCTGAAAGTTTCTT
+TATATCCCTGTCCAGAAGCTCGGTCAGCTCGAACATGATGGCGGCCTCCTCGAGTGTCAGGCGGCCTTTTATGCCGGAAAGCAGGTCCTTCACGGGCCCC
+GATGCGGCTTTGGGTATGAGGTCCACGTACTGTGGCTTTATGGCCGTCCTCACCTTGCCGGCGTAGACGTCCGTGAGATAGGATTTGACCTCGGTACCGT
+CGTAGTGCTGCAGCACTTCCTCTTTGGATGGAGGTTTCTCATAGTTGCCCAGGTTGGGGACGAGTTCCCCGGAAAGTATCTTGATCGCCGTGGATTTTCC
+GATCCCGTTCGGTCCAAGTATGCCCGTGACCATGCCTTTCTTCGGCACCGGGAGCCTATAGAGGCGGAAGGCGTTCTCGCCGTACTGGTGGACCATCTCC
+GTCTTCAGCTCGTCGGCCAGGCCTATGATCTTTATGGCGTCGAACTGGCATTTGTTGACGCATATCCCGCATCCCTGGCACAGGGATTCGGATATGATGG
+GCTTGCCCCTCTCGCCGAACACTATGCATTCCACGCCCGTTCTGACCAACGGGCAGAACTTATAGCATTCCTTGTTGCATTTTCTGTTCTGGCATCTGTC
+CTGCAGGACGGCCGCAATACGCATGTCCCCGCTTAGACCGATTTAAGATATAACCTTTAAGGATGGTATCGCAGATAAGCTGATAAGGGAAGACGGAGAC
+AGATGGGCATGGCCGAAGCGGATGGGACCACCGAGGACGTCAGGATACTTACGGGCGACTACAGGAGGGCGATAAGGCATCTCTCCATACCGATAGCCGT
+GGCTCTTGCGATACAGCATATCAACATACTCGTAGACACGTTCTGGGTCGCGGGCCTGGGGGCGGACCCGATGGCTTCAATAAGCATAGTATACCCGGTT
+TTCGCCACGGTCATGGGCATCGGAAGCGGGCTGGGGATCGGTGCTTCTTCCGCGATAGCCAGAAGCATCGGGCATAACAGGAGGAAGGAAGCCGGCACGA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome2.fna	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,89 @@
+>contig_11394
+GCTTTTTACGCCCAACGGGCTTGTTCTTGCTCAGACAGTCCAAGGCTTTCCAGATATGATAATCGCTGAATTTCGGTATCGGGCCGCCTGCACCCCTGTG
+CTGCTGATTGGACATATGGATGTATCATTCATCCATTAAATAAAGTAATTGATTGTCATGTCGCAAGATACGGTATTAACGCCGTCAATAGCTGTTAAGC
+CAGTGCCCGCCGTCGCTTTCCATCCGTAATACTCGCGCGTTTTTTATATCGAAGGAACTCTTCACATCCATGGTTCAAAAGTCGGTGGACTTTGTTATTA
+TTGAGAATGTTTCCAAGAGGTTCGGGAACAAGACGGTCCTTAATAATGTGAGCGCCACTATACAGACCGGTAAGATACTGGGCCTGATAGGCAAGAGCGC
+CGCGGGCAAGAGCGTTTTGATAATGATGCTGAGGGGAAGCGAAGATTACGCACCCGACTCCGGAAGGGTGCTGTACAGGGTCAATAGGTGTTCCGGATGC
+GGAAACCTCGACCTCCCCCATGAAGGAACGCCCTGCTCGAAATGCGGGTGTGAAACAGGAACGATCACCGTGGATTTCTGGTCTTTGAAAGATGACGACC
+CTTTAAGACGCCAGCTCAAGAGCCGCATCGCCATAATGCTGCAGAGGACGTTCGCCCTTTTCGGGGATAAGACCGTGATCGAGAACATCTTCGAGGCCAT
+AGGCGACCGTGCAGAGGGCAAGGCCAGGACGGACATGGCGCTCCAGCTGCTGGAATTCGTGGGGATGACTCACAGGACCACACACATAGCCAGGGACCTG
+TCCGGAGGAGAGAAGCAGAGGATAGTCCTCGCAAGGCAGATAGCCAGGGATCCTCTCTTCTTCTTGGCGGACGAGCCGACGGGAACGCTTGACCCGTACA
+CGGCGGAATTGATGCACGAGCGTCTTGTGGACTACGTCGGGAAGAGAGGGATCTCGATGGTCTTCGCGTCCCATTGGCCCGAGGCCGTGGATAAGATGGC
+CGACGAGGCCATATGGCTGGATTCCGGCAACGTGCTGATGCAGGGCGACCCGAAGGAGATCGCCGATAAATTTATGGAAGGATACTCGTTCGAAAGGACA
+AAGGCCGCCGACCTGGGAGAGCCGATAATATCGCTCAAGGATGCGGAGAAGCACTTTTTCTCTGTCGTCAGAGGAGTCGTCAAGGCAGTGGACGGTGTAA
+CCTTCGATATAATGGAGCGCGAGGTGTTCGGCCTTGTGGGAAAGTCGGGCGCCGGCAAGACCACGACGTCAAGAATGGTCGCCGGCATGACGCCCGCCAC
+CCGCGGGTCCGTGAAGATAAGGATCGGCGACGACTGGGTTGATATGTCAGAGATGGGGCCGAGCGGGAAAGGCCGCGCCACCCCCTATATCGGGTTCCTC
+CATCAGGAATACACGCTCTATCCCTTCGACAACATACTCAGCAACCTTACGACCAGCATAGGCACCAGGATGCCAGCGGAACTTGCAAAGTTCAAGGCCA
+TACAAGTGCTTCAGAGCGTAGGGTTCGACAAGAAGAACATGGAGAGTCTTCTCTACTCTTACCCCGACACACTGAGCGTCGGAGAGTGCCAGAGGATAGC
+CTTCGCACAGGTCCTGATAAGGGAGCCCCGCATCATAGTGCTGGATGAGCCTACAGGGACAATGGACCCGATAACAAAGACCATCATAGCAAAATCCGTC
+ATCCGGGCGAGGGAGACCCTGGGCGAGACCTTCGTCGTGGTGAGCCACGACATGGATTTTGTCGAGAACGTCTGCGACCGCGTAGCGTTCATAAGGAACG
+GCGTCGTGGAAGACATGGGAACTCCCGAGTCGGTCATCCAGAGGTTCGGTCTGAAAGAGCTTCAGGATGACGACTCCGAGGGTGAATGAATGAAGCAGCA
+GATCGGGCGCCACCTCAGCTTCGTTGAATGCAGAGAGGCCATGGGGCTCGGCGTGGGCGGTGCCCTGGCACAGAGGGCGACCATCTCTGACAGCGGAAGG
+GACGTCGTTGCGGTGGCCATGGGCCCGGGCAAGAGGCACATAACCAAACCGGTATGCGAGATAACATATGCCCTCAGAGAAGAGGGCATAGATACCAGCG
+TCGCCTGAGCGCCTGGCACTTATTGCGGAGTATGTCAAGGACATGATGACCGAACTCGAACCGGACAACGCGGCCGTCTTCGAAGCGGGATGCGCCAGCT
+ACCGGGCCAAGGTAGATGTGCTGATAGGGCTTGAACAAGAATATCTGACAGGCAAGGCGACTACCGAGATCATCGTCTGGCACCCTTCCTGGGCGTATCT
+TCTTCCGGATAATGTGACCGAGGCAGAGCTCATGGAAGCAGCCGAGGCGGCATCCACGCCCTCATCGATCGCGATGCTGCAGGGAGGGACGCCGGAAAAT
+CCTATCAACGTGTTCCTTTCGGAACCCGAAGAGATCAACGGTCTTACCCAGCAGGGGCTTTGTGAAATGGGAATATATGTAAACATAATAGTGATTAACA
+TACTCGCCGGGGACTGGGTCGAATATCTGGGCCAGGTCATCGAGATACTGGGAGATAATATTCCGGATGCGGGGACATGAATTGATGATACCAATAGAAA
+TTAAGGACCTTACCGCTGGATATGACGGCCGAGCCGTTTTCAGCAACGTCGACCTGGAGCTCAGGGACAAAGACTTCCTGGCGGTCATAGGGCCCAACGG
+CGGCGGGAAGACAACGCTCTTCAGGGCGATCCTGGGCCTAATAAAACCCATGGGGGGGACCGTAAAAGTGTTCGGCAAGGAGCCGGCAGGTTCGCCCCCG
+GGCATAGGATACGTTCCGCAGAACGAGAATCTGGACTCAGAATATCCAATAAGTGCCAGGGAAGTCGTCCTTATGGGAATGAGGTGCAAGAAGGGCCTTA
+GGCCGTTCTATTCCAGTGAGGAGAAGGAGTCCGCAGAGAGGGCCATGGAGTACGCCGAGGTCTCGGATTTCGCAGACAGCCGAATAAGCAACCTGTCGGG
+AGGGCAGAGACAGAGAGTATACCTCGCAAGGGCTCTTGCCCCGGAACCGAAGATACTCATGCTGGACGAACCCACCGCGAGCCTGGACCCGTCGATGAAG
+GACTGCACCTACGACATACTCAGGAAGCTGAACAGGGACGGGATAGCCATAATGGTGATAACTCACGATATGAGCAGCATCTCTCATGATGTCAAACGTG
+TAGCATGCATGAACCGCAGGCTGATAGTCAACGATGCGCCCGAGATAACCCAGGAGATGATCGCATTGGGATTCCACTGCATCCCCGAGCTAGTGCACAT
+AGGTCCCTGCGATTGCGGAGGTCACAACGATGGTTGATTGGGTCGCGGCATTCTCGATGCCTCTGATTCAGAACATGTTCATGGTCGCGGCCATAGCATG
+CGTTCTTTGCGGAGTCGTGGGAACCCTGGTGGTCGTGAAACGGATGGTGTTCGTAACGGGTGGCATAGCACACACCACTTTCGGAGGTGTGGGTCTTGCA
+TATTATGTTATGTCCGTCGTCGCAGTCTCATGGTTCACCCCCATGATCGGCGCCGCACTGTTCGCGGTCGTTTCGGCGGTCATAATGGCGCTTCCCGCGG
+
+>contig_5089
+TGCCGAAAGGTACGAGGAGATCATGGAGGCGCTCCGGGAGCTGGAGGAGATGTCTTGGGAACGGGTCATACTCGTGGAGGGCCGAAGGGACGTCACCGCG
+CTGGAGCATCTCGGCATATTCGGGGACGTTTTCACCGTCCAGGCGTCGGGCGGCCCGGTAAAGGCCGCAGAATATGTGGCCGGCAGACGGAAGAAGGCGG
+TCATACTGACCGATTGGGACAGGAAGGGCGACATAATAGCCTCGGACCTGGAGGTCCACCTGAGCGCTCTCGATGTCCAGTACGACACGGCGGTAAGAAG
+CAGACTGGCGGGCCTTTGCAGGATCGACATCAAGGACGTGCAGTCCCTGGACGAACTGGTACACCGACTCGAGACGGCATGAAGTAATATATTCAGTAAA
+TCATATTGAACCGTTAAGGATGGCAGGCCGTTTCATTGTTTTCGAAGGCATCGACGGTGCGGGCAAAAGCACTTTGATAGATGAAGTTTCAAAAAAATTG
+GAGTCGGCGGGCATAAGGACCGTCGTAACCGCAGAGCCCACTGAAGGGCCGATAGGAATGCTGATACGGAGCGGGGCGGTCAAATGCATATCTCCGAACG
+CGGAAGCTCTGCTGTTCACCGCCGACCGTGCCTGCCACACCTCCGAGATAGTCGGATGGATGGAGGAGGGGACGACCGTCCTCTGCGACCGTTACTACGC
+CTCCACCATAGCGTACCAGTCCGCAGGACTCGACGGAACGGTGTCGGGCAAGGAATGGCTCATGGACATCAACCGTCCCGTCACCGTAGAACCCGACACG
+ACGATACTTCTGGACATCGACCCCGAGGCGGGGATGCGCCGGGTGGGGGAACGCGGAGCGAGGAGCAAGTACGAGGTCACCGAGTACCTCGGCAGAGTGC
+GCTCCAATTATCTGGAAATAGCGGAGGAGAAAGGATTCCGCATAATAGACGCTTCTCGTCCGAAGGACGAGGTGCTGAGAGAAACGATGAAAATCTTAGG
+TGAGTGAAATGCATCCGTCGGAAGAGATCTATTGTGAGAAGAGCAACAGGCTGAAGGGAAAGACAGTGGTACTCGGGATAACGGGAAGCATCGCCGCAAC
+GGAATGTTTCTCCACGATACGCGAGCTGATACGCCACGGCGCTACGGTTATACCTGTCATGACAAGGGCGGCCTGCGACATAGTGACCGAGCAGAGCATA
+GAGTTCGCATCCGGAAAAAAACCCATAACCGAGCTCACGGGCCAGACCGAGCATGTCAAGCTGATGGGCGACTCCCGCACCGCGGACCTTCTTATGATCT
+AGAATGGACCACAGATGCGGATACAATGCATGCGGACACGAGGATCGGCACTGTCTCGGGCTCCGCGAACCCCGAAACGAAGGACGAGACCGAGGGATAC
+GAGACCAGCATCAAAGCGGCCAGCGTGTTGACGTGAATGCCAGGCACCAGTCCGGTCACCGCGCCCATCAGGGCCCCCGCCATGCTCATCAGCGATACAA
+GAAGCAGGACGTCCGTACCCATGGCCTAAGATCTGTCGCCGTGCGGTATATATGCGGATATCTACAGTTAGCAATCCAGGAACGGTTTGCCGTCCCTCAC
+TCCCACGCGCAGCATCGCCTTCACACGGACACCCGGCACGGCATCTGCGAGGTCCCCGGTCTTATCGAAGACCGCCAATACCTCCACGAGCCTGATGCCG
+TGGGAGGCCAGCGCAAGGGCGAGGGCCCTCACGGTCCCGCCGGTGCTGAGGGTGTCCTCGACTATCACGGCCCTGTCGCCTCTTCCCGGTCCGTTTATAT
+ACAAACTTCCTGAAGAATAGCCGGTGCTCCTGTCTATGATTATTTCTCCGGGAAGCCCGTAACCGCGTTTCCTTACTATGCTGTAGGGGATGCCCAGCCT
+TAGGGATATTGGCACGGCCAGCGGGATGCCCATAGCCTCCGGCGCGAGTATGACGTCGCAGTCGAAATCTCCCAGGTCAATCAGCCCCTCCACGACTTCG
+TTCAGAAGCGCGGGGTCCACGCGGGGAACGCCGTCGCTTATGGGATTGACGAAATACGGGTAGCCGTTCCTGTCGATCACAGGGCTGGCCATAAAGCTCT
+TTCTCAGAAGCTCGTACACTGAACGTGCACGGCGATGTGGTTATTTACCTGTTCTTAAAAGGAGACATATTTAAGGGAAGGTACGATAACAGCAACGATA
+TCCGTCGAAACGGCTCCGTTGACAACGGTCACGAACACCTGGACGGAATCGGAATCGATCCCTCCGAACGCACCGGTGTTTGCGGATATGTAGTAAGCTT
+TGATCTTACCCTGTGTCCCGTTATCGATATTGTCGCTGAAGTCAGGGCTCACCACCAGAAGGTTAGAGAGGCCCTTCTCCTGCATCAGCGAGATCGTCTC
+CTCGTCGGTCAGTATGGAGGACGGGTACAGCTTGATCAGGTACTGGTCCCTGGTGGCGGGATCGTACATCCCGTCCAACGCGGAATTGACAGTGTCGTAA
+TATGTGGAAGAACTCTGATCGTCGTAGTTTATCACGCCCATCCTGACGGGCTCCATGGCCTGTTCGGTCTGGTCGCTGATCATATATCCCAGACCTACGA
+ACAGTATCATGACGACCGCTATCGAGATTAGGGACTCGAGGGTCATCAGCTCCCTGAGCTCTTTCTTGACCAGGTTAAACAGATTGTTCAACGGACTTCA
+CCCCCTTGATGAAGACCTCTTCAAGGTTTTTAGCGTCGTATTTCGCCTTGAGCTCCGCGGGAGTCCCCTGCATGATGATCTCTCCCTTGTTGATCATGGC
+GACGCGGTCGCACAGGGATTCGACCTCATACATGTTGTGCGAGGAGAGGAGAACCGTAACTCCGGACCCGGCTATCTCCCGGATCAGCTCCCTGATGTCG
+TGCGCGTTCATCACGTCCAATCCGGATGTTACCTCGTCCATGATGGCGAAACGCGGTGAGGTCATTATCGCCCTTGCGATGAGCAGGCGTCTCATCATGC
+CCTTGCTGTAAGTGTTGACCTTGCTGTCGATGCGGTCTCCCAGGTTGGCGATGTCCATCCCCCTCTGGGTCATTTTTTCTGTCTCCTCGCCGTCGGTGAA
+GAAGCCCGCGATGAAGCGCAGATAGGTGCGTCCCGTCAGGTCCTTGTAGGCGCCCGCGTCCTCGGGCAGATAGCTTATGGATTTTCTTACGTCGTCCCCC
+TGGGCGGCGACATCGTATCCGCAGACGGTTATCTTTCCGGAAGTTATGGTTATGAGCGTCGAGATCATCCTAAGCGCCGTGGTCTTCCCGGCGCCGTTCG
+GTCCGATGAGGCCAAAGATCTCTCCCTCTTTGACAGAGAAGTTTATTCCCTTGACGGCCTCGATGTTCCCGTATATTTTATGCACGTTCTCTACGCGCAG
+GGCATCCATGAAGCATTGAGGTATATCTCGCTTTTAAATTTATTGGAGTGCTGTTAAGTGTCTAAAAAAGTGTGTATTCGGGCCGGGGCCCGTAATGTGT
+TTACTGTTCCGCGCCGGCCGAGGCGTTGCCCATGGCCTTATTTATGGTCTCTTGGAGGGACTGGTATTTCTCGCGGAGACTTTTCTCCTGGCGGTCCAGG
+
+>contig_159
+TATAGCTCAGCTCGTTGGCGGAGACGCTGCTTCCGTACATCTGGCCGCCGCCGTTGATGCCGCCGCCCCAGGCGGCCGTCCCGATCCCCACGGGAGAAAT
+GTCCGTACCTCGGAACCTTATGTTTCTCACGGATTCCCGTATATGTTCCTGGATTATAACTGATACGCAATCCTGTTTCCGACGTCCGCCATGTTTAGAT
+AAATTGACGGTATAGCCGAAGGCATGGATATGGCAATGGAGCTGAGGAACGTCTCCGTAGTGAGGGACGGGAAGCGGATACTGGATTCCGTCTGCCTCGA
+TATCGGCGCCTCCGAGAACGTTGCCGTCATAGGGCCGAACGGTTCGGGGAAGACGACGCTCATCAAACTGCTGAGGGGCGATATTTATCCCTACTACGAC
+GAGGACCGCCCCGCGGAGATGAGGATCTTCGGTGAGAAGATATGGTCCATCTACGACATACGGAGCCGCATGGGCGTGGTCTCCATGGACCTCCAGGGCA
+TGTTCGGCGGCGAAACGCTGGTCGGAGACGTCATAATGTCGGGATACTTCAGCAGCCTGGACATTTTCCGCAACCATGAGGTCACCGACAACATGCGCTC
+CGGGGCCTCGCGAGCGGCCGGGTACATGGGAGTGGAACATCTCGTCGGCAGAGATCTGTCCGGCCTTTCTCTGGGAGAGATGAGGCGGACGCTGATCGCC
+CGGGCGCTGGTCACCGCCCCCGAGATGCTCGTCCTCGACGAACCGATGACGGGCCTCGATATTGTAATGAAATCCAAATTCAGGAAGATGTTCGACATCA
+TGACGGAAACGGGAGTGAGCATCGTCATGATAACCCACGACCTCACCGACATCCCCGTTTCCTTGAACCGCATAATAATGATCAAGGATGGGAAAGTGTT
+CGCGGACGGTCCTAAAAAAGACGTCCTGACGTCCGAGGTCGTCAGCGGGCTTTTCGATGAACCTATTAATGTACAATGCGTTAACGGGATATATTCAATG
+AGGATGGATGAGTGACAAGGTATATCTGTTCCGAATGCGGGAACGAGATTCCGTACGTTTCGGATTTCTGCTACCAGTGCGGTAGCCTGAAGAGCAAGGC
+GTTCAAGATAGACGAGGGCGGCGAGATGGAGGGCGGGGAGGTCCCGTGCCCCAACTGCGGAAAGCCCATAGAGGAGGACGCCCGGTACTGCAGGCACTGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome3.fna	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,60 @@
+>contig_4003
+ATCAAAGAGGAACTGAAGGCGGCGATGCTGCTGACCGGTTCCTCTGACATAAGAGAGCTCTCTGATGCAGAGTATATCGTCATGGGAGAGACACGCAAAT
+GGATGGAAGGCCTGAAATGACCGACGTCAAGAAGATATTGAAACAGATGTCCGACGAGCTGAGCAAGCCGATCGAATCATACATAGAAGACGAACTGCCC
+GCCAATCTCATCGAAGCGGCAAGACAGTACCCCTATGCCGGCGGAAAGAGGATGAGACCGGCCATGGTCATCGCCGCGTGCAGGGCGGTGGGAGGGGATG
+GCAGGAAGGCCGTTCCCCTTGCGGTTGCCATAGAGTACATACACAATTTCACGCTGATCCATGATGACCTCATGGACGGGGACGAGAAGNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCATGGACGGGGACGAGAAGCGCC
+GGGGCATGACCACATCCCATGTGAAGTACGGCATGCCCACAGCGGTGTTGGCGGGAGATGCCCTGTTCGCTAAGGCCTTCCAGATAATCGCCGACCTCGA
+TGCGGACGGCGAAACGGTCAGAGATGTCCTTAGAGTGGTCTCCCAATCCGTCTGGGACCTGGCCAGAGGTCAGCAGATGGATATAAACAACGAGAACGGG
+ACCGAGGTCACCATGGATGAGTACATCGAGACCATCAGACTGAAGACCAGCGTTCTGTTCGCCGCCGGTGCCGCCGGAGGCGCTATGATCGGAGGTGCGA
+GCAAGGAGGTCGTGGACGCCGTCCACGGCTACGCCATGAACCTGGGCGTCGCCTTCCAGATGTATGACGATATACTTGGGATAGTCGGGGACCCGGCCGT
+TACCGGTAAATCGTCCGGTAATGACATTCGCAAAGGGAAGAGCACCGTCATCGTGTGCCACGCCCTGAAGAACATAGCTGACAGGGCGGACCTGCTGGTC
+TTCCGCGATATCCTCGGCAAGACAGACGCCACCGATGCGGAGATAGACGAGGTCAGGAGCATACTCCGAAGAGCCTGCAGCCTGGATTACGCCATAGAGA
+CCGCAGAGGATTACATCAACAAAGCCGTCGACTGCCTGGATGCGCTGGAGCCCTCAAAGGACAAGGACTTCATGATAGCCCTGGCAGAATACACGATGAC
+CAGGACCCTTTAGTCGGAGATCCCCTTCTCCGTTATGGAGTATGTGGCTTTCCGGCCTTCCGGTATGCTGCGGTGCTTGACCATGATCGCGGCCCTGCGG
+CCGTTGCCTTTCTTCTCCAAGCGGATTATGGTCTTCGCGTTATGATGCATGGCGTGGCCTCCGAGGAACTCTATCGTACCGGCGCCTATGTTGGTGTATA
+
+>contig_4403
+CACCGGTCACCCGAAGGTCACGCGCGTATCGATGCGTGACATCGCAGACCTGGGAGAGAGGGGCCTGTACATCCTTCACGAGATCGGTACGGACCTCGTC
+GGCAAGATGGAGGGCTGCACCGGGTGCAAGAAGTGCGAGCACGAATGCCCCGAGAACGCGTTGACCGTAAGCAAGGACAAGACGATCACCGTGAAGACCA
+AGAACTGCCTCGGAACGGCATGCTACAGATGCCAGTACGTCTGTCCCGAGAAGGTCATGCAGTTCGACTCCCTAAGGCTGTCGTGATAAACGGTTTTGGG
+CGGGGCCGGCCCCGCCCTTTTTTCATTTACCGCCGTTCAGGGCCTCGGCGTGCACGGCAGGCCTGACATTCTCGTCCTCCAGCTCCGTAAGTATCTGCTT
+GCGCAGCCTGATGAACTCGGGCGAAGCGCGGTCCCTGGGCCGCGGAATGCCTATGTCCACGATGTCCTTGATGCTGGCAGGACGCTTGGTAAGGACGACT
+ATCCTGTCTGAAAGATAAACGGCCTCGTCGACCGAGTGGGTCACGAACAGGATCGTAGTGTCCGTCTTCTCGACTATCCTCAGCAGCTCGCCCTGCATGA
+TGTTGCGCGTCTGGGCGTCCAACGCGCCGAACGGCTCGTCCATGAGCAGCACGTCGGGCTTGGTAACAAGGGCCCTTGCGATGCCCACGCGCTGCTTCAT
+ACCTCCGCTGAGCTCGTGGACACGATGGTCCTCGAAACCTTCGAGGCCGACCGCCCTGATGTAGCGTTCGGCGGTCTTCCTGCGCTGCTCCGCCGGGACG
+CCGGCGATCTCCAGGCCGAACTCGACATTCTTCCTTACAGAACGCCAAGGGAACAGTGCGAACTCCTGGAACACCATGCCTCTGTCGGGGCCTGGCCCGG
+TGCACTTCTTCCCGCCTATCGACACTTCTCCGGAGGACGGCTCCATGAGCCCTGCTATAAGCCTGAGCAGAGTCGTCTTTCCGCATCCCGAGGGACCGAC
+TATGGATATAAGCTCGCCCTTCTGGATCTCCAGAGAGAAATCCTCCAGGGCCACGGTCTCCTGTTCATCGGTCTTGTAGACCTTCCTCAGATGATTGATA
+ACGATCTTCTCGCTCATTCTATCCCCATCCTTCTTGTTATGACCTTGTGCAGATAGTCGGCGAGGCTGGTCGTCAGTATTCCGAGGATTGCGATTATGAC
+TATGCCCGCGTAGACGTTGGGCCAGTACCCCATCTGCGCCTGTATGCTGATGAAGTATCCGACGCCTCCTCCGAACGATGCGTACAGCTCGGAGGCAACT
+ATGCACATCCACCCGACCCCCATGCCTATGCGGAGGCCGTTCATTATGTATGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+>contig_14302
+GATGTGGAATGCGGCCAAATGCCGCCTCGGTTTCAGCGGGGCAAGGCGTTCCATGTCCACGACCGCTGGAATACTGGTGAACGTTTTCATTACTTCTCTG
+GAGATAGCCGAACGTTCTCAGCCTGCGCTCGAAAGCAGGAACTACAACGGCAGTTTCCCCATTTATAGGATGCCCAACAAGATCGGTCTCTACTGGTTCG
+CCATCACGGCCGCCGCGTGCGCCTCGCTATACATCATCGGATACTACATCTCGACCCCGAACATGGCGGCAATACTCCTGGGGGCGGTCTGACGGTCTGC
+GACCTTCGTACGGATAAAGTGAGCTACAGGTACGGCAATTCGAATTACAATGCTATCGATTCTGTGGATTTCACAGCCTCCCATGGCAGAAGGACCGTCA
+TCCTGGGAGAGAACGGGTGCGGCAAGTCGACGCTGATATATCAGCTCAACGGAGTATACAAGCCTGTTTCCGGTACTGTGTTCTATGGAGATATGCCCAT
+ATCATACGACAAGGAGTTCCTTACGGAGCTGCGTTCCGACGTTTCCGTTGTTCTGCAGAATCCAGACGATCAGATTTTCTCTTCCACCGTCGAGGAGGAC
+GTGGCATTTGGACCGTTAAACTTGGGCCTTTCCCGGGAAGAGGTTGGAGAAAGAATCGGCCGGTCTTTGGAATGCGTGGGGATGTCGGGATTTGCCGAAG
+TGCCTGTTCAGCGCCTTTCATACGGTCAGAAAAAAAGAGTGTCACTCGCAGGCGCCCTAGCATCACATCCAAAGATACTGGTCCTGGACGAGCCTACCGC
+AGGCCTGGACCCGCAGATGTCCAGAGAGGTGATGGAGATCGCAAATTCTCTCATCAGAGAAGGGATCTCCGTCGTAGTATCCACCCATGACGTGAATCTG
+ATCTACAATTGGGTCGAGGACCTTTACGTGATGCGGAACGGACACATGGTCTTCTCCGGAGATGCGGACGAGTTCTTCTCCGACCGTCCGTCCGTTTATC
+TTTCAGGTCTGGAACAGCCCTCGATATTCAGCATAAACCACAATATGGAGACGTTAAGAGGGACGATTCCCGCGTCATATCCCAAGACCATGAGCCAGAT
+GGTCAGCAGATTATTCCCTTCAGGATCCTCGGCCGGAAGGATATTCATCTATCAGACTGAAGGCGAGCGCATCGACCAGGATGCGATCGAGGAGGCCGTG
+GGAAAGAAAGGGATGCCCATTGCAGTATACGGCCCCTCCGCGCGCAGGTCGGTGACCCGATCGAAGCTCAGGGTTGATTTCTATTTTAACGGCATAGAGT
+GTTGCATCAGGGAGGCCATGGTAAACCATGATTCCCTGATAATAGTCGACCGGGGCTTGAAGGGGATCGTCACGGAGGCGATTGAAGAGCTTAGGGCATA
+CGGAACCCGGATCAGTATCAGGGAGTTGGTTTTTTGAGCGCTCCCCTTTTCCGCACCGAAGGTCTTTTCTTCAGATACGAAGGCGGCCGGGGGGACGCGT
+TGGCAGACGTGAACATCACGATCAAAGAGGGTGCCAGAACTGTCATCATGGGAGCCAACGGAGCTGGAAAATCCACGTTCTTCTATCATCTTAACGGAGT
+CTTGAGGCCGTCGAAGGGCTCGGTGTTTTTCCGGGGAGAAAAAATACCGCACAGGGGAAAAGCTCTCAGGAAGCTGCGCTCGGAGGTCGCGGTGATGCTC
+CAAGACCCCAACGACCAGCTTTTTGCACCAAAAGTATCTGACGACATAGCATTCGGCCCGAAGAACCTGGGACTCGACGCTCAGACTGTAGGGGAGAGGG
+TCAGGGACGCCCTCTACATCACAGGCATCGAATCTCTGGAGGGTCGCAGCGTGATGCAGCTGTCGTTCGGCCAGAAGAAGAGGGTGGTGCTGGCCGGTGC
+CTTGGCGATGCATCCGAAGGTGCTTATAATGGACGAGCCCACCGCAGGTCTCGATCCCCAGATGTCCAAGGAGCTCATCGAGCTCGCGGACGAGCTGCAC
+CATCTTGGAACGACCGTTATTTTTTCAACCCATGACGTGGACCTCTCATATTCTTGGGCGGACGAGGTCCATGTCCTAAGAGGGGGCCGTAATGTATATT
+CGGGGAGCTCAGAAAGATTCTATGACGATACTTCGGAAGTTTATCTTTCGGGCCTTGTCGAACCGGCCATGTACGACATCAACGTCAGCATCTCCGAGCT
+TGCCGGATGCCCCGTTGAACCGTTTCCCAAAACCCTGCCTCAGCTTGTGGCCAAGGCAGTGCCGTCAGAGGGGCCGGGCACGGTTCACATCCTTCCCGTG
+GAAGGTCCGGTCGACCGGGAGCTGTTCTCCTCTCTGACGTCCGGGTCCGGGATGTCCGCAACAGGCGTCTACGGTACTAATGCAAGAAAATCTGCGGAGG
+CTTCCAAATTGCCGATAGATTATTTCTTCGGGGCCGACGAGGGATGCATAATAGAGGCTTTGCACGGCAAAGACACGCTGATATGCTGCGACAGGTCCCT
+TACAGATCTGCTGATATCGAAGATAGGCAGTATGTCCCGGTTCGGGACAGAGGTCCCTTATTCTCTGCACTGAACATTTCTTTTTTCCGGGGGTTCGAAC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_contig.contig2classification.names.txt	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,4 @@
+# contig	classification	reason	lineage	lineage scores	superkingdom	phylum	class	order	family	genus	species
+contig_38063	classified	based on 1/2 ORFs	1;131567;2;1783272;1239;91061;1385;186820;1637;1639	1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00	Bacteria: 1.00	Firmicutes: 1.00	Bacilli: 1.00	Bacillales: 1.00	Listeriaceae: 1.00	Listeria: 1.00	Listeria monocytogenes: 1.00
+contig_44250	classified	based on 1/2 ORFs	1;131567;2;1224;1236;135623;641;662;666	1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00	Bacteria: 1.00	Proteobacteria: 1.00	Gammaproteobacteria: 1.00	Vibrionales: 1.00	Vibrionaceae: 1.00	Vibrio: 1.00	Vibrio cholerae: 1.00
+contig_9952	classified	based on 1/5 ORFs	1;131567;2;1783272;1239;91061*	1.00;1.00;1.00;1.00;1.00;1.00	Bacteria: 1.00	Firmicutes: 1.00	Bacilli*: 1.00	not classified	not classified	not classified	not classified
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_contig.contig2classification.txt	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,4 @@
+# contig	classification	reason	lineage	lineage scores
+contig_38063	classified	based on 1/2 ORFs	1;131567;2;1783272;1239;91061;1385;186820;1637;1639	1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+contig_44250	classified	based on 1/2 ORFs	1;131567;2;1224;1236;135623;641;662;666	1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+contig_9952	classified	based on 1/5 ORFs	1;131567;2;1783272;1239;91061*	1.00;1.00;1.00;1.00;1.00;1.00
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/cat_database.loc.sample	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,7 @@
+## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz
+# ls CAT_prepare_20190719/
+#  2019-07-19.CAT_prepare.fresh.log
+#  2019-07-19_CAT_database
+#  2019-07-19_taxonomy
+#value  name    database_folder	taxonomy_folder
+#2019-07-19_CAT_database	2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of CAT databases -->
+    <table name="cat_database" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, database_folder, taxonomy_folder</columns>
+        <file path="tool-data/cat_database.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of CAT databases -->
+    <table name="cat_database" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, database_folder, taxonomy_folder</columns>
+        <file path="${__HERE__}/test-data/cached_locally/cat_database.loc" />
+    </table>
+</tables>