changeset 0:11a61934bfb3 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
author iuc
date Tue, 10 Dec 2019 16:05:34 -0500
parents
children c6c77d840993
files cat_contigs.xml macros.xml tabpad.py test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.dmnd test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.gz test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz test-data/cached_locally/cat_database.loc test-data/contigs.fasta test-data/genome2.fna test-data/genome3.fna test-data/test_contig.contig2classification.names.txt test-data/test_contig.contig2classification.txt tool-data/cat_database.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 19 files changed, 1095 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_contigs.xml	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,92 @@
+<tool id="cat_contigs" name="CAT contigs" version="@VERSION@.0">
+    <description>annotate with taxonomic classification</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+    CAT contigs 
+    -c '$contigs_fasta'
+    @CAT_DB@
+    @USE_INTERMEDIATES@
+    @CUSTOM_SETTINGS@
+    @DIAMOND_OPTIONS@
+    && @TXT2TSV@ *.ORF2LCA.txt *.contig2classification.txt
+    @ADD_NAMES@
+    @SUMMARISE@
+    ]]></command>
+    <inputs>
+        <param name="contigs_fasta" type="data" format="fasta" label="Contigs fasta"/>
+        <expand macro="cat_db" />
+        <expand macro="use_intermediates" />
+        <expand macro="custom_settings" />
+        <expand macro="diamond_options" />
+        <expand macro="add_names" />
+        <expand macro="summarise" />
+        <expand macro="select_cat_outputs" />
+    </inputs>
+    <outputs>
+        <expand macro="outputs" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="contigs_fasta" ftype="fasta" value="contigs.fasta"/>
+            <expand macro="test_catdb"/>
+            <param name="select_outputs" value="contig2classification"/>
+            <output name="contig2classification">
+                <assert_contents>
+                    <has_text text="contig_38063" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="contigs_fasta" ftype="fasta" value="contigs.fasta"/>
+            <expand macro="test_catdb"/>
+            <param name="select_outputs" value="predicted_proteins_faa"/>
+            <conditional name="names">
+                <param name="add_names" value="both"/>
+            </conditional>
+            <param name="summarise" value="classification"/>
+            <output name="predicted_proteins_faa">
+                <assert_contents>
+                    <has_text text=">contig_9952" />
+                </assert_contents>
+            </output>
+            <output name="orf2lca_names">
+                <assert_contents>
+                    <has_text text="Listeria" />
+                </assert_contents>
+            </output>
+            <output name="classification_summary">
+                <assert_contents>
+                    <has_text text="Listeria" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**CAT contigs**
+
+Classifiy metagenomics contigs.
+
+@COMMON_HELP@
+
+@OUTPUTS_HELP@
+
+
+**CAT contigs**
+
+Run Contig Annotation Tool (CAT).
+
+Required arguments:
+  -c, --contigs_fasta    Path to contigs fasta file.
+  -d, --database_folder  Path to folder that contains database files.
+  -t, --taxonomy_folder  Path to folder that contains taxonomy files.
+
+
+@OPTIONS_HELP@
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,389 @@
+<macros>
+    <token name="@VERSION@">5.0.3</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">cat</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command><![CDATA[CAT --version]]></version_command>
+    </xml>
+    <token name="@DATABASE_FOLDER@">CAT_database</token>
+    <token name="@TAXONOMY_FOLDER@">taxonomy</token>
+    <xml name="cat_db">
+        <conditional name="db">
+            <param name="db_src" type="select" label="CAT database (--database_folder,--taxonomy_folder) from">
+                <option value="cached">local cached database</option>
+                <option value="history">history</option>
+            </param>
+            <when value="cached">
+                <param name="cat_builtin" type="select" label="Use a built-in CAT database" help="If the CAT database of interest is not listed, contact your Galaxy administrator">
+                    <options from_data_table="cat_database">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No CAT database is available." />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="cat_db" type="data" format="txt" label="A history dataset from CAT prepare tool"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@CAT_DB@"><![CDATA[
+        #if $db.db_src == 'cached':
+        --database_folder '$db.cat_builtin.fields.database_folder'
+        --taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder'
+        #else
+        #import os.path
+        #set $catdb = $db.cat_db.extra_files_path
+        --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")'
+        --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+        #end if
+]]></token>
+    <token name="@CAT_TAXONOMY@"><![CDATA[
+        #if $db.db_src == 'cached':
+        --taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder'
+        #else
+        #import os.path
+        #set $catdb = $db.cat_db.extra_files_path
+        --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+        #end if
+]]></token>
+    <xml name="test_catdb">
+        <conditional name="db">
+            <param name="db_src" value="cached"/>
+            <param name="cat_builtin" value="CAT_prepare_test"/>
+        </conditional>
+    </xml>
+    <xml name="use_intermediates">
+        <conditional name="previous">
+            <param name="use_previous" type="select" label="Use previous prodigal gene prediction and diamond alignment">
+                <help>predicted_proteins.faa and alignment.diamond from previous CAT run.</help> 
+                <option value="yes">Yes</option>
+                <option value="no" selected="true">No</option>
+            </param>
+            <when value="yes">
+                <param argument="--proteins_fasta" type="data" format="fasta" label="prodigal predicted proteins fasta"/>
+                <param argument="--diamond_alignment" type="data" format="tabular" label="alignment.diamond file"/>
+            </when>
+            <when value="no"/>
+        </conditional>
+    </xml>
+    <token name="@USE_INTERMEDIATES@"><![CDATA[
+      #if $previous.use_previous == 'yes'
+      --proteins_fasta '$previous.proteins_fasta'
+      --diamond_alignment '$previous.diamond_alignment'
+      #end if
+      --out_prefix 'cat_output'
+]]></token>
+    <xml name="custom_settings">
+        <param argument="--range" type="integer" value="10" min="0" max="49" label="range"/>
+        <param argument="--fraction" type="float" value="0.5" min="0" max="0.99" label="fraction"/>
+    </xml>
+    <token name="@CUSTOM_SETTINGS@"><![CDATA[
+      --range '$range'
+      --fraction '$fraction'
+]]></token>
+    <xml name="diamond_options">
+        <conditional name="diamond">
+            <param name="set_diamond_opts" type="select" label="Set advanced diamond options">
+                <option value="yes">Yes</option>
+                <option value="no" selected="true">No</option>
+            </param>
+            <when value="yes">
+                <param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="false" 
+                    label="Run DIAMOND in sensitive mode (considerably slower)"/>
+                <param argument="--block_size" type="float" value="2.0" min="1" max="10" label="DIAMOND block-size parameter."
+                    help="lower will decrease memory and temporary disk space usage, higher will increase performance."/>
+                <param argument="--index_chunks" type="integer" value="4" min="1" max="10" label="DIAMOND index-chunks parameter" 
+                    help="Set to 1 on high memory machines. The parameter has no effect on temporary disk space usage."/>
+                <param argument="--top" type="integer" value="50" min="1" max="50" label="DIAMOND top parameter" 
+                    help="Governs hits within range of best hit that are written to the alignment file. This implies you know what you are doing."/>
+            </when>
+            <when value="no"/>
+        </conditional>
+    </xml>
+    <token name="@DIAMOND_OPTIONS@"><![CDATA[
+        #if $diamond.set_diamond_opts == 'yes':
+            $diamond.sensitive
+            --block_size '$diamond.block_size'
+            --index_chunks '$diamond.index_chunks'
+            #if $diamond.top < 50:
+                --I_know_what_Im_doing
+                --top '$diamond.top'
+            #end if
+        #end if
+]]></token>
+
+    <xml name="add_names_options">
+        <param argument="--only_official" type="boolean" truevalue="--only_official" falsevalue="" checked="true" 
+            label="Only output official level names."/>
+        <param argument="--exclude_scores" type="boolean" truevalue="--exclude_scores" falsevalue="" checked="false" 
+            label="Exclude bit-score support scores in the lineage."/>
+    </xml>
+    <token name="@ADD_NAMES_OPTIONS@"><![CDATA[
+    $only_official $exclude_scores 
+]]></token>
+    <xml name="add_names">
+        <conditional name="names">
+            <param name="add_names" type="select" label="CAT add_names for" 
+                help="annotate with taxonomic names."> 
+                <option value="no">No</option>
+                <option value="orf2lca">ORF2LCA.names.txt</option>
+                <option value="classification">classification.names.txt</option>
+                <option value="both">ORF2LCA.names.txt and classification.names.txt</option>
+            </param>
+            <when value="no"/>
+            <when value="orf2lca">
+                <expand macro="add_names_options"/>
+            </when>
+            <when value="classification">
+                <expand macro="add_names_options"/>
+            </when>
+            <when value="both">
+                <expand macro="add_names_options"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@TXT2TSV@">${__tool_directory__}/tabpad.py</token>
+    <token name="@ADD_NAMES@"><![CDATA[
+    #if $names.add_names in ['classification','both']:
+        && CAT add_names $names.only_official $names.exclude_scores 
+            @CAT_TAXONOMY@
+            #if $bcat == 'CAT'
+                -i 'cat_output.contig2classification.tsv'
+            #else
+                -i 'cat_output.bin2classification.tsv'
+            #end if
+            -o 'classification_names.txt'
+        && ${__tool_directory__}/tabpad.py -i 'classification_names.txt' -o '$classification_names'
+    #end if
+    #if $names.add_names in ['orf2lca','both']:
+        && CAT add_names $names.only_official $names.exclude_scores 
+            @CAT_TAXONOMY@
+            -i 'cat_output.ORF2LCA.tsv'
+            -o 'orf2lca_names.txt'
+        && ${__tool_directory__}/tabpad.py -i 'orf2lca_names.txt' -o '$orf2lca_names'
+    #end if
+]]></token>
+    <xml name="summarise">
+        <param name="summarise" type="select" label="CAT summarise report" 
+            help="Report the number of assignments to each taxonomic name">
+            <option value="no">No</option>
+            <option value="classification">classification.summary.txt</option>
+        </param>
+    </xml>
+    <token name="@SUMMARISE@"><![CDATA[
+    #if $summarise in ['classification']:
+        #if $names.add_names in ['classification','both'] and $names.only_official: 
+            #set $summary_input = $classification_names
+        #else 
+            #set $summary_input = 'classification_offical_names'
+            && CAT add_names --only_official
+                @CAT_TAXONOMY@
+                #if $bcat == 'CAT'
+                    -i 'cat_output.contig2classification.tsv'
+                #else
+                    -i 'cat_output.bin2classification.tsv'
+                #end if
+                -o '$summary_input'
+        #end if
+        && CAT summarise 
+            #if $bcat == 'CAT'
+                -c '$contigs_fasta'
+            #end if
+            -i '$summary_input'
+            -o 'classification_summary.txt'
+        && ${__tool_directory__}/tabpad.py -i 'classification_summary.txt' -o '$classification_summary'
+    #end if
+]]></token>
+    
+    <xml name="select_outputs">
+        <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs">
+            <option value="log" selected="true">log</option>
+            <option value="predicted_proteins_faa" selected="true">Prodigal predicted_proteins.faa</option>
+            <option value="predicted_proteins_gff">Prodigal predicted_proteins.gff</option>
+            <option value="alignment_diamond">Diamond blastp alignment.diamond</option>
+            <option value="orf2lca" selected="true">ORF2LCA.txt (taxonomic assignment per predicted ORF)</option>
+            <yield/>
+        </param>
+    </xml>
+    <xml name="select_cat_outputs">
+        <param name="bcat" type="hidden" value="CAT"/>
+        <param name="seqtype" type="hidden" value="contig"/>
+        <param name="sum_titles" type="hidden" value="contigs,number of ORFs,number of positions"/>
+        <param name="bin_col" type="hidden" value=""/>
+        <expand macro="select_outputs">
+            <option value="contig2classification" selected="true">contig2classification.txt (taxonomic assignment per contig)</option>
+        </expand>
+    </xml>
+    <xml name="select_bat_outputs">
+        <param name="bcat" type="hidden" value="BAT"/>
+        <param name="seqtype" type="hidden" value="bin"/>
+        <param name="sum_titles" type="hidden" value="bins"/>
+        <param name="bin_col" type="hidden" value="bin,"/>
+        <expand macro="select_outputs">
+            <option value="bin2classification" selected="true">bin2classification.txt (taxonomic assignment per metagenome assembly)</option>
+        </expand>
+    </xml>
+    <xml name="outputs">
+        <data name="log" format="txt" label="${bcat}.log" from_work_dir="cat_output.log">
+            <filter>'log' in select_outputs or not select_outputs</filter>
+        </data>
+        <data name="predicted_proteins_faa" format="fasta" label="${bcat}.predicted_proteins.faa" from_work_dir="cat_output.predicted_proteins.faa">
+            <filter>'predicted_proteins_faa' in select_outputs and previous['use_previous'] == 'no'</filter>
+        </data>
+        <data name="predicted_proteins_gff" format="gff" label="${bcat}.predicted_proteins.gff" from_work_dir="cat_output.predicted_proteins.gff">
+            <filter>'predicted_proteins_gff' in select_outputs and previous['use_previous'] == 'no'</filter>
+        </data>
+        <data name="alignment_diamond" format="tabular" label="${bcat}.alignment.diamond" from_work_dir="cat_output.alignment.diamond">
+            <filter>'alignment_diamond' in select_outputs and previous['use_previous'] == 'no'</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" />
+            </actions>
+        </data>
+        <data name="orf2lca" format="tabular" label="${bcat}.ORF2LCA.txt" from_work_dir="cat_output.ORF2LCA.tsv">
+            <filter>'orf2lca' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score" />
+            </actions>
+        </data>
+        <data name="contig2classification" format="tabular" label="${bcat}.contig2classification.txt" from_work_dir="cat_output.contig2classification.tsv">
+            <filter>'contig2classification' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="contig,classification,reason,lineage,lineage scores" />
+            </actions>
+        </data>
+        <data name="bin2classification" format="tabular" label="${bcat}.bin2classification.txt" from_work_dir="cat_output.bin2classification.tsv">
+            <filter>'bin2classification' in select_outputs</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="bin,classification,reason,lineage,lineage scores" />
+            </actions>
+        </data>
+        <data name="orf2lca_names" format="tabular" label="${bcat}.ORF2LCA.names.txt">
+            <filter>names['add_names'] in ['both','orf2lca']</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" />
+            </actions>
+        </data>
+        <data name="classification_names" format="tabular" label="${bcat}.${seqtype}2classification.names.txt">
+            <filter>names['add_names'] in ['both','classification']</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="${seqtype},classification,reason,lineage,lineage scores,superkingdom,phylum,class,order,family,genus,species" />
+            </actions>
+        </data>
+        <data name="classification_summary" format="tabular" label="${bcat}.${seqtype}2classification.summary.txt">
+            <filter>'classification' in summarise</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="4" />
+                <action name="column_names" type="metadata" default="rank,clade,number of ${sum_titles}" />
+            </actions>
+        </data>
+    </xml>
+    <token name="@COMMON_HELP@"><![CDATA[
+The Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) workflows are described at: https://github.com/dutilh/CAT
+
+  - CAT contigs/CAT bins - runs Prodigal_ prokaryotic protein prediction on the fasta input. 
+  - CAT contigs/CAT bins - runs Diamond_ to align predicted proteins to the reference proteins in the CAT database.
+  - CAT contigs/CAT bins - assigns taxonomic classification to fasta entries and ORFs based on alignments. 
+  - CAT add_names - annotates outputs with taxonomic names. 
+  - CAT summerise - reports number of assignments to each taxonomic name.
+
+A CAT database can either be installed by data_manager_cat or in the local history by CAT prepare tool.
+
+.. _Prodigal: https://github.com/hyattpd/Prodigal
+.. _Diamond: https://github.com/bbuchfink/diamond
+
+]]></token>
+    <token name="@OUTPUTS_HELP@"><![CDATA[
+
+**OUTPUTS**
+
+Any of the files produced by the CAT workflow are available as outputs
+  - Prodigal
+
+    - predicted_proteins.faa
+    - predicted_proteins.gff
+
+  - Diamond
+
+    - alignment.diamond
+
+  - CAT contigs/bins
+
+    - contigs/bin2classification.txt
+    - ORF2LCA.txt
+
+  - CAT add_names (optional)
+
+    - contigs/bin2classification.names.txt
+    - ORF2LCA.names.txt
+
+  - CAT summarise (optional)
+
+    - contigs/bin2classification.summary.txt
+
+
+]]></token>
+ 
+    <token name="@OPTIONS_HELP@"><![CDATA[
+
+Optional arguments:
+  -r, --range               cut-off range after alignment [0-49] (default: 10).
+  -f, --fraction            fraction of bit-score support for each classification
+                            [0-0.99] (default: 0.5).
+  -p, --proteins_fasta
+                            Path to predicted proteins fasta file. If supplied,
+                            CAT will skip the protein prediction step.
+  -a, --diamond_alignment
+                            Path to DIAMOND alignment table. If supplied, CAT will
+                            skip the DIAMOND alignment step and directly classify
+                            the sequences. A predicted proteins fasta file should
+                            also be supplied with argument [-p / --proteins].
+
+
+DIAMOND specific optional arguments:
+  --sensitive     Run DIAMOND in sensitive mode (default: not enabled).
+
+  --block_size    DIAMOND block-size parameter (default: 2.0). Lower
+                  numbers will decrease memory and temporary disk space
+                  usage.
+
+  --index_chunks
+                  DIAMOND index-chunks parameter (default: 4). Set to 1
+                  on high memory machines. The parameter has no effect
+                  on temporary disk space usage.
+
+  --top
+                  DIAMOND top parameter [0-50] (default: 50). Governs
+                  hits within range of best hit that are written to the
+                  alignment file. This is not the [-r / --range]
+                  parameter!
+
+
+Setting the DIAMOND --top parameter
+
+You can speed up DIAMOND considerably, and at the same time greatly reduce disk usage, by setting the DIAMOND --top parameter to lower values. This will govern hits within range of the best hit that are written to the alignment file.
+
+You have to be very carefull to 1) not confuse this parameter with the r / --range parameter, which does a similar cut-off but after alignment and 2) be aware that if you want to run CAT or BAT again afterwards with different values of the -r / --range parameter, your options will be limited to the range you have chosen with --top earlier, because all hits that fall outside this range will not be included in the alignment file. Importantly, CAT and BAT currently do not warn you if you choose -r / --range in a second run higher than --top in a previous one, so it's up to you to remember this!
+
+If you have understood all this, or you do not plan to tune -r / --range at all afterwards, you can enjoy a huge speedup with much smaller alignment files! For CAT you can for example set --top 11 and for BAT --top 6.
+
+]]></token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">https://doi.org/10.1101/072868</citation>
+            <citation type="doi">https://doi.org/10.1186/s13059-019-1817-x</citation>
+            <citation type="doi">https://doi.org/10.1038/nmeth.3176</citation>
+            <citation type="doi">https://doi.org/10.1186/1471-2105-11-119</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tabpad.py	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+import argparse
+import re
+
+
+def padfile(infile, outfile, fieldcnt=None):
+    with open(infile, 'r') as fh:
+        out = open(outfile, 'w')
+        commentlines = []
+        tabs = '\t' * fieldcnt if fieldcnt is not None else None
+
+        def pad_line(txtline, tabs=None):
+            line = txtline.rstrip('\r\n')
+            fields = line.split('\t')
+            if not tabs:
+                tabs = '\t' * len(fields)
+            out.write('%s%s\n' % (line, tabs[len(fields):]))
+
+        for i, txtline in enumerate(fh):
+            if txtline.lstrip().startswith('#'):
+                commentlines.append(txtline)
+            else:
+                if commentlines:
+                    for i in range(len(commentlines) - 1):
+                        out.write(commentlines[i])
+                    pad_line(commentlines[-1], tabs=tabs)
+                    commentlines = []
+                pad_line(txtline, tabs=tabs)
+        out.close()
+
+
+def fieldcount(infile):
+    fieldcnt = 0
+    with open(infile, 'r') as fh:
+        for i, line in enumerate(fh):
+            fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t')))
+    return fieldcnt
+
+
+def tsvname(infile):
+    return re.sub('.txt$', '', infile) + '.tsv'
+
+
+def __main__():
+    parser = argparse.ArgumentParser(
+        description='Pad a file with TABS for equal field size across lines')
+    parser.add_argument(
+        '-i', '--input', help='input file')
+    parser.add_argument(
+        '-o', '--output', help='output file')
+    parser.add_argument(
+        'files', nargs='*', help='.txt files')
+    args = parser.parse_args()
+
+    if args.input:
+        outfile = args.output if args.output else tsvname(args.input)
+        fieldcnt = fieldcount(args.input)
+        padfile(args.input, outfile, fieldcnt=fieldcnt)
+    for infile in args.files:
+        outfile = tsvname(infile)
+        fieldcnt = fieldcount(infile)
+        padfile(infile, outfile, fieldcnt=fieldcnt)
+
+
+if __name__ == "__main__":
+    __main__()
Binary file test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.dmnd has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,10 @@
+WP_000159554.1	2
+WP_000214552.1	91061
+WP_000346214.1	91061
+WP_000568619.1	666
+WP_000958804.1	1301
+WP_000991933.1	666
+WP_000996146.1	666
+WP_003722398.1	1639
+WP_005378126.1	662
+XP_961517.1	5141
Binary file test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,17 @@
+91061
+1
+641
+1224
+1236
+131567
+1637
+1639
+1783272
+2
+662
+13562
+13562
+641
+662
+666
+91061
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,252 @@
+1	|	all	|		|	synonym	|
+1	|	root	|		|	scientific name	|
+2	|	Bacteria	|	Bacteria <prokaryotes>	|	scientific name	|
+2	|	Monera	|	Monera <Bacteria>	|	in-part	|
+2	|	Procaryotae	|	Procaryotae <Bacteria>	|	in-part	|
+2	|	Prokaryota	|	Prokaryota <Bacteria>	|	in-part	|
+2	|	Prokaryotae	|	Prokaryotae <Bacteria>	|	in-part	|
+2	|	bacteria	|	bacteria <blast2>	|	blast name	|
+2	|	eubacteria	|		|	genbank common name	|
+2	|	prokaryote	|	prokaryote <Bacteria>	|	in-part	|
+2	|	prokaryotes	|	prokaryotes <Bacteria>	|	in-part	|
+641	|	Vibrionaceae	|		|	scientific name	|
+641	|	Vibrionaceae Veron 1965	|		|	authority	|
+641	|	gamma-3 proteobacteria	|	gamma-3 proteobacteria <#3>	|	in-part	|
+662	|	"Microspira" Schroeter 1886	|		|	authority	|
+662	|	"Pacinia" Trevisan 1885	|		|	authority	|
+662	|	Beneckea	|		|	synonym	|
+662	|	Beneckea Campbell 1957	|		|	authority	|
+662	|	Listonella	|		|	synonym	|
+662	|	Listonella MacDonell and Colwell 1986	|		|	authority	|
+662	|	Microspira	|		|	synonym	|
+662	|	Pacinia	|		|	synonym	|
+662	|	Vibrio	|		|	scientific name	|
+662	|	Vibrio Pacini 1854	|		|	authority	|
+666	|	"Bacillo virgola del Koch" Trevisan 1884	|		|	authority	|
+666	|	"Bacillus cholerae" (Pacini 1854) Trevisan 1884	|		|	authority	|
+666	|	"Bacillus cholerae-asiaticae" Trevisan 1884	|		|	authority	|
+666	|	"Kommabacillus" Koch 1884	|		|	authority	|
+666	|	"Liquidivibrio cholerae" (Pacini 1854) Orla-Jensen 1909	|		|	authority	|
+666	|	"Microspira comma" Schroeter 1886	|		|	authority	|
+666	|	"Pacinia cholerae-asiaticae" (Trevisan 1884) Trevisan 1885	|		|	authority	|
+666	|	"Spirillum cholerae" (Pacini 1854) Mac1889	|		|	authority	|
+666	|	"Spirillum cholerae-asiaticae" (Trevisan 1884) Zopf 1885	|		|	authority	|
+666	|	"Vibrio cholera" (sic) Pacini 1854	|		|	authority	|
+666	|	"Vibrio cholerae-asiaticae" (Trevisan 1884) Pfeiffer 1896	|		|	authority	|
+666	|	"Vibrio comma" (Schroeter 1886) Blanchard 1906	|		|	authority	|
+666	|	ATCC 14035	|	ATCC 14035 <type strain>	|	type material	|
+666	|	ATCC 14547 [[Vibrio albensis]]	|	ATCC 14547 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	Bacillo virgola del Koch	|		|	synonym	|
+666	|	Bacillus cholerae	|		|	synonym	|
+666	|	Bacillus cholerae-asiaticae	|		|	synonym	|
+666	|	CCUG 48664 [[Vibrio albensis]]	|	CCUG 48664 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	CCUG 9118 A	|	CCUG 9118 A <type strain>	|	type material	|
+666	|	CECT 514	|	CECT 514 <type strain>	|	type material	|
+666	|	CIP 62.13	|	CIP 62.13 <type strain>	|	type material	|
+666	|	Kommabacillus	|		|	synonym	|
+666	|	LMG 4406 [[Vibrio albensis]]	|	LMG 4406 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	LMG:4406 [[Vibrio albensis]]	|	LMG:4406 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	Liquidivibrio cholerae	|		|	synonym	|
+666	|	Microspira comma	|		|	synonym	|
+666	|	NCIMB 41 [[Vibrio albensis]]	|	NCIMB 41 [[Vibrio albensis]] <type strain>	|	type material	|
+666	|	NCTC 8021	|	NCTC 8021 <type strain>	|	type material	|
+666	|	Pacinia cholerae-asiaticae	|		|	synonym	|
+666	|	Spirillum cholerae	|		|	synonym	|
+666	|	Spirillum cholerae-asiaticae	|		|	synonym	|
+666	|	Vibrio albensis	|		|	synonym	|
+666	|	Vibrio albensis Lehmann and Neumann 1896	|		|	authority	|
+666	|	Vibrio cholera	|		|	synonym	|
+666	|	Vibrio cholerae	|		|	scientific name	|
+666	|	Vibrio cholerae Pacini 1854	|		|	authority	|
+666	|	Vibrio cholerae biovar albensis	|		|	synonym	|
+666	|	Vibrio cholerae bv. albensis	|		|	synonym	|
+666	|	Vibrio cholerae-asiaticae	|		|	synonym	|
+666	|	Vibrio comma	|		|	synonym	|
+1224	|	Alphaproteobacteraeota	|		|	synonym	|
+1224	|	Alphaproteobacteraeota Oren et al. 2015	|		|	authority	|
+1224	|	Alphaproteobacteriota	|		|	synonym	|
+1224	|	Proteobacteria	|		|	scientific name	|
+1224	|	Proteobacteria Garrity et al. 2005	|		|	authority	|
+1224	|	Proteobacteria [class] Stackebrandt et al. 1988	|		|	authority	|
+1224	|	proteobacteria	|	proteobacteria <blast1224>	|	blast name	|
+1224	|	purple bacteria	|		|	common name	|
+1224	|	purple bacteria and relatives	|		|	common name	|
+1224	|	purple non-sulfur bacteria	|		|	common name	|
+1224	|	purple photosynthetic bacteria	|		|	common name	|
+1224	|	purple photosynthetic bacteria and relatives	|		|	common name	|
+1236	|	Gammaproteobacteria	|		|	scientific name	|
+1236	|	Gammaproteobacteria Garrity et al. 2005 emend. Williams and Kelly 2013	|		|	authority	|
+1236	|	Proteobacteria gamma subdivision	|		|	synonym	|
+1236	|	Purple bacteria, gamma subdivision	|		|	synonym	|
+1236	|	g-proteobacteria	|		|	blast name	|
+1236	|	gamma proteobacteria	|		|	synonym	|
+1236	|	gamma subdivision	|		|	synonym	|
+1236	|	gamma subgroup	|		|	synonym	|
+1239	|	Bacillaeota	|		|	synonym	|
+1239	|	Bacillaeota Oren et al. 2015	|		|	authority	|
+1239	|	Bacillota	|		|	synonym	|
+1239	|	Bacillus/Clostridium group	|		|	synonym	|
+1239	|	Clostridium group firmicutes	|		|	synonym	|
+1239	|	Firmacutes	|		|	synonym	|
+1239	|	Firmicutes	|		|	scientific name	|
+1239	|	Firmicutes corrig. Gibbons and Murray 1978	|		|	authority	|
+1239	|	Low G+C firmicutes	|		|	synonym	|
+1239	|	clostridial firmicutes	|		|	synonym	|
+1239	|	firmicutes	|	firmicutes <blast1239>	|	blast name	|
+1239	|	low G+C Gram-positive bacteria	|		|	common name	|
+1239	|	low GC Gram+	|		|	common name	|
+1385	|	Bacillales	|		|	scientific name	|
+1385	|	Bacillales Prevot 1953	|		|	authority	|
+1385	|	Bacillus/Staphylococcus group	|		|	synonym	|
+1637	|	"Listerella" Pirie 1927 (nom. rej. Opin. 14)	|		|	authority	|
+1637	|	Listerella	|		|	synonym	|
+1637	|	Listeria	|		|	scientific name	|
+1637	|	Listeria Pirie 1940	|		|	authority	|
+1639	|	"Bacterium monocytogenes hominis" Nyfeldt 1932	|		|	authority	|
+1639	|	"Bacterium monocytogenes" Murray et al. 1926	|		|	authority	|
+1639	|	"Corynebacterium infantisepticum" Potel 1950	|		|	authority	|
+1639	|	"Corynebacterium parvulum" Schultz et al. 1934	|		|	authority	|
+1639	|	"Erysipelothrix monocytogenes" (Murray et al. 1926) Wilson and Miles 1946	|		|	authority	|
+1639	|	"Listerella hepatolytica" Pirie 1927	|		|	authority	|
+1639	|	ATCC 15313	|	ATCC 15313 <type strain>	|	type material	|
+1639	|	Bacterium monocytogenes	|		|	synonym	|
+1639	|	Bacterium monocytogenes hominis	|		|	synonym	|
+1639	|	CCUG 15526	|	CCUG 15526 <type strain>	|	type material	|
+1639	|	CIP 82.110	|	CIP 82.110 <type strain>	|	type material	|
+1639	|	Corynebacterium infantisepticum	|		|	synonym	|
+1639	|	Corynebacterium parvulum	|		|	synonym	|
+1639	|	DSM 20600	|	DSM 20600 <type strain>	|	type material	|
+1639	|	Erysipelothrix monocytogenes	|		|	synonym	|
+1639	|	Listerella hepatolytica	|		|	synonym	|
+1639	|	Listeria monocytogenes	|		|	scientific name	|
+1639	|	Listeria monocytogenes (Murray et al. 1926) Pirie 1940	|		|	authority	|
+1639	|	Listeria sp. FDA00013359	|		|	includes	|
+1639	|	Listeria sp. FDA00013360	|		|	includes	|
+1639	|	Listeria sp. FDA00013361	|		|	includes	|
+1639	|	Listeria sp. FDA00013362	|		|	includes	|
+1639	|	Listeria sp. FDA00013363	|		|	includes	|
+1639	|	Listeria sp. FDA00013364	|		|	includes	|
+1639	|	Listeria sp. FDA00013365	|		|	includes	|
+1639	|	Listeria sp. FDA00013366	|		|	includes	|
+1639	|	Listeria sp. FDA00013367	|		|	includes	|
+1639	|	Listeria sp. FDA00013503	|		|	includes	|
+1639	|	Listeria sp. FDA00013504	|		|	includes	|
+1639	|	Listeria sp. FDA00013505	|		|	includes	|
+1639	|	Listeria sp. FDA00013506	|		|	includes	|
+1639	|	Listeria sp. FDA00013507	|		|	includes	|
+1639	|	Listeria sp. FDA00013508	|		|	includes	|
+1639	|	Listeria sp. FDA00013509	|		|	includes	|
+1639	|	Listeria sp. FDA00013510	|		|	includes	|
+1639	|	Listeria sp. FDA00013511	|		|	includes	|
+1639	|	Listeria sp. FDA00013512	|		|	includes	|
+1639	|	Listeria sp. FDA00013536	|		|	includes	|
+1639	|	Listeria sp. FDA00013537	|		|	includes	|
+1639	|	Listeria sp. FDA00013538	|		|	includes	|
+1639	|	Listeria sp. FDA00013539	|		|	includes	|
+1639	|	Listeria sp. FDA00013540	|		|	includes	|
+1639	|	Listeria sp. FDA00013541	|		|	includes	|
+1639	|	Listeria sp. FDA00013542	|		|	includes	|
+1639	|	Listeria sp. FDA00013543	|		|	includes	|
+1639	|	Listeria sp. FDA00013544	|		|	includes	|
+1639	|	Listeria sp. FDA00013545	|		|	includes	|
+1639	|	Listeria sp. FDA00013546	|		|	includes	|
+1639	|	Listeria sp. FDA00013547	|		|	includes	|
+1639	|	Listeria sp. FDA00013548	|		|	includes	|
+1639	|	Listeria sp. FDA00013549	|		|	includes	|
+1639	|	Listeria sp. FDA00013550	|		|	includes	|
+1639	|	Listeria sp. FDA00013551	|		|	includes	|
+1639	|	Listeria sp. FDA00013552	|		|	includes	|
+1639	|	Listeria sp. FDA00013553	|		|	includes	|
+1639	|	Listeria sp. FDA00013554	|		|	includes	|
+1639	|	Listeria sp. FDA00013555	|		|	includes	|
+1639	|	Listeria sp. FDA00013556	|		|	includes	|
+1639	|	Listeria sp. FDA00013557	|		|	includes	|
+1639	|	Listeria sp. FDA00013558	|		|	includes	|
+1639	|	Listeria sp. FDA00013559	|		|	includes	|
+1639	|	Listeria sp. FDA00013560	|		|	includes	|
+1639	|	Listeria sp. FDA00013561	|		|	includes	|
+1639	|	Listeria sp. FDA00013562	|		|	includes	|
+1639	|	Listeria sp. FDA00013563	|		|	includes	|
+1639	|	Listeria sp. FDA00013564	|		|	includes	|
+1639	|	Listeria sp. FDA00013565	|		|	includes	|
+1639	|	Listeria sp. FDA00013566	|		|	includes	|
+1639	|	Listeria sp. FDA00013567	|		|	includes	|
+1639	|	Listeria sp. FDA00013568	|		|	includes	|
+1639	|	Listeria sp. FDA00013570	|		|	includes	|
+1639	|	Listeria sp. FDA00013571	|		|	includes	|
+1639	|	Listeria sp. FDA00013572	|		|	includes	|
+1639	|	Listeria sp. FDA00013573	|		|	includes	|
+1639	|	Listeria sp. FDA00013574	|		|	includes	|
+1639	|	Listeria sp. FDA00013575	|		|	includes	|
+1639	|	Listeria sp. FDA00013576	|		|	includes	|
+1639	|	Listeria sp. FDA00013577	|		|	includes	|
+1639	|	Listeria sp. FDA00013578	|		|	includes	|
+1639	|	Listeria sp. FDA00013579	|		|	includes	|
+1639	|	Listeria sp. FDA00013607	|		|	includes	|
+1639	|	NCTC 10357	|	NCTC 10357 <type strain>	|	type material	|
+1639	|	SLCC 53	|	SLCC 53 <type strain>	|	type material	|
+2157	|	"Archaea" Woese et al. 1990	|		|	authority	|
+2157	|	"Archaebacteria" (sic) Woese and Fox 1977	|		|	authority	|
+2157	|	Archaea	|		|	scientific name	|
+2157	|	Archaebacteria	|		|	synonym	|
+2157	|	Mendosicutes	|		|	synonym	|
+2157	|	Metabacteria	|		|	synonym	|
+2157	|	Monera	|	Monera <Archaea>	|	in-part	|
+2157	|	Procaryotae	|	Procaryotae <Archaea>	|	in-part	|
+2157	|	Prokaryota	|	Prokaryota <Archaea>	|	in-part	|
+2157	|	Prokaryotae	|	Prokaryotae <Archaea>	|	in-part	|
+2157	|	archaea	|	archaea <blast2157>	|	blast name	|
+2157	|	prokaryote	|	prokaryote <Archaea>	|	in-part	|
+2157	|	prokaryotes	|	prokaryotes <Archaea>	|	in-part	|
+2158	|	Methanobacteriales	|		|	scientific name	|
+2158	|	Methanobacteriales Balch and Wolfe 1981	|		|	authority	|
+2159	|	Methanobacteriaceae	|		|	scientific name	|
+2159	|	Methanobacteriaceae Barker 1956	|		|	authority	|
+2172	|	Methanobrevibacter	|		|	scientific name	|
+2172	|	Methanobrevibacter Balch and Wolfe 1981	|		|	authority	|
+28890	|	"Euryarchaeota" Woese et al. 1990	|		|	authority	|
+28890	|	Euryarchaeota	|		|	scientific name	|
+28890	|	Euryarchaeota Garrity and Holt 2002	|		|	authority	|
+28890	|	Methanobacteraeota	|		|	synonym	|
+28890	|	Methanobacteraeota Oren et al. 2015	|		|	authority	|
+28890	|	Methanobacteriota	|		|	synonym	|
+28890	|	euryarchaeotes	|	euryarchaeotes <blast28890>	|	blast name	|
+83816	|	ATCC 35063	|	ATCC 35063 <type strain>	|	type material	|
+83816	|	DSM 1093	|	DSM 1093 <type strain>	|	type material	|
+83816	|	JCM 13430	|	JCM 13430 <type strain>	|	type material	|
+83816	|	Methanobacterium ruminantium	|		|	synonym	|
+83816	|	Methanobacterium ruminantium Smith and Hungate 1958 (Approved Lists 1980)	|		|	authority	|
+83816	|	Methanobrevibacter ruminantium	|		|	scientific name	|
+83816	|	Methanobrevibacter ruminantium (Smith and Hungate 1958) Balch and Wolfe 1981	|		|	authority	|
+83816	|	OCM 146	|	OCM 146 <type strain>	|	type material	|
+83816	|	strain M1	|	strain M1 <type strain> <taxid 83816>	|	type material	|
+91061	|	Bacilli	|		|	scientific name	|
+91061	|	Bacilli Ludwig et al. 2010	|		|	authority	|
+91061	|	Bacillus/Lactobacillus/Streptococcus group	|		|	synonym	|
+91061	|	Firmibacteria	|		|	synonym	|
+91061	|	Firmibacteria Murray 1988	|		|	authority	|
+131567	|	biota	|		|	synonym	|
+131567	|	cellular organisms	|		|	scientific name	|
+135623	|	'Vibrionales'	|		|	synonym	|
+135623	|	Vibrionaceae group	|		|	synonym	|
+135623	|	Vibrionales	|		|	scientific name	|
+183925	|	Archaeobacteria	|		|	synonym	|
+183925	|	Archaeobacteria Murray 1988	|		|	authority	|
+183925	|	Methanobacteria	|		|	scientific name	|
+183925	|	Methanobacteria Boone 2002	|		|	authority	|
+183967	|	Thermoplasmata	|		|	scientific name	|
+183967	|	Thermoplasmata Reysenbach 2002	|		|	authority	|
+186820	|	Listeriaceae	|		|	scientific name	|
+186820	|	Listeriaceae Ludwig et al. 2010	|		|	authority	|
+1235850	|	"Methanoplasmatales" Paul et al. 2012	|		|	authority	|
+1235850	|	Methanomassiliicoccales	|		|	scientific name	|
+1235850	|	Methanomassiliicoccales Iino et al. 2013	|		|	authority	|
+1235850	|	Methanoplasmatales	|		|	synonym	|
+1783272	|	Terrabacteria group	|		|	scientific name	|
+2283794	|	"Methanomada" Petitjean et al. 2015	|		|	authority	|
+2283794	|	Methanogen Class I	|		|	synonym	|
+2283794	|	Methanomada	|		|	equivalent name	|
+2283794	|	Methanomada group	|		|	scientific name	|
+2283796	|	Diaforarchaea	|		|	equivalent name	|
+2283796	|	Diaforarchaea Petijean et al. 2015	|		|	authority	|
+2283796	|	Diaforarchaea group	|		|	scientific name	|
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,27 @@
+1	|	1	|	no rank	|		|	8	|	0	|	1	|	0	|	0	|	0	|	0	|	0	|		|
+2	|	131567	|	superkingdom	|		|	0	|	0	|	11	|	0	|	0	|	0	|	0	|	0	|		|
+641	|	135623	|	family	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+662	|	641	|	genus	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+666	|	662	|	species	|	VC	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+1224	|	2	|	phylum	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1236	|	1224	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1239	|	1783272	|	phylum	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1385	|	91061	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1637	|	186820	|	genus	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1639	|	1637	|	species	|	LM	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+2157	|	131567	|	superkingdom	|		|	0	|	0	|	11	|	0	|	0	|	0	|	0	|	0	|		|
+2158	|	183925	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+2159	|	2158	|	family	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+2172	|	2159	|	genus	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+28890	|	2157	|	phylum	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+83816	|	2172	|	species	|	MR	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+91061	|	1239	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+131567	|	1	|	no rank	|		|	8	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+135623	|	1236	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+183925	|	2283794	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+183967	|	2283796	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+186820	|	1385	|	family	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1235850	|	183967	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1783272	|	2	|	no rank	|		|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+2283794	|	28890	|	no rank	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+2283796	|	28890	|	no rank	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
Binary file test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/cat_database.loc	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,8 @@
+## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz
+# ls CAT_prepare_20190719/
+#  2019-07-19.CAT_prepare.fresh.log
+#  2019-07-19_CAT_database
+#  2019-07-19_taxonomy
+#value  name    database_folder	taxonomy_folder
+#2019-07-19_CAT_database	2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
+CAT_prepare_test	CAT_prepare_test	${__HERE__}/CAT_prepare_test/CAT_database	${__HERE__}/CAT_prepare_test/taxonomy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contigs.fasta	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,93 @@
+>contig_9952
+TGGTTATGTACGCAGACAGCTTACTCCTCCTCGGAGACCACTCGCGGTACTCTATTCCCTCCACGGTTGCGAGGCGCTCGCCGTAGATCCGCTTCCCGGG
+ACAGGCAGACAGGGTGTATAGCCTCCCCCTCTCGGAAAAAACCCCGGGCACGCGGTCCAAAGACTCCATGCCCGTAACAACGCCGTGGTTTTAGAAATAA
+TCTGTGCCGTCGGTTGCAAACCCTAAATACAGGGGGATATCAATGCGGTTGCATGGATATCCACATCCTTCGTGAGATCGCCGACGCAGTTCAGGCGGCG
+GTATCTCTCATACCCGACCCCTGCAGCAGGGGCAACGAGATATGCATGGGCAACGACGGCACACCCACATCCGAGATAGACAAAGTGGCTGAGAACGCGG
+TGCTCGGGTACATAGAGTCCAACCGCCTGGCTCTGAACGTGCTCAGCGAAGAGATAGGCTTCGTGGACAACGGCGCGTCGGAGGTTCTGGTCCTCGATCC
+CATCGATGGGACAAGCAATTCAGTGGCCGAGATACCTTTCTATACGATATCCATGGCCGTCGGCAAGGATTCGCTCTGCGGCATGCACACGGCCTACATC
+AGGAACCTGGCGACAGGGGACGAGTTCTGGGCGCACAAAGGGGATGGCGCTTATTACAACGGAAGGAGGATAAACGTCAGGAAGCCGGATTTCTCCAAAC
+TGTTCGCCCTTATATACATGGGGAACGCCGCTGTCGATGAAGCGTTCGCCCTTGCAAAGAACGTCAAGACCTCCCGCTCCATGGGCTGCGCCTCCCTTGA
+GATGACGCTCGTGGCACTAGGACACGCCGATATCTATTACATGAACACCTACCGTTACAACCGTGCCGTCAGGACTGTGGACATCGCCGCCAGCGCCCTG
+ATACTCAGGGAGGCGGGCGGCGAGATATTCGATATCGGCGGCAACAAGCTGGATATGCCGCTGGACAACGCTTACCACGCAAGCTTCGTGGCGTGCTCCT
+GCAAAGAGGTATTCGACCACATCATGAGGGCCCACATCGAGGAGCACGGCGCTACGCGTTACGGGATATACGCCAACGAGACCGTCCCCGGGGCGGCCGA
+GTATGTGAGGAGGGCGTACGATGCTTTGAGGGGGGAGAAGGTAACCCTCGACACGGCGGCCGCCAGGCTGATCGGGGCGGAAGGCGTGCCTATTTCGGAG
+ATCGAGGCGGACATCGTCGTGGTGATAGGAGGGGACGGCACGATACTCAGGGCGCTCAAGAAGACGGATGCCGCCGTGATAGGGATCAACGCCGGAGGCG
+TGGGGTTCCTGGCCGAGGTCGAGCCGGACGAGATAGAGGAGAGCATATCCCGCATCAGGCGCGGAGAGTACTCGGTTGAGGAGAGGATCAAGCTCAGGAC
+TTTTTACGAGGGGGAATATCTCTCGGAGGCCGTGAACGAGACAGTGATACACACTGATTCCGTGGCGAAGATCAGGCAGTTCAAGATATATGTCAACGAA
+CACCTGGCAACGGAGGTCCGCGCGGACGGCATAATCATCTCGACGCCCACAGGCTCCACCTGCTACGCCATGAGCCTCGGCGCGCCCATAACCGACCCGG
+GGGTCGGAGCTTTCCTGATAGTCCCCATGGCGGCGTTCAAGTTCGCTTCCCGTCCGTTCGTCGTTCCCTATACGGCGAAGATAACCGTCGAGGCGGTCAT
+GGACAAGGGCTGCCTCATCGTGGTGGACGGCCAGCACGAGTACCCGATGAGGGGAGGGACGCGGGCGGAATTCTCGCTTTCCGACAACCTCGCCAAATTC
+TCGGCCCCGGCGTTCCTGGCATCGACGGGCATCTCGAAGTAGATCTCGCCGCCCATCATGTTGATGTCGGCCTTAAAAGGTAAAGAAAGCCAGATGGCGT
+TCGAGACATCGGAGTCGTCCAGCTCGGCGAAGTAATCGCCGCTCGTCGTGACAATCTTCATTCTGCTCAAAGCGCCACCCGGCCGTCGGTTCAGTTCTTT
+TTCTTCTTTTCGAAGAGCTTCCTGATCTCCCTGCCCGTGACCTCGATCTGGAGGTCCTTCTCCTTCTCCTCCATTGCCTTCAGGCCCTTCAGGTCGTTGG
+CCCATTCGGAGGTCCAGTCGGCTTTGAATTTTCCGGATTCGATGTCGTCCAGAATCTTCTTCATGCCCTTCTCGGACTCTTCGGTGATCACCAGGTCCCT
+CCTGGTAAGGCCTCCGTACTCGGCAGTGTTGGAAACGACGTGCCACATCTTCTCGAAACCGCCCTCGTTTATGAGGTCGACGATGAGCTTCGCCTCATGG
+CATACTTCGAAGTAGGCCATTTCGGGAGGGTATCCTCCCTCGACCAGGGTCTTGAATCCCGACTTTATGAGGCCGGTGGTCCCTCCGCACAGCACGGCCT
+GCTCTCCGAACAGGTCTGTGAGCGTCTCGTTGTCGAAAGTGGTCTCGAAGACGCCGGCGCGGGTGGCTCCGAGCCCCTTTGCAAGTGCAAGGGCGATCTT
+CTTGGCGTTGCCGGTATAGTCCTGGTGGACGCAAACGAGGGCCGGAACTCCGAATCCCTCGACGAACACATCCCTTTCTTTGTCCCCGGGGGCCTTGGGA
+GCCATCATTATGACGTCGATGTTTTTCGGAGGAACGATGGTCTTGTAGGTCACAGCGAAACCGTGGGCGAACTCAAGTGCGCAGCCCTCCCTGATGTTGG
+GCTCGACGAATTCTTTGTATACCTTTGGCTGGACCTCGTCGGGCAGAAGCATCATGATGACGTCCGCGGTCTTGGCGGCCTCGGCGAAATCTACGACCTT
+GAAGCCGTCCTCTTTCGCTTTGTTCCATGATCTTCCGTCTTTCCTGAGCCCGATCACTACGTTGAGGCCGGAGTCCCTGAAGCACAGGGCCTGCGCTCTC
+CCCTGGGATCCGTAGCCCATGACGGCGACCGTTTTTCCTTTAAGGACATCTATGTCCACATCTGCATCGTGGTAAATCTTCATTATATCCACCTGTTTAG
+AGGTCCAACTGCTTTATAGACTAAAAGGTATCGTTCCCGCTCCGACATATAGGTCAGTTCAGTACTGGCAGCGTCCTTTGACCAGGGCCTGATTCGGATT
+GGCAGGCAGCATGGGCAACACGTCCTCCTCGGGATCGATGTGGATGTCCAGCAGGCACGTCTCGCCGCTGTCTATTGCGGTCTTCAGGGCGTCGGCTATC
+TCTCCCGGCTTCTCGACCAGCATTCCTCTGGCCCCGTAGGCCTCGGCTATCTTGGAGAAGTCCGGGTCGGCGCCAAGCTCGGTCTCGCTGTACCTCTTGT
+TCCAGAACAGCTTCTGCCACTGTTTGACCATTCCCAGCCATCCGTTGTTCAGCAGGACTATGACGACCGGCAGGTCCTCGGCCACCGAGGTGGCCAGCTC
+CTGTTGGACCATCTGGAATCCCCCGTCCCCTGTTATGGTCAGGACGGTGCTGTCGGGCTTGGCGGCCTTCGCCCCTATGGCGGAGGGGAGCCCGAAACCC
+ATCGTGCCGAAGCTTCCCGAGGAGAGGAGCTGTCTGGGCCTGTGGACGTGCAGATGGTGCATGGCCCACATTTGGTTCTGTCCCACGTCGGTGGTGACTA
+TCATGTCGTCGTCCTTGTCGATCAGCCTGTTGATCTCGTATATGACCTTCTGAGGGACGATCGGTGTAAGGTCTATGTCGATCTTGCACCTGCAACGCCT
+CCTGTACTCCGCATAGGTGCTGTTCCAGTCGGCATGGGTATCCCTGTATCCGGAGAGCCCGTCGATGAGCGCCGCGGTACCCTTCTTAGCATCGCAGAGA
+AGGTTGACGTCGTTGTTCTTGTGCTTGTCGAACTCCGTCGCGTCTATGTCTATCTGGACGACCCTGCATGCGCCGTCGAACCTGGTGTGGGGGCTGAACG
+TCCTGTCCGAGAACTTCGTGCCTATGGCTATTACCAGGTCGGCGTTGCGGAAAGTATCGAGGGCGCACATCTTGCCGTGCATCCCCAAGGGGCCCAGGCT
+GAGCGGGTGCTCGGTGGACATGGCACCCAATCCCATGAGCGTGAAGACCGCCGGCGCGCCGATGAGCTCTGCGAGCCTCGTAACTTCCTCGGACGCGTTC
+GCGCTTATCGTTCCGCCGCCGATCAGCAGGACGGGCCTCTGCGCTTCCTTGATCCATTGGACCGCGGTGCCCAGTTCGGACATGTCCTCCCTGGGCTCCT
+TGATCCCGTACGAGATGCCCAGGAGGCTCTCGTCGATCTCCGAGTTCATCTGGTCTGAGGGGAGGTCGATGTGGACAGGCCCCGGTCGCCCGGTCTGGCA
+CATCTTCCATGCCTCGTCCACCGCATGGGGCAGCCTGTTGACGTCTAGGACCCTGAAGTTGTGCTTCGTTATAGGCATGAGGAGGCTGTACGCGTCCACT
+TCCTGGAAAGCGCCGAGCCCCAGGGACCCGGTTCCGACCTGTCCGGTAAGTGCCAGCATGGGAGTTGAGTCCGCATACGCCGTGCCTATGCCGGTGATCA
+TGTTGGTGGCACCGGGCCCGCTGGTGGCCATGCAGACGCCCGGCCTCCCGCTGGCCCTGGCATATCCGTCTGCCATGTGGGCGGCGCACTGCTCGTGGCG
+TACTAGGACATGGTTTATCGATGAGTTCATTATCTCGTCGTAGATCGAGATTACGCTTCCGCCCGGATATCCGAACATGGTCTCGACACCTCTGTCCTCC
+AGCATTTGGAGCAATGCTCTGTTTCCTTTCATGGTTGGTCTCCGGCGACGTATCGCGCTTGTTTTTTATAATTCTATTTGGAAAAGCGCGCCGAAACGCG
+CCAGCGGAAGAAGTTTATGTATACGGGGGCCATATGCCCACGCAGGTGTTTCATGGCTGTAATAAAGGTCGGTATCAACGGATTCGGAACCATAGGGAAA
+AGGGTCGCCTCCGCAGTGAGCGCACAGGATGACATGGAAGTCGTAGGTGTGACGAAGACCCGCCCGTCCTTCGAGTCGGAGGTCGCAAGGTACAGGGGAT
+TCGACCTGTACGCGCCTCAGAAAAGCGTCGAACTGTTCGACAAAGCGAACGTGCCGGTCGGGGGGACCGTCGAAGACCTCTGCGGCAAGGTAGACATCAT
+GGTCGACTGCACGCCCGGAAACGTAGGGCAGGAATACAAGGCGATGTACGCCAAAGCAGGCATAAAGGCGATATTCCAGGGAGGGGAGGACCACAGCCTG
+ACGGGGATATCCTTCAACTCCACCGCCAACTACAAGGAGTCCTGGGGCGCCCAGTTCTCCCGTGTCGTTTCTTGCAACACCACGGGGCTGCTGAGGACGC
+TCTACCCCATAGACCGCGAGTTCGGTATCGAGAAGGCGTACGTAACGTTGGTCAGAAGGGCCGCGGACCCCGGTGACAGCAAGAACGGGCCGATCAACGG
+GCTGGAGCCCACCGTCAAGCTGCCGACCCACCACGGGCCGGACGTCCAGAGCATCATGCCATGGGTCAACATCAACACCATGGCGATAAAGGCCTCCACT
+ACGTTGATGCACATGCACACGGTCACGCTGGAGCTGAAGAACTCCGCTTCCACCGAGGCCGCGGTCGAAGCGATAAGGAACTCCTCGCGCGTCAGGATGG
+TGGACGCGGCGTCCGGCATCAGGTCCACGGCGGAGGTCATGGAGCTGTCGAGGGACCTGGCCAGGGACAGGTCCGACATGTACGAGATCGTGGTATGGGA
+
+>contig_38063
+CTATCTCCTCAGGAGGTCTGGGAATCTCTGATCGGGAAGAACAGTAACTACCGCATCATAGTCGTGGACCTCAATCTGACCCGTGTGCTGTTCGGCATGA
+TAGTGGGCGCCGGCCTGGCGGTGGCCGGTGCGGTCATGCAGGCCCTGTTCAAGAACCCGATGGCCTCGCCTTATACTCTCGGGCTCTCGTCAGGCGCCGC
+ATTGGGCGCCGCATTGGGGATTCTCTTCCCTCTTTCGTTCGTACCTGAGGTCGCATCGGTCCCAATCCTGGCTTTCGTTTTCTGTCTGGGGACCATGTTC
+CTCGTGTACTCTATTGCCAGAGTGGGCAACCAGACGCACATGGAGACTCTTCTGCTGGCCGGAATAGCCGTAGCGGCATTGGCGCAGGCGGCGGTCTCCC
+TGCTCACGTACATAGCGGGCGAGAGCATCACGGAGATAGTCTTCTGGGGAATGGGCAGCCTGACCGTCAGCCTCCCATGGGTCAAGATCCCGATAGTGCT
+GGTCCTCAGCGCCGTGGGCATATTCGCAATGCTCTACTACGCCAAGGACCTGAACGCCATGATGCTGGGGGACGCCCACGCCATGGACCTTGGAATAGAC
+GTAAAAAAGACAAGGCTGGCACTGTTGATCGCCTCGTCTCTCGTCACCGCGGCTGCGGTATGTTTCGTGGGGACCATCGGCTTCGTAGGCCTTGTGATCC
+CGCACATACTCAGGATACTTCTTGGTCCGGACAACCGTCTGCTTCTGCCGATGTGCGTGCTGACCGGAGGGATATATCTTGTAGGATGCGACTATCTGGC
+ACATCTCTTCGCCCAATCTCTGGGCGTCATGCCCATAGGCATAGTGACATCTCTGATAGGCGCCCCGTATTTCATCTATCTGCTCAGGAGAAGAAAAAAG
+GAGGTGGGATGGGTATGAGCCTGGATATCCGTGACTTATTCTACAATTACGATGGGAAGCCTGTTCTCAAAGACGTTTCGTTCCTGGTCAAGGAAGGAGA
+GGTCCTGGGGATACTGGGGCCCAACGGATGCGGAAAGACGACCCTGCTGGGCAATCTGAACAGGAATCTGAGCCCCAAAGGCGGATGCGTGCTTCTGGAC
+GGGGAGGACCTTCACAATTACAAGAAAAAAGACATCGCGAAGGAGATAGCGGTGGTTCCGCAGGACAGTCGCGTAGGTTTCTCGTTCACCGTAAGAGAGA
+TCGTCTCCATGGGCAGGATGCCATTCCAGGACGCCTTCCAGGGAGACTCCTCGGAAGACCTCAGGATAATCGAAGACGCGATGAGGAAGACCAACGTACT
+GGATATGGCAGACCGTTACGTGAACACCATGAGCGGCGGGGAAAGGCAGAAGGTCATAATCGCCAGGGCCATGGCGCAGACGCCCAAGATACTGCTGATG
+GACGAGCCC
+
+>contig_44250
+GGTGATGTACTGGGGCTTGTAGGCTACTTTGACCTTTGCGTCTATCTTGCCGCCGTCTGGAGGGATCTCTCCGGCCAGCATCTTTACGAAAGTGGTCTTT
+CCTGTGGCGTTGGGACCGACGACCCCGACGGATTCCCCCATCTTTATGGAACCGCCGACGACATCCAAAGTGAACTCTCCGAAGTCCTTGGACAGGCCCT
+CGAAGGAAAGCAGGTCGGAAGTGACCCAGTCGCTCCTGGGAGGAGACGCGAAGAACTCTATCGGCCTATCCCTGAAACGGATATTCTCTTCGGGAAGGTA
+ACCGTCCAGATATACGTTTATGGCGGTCCTGACCTGTCTTGCAAGAGTGAACACGCCGTACGCCCCCTCGGTACCGTATACAACGCTGACGATGTCGGCG
+AGGAAATCGAGTATGGCAAGATCGTGTTCTATCACGACCACCTGCTTTTCTGCGCTGAGTTCTTTGATGATGCGTGCCATCCTGATCCTCTGGTAGATGT
+CAAGGTACGAGGTGGGCTCGTCGAAGAAGTATACGTCCGCGTCCTTCATGACCGTGGCAGCCATGGCGACCCTCTGAAGCTCTCCTCCTGAAAGTTTCTT
+TATATCCCTGTCCAGAAGCTCGGTCAGCTCGAACATGATGGCGGCCTCCTCGAGTGTCAGGCGGCCTTTTATGCCGGAAAGCAGGTCCTTCACGGGCCCC
+GATGCGGCTTTGGGTATGAGGTCCACGTACTGTGGCTTTATGGCCGTCCTCACCTTGCCGGCGTAGACGTCCGTGAGATAGGATTTGACCTCGGTACCGT
+CGTAGTGCTGCAGCACTTCCTCTTTGGATGGAGGTTTCTCATAGTTGCCCAGGTTGGGGACGAGTTCCCCGGAAAGTATCTTGATCGCCGTGGATTTTCC
+GATCCCGTTCGGTCCAAGTATGCCCGTGACCATGCCTTTCTTCGGCACCGGGAGCCTATAGAGGCGGAAGGCGTTCTCGCCGTACTGGTGGACCATCTCC
+GTCTTCAGCTCGTCGGCCAGGCCTATGATCTTTATGGCGTCGAACTGGCATTTGTTGACGCATATCCCGCATCCCTGGCACAGGGATTCGGATATGATGG
+GCTTGCCCCTCTCGCCGAACACTATGCATTCCACGCCCGTTCTGACCAACGGGCAGAACTTATAGCATTCCTTGTTGCATTTTCTGTTCTGGCATCTGTC
+CTGCAGGACGGCCGCAATACGCATGTCCCCGCTTAGACCGATTTAAGATATAACCTTTAAGGATGGTATCGCAGATAAGCTGATAAGGGAAGACGGAGAC
+AGATGGGCATGGCCGAAGCGGATGGGACCACCGAGGACGTCAGGATACTTACGGGCGACTACAGGAGGGCGATAAGGCATCTCTCCATACCGATAGCCGT
+GGCTCTTGCGATACAGCATATCAACATACTCGTAGACACGTTCTGGGTCGCGGGCCTGGGGGCGGACCCGATGGCTTCAATAAGCATAGTATACCCGGTT
+TTCGCCACGGTCATGGGCATCGGAAGCGGGCTGGGGATCGGTGCTTCTTCCGCGATAGCCAGAAGCATCGGGCATAACAGGAGGAAGGAAGCCGGCACGA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome2.fna	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,51 @@
+>contig_11394
+GCTTTTTACGCCCAACGGGCTTGTTCTTGCTCAGACAGTCCAAGGCTTTCCAGATATGATAATCGCTGAATTTCGGTATCGGGCCGCCTGCACCCCTGTG
+CTGCTGATTGGACATATGGATGTATCATTCATCCATTAAATAAAGTAATTGATTGTCATGTCGCAAGATACGGTATTAACGCCGTCAATAGCTGTTAAGC
+CAGTGCCCGCCGTCGCTTTCCATCCGTAATACTCGCGCGTTTTTTATATCGAAGGAACTCTTCACATCCATGGTTCAAAAGTCGGTGGACTTTGTTATTA
+TTGAGAATGTTTCCAAGAGGTTCGGGAACAAGACGGTCCTTAATAATGTGAGCGCCACTATACAGACCGGTAAGATACTGGGCCTGATAGGCAAGAGCGC
+CGCGGGCAAGAGCGTTTTGATAATGATGCTGAGGGGAAGCGAAGATTACGCACCCGACTCCGGAAGGGTGCTGTACAGGGTCAATAGGTGTTCCGGATGC
+GGAAACCTCGACCTCCCCCATGAAGGAACGCCCTGCTCGAAATGCGGGTGTGAAACAGGAACGATCACCGTGGATTTCTGGTCTTTGAAAGATGACGACC
+CTTTAAGACGCCAGCTCAAGAGCCGCATCGCCATAATGCTGCAGAGGACGTTCGCCCTTTTCGGGGATAAGACCGTGATCGAGAACATCTTCGAGGCCAT
+AGGCGACCGTGCAGAGGGCAAGGCCAGGACGGACATGGCGCTCCAGCTGCTGGAATTCGTGGGGATGACTCACAGGACCACACACATAGCCAGGGACCTG
+TCCGGAGGAGAGAAGCAGAGGATAGTCCTCGCAAGGCAGATAGCCAGGGATCCTCTCTTCTTCTTGGCGGACGAGCCGACGGGAACGCTTGACCCGTACA
+CGGCGGAATTGATGCACGAGCGTCTTGTGGACTACGTCGGGAAGAGAGGGATCTCGATGGTCTTCGCGTCCCATTGGCCCGAGGCCGTGGATAAGATGGC
+CGACGAGGCCATATGGCTGGATTCCGGCAACGTGCTGATGCAGGGCGACCCGAAGGAGATCGCCGATAAATTTATGGAAGGATACTCGTTCGAAAGGACA
+AAGGCCGCCGACCTGGGAGAGCCGATAATATCGCTCAAGGATGCGGAGAAGCACTTTTTCTCTGTCGTCAGAGGAGTCGTCAAGGCAGTGGACGGTGTAA
+CCTTCGATATAATGGAGCGCGAGGTGTTCGGCCTTGTGGGAAAGTCGGGCGCCGGCAAGACCACGACGTCAAGAATGGTCGCCGGCATGACGCCCGCCAC
+CCGCGGGTCCGTGAAGATAAGGATCGGCGACGACTGGGTTGATATGTCAGAGATGGGGCCGAGCGGGAAAGGCCGCGCCACCCCCTATATCGGGTTCCTC
+CATCAGGAATACACGCTCTATCCCTTCGACAACATACTCAGCAACCTTACGACCAGCATAGGCACCAGGATGCCAGCGGAACTTGCAAAGTTCAAGGCCA
+TACAAGTGCTTCAGAGCGTAGGGTTCGACAAGAAGAACATGGAGAGTCTTCTCTACTCTTACCCCGACACACTGAGCGTCGGAGAGTGCCAGAGGATAGC
+CTTCGCACAGGTCCTGATAAGGGAGCCCCGCATCATAGTGCTGGATGAGCCTACAGGGACAATGGACCCGATAACAAAGACCATCATAGCAAAATCCGTC
+ATCCGGGCGAGGGAGACCCTGGGCGAGACCTTCGTCGTGGTGAGCCACGACATGGATTTTGTCGAGAACGTCTGCGACCGCGTAGCGTTCATAAGGAACG
+GCGTCGTGGAAGACATGGGAACTCCCGAGTCGGTCATCCAGAGGTTCGGTCTGAAAGAGCTTCAGGATGACGACTCCGAGGGTGAATGAATGAAGCAGCA
+GATCGGGCGCCACCTCAGCTTCGTTGAATGCAGAGAGGCCATGGGGCTCGGCGTGGGCGGTGCCCTGGCACAGAGGGCGACCATCTCTGACAGCGGAAGG
+GACGTCGTTGCGGTGGCCATGGGCCCGGGCAAGAGGCACATAACCAAACCGGTATGCGAGATAACATATGCCCTCAGAGAAGAGGGCATAGATACCAGCG
+TCGCCTGAGCGCCTGGCACTTATTGCGGAGTATGTCAAGGACATGATGACCGAACTCGAACCGGACAACGCGGCCGTCTTCGAAGCGGGATGCGCCAGCT
+ACCGGGCCAAGGTAGATGTGCTGATAGGGCTTGAACAAGAATATCTGACAGGCAAGGCGACTACCGAGATCATCGTCTGGCACCCTTCCTGGGCGTATCT
+TCTTCCGGATAATGTGACCGAGGCAGAGCTCATGGAAGCAGCCGAGGCGGCATCCACGCCCTCATCGATCGCGATGCTGCAGGGAGGGACGCCGGAAAAT
+CCTATCAACGTGTTCCTTTCGGAACCCGAAGAGATCAACGGTCTTACCCAGCAGGGGCTTTGTGAAATGGGAATATATGTAAACATAATAGTGATTAACA
+TACTCGCCGGGGACTGGGTCGAATATCTGGGCCAGGTCATCGAGATACTGGGAGATAATATTCCGGATGCGGGGACATGAATTGATGATACCAATAGAAA
+TTAAGGACCTTACCGCTGGATATGACGGCCGAGCCGTTTTCAGCAACGTCGACCTGGAGCTCAGGGACAAAGACTTCCTGGCGGTCATAGGGCCCAACGG
+CGGCGGGAAGACAACGCTCTTCAGGGCGATCCTGGGCCTAATAAAACCCATGGGGGGGACCGTAAAAGTGTTCGGCAAGGAGCCGGCAGGTTCGCCCCCG
+GGCATAGGATACGTTCCGCAGAACGAGAATCTGGACTCAGAATATCCAATAAGTGCCAGGGAAGTCGTCCTTATGGGAATGAGGTGCAAGAAGGGCCTTA
+GGCCGTTCTATTCCAGTGAGGAGAAGGAGTCCGCAGAGAGGGCCATGGAGTACGCCGAGGTCTCGGATTTCGCAGACAGCCGAATAAGCAACCTGTCGGG
+AGGGCAGAGACAGAGAGTATACCTCGCAAGGGCTCTTGCCCCGGAACCGAAGATACTCATGCTGGACGAACCCACCGCGAGCCTGGACCCGTCGATGAAG
+GACTGCACCTACGACATACTCAGGAAGCTGAACAGGGACGGGATAGCCATAATGGTGATAACTCACGATATGAGCAGCATCTCTCATGATGTCAAACGTG
+TAGCATGCATGAACCGCAGGCTGATAGTCAACGATGCGCCCGAGATAACCCAGGAGATGATCGCATTGGGATTCCACTGCATCCCCGAGCTAGTGCACAT
+AGGTCCCTGCGATTGCGGAGGTCACAACGATGGTTGATTGGGTCGCGGCATTCTCGATGCCTCTGATTCAGAACATGTTCATGGTCGCGGCCATAGCATG
+CGTTCTTTGCGGAGTCGTGGGAACCCTGGTGGTCGTGAAACGGATGGTGTTCGTAACGGGTGGCATAGCACACACCACTTTCGGAGGTGTGGGTCTTGCA
+TATTATGTTATGTCCGTCGTCGCAGTCTCATGGTTCACCCCCATGATCGGCGCCGCACTGTTCGCGGTCGTTTCGGCGGTCATAATGGCGCTTCCCGCGG
+
+>contig_159
+TATAGCTCAGCTCGTTGGCGGAGACGCTGCTTCCGTACATCTGGCCGCCGCCGTTGATGCCGCCGCCCCAGGCGGCCGTCCCGATCCCCACGGGAGAAAT
+GTCCGTACCTCGGAACCTTATGTTTCTCACGGATTCCCGTATATGTTCCTGGATTATAACTGATACGCAATCCTGTTTCCGACGTCCGCCATGTTTAGAT
+AAATTGACGGTATAGCCGAAGGCATGGATATGGCAATGGAGCTGAGGAACGTCTCCGTAGTGAGGGACGGGAAGCGGATACTGGATTCCGTCTGCCTCGA
+TATCGGCGCCTCCGAGAACGTTGCCGTCATAGGGCCGAACGGTTCGGGGAAGACGACGCTCATCAAACTGCTGAGGGGCGATATTTATCCCTACTACGAC
+GAGGACCGCCCCGCGGAGATGAGGATCTTCGGTGAGAAGATATGGTCCATCTACGACATACGGAGCCGCATGGGCGTGGTCTCCATGGACCTCCAGGGCA
+TGTTCGGCGGCGAAACGCTGGTCGGAGACGTCATAATGTCGGGATACTTCAGCAGCCTGGACATTTTCCGCAACCATGAGGTCACCGACAACATGCGCTC
+CGGGGCCTCGCGAGCGGCCGGGTACATGGGAGTGGAACATCTCGTCGGCAGAGATCTGTCCGGCCTTTCTCTGGGAGAGATGAGGCGGACGCTGATCGCC
+CGGGCGCTGGTCACCGCCCCCGAGATGCTCGTCCTCGACGAACCGATGACGGGCCTCGATATTGTAATGAAATCCAAATTCAGGAAGATGTTCGACATCA
+TGACGGAAACGGGAGTGAGCATCGTCATGATAACCCACGACCTCACCGACATCCCCGTTTCCTTGAACCGCATAATAATGATCAAGGATGGGAAAGTGTT
+CGCGGACGGTCCTAAAAAAGACGTCCTGACGTCCGAGGTCGTCAGCGGGCTTTTCGATGAACCTATTAATGTACAATGCGTTAACGGGATATATTCAATG
+AGGATGGATGAGTGACAAGGTATATCTGTTCCGAATGCGGGAACGAGATTCCGTACGTTTCGGATTTCTGCTACCAGTGCGGTAGCCTGAAGAGCAAGGC
+GTTCAAGATAGACGAGGGCGGCGAGATGGAGGGCGGGGAGGTCCCGTGCCCCAACTGCGGAAAGCCCATAGAGGAGGACGCCCGGTACTGCAGGCACTGC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome3.fna	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,60 @@
+>contig_4003
+ATCAAAGAGGAACTGAAGGCGGCGATGCTGCTGACCGGTTCCTCTGACATAAGAGAGCTCTCTGATGCAGAGTATATCGTCATGGGAGAGACACGCAAAT
+GGATGGAAGGCCTGAAATGACCGACGTCAAGAAGATATTGAAACAGATGTCCGACGAGCTGAGCAAGCCGATCGAATCATACATAGAAGACGAACTGCCC
+GCCAATCTCATCGAAGCGGCAAGACAGTACCCCTATGCCGGCGGAAAGAGGATGAGACCGGCCATGGTCATCGCCGCGTGCAGGGCGGTGGGAGGGGATG
+GCAGGAAGGCCGTTCCCCTTGCGGTTGCCATAGAGTACATACACAATTTCACGCTGATCCATGATGACCTCATGGACGGGGACGAGAAGNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCATGGACGGGGACGAGAAGCGCC
+GGGGCATGACCACATCCCATGTGAAGTACGGCATGCCCACAGCGGTGTTGGCGGGAGATGCCCTGTTCGCTAAGGCCTTCCAGATAATCGCCGACCTCGA
+TGCGGACGGCGAAACGGTCAGAGATGTCCTTAGAGTGGTCTCCCAATCCGTCTGGGACCTGGCCAGAGGTCAGCAGATGGATATAAACAACGAGAACGGG
+ACCGAGGTCACCATGGATGAGTACATCGAGACCATCAGACTGAAGACCAGCGTTCTGTTCGCCGCCGGTGCCGCCGGAGGCGCTATGATCGGAGGTGCGA
+GCAAGGAGGTCGTGGACGCCGTCCACGGCTACGCCATGAACCTGGGCGTCGCCTTCCAGATGTATGACGATATACTTGGGATAGTCGGGGACCCGGCCGT
+TACCGGTAAATCGTCCGGTAATGACATTCGCAAAGGGAAGAGCACCGTCATCGTGTGCCACGCCCTGAAGAACATAGCTGACAGGGCGGACCTGCTGGTC
+TTCCGCGATATCCTCGGCAAGACAGACGCCACCGATGCGGAGATAGACGAGGTCAGGAGCATACTCCGAAGAGCCTGCAGCCTGGATTACGCCATAGAGA
+CCGCAGAGGATTACATCAACAAAGCCGTCGACTGCCTGGATGCGCTGGAGCCCTCAAAGGACAAGGACTTCATGATAGCCCTGGCAGAATACACGATGAC
+CAGGACCCTTTAGTCGGAGATCCCCTTCTCCGTTATGGAGTATGTGGCTTTCCGGCCTTCCGGTATGCTGCGGTGCTTGACCATGATCGCGGCCCTGCGG
+CCGTTGCCTTTCTTCTCCAAGCGGATTATGGTCTTCGCGTTATGATGCATGGCGTGGCCTCCGAGGAACTCTATCGTACCGGCGCCTATGTTGGTGTATA
+
+>contig_4403
+CACCGGTCACCCGAAGGTCACGCGCGTATCGATGCGTGACATCGCAGACCTGGGAGAGAGGGGCCTGTACATCCTTCACGAGATCGGTACGGACCTCGTC
+GGCAAGATGGAGGGCTGCACCGGGTGCAAGAAGTGCGAGCACGAATGCCCCGAGAACGCGTTGACCGTAAGCAAGGACAAGACGATCACCGTGAAGACCA
+AGAACTGCCTCGGAACGGCATGCTACAGATGCCAGTACGTCTGTCCCGAGAAGGTCATGCAGTTCGACTCCCTAAGGCTGTCGTGATAAACGGTTTTGGG
+CGGGGCCGGCCCCGCCCTTTTTTCATTTACCGCCGTTCAGGGCCTCGGCGTGCACGGCAGGCCTGACATTCTCGTCCTCCAGCTCCGTAAGTATCTGCTT
+GCGCAGCCTGATGAACTCGGGCGAAGCGCGGTCCCTGGGCCGCGGAATGCCTATGTCCACGATGTCCTTGATGCTGGCAGGACGCTTGGTAAGGACGACT
+ATCCTGTCTGAAAGATAAACGGCCTCGTCGACCGAGTGGGTCACGAACAGGATCGTAGTGTCCGTCTTCTCGACTATCCTCAGCAGCTCGCCCTGCATGA
+TGTTGCGCGTCTGGGCGTCCAACGCGCCGAACGGCTCGTCCATGAGCAGCACGTCGGGCTTGGTAACAAGGGCCCTTGCGATGCCCACGCGCTGCTTCAT
+ACCTCCGCTGAGCTCGTGGACACGATGGTCCTCGAAACCTTCGAGGCCGACCGCCCTGATGTAGCGTTCGGCGGTCTTCCTGCGCTGCTCCGCCGGGACG
+CCGGCGATCTCCAGGCCGAACTCGACATTCTTCCTTACAGAACGCCAAGGGAACAGTGCGAACTCCTGGAACACCATGCCTCTGTCGGGGCCTGGCCCGG
+TGCACTTCTTCCCGCCTATCGACACTTCTCCGGAGGACGGCTCCATGAGCCCTGCTATAAGCCTGAGCAGAGTCGTCTTTCCGCATCCCGAGGGACCGAC
+TATGGATATAAGCTCGCCCTTCTGGATCTCCAGAGAGAAATCCTCCAGGGCCACGGTCTCCTGTTCATCGGTCTTGTAGACCTTCCTCAGATGATTGATA
+ACGATCTTCTCGCTCATTCTATCCCCATCCTTCTTGTTATGACCTTGTGCAGATAGTCGGCGAGGCTGGTCGTCAGTATTCCGAGGATTGCGATTATGAC
+TATGCCCGCGTAGACGTTGGGCCAGTACCCCATCTGCGCCTGTATGCTGATGAAGTATCCGACGCCTCCTCCGAACGATGCGTACAGCTCGGAGGCAACT
+ATGCACATCCACCCGACCCCCATGCCTATGCGGAGGCCGTTCATTATGTATGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+>contig_14302
+GATGTGGAATGCGGCCAAATGCCGCCTCGGTTTCAGCGGGGCAAGGCGTTCCATGTCCACGACCGCTGGAATACTGGTGAACGTTTTCATTACTTCTCTG
+GAGATAGCCGAACGTTCTCAGCCTGCGCTCGAAAGCAGGAACTACAACGGCAGTTTCCCCATTTATAGGATGCCCAACAAGATCGGTCTCTACTGGTTCG
+CCATCACGGCCGCCGCGTGCGCCTCGCTATACATCATCGGATACTACATCTCGACCCCGAACATGGCGGCAATACTCCTGGGGGCGGTCTGACGGTCTGC
+GACCTTCGTACGGATAAAGTGAGCTACAGGTACGGCAATTCGAATTACAATGCTATCGATTCTGTGGATTTCACAGCCTCCCATGGCAGAAGGACCGTCA
+TCCTGGGAGAGAACGGGTGCGGCAAGTCGACGCTGATATATCAGCTCAACGGAGTATACAAGCCTGTTTCCGGTACTGTGTTCTATGGAGATATGCCCAT
+ATCATACGACAAGGAGTTCCTTACGGAGCTGCGTTCCGACGTTTCCGTTGTTCTGCAGAATCCAGACGATCAGATTTTCTCTTCCACCGTCGAGGAGGAC
+GTGGCATTTGGACCGTTAAACTTGGGCCTTTCCCGGGAAGAGGTTGGAGAAAGAATCGGCCGGTCTTTGGAATGCGTGGGGATGTCGGGATTTGCCGAAG
+TGCCTGTTCAGCGCCTTTCATACGGTCAGAAAAAAAGAGTGTCACTCGCAGGCGCCCTAGCATCACATCCAAAGATACTGGTCCTGGACGAGCCTACCGC
+AGGCCTGGACCCGCAGATGTCCAGAGAGGTGATGGAGATCGCAAATTCTCTCATCAGAGAAGGGATCTCCGTCGTAGTATCCACCCATGACGTGAATCTG
+ATCTACAATTGGGTCGAGGACCTTTACGTGATGCGGAACGGACACATGGTCTTCTCCGGAGATGCGGACGAGTTCTTCTCCGACCGTCCGTCCGTTTATC
+TTTCAGGTCTGGAACAGCCCTCGATATTCAGCATAAACCACAATATGGAGACGTTAAGAGGGACGATTCCCGCGTCATATCCCAAGACCATGAGCCAGAT
+GGTCAGCAGATTATTCCCTTCAGGATCCTCGGCCGGAAGGATATTCATCTATCAGACTGAAGGCGAGCGCATCGACCAGGATGCGATCGAGGAGGCCGTG
+GGAAAGAAAGGGATGCCCATTGCAGTATACGGCCCCTCCGCGCGCAGGTCGGTGACCCGATCGAAGCTCAGGGTTGATTTCTATTTTAACGGCATAGAGT
+GTTGCATCAGGGAGGCCATGGTAAACCATGATTCCCTGATAATAGTCGACCGGGGCTTGAAGGGGATCGTCACGGAGGCGATTGAAGAGCTTAGGGCATA
+CGGAACCCGGATCAGTATCAGGGAGTTGGTTTTTTGAGCGCTCCCCTTTTCCGCACCGAAGGTCTTTTCTTCAGATACGAAGGCGGCCGGGGGGACGCGT
+TGGCAGACGTGAACATCACGATCAAAGAGGGTGCCAGAACTGTCATCATGGGAGCCAACGGAGCTGGAAAATCCACGTTCTTCTATCATCTTAACGGAGT
+CTTGAGGCCGTCGAAGGGCTCGGTGTTTTTCCGGGGAGAAAAAATACCGCACAGGGGAAAAGCTCTCAGGAAGCTGCGCTCGGAGGTCGCGGTGATGCTC
+CAAGACCCCAACGACCAGCTTTTTGCACCAAAAGTATCTGACGACATAGCATTCGGCCCGAAGAACCTGGGACTCGACGCTCAGACTGTAGGGGAGAGGG
+TCAGGGACGCCCTCTACATCACAGGCATCGAATCTCTGGAGGGTCGCAGCGTGATGCAGCTGTCGTTCGGCCAGAAGAAGAGGGTGGTGCTGGCCGGTGC
+CTTGGCGATGCATCCGAAGGTGCTTATAATGGACGAGCCCACCGCAGGTCTCGATCCCCAGATGTCCAAGGAGCTCATCGAGCTCGCGGACGAGCTGCAC
+CATCTTGGAACGACCGTTATTTTTTCAACCCATGACGTGGACCTCTCATATTCTTGGGCGGACGAGGTCCATGTCCTAAGAGGGGGCCGTAATGTATATT
+CGGGGAGCTCAGAAAGATTCTATGACGATACTTCGGAAGTTTATCTTTCGGGCCTTGTCGAACCGGCCATGTACGACATCAACGTCAGCATCTCCGAGCT
+TGCCGGATGCCCCGTTGAACCGTTTCCCAAAACCCTGCCTCAGCTTGTGGCCAAGGCAGTGCCGTCAGAGGGGCCGGGCACGGTTCACATCCTTCCCGTG
+GAAGGTCCGGTCGACCGGGAGCTGTTCTCCTCTCTGACGTCCGGGTCCGGGATGTCCGCAACAGGCGTCTACGGTACTAATGCAAGAAAATCTGCGGAGG
+CTTCCAAATTGCCGATAGATTATTTCTTCGGGGCCGACGAGGGATGCATAATAGAGGCTTTGCACGGCAAAGACACGCTGATATGCTGCGACAGGTCCCT
+TACAGATCTGCTGATATCGAAGATAGGCAGTATGTCCCGGTTCGGGACAGAGGTCCCTTATTCTCTGCACTGAACATTTCTTTTTTCCGGGGGTTCGAAC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_contig.contig2classification.names.txt	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,4 @@
+# contig	classification	reason	lineage	lineage scores	superkingdom	phylum	class	order	family	genus	species
+contig_38063	classified	based on 1/2 ORFs	1;131567;2;1783272;1239;91061;1385;186820;1637;1639	1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00	Bacteria: 1.00	Firmicutes: 1.00	Bacilli: 1.00	Bacillales: 1.00	Listeriaceae: 1.00	Listeria: 1.00	Listeria monocytogenes: 1.00
+contig_44250	classified	based on 1/2 ORFs	1;131567;2;1224;1236;135623;641;662;666	1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00	Bacteria: 1.00	Proteobacteria: 1.00	Gammaproteobacteria: 1.00	Vibrionales: 1.00	Vibrionaceae: 1.00	Vibrio: 1.00	Vibrio cholerae: 1.00
+contig_9952	classified	based on 1/5 ORFs	1;131567;2;1783272;1239;91061*	1.00;1.00;1.00;1.00;1.00;1.00	Bacteria: 1.00	Firmicutes: 1.00	Bacilli*: 1.00	not classified	not classified	not classified	not classified
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_contig.contig2classification.txt	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,4 @@
+# contig	classification	reason	lineage	lineage scores
+contig_38063	classified	based on 1/2 ORFs	1;131567;2;1783272;1239;91061;1385;186820;1637;1639	1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+contig_44250	classified	based on 1/2 ORFs	1;131567;2;1224;1236;135623;641;662;666	1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+contig_9952	classified	based on 1/5 ORFs	1;131567;2;1783272;1239;91061*	1.00;1.00;1.00;1.00;1.00;1.00
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/cat_database.loc.sample	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,7 @@
+## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz
+# ls CAT_prepare_20190719/
+#  2019-07-19.CAT_prepare.fresh.log
+#  2019-07-19_CAT_database
+#  2019-07-19_taxonomy
+#value  name    database_folder	taxonomy_folder
+#2019-07-19_CAT_database	2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database	/opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of CAT databases -->
+    <table name="cat_database" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, database_folder, taxonomy_folder</columns>
+        <file path="tool-data/cat_database.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Dec 10 16:05:34 2019 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of CAT databases -->
+    <table name="cat_database" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, database_folder, taxonomy_folder</columns>
+        <file path="${__HERE__}/test-data/cached_locally/cat_database.loc" />
+    </table>
+</tables>