Mercurial > repos > jjohnson > contig_annotation_tool
changeset 0:ad7507073c3f draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cat_add_names.xml Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,41 @@ +<tool id="cat_add_names" name="CAT add_names" version="@VERSION@.0"> + <description>annotate with taxonomic classification</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + CAT add_names -i '$input' + @CAT_TAXONOMY@ + @ADD_NAMES_OPTIONS@ + -o output_names.txt + && @TXT2TSV@ -i output_names.txt -o $output + ]]></command> + <inputs> + <param name="input" type="data" format="tabular,txt" label="classification.txt or ORF2LCA.txt"/> + <expand macro="cat_db" /> + <expand macro="add_names_options" /> + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <test> + <param name="input" ftype="tabular" value="test_contig.contig2classification.txt"/> + <expand macro="test_catdb"/> + <output name="output"> + <assert_contents> + <has_text text="Firmicutes" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**CAT/BAT add_names** +Add names for the NCBI taxomy IDs. + +@COMMON_HELP@ + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cat_bins.xml Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,71 @@ +<tool id="cat_bins" name="CAT bins" version="@VERSION.0"> + <description>annotate with taxonomic classification</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + #set $bin_dir = None + #if len($mags) > 1: + #set $bin_dir = 'inputs' + mkdir -p $bin_dir && + #for mag in $mags: + ln -s '$mag' $bin_dir/ && + #end for + #end if + CAT + #if $bin_dir + bins -s '.dat' -b $bin_dir + #else + bin -b '$mags' + #end if + @CAT_DB@ + @USE_INTERMEDIATES@ + @CUSTOM_SETTINGS@ + && @TXT2TSV@ *.ORF2LCA.txt *.bin2classification.txt + @ADD_NAMES@ + @SUMMARISE@ + ]]></command> + <inputs> + <param name="mags" type="data" format="fasta" multiple="true" label="metagenome assembled genomes (MAGs/bins)"/> + <expand macro="cat_db" /> + <expand macro="use_intermediates" /> + <expand macro="custom_settings" /> + <expand macro="add_names" /> + <expand macro="summarise" /> + <expand macro="select_bat_outputs" /> + </inputs> + <outputs> + <expand macro="outputs" /> + </outputs> + <tests> + <test> + <param name="mags" ftype="fasta" value="genome3.fna"/> + <expand macro="test_catdb"/> + <param name="select_outputs" value="contig2classification"/> + <output name="contig2classification"> + <assert_contents> + <has_text text="1639" /> + </assert_contents> + </output> + </test> + <param name="mags" ftype="fasta" value="genome2.fna,genome3.fna"/> + <expand macro="test_catdb"/> + <param name="select_outputs" value="contig2classification"/> + <output name="contig2classification"> + <assert_contents> + <has_text text="666" /> + <has_text text="1639" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**CAT bin or bins** +Classify metagenomics assembled genomes. + +@COMMON_HELP@ + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cat_contigs.xml Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,48 @@ +<tool id="cat_contigs" name="CAT contigs" version="@VERSION@.0"> + <description>annotate with taxonomic classification</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + CAT contigs -c '$contigs_fasta' + @CAT_DB@ + @USE_INTERMEDIATES@ + @CUSTOM_SETTINGS@ + && @TXT2TSV@ *.ORF2LCA.txt *.contig2classification.txt + @ADD_NAMES@ + @SUMMARISE@ + ]]></command> + <inputs> + <param name="contigs_fasta" type="data" format="fasta" label="Contigs fasta"/> + <expand macro="cat_db" /> + <expand macro="use_intermediates" /> + <expand macro="custom_settings" /> + <expand macro="add_names" /> + <expand macro="summarise" /> + <expand macro="select_cat_outputs" /> + </inputs> + <outputs> + <expand macro="outputs" /> + </outputs> + <tests> + <test> + <param name="contigs_fasta" ftype="fasta" value="contigs.fasta"/> + <expand macro="test_catdb"/> + <param name="select_outputs" value="contig2classification"/> + <output name="contig2classification"> + <assert_contents> + <has_text text="contig_38063" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**CAT contigs** +Classifiy metagenomics contigs. + +@COMMON_HELP@ + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cat_prepare.xml Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,29 @@ +<tool id="cat_prepare" name="CAT prepare" version="@VERSION@.0"> + <description>database for CAT - Contig Annotation Tool</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + #import os.path + #set $catdb = $cat_db.files_path + mkdir -p $catdb && + echo CAT_DB `date '+%Y-%m-%d'` "@DATABASE_FOLDER@" "@TAXONOMY_FOLDER@" > $cat_db && + CAT prepare --fresh + --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")' + --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")' + ]]></command> + <inputs> + </inputs> + <outputs> + <data name="cat_db" format="cat_db" /> + </outputs> + <help><![CDATA[ +**CAT prepare** +Prepare CAT reference data for classifying metagomic contigs or genome assemblies. + +@COMMON_HELP@ + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cat_summarise.xml Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,40 @@ +<tool id="cat_summarise" name="CAT summarise" version="@VERSION@.0"> + <description>annotate with taxonomic classification</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + CAT summarise -c '$contigs_fasta' -i '$input' + -o output_names_summary.txt + && @TXT2TSV@ -i output_names_summary.txt -o $output + ]]></command> + <inputs> + <param name="input" type="data" format="tabular" label="classification.official_names.txt" + help="The classication must be made with only_official names"/> + <param name="contigs_fasta" type="data" format="fasta" optional="true" label="contigs.fasta" + help="Required if a contig2classification.names"/> + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <test> + <param name="input" ftype="tabular" value="test_contig.contig2classification.names.txt"/> + <param name="contigs_fasta" ftype="fasta" value="configs.fasta"/> + <output name="output"> + <assert_contents> + <has_text text="Firmicutes" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**CAT summarise** +Produce a summary report of assignments to the ofifcial taxonomic names. + +@COMMON_HELP@ + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<datatypes> + <registration> + <datatype extension="cat_db" type="galaxy.datatypes.data:Text" subclass="true" /> + </registration> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,267 @@ +<macros> + <token name="@VERSION@">5.0.3</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">cat</requirement> + <yield/> + </requirements> + </xml> + <xml name="version_command"> + <version_command>CAT --version</version_command> + </xml> + <token name="@DATABASE_FOLDER@">CAT_database</token> + <token name="@TAXONOMY_FOLDER@">taxonomy</token> + <xml name="cat_db"> + <conditional name="db"> + <param name="db_src" type="select" label="CAT database from"> + <option value="cached">local cached database</option> + <option value="history">history</option> + </param> + <when value="cached"> + <param name="cat_builtin" type="select" label="Use a built-in CAT database" help="If the CAT database of interest is not listed, contact your Galaxy administrator"> + <options from_data_table="cat_databases"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No CAT database is available." /> + </options> + </param> + </when> + <when value="history"> + <param name="cat_db" type="data" format="txt" label="A history dataset from CAT prepare tool"/> + </when> + </conditional> + </xml> + <token name="@CAT_DB@"><![CDATA[ + #if $db.db_src == 'cached': + --database_folder $db.cat_builtin.fields.database_folder + --taxonomy_folder $db.cat_builtin.fields.taxonomy_folder + #else + #import os.path + #set $catdb = $db.cat_db.extra_files_path + --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")' + --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")' + #end if +]]></token> + <token name="@CAT_TAXONOMY@"><![CDATA[ + #if $db.db_src == 'cached': + --taxonomy_folder $db.cat_builtin.fields.taxonomy_folder + #else + #import os.path + #set $catdb = $db.cat_db.extra_files_path + --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")' + #end if +]]></token> + <xml name="test_catdb"> + <conditional name="db"> + <param name="db_src" value="cached"/> + <param name="cat_builtin" value="CAT_prepare_test"/> + </conditional> + </xml> + + <xml name="use_intermediates"> + <conditional name="previous"> + <param name="use_previous" type="select" label="Use previous gene prediction and diamond alignment"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> + <param argument="--proteins_fasta" type="data" format="fasta" label="predicted proteins fasta"/> + <param argument="--diamond_alignment" type="data" format="fasta" label="alignments file"/> + </when> + </conditional> + </xml> + <token name="@USE_INTERMEDIATES@"><![CDATA[ + #if $previous.use_previous == 'yes' + --proteins_fasta '$previous.proteins_fasta' + --diamond_alignment '$previous.diamond_alignment' + #end if + --out_prefix 'cat_output' +]]></token> + + <xml name="custom_settings"> + <param argument="--range" type="integer" value="10" min="0" max="49" label="range"/> + <param argument="--fraction" type="float" value="0.5" min="0" max="0.99" label="fraction"/> + </xml> + <token name="@CUSTOM_SETTINGS@"><![CDATA[ + --range $range + --fraction $fraction +]]></token> + <xml name="add_names_options"> + <param argument="--only_official" type="boolean" truevalue="--only_official" falsevalue="" checked="true" + label="Only output official level names."/> + <param argument="--exclude_scores" type="boolean" truevalue="--exclude_scores" falsevalue="" checked="false" + label="Exclude bit-score support scores in the lineage."/> + </xml> + <token name="@ADD_NAMES_OPTIONS@"><![CDATA[ + $only_official $exclude_scores +]]></token> + <xml name="add_names"> + <conditional name="names"> + <param name="add_names" type="select" label="add_names"> + <option value="no">No</option> + <option value="orf2lca">ORF2LCA.txt</option> + <option value="classification">classification.txt</option> + <option value="both">ORF2LCA.txt and classification.txt</option> + </param> + <when value="no"/> + <when value="orf2lca"> + <expand macro="add_names_options"/> + </when> + <when value="classification"> + <expand macro="add_names_options"/> + </when> + <when value="both"> + <expand macro="add_names_options"/> + </when> + </conditional> + </xml> + <token name="@ADD_NAMES@"><![CDATA[ + #if $names.add_names in ['classification','both']: + && CAT add_names $names.only_official $names.exclude_scores + @CAT_TAXONOMY@ + #if $bcat == 'CAT' + -i cat_output.contigs2classification.tsv + #else + -i cat_output.bin2classification.tsv + #end if + -o classification_names.txt + && @TXT2TSV@ -i classification_names -o $classification_names + #end if + #if $names.add_names in ['orf2lca','both']: + && CAT add_names $names.only_official $names.exclude_scores + @CAT_TAXONOMY@ + -i cat_output.ORF2LCA.tsv + -o orf2lca_names.txt + && @TXT2TSV@ -i orf2lca_names.txt -o $orf2lca_names + #end if +]]></token> + <xml name="summarise"> + <param name="summarise" type="select" label="summarise"> + <option value="no">No</option> + <option value="classification">classification.txt</option> + </param> + </xml> + <token name="@SUMMARISE@"><![CDATA[ + #if $summarise in ['classification']: + #if $names.add_names in ['classification','both'] and $names.only_official: + #set $summary_input = $classification_names + #else + #set $summary_input = classification_offical_names + && CAT add_names --only_official + @CAT_TAXONOMY@ + #if $bcat == 'CAT' + -i cat_output.contigs2classification.tsv + #else + -i cat_output.bin2classification.tsv + #end if + -o $summary_input + #end if + && CAT summarise + #if $bcat == 'CAT' + -c $contigs_fasta + #end if + -i $summary_input + -o classification_summary.txt + && @TXT2TSV@ -i classification_summary.txt -o $classification_summary + #end if +]]></token> + + <xml name="select_outputs"> + <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs"> + <option value="log" selected="true">CAT.log</option> + <option value="predicted_proteins_faa" selected="true">predicted_proteins.faa</option> + <option value="predicted_proteins_gff">predicted_proteins.gff</option> + <option value="alignment_diamond">alignment.diamond</option> + <option value="orf2lca" selected="true">ORF2LCA.txt</option> + <yield/> + </param> + </xml> + <xml name="select_cat_outputs"> + <param name="bcat" type="hidden" value="CAT"/> + <param name="seqtype" type="hidden" value="contig"/> + <expand macro="select_outputs"> + <option value="contig2classification" selected="true">contig2classification.txt</option> + </expand> + </xml> + <xml name="select_bat_outputs"> + <param name="bcat" type="hidden" value="BAT"/> + <param name="seqtype" type="hidden" value="bin"/> + <expand macro="select_outputs"> + <option value="bin2classification" selected="true">bin2classification.txt</option> + </expand> + </xml> + + <token name="@TXT2TSV@"><![CDATA[ + $__tool_directory__/tabpad.py +]]></token> + <xml name="outputs"> + <data name="log" format="txt" label="${bcat}.log" from_work_dir="cat_output.log"> + <filter>'log' in select_outputs or not select_outputs</filter> + </data> + <data name="predicted_proteins_faa" format="fasta" label="${bcat}.predicted_proteins.faa" from_work_dir="cat_output.predicted_proteins.faa"> + <filter>'predicted_proteins_faa' in select_outputs</filter> + </data> + <data name="predicted_proteins_gff" format="gff" label="${bcat}.predicted_proteins.gff" from_work_dir="cat_output.predicted_proteins.gff"> + <filter>'predicted_proteins_gff' in select_outputs</filter> + </data> + <data name="alignment_diamond" format="tabular" label="${bcat}.alignment.diamond" from_work_dir="cat_output.alignment.diamond"> + <filter>'alignment_diamond' in select_outputs</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" /> + </actions> + </data> + <data name="orf2lca" format="tabular" label="${bcat}.ORF2LCA.txt" from_work_dir="cat_output.ORF2LCA.tsv"> + <filter>'orf2lca' in select_outputs</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="ORF,lineage,bit-score" /> + </actions> + </data> + <data name="contig2classification" format="tabular" label="${bcat}.contig2classification.txt" from_work_dir="cat_output.contig2classification.tsv"> + <filter>'contig2classification' in select_outputs</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="contig,classification,reason,lineage,lineage scores" /> + </actions> + </data> + <data name="bin2classification" format="tabular" label="${bcat}.bin2classification.txt" from_work_dir="cat_output.bin2classification.tsv"> + <filter>'bin2classification' in select_outputs</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="bin,classification,reason,lineage,lineage scores" /> + </actions> + </data> + <data name="orf2lca_names" format="tabular" label="${bcat}.ORF2LCA.names.txt"> + <filter>'orf2lca' in names.add_names</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="ORF,lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" /> + </actions> + </data> + <data name="classification_names" format="tabular" label="${bcat}.${seqtype}2classification.names.txt"> + <filter>'classification' in names.add_names</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="${seqtype},classification,reason,lineage,lineage scores,superkingdom,phylum,class,order,family,genus,species" /> + </actions> + </data> + <data name="classification_summary" format="tabular" label="${bcat}.${seqtype}2classification.summary.txt"> + <filter>'classification' in summarise</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="rank,clade,number of contigs,number of ORFs,number of positions" /> + </actions> + </data> + </xml> + <token name="@COMMON_HELP@"><![CDATA[ +The CAT/BAT workflow is described at: https://github.com/dutilh/CAT +]]></token> + <xml name="citations"> + <citations> + <citation type="doi">https://doi.org/10.1101/072868</citation> + <citation type="doi">https://doi.org/10.1186/s13059-019-1817-x</citation> + <yield /> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tabpad.py Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +import argparse +import re + + +def padfile(infile, outfile, fieldcnt=None): + with open(infile, 'r') as fh: + out = open(outfile, 'w') + tabs = '\t' * fieldcnt if fieldcnt is not None else None + for i, txtline in enumerate(fh): + line = txtline.rstrip('\r\n') + fields = line.split('\t') + if not tabs: + tabs = '\t' * len(fields) + out.write('%s%s\n' % (line, tabs[len(fields):])) + out.close() + + +def fieldcount(infile): + fieldcnt = 0 + with open(infile, 'r') as fh: + for i, line in enumerate(fh): + fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t'))) + return fieldcnt + + +def tsvname(infile): + return re.sub('\.txt$', '', infile) + '.tsv' + + +def __main__(): + parser = argparse.ArgumentParser( + description='Pad a file with TABS for equal field size across lines') + parser.add_argument( + '-i', '--input', help='input file') + parser.add_argument( + '-o', '--output', help='output file') + parser.add_argument( + 'files', nargs='*', help='.txt files') + args = parser.parse_args() + + if args.input: + outfile = args.output if args.output else tsvname(args.input) + fieldcnt = fieldcount(args.input) + padfile(args.input, outfile, fieldcnt=fieldcnt) + for infile in args.files: + outfile = tsvname(infile) + fieldcnt = fieldcount(infile) + padfile(infile, outfile, fieldcnt=fieldcnt) + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/protIDs Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,11 @@ +WP_000159554.1 +WP_000214552.1 +WP_000346214.1 +WP_000568619.1 +WP_000958804.1 +WP_000991933.1 +WP_000996146.1 +WP_003722398.1 +WP_005378126.1 +XP_961517.1 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,10 @@ +WP_000159554.1 2 +WP_000214552.1 91061 +WP_000346214.1 91061 +WP_000568619.1 666 +WP_000958804.1 1301 +WP_000991933.1 666 +WP_000996146.1 666 +WP_003722398.1 1639 +WP_005378126.1 662 +XP_961517.1 5141
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,17 @@ +91061 +1 +641 +1224 +1236 +131567 +1637 +1639 +1783272 +2 +662 +13562 +13562 +641 +662 +666 +91061
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,252 @@ +1 | all | | synonym | +1 | root | | scientific name | +2 | Bacteria | Bacteria <prokaryotes> | scientific name | +2 | Monera | Monera <Bacteria> | in-part | +2 | Procaryotae | Procaryotae <Bacteria> | in-part | +2 | Prokaryota | Prokaryota <Bacteria> | in-part | +2 | Prokaryotae | Prokaryotae <Bacteria> | in-part | +2 | bacteria | bacteria <blast2> | blast name | +2 | eubacteria | | genbank common name | +2 | prokaryote | prokaryote <Bacteria> | in-part | +2 | prokaryotes | prokaryotes <Bacteria> | in-part | +641 | Vibrionaceae | | scientific name | +641 | Vibrionaceae Veron 1965 | | authority | +641 | gamma-3 proteobacteria | gamma-3 proteobacteria <#3> | in-part | +662 | "Microspira" Schroeter 1886 | | authority | +662 | "Pacinia" Trevisan 1885 | | authority | +662 | Beneckea | | synonym | +662 | Beneckea Campbell 1957 | | authority | +662 | Listonella | | synonym | +662 | Listonella MacDonell and Colwell 1986 | | authority | +662 | Microspira | | synonym | +662 | Pacinia | | synonym | +662 | Vibrio | | scientific name | +662 | Vibrio Pacini 1854 | | authority | +666 | "Bacillo virgola del Koch" Trevisan 1884 | | authority | +666 | "Bacillus cholerae" (Pacini 1854) Trevisan 1884 | | authority | +666 | "Bacillus cholerae-asiaticae" Trevisan 1884 | | authority | +666 | "Kommabacillus" Koch 1884 | | authority | +666 | "Liquidivibrio cholerae" (Pacini 1854) Orla-Jensen 1909 | | authority | +666 | "Microspira comma" Schroeter 1886 | | authority | +666 | "Pacinia cholerae-asiaticae" (Trevisan 1884) Trevisan 1885 | | authority | +666 | "Spirillum cholerae" (Pacini 1854) Mac1889 | | authority | +666 | "Spirillum cholerae-asiaticae" (Trevisan 1884) Zopf 1885 | | authority | +666 | "Vibrio cholera" (sic) Pacini 1854 | | authority | +666 | "Vibrio cholerae-asiaticae" (Trevisan 1884) Pfeiffer 1896 | | authority | +666 | "Vibrio comma" (Schroeter 1886) Blanchard 1906 | | authority | +666 | ATCC 14035 | ATCC 14035 <type strain> | type material | +666 | ATCC 14547 [[Vibrio albensis]] | ATCC 14547 [[Vibrio albensis]] <type strain> | type material | +666 | Bacillo virgola del Koch | | synonym | +666 | Bacillus cholerae | | synonym | +666 | Bacillus cholerae-asiaticae | | synonym | +666 | CCUG 48664 [[Vibrio albensis]] | CCUG 48664 [[Vibrio albensis]] <type strain> | type material | +666 | CCUG 9118 A | CCUG 9118 A <type strain> | type material | +666 | CECT 514 | CECT 514 <type strain> | type material | +666 | CIP 62.13 | CIP 62.13 <type strain> | type material | +666 | Kommabacillus | | synonym | +666 | LMG 4406 [[Vibrio albensis]] | LMG 4406 [[Vibrio albensis]] <type strain> | type material | +666 | LMG:4406 [[Vibrio albensis]] | LMG:4406 [[Vibrio albensis]] <type strain> | type material | +666 | Liquidivibrio cholerae | | synonym | +666 | Microspira comma | | synonym | +666 | NCIMB 41 [[Vibrio albensis]] | NCIMB 41 [[Vibrio albensis]] <type strain> | type material | +666 | NCTC 8021 | NCTC 8021 <type strain> | type material | +666 | Pacinia cholerae-asiaticae | | synonym | +666 | Spirillum cholerae | | synonym | +666 | Spirillum cholerae-asiaticae | | synonym | +666 | Vibrio albensis | | synonym | +666 | Vibrio albensis Lehmann and Neumann 1896 | | authority | +666 | Vibrio cholera | | synonym | +666 | Vibrio cholerae | | scientific name | +666 | Vibrio cholerae Pacini 1854 | | authority | +666 | Vibrio cholerae biovar albensis | | synonym | +666 | Vibrio cholerae bv. albensis | | synonym | +666 | Vibrio cholerae-asiaticae | | synonym | +666 | Vibrio comma | | synonym | +1224 | Alphaproteobacteraeota | | synonym | +1224 | Alphaproteobacteraeota Oren et al. 2015 | | authority | +1224 | Alphaproteobacteriota | | synonym | +1224 | Proteobacteria | | scientific name | +1224 | Proteobacteria Garrity et al. 2005 | | authority | +1224 | Proteobacteria [class] Stackebrandt et al. 1988 | | authority | +1224 | proteobacteria | proteobacteria <blast1224> | blast name | +1224 | purple bacteria | | common name | +1224 | purple bacteria and relatives | | common name | +1224 | purple non-sulfur bacteria | | common name | +1224 | purple photosynthetic bacteria | | common name | +1224 | purple photosynthetic bacteria and relatives | | common name | +1236 | Gammaproteobacteria | | scientific name | +1236 | Gammaproteobacteria Garrity et al. 2005 emend. Williams and Kelly 2013 | | authority | +1236 | Proteobacteria gamma subdivision | | synonym | +1236 | Purple bacteria, gamma subdivision | | synonym | +1236 | g-proteobacteria | | blast name | +1236 | gamma proteobacteria | | synonym | +1236 | gamma subdivision | | synonym | +1236 | gamma subgroup | | synonym | +1239 | Bacillaeota | | synonym | +1239 | Bacillaeota Oren et al. 2015 | | authority | +1239 | Bacillota | | synonym | +1239 | Bacillus/Clostridium group | | synonym | +1239 | Clostridium group firmicutes | | synonym | +1239 | Firmacutes | | synonym | +1239 | Firmicutes | | scientific name | +1239 | Firmicutes corrig. Gibbons and Murray 1978 | | authority | +1239 | Low G+C firmicutes | | synonym | +1239 | clostridial firmicutes | | synonym | +1239 | firmicutes | firmicutes <blast1239> | blast name | +1239 | low G+C Gram-positive bacteria | | common name | +1239 | low GC Gram+ | | common name | +1385 | Bacillales | | scientific name | +1385 | Bacillales Prevot 1953 | | authority | +1385 | Bacillus/Staphylococcus group | | synonym | +1637 | "Listerella" Pirie 1927 (nom. rej. Opin. 14) | | authority | +1637 | Listerella | | synonym | +1637 | Listeria | | scientific name | +1637 | Listeria Pirie 1940 | | authority | +1639 | "Bacterium monocytogenes hominis" Nyfeldt 1932 | | authority | +1639 | "Bacterium monocytogenes" Murray et al. 1926 | | authority | +1639 | "Corynebacterium infantisepticum" Potel 1950 | | authority | +1639 | "Corynebacterium parvulum" Schultz et al. 1934 | | authority | +1639 | "Erysipelothrix monocytogenes" (Murray et al. 1926) Wilson and Miles 1946 | | authority | +1639 | "Listerella hepatolytica" Pirie 1927 | | authority | +1639 | ATCC 15313 | ATCC 15313 <type strain> | type material | +1639 | Bacterium monocytogenes | | synonym | +1639 | Bacterium monocytogenes hominis | | synonym | +1639 | CCUG 15526 | CCUG 15526 <type strain> | type material | +1639 | CIP 82.110 | CIP 82.110 <type strain> | type material | +1639 | Corynebacterium infantisepticum | | synonym | +1639 | Corynebacterium parvulum | | synonym | +1639 | DSM 20600 | DSM 20600 <type strain> | type material | +1639 | Erysipelothrix monocytogenes | | synonym | +1639 | Listerella hepatolytica | | synonym | +1639 | Listeria monocytogenes | | scientific name | +1639 | Listeria monocytogenes (Murray et al. 1926) Pirie 1940 | | authority | +1639 | Listeria sp. FDA00013359 | | includes | +1639 | Listeria sp. FDA00013360 | | includes | +1639 | Listeria sp. FDA00013361 | | includes | +1639 | Listeria sp. FDA00013362 | | includes | +1639 | Listeria sp. FDA00013363 | | includes | +1639 | Listeria sp. FDA00013364 | | includes | +1639 | Listeria sp. FDA00013365 | | includes | +1639 | Listeria sp. FDA00013366 | | includes | +1639 | Listeria sp. FDA00013367 | | includes | +1639 | Listeria sp. FDA00013503 | | includes | +1639 | Listeria sp. FDA00013504 | | includes | +1639 | Listeria sp. FDA00013505 | | includes | +1639 | Listeria sp. FDA00013506 | | includes | +1639 | Listeria sp. FDA00013507 | | includes | +1639 | Listeria sp. FDA00013508 | | includes | +1639 | Listeria sp. FDA00013509 | | includes | +1639 | Listeria sp. FDA00013510 | | includes | +1639 | Listeria sp. FDA00013511 | | includes | +1639 | Listeria sp. FDA00013512 | | includes | +1639 | Listeria sp. FDA00013536 | | includes | +1639 | Listeria sp. FDA00013537 | | includes | +1639 | Listeria sp. FDA00013538 | | includes | +1639 | Listeria sp. FDA00013539 | | includes | +1639 | Listeria sp. FDA00013540 | | includes | +1639 | Listeria sp. FDA00013541 | | includes | +1639 | Listeria sp. FDA00013542 | | includes | +1639 | Listeria sp. FDA00013543 | | includes | +1639 | Listeria sp. FDA00013544 | | includes | +1639 | Listeria sp. FDA00013545 | | includes | +1639 | Listeria sp. FDA00013546 | | includes | +1639 | Listeria sp. FDA00013547 | | includes | +1639 | Listeria sp. FDA00013548 | | includes | +1639 | Listeria sp. FDA00013549 | | includes | +1639 | Listeria sp. FDA00013550 | | includes | +1639 | Listeria sp. FDA00013551 | | includes | +1639 | Listeria sp. FDA00013552 | | includes | +1639 | Listeria sp. FDA00013553 | | includes | +1639 | Listeria sp. FDA00013554 | | includes | +1639 | Listeria sp. FDA00013555 | | includes | +1639 | Listeria sp. FDA00013556 | | includes | +1639 | Listeria sp. FDA00013557 | | includes | +1639 | Listeria sp. FDA00013558 | | includes | +1639 | Listeria sp. FDA00013559 | | includes | +1639 | Listeria sp. FDA00013560 | | includes | +1639 | Listeria sp. FDA00013561 | | includes | +1639 | Listeria sp. FDA00013562 | | includes | +1639 | Listeria sp. FDA00013563 | | includes | +1639 | Listeria sp. FDA00013564 | | includes | +1639 | Listeria sp. FDA00013565 | | includes | +1639 | Listeria sp. FDA00013566 | | includes | +1639 | Listeria sp. FDA00013567 | | includes | +1639 | Listeria sp. FDA00013568 | | includes | +1639 | Listeria sp. FDA00013570 | | includes | +1639 | Listeria sp. FDA00013571 | | includes | +1639 | Listeria sp. FDA00013572 | | includes | +1639 | Listeria sp. FDA00013573 | | includes | +1639 | Listeria sp. FDA00013574 | | includes | +1639 | Listeria sp. FDA00013575 | | includes | +1639 | Listeria sp. FDA00013576 | | includes | +1639 | Listeria sp. FDA00013577 | | includes | +1639 | Listeria sp. FDA00013578 | | includes | +1639 | Listeria sp. FDA00013579 | | includes | +1639 | Listeria sp. FDA00013607 | | includes | +1639 | NCTC 10357 | NCTC 10357 <type strain> | type material | +1639 | SLCC 53 | SLCC 53 <type strain> | type material | +2157 | "Archaea" Woese et al. 1990 | | authority | +2157 | "Archaebacteria" (sic) Woese and Fox 1977 | | authority | +2157 | Archaea | | scientific name | +2157 | Archaebacteria | | synonym | +2157 | Mendosicutes | | synonym | +2157 | Metabacteria | | synonym | +2157 | Monera | Monera <Archaea> | in-part | +2157 | Procaryotae | Procaryotae <Archaea> | in-part | +2157 | Prokaryota | Prokaryota <Archaea> | in-part | +2157 | Prokaryotae | Prokaryotae <Archaea> | in-part | +2157 | archaea | archaea <blast2157> | blast name | +2157 | prokaryote | prokaryote <Archaea> | in-part | +2157 | prokaryotes | prokaryotes <Archaea> | in-part | +2158 | Methanobacteriales | | scientific name | +2158 | Methanobacteriales Balch and Wolfe 1981 | | authority | +2159 | Methanobacteriaceae | | scientific name | +2159 | Methanobacteriaceae Barker 1956 | | authority | +2172 | Methanobrevibacter | | scientific name | +2172 | Methanobrevibacter Balch and Wolfe 1981 | | authority | +28890 | "Euryarchaeota" Woese et al. 1990 | | authority | +28890 | Euryarchaeota | | scientific name | +28890 | Euryarchaeota Garrity and Holt 2002 | | authority | +28890 | Methanobacteraeota | | synonym | +28890 | Methanobacteraeota Oren et al. 2015 | | authority | +28890 | Methanobacteriota | | synonym | +28890 | euryarchaeotes | euryarchaeotes <blast28890> | blast name | +83816 | ATCC 35063 | ATCC 35063 <type strain> | type material | +83816 | DSM 1093 | DSM 1093 <type strain> | type material | +83816 | JCM 13430 | JCM 13430 <type strain> | type material | +83816 | Methanobacterium ruminantium | | synonym | +83816 | Methanobacterium ruminantium Smith and Hungate 1958 (Approved Lists 1980) | | authority | +83816 | Methanobrevibacter ruminantium | | scientific name | +83816 | Methanobrevibacter ruminantium (Smith and Hungate 1958) Balch and Wolfe 1981 | | authority | +83816 | OCM 146 | OCM 146 <type strain> | type material | +83816 | strain M1 | strain M1 <type strain> <taxid 83816> | type material | +91061 | Bacilli | | scientific name | +91061 | Bacilli Ludwig et al. 2010 | | authority | +91061 | Bacillus/Lactobacillus/Streptococcus group | | synonym | +91061 | Firmibacteria | | synonym | +91061 | Firmibacteria Murray 1988 | | authority | +131567 | biota | | synonym | +131567 | cellular organisms | | scientific name | +135623 | 'Vibrionales' | | synonym | +135623 | Vibrionaceae group | | synonym | +135623 | Vibrionales | | scientific name | +183925 | Archaeobacteria | | synonym | +183925 | Archaeobacteria Murray 1988 | | authority | +183925 | Methanobacteria | | scientific name | +183925 | Methanobacteria Boone 2002 | | authority | +183967 | Thermoplasmata | | scientific name | +183967 | Thermoplasmata Reysenbach 2002 | | authority | +186820 | Listeriaceae | | scientific name | +186820 | Listeriaceae Ludwig et al. 2010 | | authority | +1235850 | "Methanoplasmatales" Paul et al. 2012 | | authority | +1235850 | Methanomassiliicoccales | | scientific name | +1235850 | Methanomassiliicoccales Iino et al. 2013 | | authority | +1235850 | Methanoplasmatales | | synonym | +1783272 | Terrabacteria group | | scientific name | +2283794 | "Methanomada" Petitjean et al. 2015 | | authority | +2283794 | Methanogen Class I | | synonym | +2283794 | Methanomada | | equivalent name | +2283794 | Methanomada group | | scientific name | +2283796 | Diaforarchaea | | equivalent name | +2283796 | Diaforarchaea Petijean et al. 2015 | | authority | +2283796 | Diaforarchaea group | | scientific name |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,27 @@ +1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | +2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | +641 | 135623 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +662 | 641 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +666 | 662 | species | VC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1239 | 1783272 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1385 | 91061 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1637 | 186820 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1639 | 1637 | species | LM | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +2157 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | +2158 | 183925 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +2159 | 2158 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +2172 | 2159 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +28890 | 2157 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +83816 | 2172 | species | MR | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +91061 | 1239 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | +135623 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +183925 | 2283794 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +183967 | 2283796 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +186820 | 1385 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1235850 | 183967 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1783272 | 2 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +2283794 | 28890 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +2283796 | 28890 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
Binary file test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/cat_database.loc Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,8 @@ +## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz +# ls CAT_prepare_20190719/ +# 2019-07-19.CAT_prepare.fresh.log +# 2019-07-19_CAT_database +# 2019-07-19_taxonomy +#value name database_folder taxonomy_folder +#2019-07-19_CAT_database 2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy +CAT_database_test CAT_database_test ${__HERE__}/CAT_prepare_test/CAT_database ${__HERE__}/CAT_prepare_test/taxonomy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/contigs.fasta Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,94 @@ + +>contig_9952 +TGGTTATGTACGCAGACAGCTTACTCCTCCTCGGAGACCACTCGCGGTACTCTATTCCCTCCACGGTTGCGAGGCGCTCGCCGTAGATCCGCTTCCCGGG +ACAGGCAGACAGGGTGTATAGCCTCCCCCTCTCGGAAAAAACCCCGGGCACGCGGTCCAAAGACTCCATGCCCGTAACAACGCCGTGGTTTTAGAAATAA +TCTGTGCCGTCGGTTGCAAACCCTAAATACAGGGGGATATCAATGCGGTTGCATGGATATCCACATCCTTCGTGAGATCGCCGACGCAGTTCAGGCGGCG +GTATCTCTCATACCCGACCCCTGCAGCAGGGGCAACGAGATATGCATGGGCAACGACGGCACACCCACATCCGAGATAGACAAAGTGGCTGAGAACGCGG +TGCTCGGGTACATAGAGTCCAACCGCCTGGCTCTGAACGTGCTCAGCGAAGAGATAGGCTTCGTGGACAACGGCGCGTCGGAGGTTCTGGTCCTCGATCC +CATCGATGGGACAAGCAATTCAGTGGCCGAGATACCTTTCTATACGATATCCATGGCCGTCGGCAAGGATTCGCTCTGCGGCATGCACACGGCCTACATC +AGGAACCTGGCGACAGGGGACGAGTTCTGGGCGCACAAAGGGGATGGCGCTTATTACAACGGAAGGAGGATAAACGTCAGGAAGCCGGATTTCTCCAAAC +TGTTCGCCCTTATATACATGGGGAACGCCGCTGTCGATGAAGCGTTCGCCCTTGCAAAGAACGTCAAGACCTCCCGCTCCATGGGCTGCGCCTCCCTTGA +GATGACGCTCGTGGCACTAGGACACGCCGATATCTATTACATGAACACCTACCGTTACAACCGTGCCGTCAGGACTGTGGACATCGCCGCCAGCGCCCTG +ATACTCAGGGAGGCGGGCGGCGAGATATTCGATATCGGCGGCAACAAGCTGGATATGCCGCTGGACAACGCTTACCACGCAAGCTTCGTGGCGTGCTCCT +GCAAAGAGGTATTCGACCACATCATGAGGGCCCACATCGAGGAGCACGGCGCTACGCGTTACGGGATATACGCCAACGAGACCGTCCCCGGGGCGGCCGA +GTATGTGAGGAGGGCGTACGATGCTTTGAGGGGGGAGAAGGTAACCCTCGACACGGCGGCCGCCAGGCTGATCGGGGCGGAAGGCGTGCCTATTTCGGAG +ATCGAGGCGGACATCGTCGTGGTGATAGGAGGGGACGGCACGATACTCAGGGCGCTCAAGAAGACGGATGCCGCCGTGATAGGGATCAACGCCGGAGGCG +TGGGGTTCCTGGCCGAGGTCGAGCCGGACGAGATAGAGGAGAGCATATCCCGCATCAGGCGCGGAGAGTACTCGGTTGAGGAGAGGATCAAGCTCAGGAC +TTTTTACGAGGGGGAATATCTCTCGGAGGCCGTGAACGAGACAGTGATACACACTGATTCCGTGGCGAAGATCAGGCAGTTCAAGATATATGTCAACGAA +CACCTGGCAACGGAGGTCCGCGCGGACGGCATAATCATCTCGACGCCCACAGGCTCCACCTGCTACGCCATGAGCCTCGGCGCGCCCATAACCGACCCGG +GGGTCGGAGCTTTCCTGATAGTCCCCATGGCGGCGTTCAAGTTCGCTTCCCGTCCGTTCGTCGTTCCCTATACGGCGAAGATAACCGTCGAGGCGGTCAT +GGACAAGGGCTGCCTCATCGTGGTGGACGGCCAGCACGAGTACCCGATGAGGGGAGGGACGCGGGCGGAATTCTCGCTTTCCGACAACCTCGCCAAATTC +TCGGCCCCGGCGTTCCTGGCATCGACGGGCATCTCGAAGTAGATCTCGCCGCCCATCATGTTGATGTCGGCCTTAAAAGGTAAAGAAAGCCAGATGGCGT +TCGAGACATCGGAGTCGTCCAGCTCGGCGAAGTAATCGCCGCTCGTCGTGACAATCTTCATTCTGCTCAAAGCGCCACCCGGCCGTCGGTTCAGTTCTTT +TTCTTCTTTTCGAAGAGCTTCCTGATCTCCCTGCCCGTGACCTCGATCTGGAGGTCCTTCTCCTTCTCCTCCATTGCCTTCAGGCCCTTCAGGTCGTTGG +CCCATTCGGAGGTCCAGTCGGCTTTGAATTTTCCGGATTCGATGTCGTCCAGAATCTTCTTCATGCCCTTCTCGGACTCTTCGGTGATCACCAGGTCCCT +CCTGGTAAGGCCTCCGTACTCGGCAGTGTTGGAAACGACGTGCCACATCTTCTCGAAACCGCCCTCGTTTATGAGGTCGACGATGAGCTTCGCCTCATGG +CATACTTCGAAGTAGGCCATTTCGGGAGGGTATCCTCCCTCGACCAGGGTCTTGAATCCCGACTTTATGAGGCCGGTGGTCCCTCCGCACAGCACGGCCT +GCTCTCCGAACAGGTCTGTGAGCGTCTCGTTGTCGAAAGTGGTCTCGAAGACGCCGGCGCGGGTGGCTCCGAGCCCCTTTGCAAGTGCAAGGGCGATCTT +CTTGGCGTTGCCGGTATAGTCCTGGTGGACGCAAACGAGGGCCGGAACTCCGAATCCCTCGACGAACACATCCCTTTCTTTGTCCCCGGGGGCCTTGGGA +GCCATCATTATGACGTCGATGTTTTTCGGAGGAACGATGGTCTTGTAGGTCACAGCGAAACCGTGGGCGAACTCAAGTGCGCAGCCCTCCCTGATGTTGG +GCTCGACGAATTCTTTGTATACCTTTGGCTGGACCTCGTCGGGCAGAAGCATCATGATGACGTCCGCGGTCTTGGCGGCCTCGGCGAAATCTACGACCTT +GAAGCCGTCCTCTTTCGCTTTGTTCCATGATCTTCCGTCTTTCCTGAGCCCGATCACTACGTTGAGGCCGGAGTCCCTGAAGCACAGGGCCTGCGCTCTC +CCCTGGGATCCGTAGCCCATGACGGCGACCGTTTTTCCTTTAAGGACATCTATGTCCACATCTGCATCGTGGTAAATCTTCATTATATCCACCTGTTTAG +AGGTCCAACTGCTTTATAGACTAAAAGGTATCGTTCCCGCTCCGACATATAGGTCAGTTCAGTACTGGCAGCGTCCTTTGACCAGGGCCTGATTCGGATT +GGCAGGCAGCATGGGCAACACGTCCTCCTCGGGATCGATGTGGATGTCCAGCAGGCACGTCTCGCCGCTGTCTATTGCGGTCTTCAGGGCGTCGGCTATC +TCTCCCGGCTTCTCGACCAGCATTCCTCTGGCCCCGTAGGCCTCGGCTATCTTGGAGAAGTCCGGGTCGGCGCCAAGCTCGGTCTCGCTGTACCTCTTGT +TCCAGAACAGCTTCTGCCACTGTTTGACCATTCCCAGCCATCCGTTGTTCAGCAGGACTATGACGACCGGCAGGTCCTCGGCCACCGAGGTGGCCAGCTC +CTGTTGGACCATCTGGAATCCCCCGTCCCCTGTTATGGTCAGGACGGTGCTGTCGGGCTTGGCGGCCTTCGCCCCTATGGCGGAGGGGAGCCCGAAACCC +ATCGTGCCGAAGCTTCCCGAGGAGAGGAGCTGTCTGGGCCTGTGGACGTGCAGATGGTGCATGGCCCACATTTGGTTCTGTCCCACGTCGGTGGTGACTA +TCATGTCGTCGTCCTTGTCGATCAGCCTGTTGATCTCGTATATGACCTTCTGAGGGACGATCGGTGTAAGGTCTATGTCGATCTTGCACCTGCAACGCCT +CCTGTACTCCGCATAGGTGCTGTTCCAGTCGGCATGGGTATCCCTGTATCCGGAGAGCCCGTCGATGAGCGCCGCGGTACCCTTCTTAGCATCGCAGAGA +AGGTTGACGTCGTTGTTCTTGTGCTTGTCGAACTCCGTCGCGTCTATGTCTATCTGGACGACCCTGCATGCGCCGTCGAACCTGGTGTGGGGGCTGAACG +TCCTGTCCGAGAACTTCGTGCCTATGGCTATTACCAGGTCGGCGTTGCGGAAAGTATCGAGGGCGCACATCTTGCCGTGCATCCCCAAGGGGCCCAGGCT +GAGCGGGTGCTCGGTGGACATGGCACCCAATCCCATGAGCGTGAAGACCGCCGGCGCGCCGATGAGCTCTGCGAGCCTCGTAACTTCCTCGGACGCGTTC +GCGCTTATCGTTCCGCCGCCGATCAGCAGGACGGGCCTCTGCGCTTCCTTGATCCATTGGACCGCGGTGCCCAGTTCGGACATGTCCTCCCTGGGCTCCT +TGATCCCGTACGAGATGCCCAGGAGGCTCTCGTCGATCTCCGAGTTCATCTGGTCTGAGGGGAGGTCGATGTGGACAGGCCCCGGTCGCCCGGTCTGGCA +CATCTTCCATGCCTCGTCCACCGCATGGGGCAGCCTGTTGACGTCTAGGACCCTGAAGTTGTGCTTCGTTATAGGCATGAGGAGGCTGTACGCGTCCACT +TCCTGGAAAGCGCCGAGCCCCAGGGACCCGGTTCCGACCTGTCCGGTAAGTGCCAGCATGGGAGTTGAGTCCGCATACGCCGTGCCTATGCCGGTGATCA +TGTTGGTGGCACCGGGCCCGCTGGTGGCCATGCAGACGCCCGGCCTCCCGCTGGCCCTGGCATATCCGTCTGCCATGTGGGCGGCGCACTGCTCGTGGCG +TACTAGGACATGGTTTATCGATGAGTTCATTATCTCGTCGTAGATCGAGATTACGCTTCCGCCCGGATATCCGAACATGGTCTCGACACCTCTGTCCTCC +AGCATTTGGAGCAATGCTCTGTTTCCTTTCATGGTTGGTCTCCGGCGACGTATCGCGCTTGTTTTTTATAATTCTATTTGGAAAAGCGCGCCGAAACGCG +CCAGCGGAAGAAGTTTATGTATACGGGGGCCATATGCCCACGCAGGTGTTTCATGGCTGTAATAAAGGTCGGTATCAACGGATTCGGAACCATAGGGAAA +AGGGTCGCCTCCGCAGTGAGCGCACAGGATGACATGGAAGTCGTAGGTGTGACGAAGACCCGCCCGTCCTTCGAGTCGGAGGTCGCAAGGTACAGGGGAT +TCGACCTGTACGCGCCTCAGAAAAGCGTCGAACTGTTCGACAAAGCGAACGTGCCGGTCGGGGGGACCGTCGAAGACCTCTGCGGCAAGGTAGACATCAT +GGTCGACTGCACGCCCGGAAACGTAGGGCAGGAATACAAGGCGATGTACGCCAAAGCAGGCATAAAGGCGATATTCCAGGGAGGGGAGGACCACAGCCTG +ACGGGGATATCCTTCAACTCCACCGCCAACTACAAGGAGTCCTGGGGCGCCCAGTTCTCCCGTGTCGTTTCTTGCAACACCACGGGGCTGCTGAGGACGC +TCTACCCCATAGACCGCGAGTTCGGTATCGAGAAGGCGTACGTAACGTTGGTCAGAAGGGCCGCGGACCCCGGTGACAGCAAGAACGGGCCGATCAACGG +GCTGGAGCCCACCGTCAAGCTGCCGACCCACCACGGGCCGGACGTCCAGAGCATCATGCCATGGGTCAACATCAACACCATGGCGATAAAGGCCTCCACT +ACGTTGATGCACATGCACACGGTCACGCTGGAGCTGAAGAACTCCGCTTCCACCGAGGCCGCGGTCGAAGCGATAAGGAACTCCTCGCGCGTCAGGATGG +TGGACGCGGCGTCCGGCATCAGGTCCACGGCGGAGGTCATGGAGCTGTCGAGGGACCTGGCCAGGGACAGGTCCGACATGTACGAGATCGTGGTATGGGA + +>contig_38063 +CTATCTCCTCAGGAGGTCTGGGAATCTCTGATCGGGAAGAACAGTAACTACCGCATCATAGTCGTGGACCTCAATCTGACCCGTGTGCTGTTCGGCATGA +TAGTGGGCGCCGGCCTGGCGGTGGCCGGTGCGGTCATGCAGGCCCTGTTCAAGAACCCGATGGCCTCGCCTTATACTCTCGGGCTCTCGTCAGGCGCCGC +ATTGGGCGCCGCATTGGGGATTCTCTTCCCTCTTTCGTTCGTACCTGAGGTCGCATCGGTCCCAATCCTGGCTTTCGTTTTCTGTCTGGGGACCATGTTC +CTCGTGTACTCTATTGCCAGAGTGGGCAACCAGACGCACATGGAGACTCTTCTGCTGGCCGGAATAGCCGTAGCGGCATTGGCGCAGGCGGCGGTCTCCC +TGCTCACGTACATAGCGGGCGAGAGCATCACGGAGATAGTCTTCTGGGGAATGGGCAGCCTGACCGTCAGCCTCCCATGGGTCAAGATCCCGATAGTGCT +GGTCCTCAGCGCCGTGGGCATATTCGCAATGCTCTACTACGCCAAGGACCTGAACGCCATGATGCTGGGGGACGCCCACGCCATGGACCTTGGAATAGAC +GTAAAAAAGACAAGGCTGGCACTGTTGATCGCCTCGTCTCTCGTCACCGCGGCTGCGGTATGTTTCGTGGGGACCATCGGCTTCGTAGGCCTTGTGATCC +CGCACATACTCAGGATACTTCTTGGTCCGGACAACCGTCTGCTTCTGCCGATGTGCGTGCTGACCGGAGGGATATATCTTGTAGGATGCGACTATCTGGC +ACATCTCTTCGCCCAATCTCTGGGCGTCATGCCCATAGGCATAGTGACATCTCTGATAGGCGCCCCGTATTTCATCTATCTGCTCAGGAGAAGAAAAAAG +GAGGTGGGATGGGTATGAGCCTGGATATCCGTGACTTATTCTACAATTACGATGGGAAGCCTGTTCTCAAAGACGTTTCGTTCCTGGTCAAGGAAGGAGA +GGTCCTGGGGATACTGGGGCCCAACGGATGCGGAAAGACGACCCTGCTGGGCAATCTGAACAGGAATCTGAGCCCCAAAGGCGGATGCGTGCTTCTGGAC +GGGGAGGACCTTCACAATTACAAGAAAAAAGACATCGCGAAGGAGATAGCGGTGGTTCCGCAGGACAGTCGCGTAGGTTTCTCGTTCACCGTAAGAGAGA +TCGTCTCCATGGGCAGGATGCCATTCCAGGACGCCTTCCAGGGAGACTCCTCGGAAGACCTCAGGATAATCGAAGACGCGATGAGGAAGACCAACGTACT +GGATATGGCAGACCGTTACGTGAACACCATGAGCGGCGGGGAAAGGCAGAAGGTCATAATCGCCAGGGCCATGGCGCAGACGCCCAAGATACTGCTGATG +GACGAGCCC + +>contig_44250 +GGTGATGTACTGGGGCTTGTAGGCTACTTTGACCTTTGCGTCTATCTTGCCGCCGTCTGGAGGGATCTCTCCGGCCAGCATCTTTACGAAAGTGGTCTTT +CCTGTGGCGTTGGGACCGACGACCCCGACGGATTCCCCCATCTTTATGGAACCGCCGACGACATCCAAAGTGAACTCTCCGAAGTCCTTGGACAGGCCCT +CGAAGGAAAGCAGGTCGGAAGTGACCCAGTCGCTCCTGGGAGGAGACGCGAAGAACTCTATCGGCCTATCCCTGAAACGGATATTCTCTTCGGGAAGGTA +ACCGTCCAGATATACGTTTATGGCGGTCCTGACCTGTCTTGCAAGAGTGAACACGCCGTACGCCCCCTCGGTACCGTATACAACGCTGACGATGTCGGCG +AGGAAATCGAGTATGGCAAGATCGTGTTCTATCACGACCACCTGCTTTTCTGCGCTGAGTTCTTTGATGATGCGTGCCATCCTGATCCTCTGGTAGATGT +CAAGGTACGAGGTGGGCTCGTCGAAGAAGTATACGTCCGCGTCCTTCATGACCGTGGCAGCCATGGCGACCCTCTGAAGCTCTCCTCCTGAAAGTTTCTT +TATATCCCTGTCCAGAAGCTCGGTCAGCTCGAACATGATGGCGGCCTCCTCGAGTGTCAGGCGGCCTTTTATGCCGGAAAGCAGGTCCTTCACGGGCCCC +GATGCGGCTTTGGGTATGAGGTCCACGTACTGTGGCTTTATGGCCGTCCTCACCTTGCCGGCGTAGACGTCCGTGAGATAGGATTTGACCTCGGTACCGT +CGTAGTGCTGCAGCACTTCCTCTTTGGATGGAGGTTTCTCATAGTTGCCCAGGTTGGGGACGAGTTCCCCGGAAAGTATCTTGATCGCCGTGGATTTTCC +GATCCCGTTCGGTCCAAGTATGCCCGTGACCATGCCTTTCTTCGGCACCGGGAGCCTATAGAGGCGGAAGGCGTTCTCGCCGTACTGGTGGACCATCTCC +GTCTTCAGCTCGTCGGCCAGGCCTATGATCTTTATGGCGTCGAACTGGCATTTGTTGACGCATATCCCGCATCCCTGGCACAGGGATTCGGATATGATGG +GCTTGCCCCTCTCGCCGAACACTATGCATTCCACGCCCGTTCTGACCAACGGGCAGAACTTATAGCATTCCTTGTTGCATTTTCTGTTCTGGCATCTGTC +CTGCAGGACGGCCGCAATACGCATGTCCCCGCTTAGACCGATTTAAGATATAACCTTTAAGGATGGTATCGCAGATAAGCTGATAAGGGAAGACGGAGAC +AGATGGGCATGGCCGAAGCGGATGGGACCACCGAGGACGTCAGGATACTTACGGGCGACTACAGGAGGGCGATAAGGCATCTCTCCATACCGATAGCCGT +GGCTCTTGCGATACAGCATATCAACATACTCGTAGACACGTTCTGGGTCGCGGGCCTGGGGGCGGACCCGATGGCTTCAATAAGCATAGTATACCCGGTT +TTCGCCACGGTCATGGGCATCGGAAGCGGGCTGGGGATCGGTGCTTCTTCCGCGATAGCCAGAAGCATCGGGCATAACAGGAGGAAGGAAGCCGGCACGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome2.fna Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,89 @@ +>contig_11394 +GCTTTTTACGCCCAACGGGCTTGTTCTTGCTCAGACAGTCCAAGGCTTTCCAGATATGATAATCGCTGAATTTCGGTATCGGGCCGCCTGCACCCCTGTG +CTGCTGATTGGACATATGGATGTATCATTCATCCATTAAATAAAGTAATTGATTGTCATGTCGCAAGATACGGTATTAACGCCGTCAATAGCTGTTAAGC +CAGTGCCCGCCGTCGCTTTCCATCCGTAATACTCGCGCGTTTTTTATATCGAAGGAACTCTTCACATCCATGGTTCAAAAGTCGGTGGACTTTGTTATTA +TTGAGAATGTTTCCAAGAGGTTCGGGAACAAGACGGTCCTTAATAATGTGAGCGCCACTATACAGACCGGTAAGATACTGGGCCTGATAGGCAAGAGCGC +CGCGGGCAAGAGCGTTTTGATAATGATGCTGAGGGGAAGCGAAGATTACGCACCCGACTCCGGAAGGGTGCTGTACAGGGTCAATAGGTGTTCCGGATGC +GGAAACCTCGACCTCCCCCATGAAGGAACGCCCTGCTCGAAATGCGGGTGTGAAACAGGAACGATCACCGTGGATTTCTGGTCTTTGAAAGATGACGACC +CTTTAAGACGCCAGCTCAAGAGCCGCATCGCCATAATGCTGCAGAGGACGTTCGCCCTTTTCGGGGATAAGACCGTGATCGAGAACATCTTCGAGGCCAT +AGGCGACCGTGCAGAGGGCAAGGCCAGGACGGACATGGCGCTCCAGCTGCTGGAATTCGTGGGGATGACTCACAGGACCACACACATAGCCAGGGACCTG +TCCGGAGGAGAGAAGCAGAGGATAGTCCTCGCAAGGCAGATAGCCAGGGATCCTCTCTTCTTCTTGGCGGACGAGCCGACGGGAACGCTTGACCCGTACA +CGGCGGAATTGATGCACGAGCGTCTTGTGGACTACGTCGGGAAGAGAGGGATCTCGATGGTCTTCGCGTCCCATTGGCCCGAGGCCGTGGATAAGATGGC +CGACGAGGCCATATGGCTGGATTCCGGCAACGTGCTGATGCAGGGCGACCCGAAGGAGATCGCCGATAAATTTATGGAAGGATACTCGTTCGAAAGGACA +AAGGCCGCCGACCTGGGAGAGCCGATAATATCGCTCAAGGATGCGGAGAAGCACTTTTTCTCTGTCGTCAGAGGAGTCGTCAAGGCAGTGGACGGTGTAA +CCTTCGATATAATGGAGCGCGAGGTGTTCGGCCTTGTGGGAAAGTCGGGCGCCGGCAAGACCACGACGTCAAGAATGGTCGCCGGCATGACGCCCGCCAC +CCGCGGGTCCGTGAAGATAAGGATCGGCGACGACTGGGTTGATATGTCAGAGATGGGGCCGAGCGGGAAAGGCCGCGCCACCCCCTATATCGGGTTCCTC +CATCAGGAATACACGCTCTATCCCTTCGACAACATACTCAGCAACCTTACGACCAGCATAGGCACCAGGATGCCAGCGGAACTTGCAAAGTTCAAGGCCA +TACAAGTGCTTCAGAGCGTAGGGTTCGACAAGAAGAACATGGAGAGTCTTCTCTACTCTTACCCCGACACACTGAGCGTCGGAGAGTGCCAGAGGATAGC +CTTCGCACAGGTCCTGATAAGGGAGCCCCGCATCATAGTGCTGGATGAGCCTACAGGGACAATGGACCCGATAACAAAGACCATCATAGCAAAATCCGTC +ATCCGGGCGAGGGAGACCCTGGGCGAGACCTTCGTCGTGGTGAGCCACGACATGGATTTTGTCGAGAACGTCTGCGACCGCGTAGCGTTCATAAGGAACG +GCGTCGTGGAAGACATGGGAACTCCCGAGTCGGTCATCCAGAGGTTCGGTCTGAAAGAGCTTCAGGATGACGACTCCGAGGGTGAATGAATGAAGCAGCA +GATCGGGCGCCACCTCAGCTTCGTTGAATGCAGAGAGGCCATGGGGCTCGGCGTGGGCGGTGCCCTGGCACAGAGGGCGACCATCTCTGACAGCGGAAGG +GACGTCGTTGCGGTGGCCATGGGCCCGGGCAAGAGGCACATAACCAAACCGGTATGCGAGATAACATATGCCCTCAGAGAAGAGGGCATAGATACCAGCG +TCGCCTGAGCGCCTGGCACTTATTGCGGAGTATGTCAAGGACATGATGACCGAACTCGAACCGGACAACGCGGCCGTCTTCGAAGCGGGATGCGCCAGCT +ACCGGGCCAAGGTAGATGTGCTGATAGGGCTTGAACAAGAATATCTGACAGGCAAGGCGACTACCGAGATCATCGTCTGGCACCCTTCCTGGGCGTATCT +TCTTCCGGATAATGTGACCGAGGCAGAGCTCATGGAAGCAGCCGAGGCGGCATCCACGCCCTCATCGATCGCGATGCTGCAGGGAGGGACGCCGGAAAAT +CCTATCAACGTGTTCCTTTCGGAACCCGAAGAGATCAACGGTCTTACCCAGCAGGGGCTTTGTGAAATGGGAATATATGTAAACATAATAGTGATTAACA +TACTCGCCGGGGACTGGGTCGAATATCTGGGCCAGGTCATCGAGATACTGGGAGATAATATTCCGGATGCGGGGACATGAATTGATGATACCAATAGAAA +TTAAGGACCTTACCGCTGGATATGACGGCCGAGCCGTTTTCAGCAACGTCGACCTGGAGCTCAGGGACAAAGACTTCCTGGCGGTCATAGGGCCCAACGG +CGGCGGGAAGACAACGCTCTTCAGGGCGATCCTGGGCCTAATAAAACCCATGGGGGGGACCGTAAAAGTGTTCGGCAAGGAGCCGGCAGGTTCGCCCCCG +GGCATAGGATACGTTCCGCAGAACGAGAATCTGGACTCAGAATATCCAATAAGTGCCAGGGAAGTCGTCCTTATGGGAATGAGGTGCAAGAAGGGCCTTA +GGCCGTTCTATTCCAGTGAGGAGAAGGAGTCCGCAGAGAGGGCCATGGAGTACGCCGAGGTCTCGGATTTCGCAGACAGCCGAATAAGCAACCTGTCGGG +AGGGCAGAGACAGAGAGTATACCTCGCAAGGGCTCTTGCCCCGGAACCGAAGATACTCATGCTGGACGAACCCACCGCGAGCCTGGACCCGTCGATGAAG +GACTGCACCTACGACATACTCAGGAAGCTGAACAGGGACGGGATAGCCATAATGGTGATAACTCACGATATGAGCAGCATCTCTCATGATGTCAAACGTG +TAGCATGCATGAACCGCAGGCTGATAGTCAACGATGCGCCCGAGATAACCCAGGAGATGATCGCATTGGGATTCCACTGCATCCCCGAGCTAGTGCACAT +AGGTCCCTGCGATTGCGGAGGTCACAACGATGGTTGATTGGGTCGCGGCATTCTCGATGCCTCTGATTCAGAACATGTTCATGGTCGCGGCCATAGCATG +CGTTCTTTGCGGAGTCGTGGGAACCCTGGTGGTCGTGAAACGGATGGTGTTCGTAACGGGTGGCATAGCACACACCACTTTCGGAGGTGTGGGTCTTGCA +TATTATGTTATGTCCGTCGTCGCAGTCTCATGGTTCACCCCCATGATCGGCGCCGCACTGTTCGCGGTCGTTTCGGCGGTCATAATGGCGCTTCCCGCGG + +>contig_5089 +TGCCGAAAGGTACGAGGAGATCATGGAGGCGCTCCGGGAGCTGGAGGAGATGTCTTGGGAACGGGTCATACTCGTGGAGGGCCGAAGGGACGTCACCGCG +CTGGAGCATCTCGGCATATTCGGGGACGTTTTCACCGTCCAGGCGTCGGGCGGCCCGGTAAAGGCCGCAGAATATGTGGCCGGCAGACGGAAGAAGGCGG +TCATACTGACCGATTGGGACAGGAAGGGCGACATAATAGCCTCGGACCTGGAGGTCCACCTGAGCGCTCTCGATGTCCAGTACGACACGGCGGTAAGAAG +CAGACTGGCGGGCCTTTGCAGGATCGACATCAAGGACGTGCAGTCCCTGGACGAACTGGTACACCGACTCGAGACGGCATGAAGTAATATATTCAGTAAA +TCATATTGAACCGTTAAGGATGGCAGGCCGTTTCATTGTTTTCGAAGGCATCGACGGTGCGGGCAAAAGCACTTTGATAGATGAAGTTTCAAAAAAATTG +GAGTCGGCGGGCATAAGGACCGTCGTAACCGCAGAGCCCACTGAAGGGCCGATAGGAATGCTGATACGGAGCGGGGCGGTCAAATGCATATCTCCGAACG +CGGAAGCTCTGCTGTTCACCGCCGACCGTGCCTGCCACACCTCCGAGATAGTCGGATGGATGGAGGAGGGGACGACCGTCCTCTGCGACCGTTACTACGC +CTCCACCATAGCGTACCAGTCCGCAGGACTCGACGGAACGGTGTCGGGCAAGGAATGGCTCATGGACATCAACCGTCCCGTCACCGTAGAACCCGACACG +ACGATACTTCTGGACATCGACCCCGAGGCGGGGATGCGCCGGGTGGGGGAACGCGGAGCGAGGAGCAAGTACGAGGTCACCGAGTACCTCGGCAGAGTGC +GCTCCAATTATCTGGAAATAGCGGAGGAGAAAGGATTCCGCATAATAGACGCTTCTCGTCCGAAGGACGAGGTGCTGAGAGAAACGATGAAAATCTTAGG +TGAGTGAAATGCATCCGTCGGAAGAGATCTATTGTGAGAAGAGCAACAGGCTGAAGGGAAAGACAGTGGTACTCGGGATAACGGGAAGCATCGCCGCAAC +GGAATGTTTCTCCACGATACGCGAGCTGATACGCCACGGCGCTACGGTTATACCTGTCATGACAAGGGCGGCCTGCGACATAGTGACCGAGCAGAGCATA +GAGTTCGCATCCGGAAAAAAACCCATAACCGAGCTCACGGGCCAGACCGAGCATGTCAAGCTGATGGGCGACTCCCGCACCGCGGACCTTCTTATGATCT +AGAATGGACCACAGATGCGGATACAATGCATGCGGACACGAGGATCGGCACTGTCTCGGGCTCCGCGAACCCCGAAACGAAGGACGAGACCGAGGGATAC +GAGACCAGCATCAAAGCGGCCAGCGTGTTGACGTGAATGCCAGGCACCAGTCCGGTCACCGCGCCCATCAGGGCCCCCGCCATGCTCATCAGCGATACAA +GAAGCAGGACGTCCGTACCCATGGCCTAAGATCTGTCGCCGTGCGGTATATATGCGGATATCTACAGTTAGCAATCCAGGAACGGTTTGCCGTCCCTCAC +TCCCACGCGCAGCATCGCCTTCACACGGACACCCGGCACGGCATCTGCGAGGTCCCCGGTCTTATCGAAGACCGCCAATACCTCCACGAGCCTGATGCCG +TGGGAGGCCAGCGCAAGGGCGAGGGCCCTCACGGTCCCGCCGGTGCTGAGGGTGTCCTCGACTATCACGGCCCTGTCGCCTCTTCCCGGTCCGTTTATAT +ACAAACTTCCTGAAGAATAGCCGGTGCTCCTGTCTATGATTATTTCTCCGGGAAGCCCGTAACCGCGTTTCCTTACTATGCTGTAGGGGATGCCCAGCCT +TAGGGATATTGGCACGGCCAGCGGGATGCCCATAGCCTCCGGCGCGAGTATGACGTCGCAGTCGAAATCTCCCAGGTCAATCAGCCCCTCCACGACTTCG +TTCAGAAGCGCGGGGTCCACGCGGGGAACGCCGTCGCTTATGGGATTGACGAAATACGGGTAGCCGTTCCTGTCGATCACAGGGCTGGCCATAAAGCTCT +TTCTCAGAAGCTCGTACACTGAACGTGCACGGCGATGTGGTTATTTACCTGTTCTTAAAAGGAGACATATTTAAGGGAAGGTACGATAACAGCAACGATA +TCCGTCGAAACGGCTCCGTTGACAACGGTCACGAACACCTGGACGGAATCGGAATCGATCCCTCCGAACGCACCGGTGTTTGCGGATATGTAGTAAGCTT +TGATCTTACCCTGTGTCCCGTTATCGATATTGTCGCTGAAGTCAGGGCTCACCACCAGAAGGTTAGAGAGGCCCTTCTCCTGCATCAGCGAGATCGTCTC +CTCGTCGGTCAGTATGGAGGACGGGTACAGCTTGATCAGGTACTGGTCCCTGGTGGCGGGATCGTACATCCCGTCCAACGCGGAATTGACAGTGTCGTAA +TATGTGGAAGAACTCTGATCGTCGTAGTTTATCACGCCCATCCTGACGGGCTCCATGGCCTGTTCGGTCTGGTCGCTGATCATATATCCCAGACCTACGA +ACAGTATCATGACGACCGCTATCGAGATTAGGGACTCGAGGGTCATCAGCTCCCTGAGCTCTTTCTTGACCAGGTTAAACAGATTGTTCAACGGACTTCA +CCCCCTTGATGAAGACCTCTTCAAGGTTTTTAGCGTCGTATTTCGCCTTGAGCTCCGCGGGAGTCCCCTGCATGATGATCTCTCCCTTGTTGATCATGGC +GACGCGGTCGCACAGGGATTCGACCTCATACATGTTGTGCGAGGAGAGGAGAACCGTAACTCCGGACCCGGCTATCTCCCGGATCAGCTCCCTGATGTCG +TGCGCGTTCATCACGTCCAATCCGGATGTTACCTCGTCCATGATGGCGAAACGCGGTGAGGTCATTATCGCCCTTGCGATGAGCAGGCGTCTCATCATGC +CCTTGCTGTAAGTGTTGACCTTGCTGTCGATGCGGTCTCCCAGGTTGGCGATGTCCATCCCCCTCTGGGTCATTTTTTCTGTCTCCTCGCCGTCGGTGAA +GAAGCCCGCGATGAAGCGCAGATAGGTGCGTCCCGTCAGGTCCTTGTAGGCGCCCGCGTCCTCGGGCAGATAGCTTATGGATTTTCTTACGTCGTCCCCC +TGGGCGGCGACATCGTATCCGCAGACGGTTATCTTTCCGGAAGTTATGGTTATGAGCGTCGAGATCATCCTAAGCGCCGTGGTCTTCCCGGCGCCGTTCG +GTCCGATGAGGCCAAAGATCTCTCCCTCTTTGACAGAGAAGTTTATTCCCTTGACGGCCTCGATGTTCCCGTATATTTTATGCACGTTCTCTACGCGCAG +GGCATCCATGAAGCATTGAGGTATATCTCGCTTTTAAATTTATTGGAGTGCTGTTAAGTGTCTAAAAAAGTGTGTATTCGGGCCGGGGCCCGTAATGTGT +TTACTGTTCCGCGCCGGCCGAGGCGTTGCCCATGGCCTTATTTATGGTCTCTTGGAGGGACTGGTATTTCTCGCGGAGACTTTTCTCCTGGCGGTCCAGG + +>contig_159 +TATAGCTCAGCTCGTTGGCGGAGACGCTGCTTCCGTACATCTGGCCGCCGCCGTTGATGCCGCCGCCCCAGGCGGCCGTCCCGATCCCCACGGGAGAAAT +GTCCGTACCTCGGAACCTTATGTTTCTCACGGATTCCCGTATATGTTCCTGGATTATAACTGATACGCAATCCTGTTTCCGACGTCCGCCATGTTTAGAT +AAATTGACGGTATAGCCGAAGGCATGGATATGGCAATGGAGCTGAGGAACGTCTCCGTAGTGAGGGACGGGAAGCGGATACTGGATTCCGTCTGCCTCGA +TATCGGCGCCTCCGAGAACGTTGCCGTCATAGGGCCGAACGGTTCGGGGAAGACGACGCTCATCAAACTGCTGAGGGGCGATATTTATCCCTACTACGAC +GAGGACCGCCCCGCGGAGATGAGGATCTTCGGTGAGAAGATATGGTCCATCTACGACATACGGAGCCGCATGGGCGTGGTCTCCATGGACCTCCAGGGCA +TGTTCGGCGGCGAAACGCTGGTCGGAGACGTCATAATGTCGGGATACTTCAGCAGCCTGGACATTTTCCGCAACCATGAGGTCACCGACAACATGCGCTC +CGGGGCCTCGCGAGCGGCCGGGTACATGGGAGTGGAACATCTCGTCGGCAGAGATCTGTCCGGCCTTTCTCTGGGAGAGATGAGGCGGACGCTGATCGCC +CGGGCGCTGGTCACCGCCCCCGAGATGCTCGTCCTCGACGAACCGATGACGGGCCTCGATATTGTAATGAAATCCAAATTCAGGAAGATGTTCGACATCA +TGACGGAAACGGGAGTGAGCATCGTCATGATAACCCACGACCTCACCGACATCCCCGTTTCCTTGAACCGCATAATAATGATCAAGGATGGGAAAGTGTT +CGCGGACGGTCCTAAAAAAGACGTCCTGACGTCCGAGGTCGTCAGCGGGCTTTTCGATGAACCTATTAATGTACAATGCGTTAACGGGATATATTCAATG +AGGATGGATGAGTGACAAGGTATATCTGTTCCGAATGCGGGAACGAGATTCCGTACGTTTCGGATTTCTGCTACCAGTGCGGTAGCCTGAAGAGCAAGGC +GTTCAAGATAGACGAGGGCGGCGAGATGGAGGGCGGGGAGGTCCCGTGCCCCAACTGCGGAAAGCCCATAGAGGAGGACGCCCGGTACTGCAGGCACTGC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome3.fna Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,60 @@ +>contig_4003 +ATCAAAGAGGAACTGAAGGCGGCGATGCTGCTGACCGGTTCCTCTGACATAAGAGAGCTCTCTGATGCAGAGTATATCGTCATGGGAGAGACACGCAAAT +GGATGGAAGGCCTGAAATGACCGACGTCAAGAAGATATTGAAACAGATGTCCGACGAGCTGAGCAAGCCGATCGAATCATACATAGAAGACGAACTGCCC +GCCAATCTCATCGAAGCGGCAAGACAGTACCCCTATGCCGGCGGAAAGAGGATGAGACCGGCCATGGTCATCGCCGCGTGCAGGGCGGTGGGAGGGGATG +GCAGGAAGGCCGTTCCCCTTGCGGTTGCCATAGAGTACATACACAATTTCACGCTGATCCATGATGACCTCATGGACGGGGACGAGAAGNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCATGGACGGGGACGAGAAGCGCC +GGGGCATGACCACATCCCATGTGAAGTACGGCATGCCCACAGCGGTGTTGGCGGGAGATGCCCTGTTCGCTAAGGCCTTCCAGATAATCGCCGACCTCGA +TGCGGACGGCGAAACGGTCAGAGATGTCCTTAGAGTGGTCTCCCAATCCGTCTGGGACCTGGCCAGAGGTCAGCAGATGGATATAAACAACGAGAACGGG +ACCGAGGTCACCATGGATGAGTACATCGAGACCATCAGACTGAAGACCAGCGTTCTGTTCGCCGCCGGTGCCGCCGGAGGCGCTATGATCGGAGGTGCGA +GCAAGGAGGTCGTGGACGCCGTCCACGGCTACGCCATGAACCTGGGCGTCGCCTTCCAGATGTATGACGATATACTTGGGATAGTCGGGGACCCGGCCGT +TACCGGTAAATCGTCCGGTAATGACATTCGCAAAGGGAAGAGCACCGTCATCGTGTGCCACGCCCTGAAGAACATAGCTGACAGGGCGGACCTGCTGGTC +TTCCGCGATATCCTCGGCAAGACAGACGCCACCGATGCGGAGATAGACGAGGTCAGGAGCATACTCCGAAGAGCCTGCAGCCTGGATTACGCCATAGAGA +CCGCAGAGGATTACATCAACAAAGCCGTCGACTGCCTGGATGCGCTGGAGCCCTCAAAGGACAAGGACTTCATGATAGCCCTGGCAGAATACACGATGAC +CAGGACCCTTTAGTCGGAGATCCCCTTCTCCGTTATGGAGTATGTGGCTTTCCGGCCTTCCGGTATGCTGCGGTGCTTGACCATGATCGCGGCCCTGCGG +CCGTTGCCTTTCTTCTCCAAGCGGATTATGGTCTTCGCGTTATGATGCATGGCGTGGCCTCCGAGGAACTCTATCGTACCGGCGCCTATGTTGGTGTATA + +>contig_4403 +CACCGGTCACCCGAAGGTCACGCGCGTATCGATGCGTGACATCGCAGACCTGGGAGAGAGGGGCCTGTACATCCTTCACGAGATCGGTACGGACCTCGTC +GGCAAGATGGAGGGCTGCACCGGGTGCAAGAAGTGCGAGCACGAATGCCCCGAGAACGCGTTGACCGTAAGCAAGGACAAGACGATCACCGTGAAGACCA +AGAACTGCCTCGGAACGGCATGCTACAGATGCCAGTACGTCTGTCCCGAGAAGGTCATGCAGTTCGACTCCCTAAGGCTGTCGTGATAAACGGTTTTGGG +CGGGGCCGGCCCCGCCCTTTTTTCATTTACCGCCGTTCAGGGCCTCGGCGTGCACGGCAGGCCTGACATTCTCGTCCTCCAGCTCCGTAAGTATCTGCTT +GCGCAGCCTGATGAACTCGGGCGAAGCGCGGTCCCTGGGCCGCGGAATGCCTATGTCCACGATGTCCTTGATGCTGGCAGGACGCTTGGTAAGGACGACT +ATCCTGTCTGAAAGATAAACGGCCTCGTCGACCGAGTGGGTCACGAACAGGATCGTAGTGTCCGTCTTCTCGACTATCCTCAGCAGCTCGCCCTGCATGA +TGTTGCGCGTCTGGGCGTCCAACGCGCCGAACGGCTCGTCCATGAGCAGCACGTCGGGCTTGGTAACAAGGGCCCTTGCGATGCCCACGCGCTGCTTCAT +ACCTCCGCTGAGCTCGTGGACACGATGGTCCTCGAAACCTTCGAGGCCGACCGCCCTGATGTAGCGTTCGGCGGTCTTCCTGCGCTGCTCCGCCGGGACG +CCGGCGATCTCCAGGCCGAACTCGACATTCTTCCTTACAGAACGCCAAGGGAACAGTGCGAACTCCTGGAACACCATGCCTCTGTCGGGGCCTGGCCCGG +TGCACTTCTTCCCGCCTATCGACACTTCTCCGGAGGACGGCTCCATGAGCCCTGCTATAAGCCTGAGCAGAGTCGTCTTTCCGCATCCCGAGGGACCGAC +TATGGATATAAGCTCGCCCTTCTGGATCTCCAGAGAGAAATCCTCCAGGGCCACGGTCTCCTGTTCATCGGTCTTGTAGACCTTCCTCAGATGATTGATA +ACGATCTTCTCGCTCATTCTATCCCCATCCTTCTTGTTATGACCTTGTGCAGATAGTCGGCGAGGCTGGTCGTCAGTATTCCGAGGATTGCGATTATGAC +TATGCCCGCGTAGACGTTGGGCCAGTACCCCATCTGCGCCTGTATGCTGATGAAGTATCCGACGCCTCCTCCGAACGATGCGTACAGCTCGGAGGCAACT +ATGCACATCCACCCGACCCCCATGCCTATGCGGAGGCCGTTCATTATGTATGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN + +>contig_14302 +GATGTGGAATGCGGCCAAATGCCGCCTCGGTTTCAGCGGGGCAAGGCGTTCCATGTCCACGACCGCTGGAATACTGGTGAACGTTTTCATTACTTCTCTG +GAGATAGCCGAACGTTCTCAGCCTGCGCTCGAAAGCAGGAACTACAACGGCAGTTTCCCCATTTATAGGATGCCCAACAAGATCGGTCTCTACTGGTTCG +CCATCACGGCCGCCGCGTGCGCCTCGCTATACATCATCGGATACTACATCTCGACCCCGAACATGGCGGCAATACTCCTGGGGGCGGTCTGACGGTCTGC +GACCTTCGTACGGATAAAGTGAGCTACAGGTACGGCAATTCGAATTACAATGCTATCGATTCTGTGGATTTCACAGCCTCCCATGGCAGAAGGACCGTCA +TCCTGGGAGAGAACGGGTGCGGCAAGTCGACGCTGATATATCAGCTCAACGGAGTATACAAGCCTGTTTCCGGTACTGTGTTCTATGGAGATATGCCCAT +ATCATACGACAAGGAGTTCCTTACGGAGCTGCGTTCCGACGTTTCCGTTGTTCTGCAGAATCCAGACGATCAGATTTTCTCTTCCACCGTCGAGGAGGAC +GTGGCATTTGGACCGTTAAACTTGGGCCTTTCCCGGGAAGAGGTTGGAGAAAGAATCGGCCGGTCTTTGGAATGCGTGGGGATGTCGGGATTTGCCGAAG +TGCCTGTTCAGCGCCTTTCATACGGTCAGAAAAAAAGAGTGTCACTCGCAGGCGCCCTAGCATCACATCCAAAGATACTGGTCCTGGACGAGCCTACCGC +AGGCCTGGACCCGCAGATGTCCAGAGAGGTGATGGAGATCGCAAATTCTCTCATCAGAGAAGGGATCTCCGTCGTAGTATCCACCCATGACGTGAATCTG +ATCTACAATTGGGTCGAGGACCTTTACGTGATGCGGAACGGACACATGGTCTTCTCCGGAGATGCGGACGAGTTCTTCTCCGACCGTCCGTCCGTTTATC +TTTCAGGTCTGGAACAGCCCTCGATATTCAGCATAAACCACAATATGGAGACGTTAAGAGGGACGATTCCCGCGTCATATCCCAAGACCATGAGCCAGAT +GGTCAGCAGATTATTCCCTTCAGGATCCTCGGCCGGAAGGATATTCATCTATCAGACTGAAGGCGAGCGCATCGACCAGGATGCGATCGAGGAGGCCGTG +GGAAAGAAAGGGATGCCCATTGCAGTATACGGCCCCTCCGCGCGCAGGTCGGTGACCCGATCGAAGCTCAGGGTTGATTTCTATTTTAACGGCATAGAGT +GTTGCATCAGGGAGGCCATGGTAAACCATGATTCCCTGATAATAGTCGACCGGGGCTTGAAGGGGATCGTCACGGAGGCGATTGAAGAGCTTAGGGCATA +CGGAACCCGGATCAGTATCAGGGAGTTGGTTTTTTGAGCGCTCCCCTTTTCCGCACCGAAGGTCTTTTCTTCAGATACGAAGGCGGCCGGGGGGACGCGT +TGGCAGACGTGAACATCACGATCAAAGAGGGTGCCAGAACTGTCATCATGGGAGCCAACGGAGCTGGAAAATCCACGTTCTTCTATCATCTTAACGGAGT +CTTGAGGCCGTCGAAGGGCTCGGTGTTTTTCCGGGGAGAAAAAATACCGCACAGGGGAAAAGCTCTCAGGAAGCTGCGCTCGGAGGTCGCGGTGATGCTC +CAAGACCCCAACGACCAGCTTTTTGCACCAAAAGTATCTGACGACATAGCATTCGGCCCGAAGAACCTGGGACTCGACGCTCAGACTGTAGGGGAGAGGG +TCAGGGACGCCCTCTACATCACAGGCATCGAATCTCTGGAGGGTCGCAGCGTGATGCAGCTGTCGTTCGGCCAGAAGAAGAGGGTGGTGCTGGCCGGTGC +CTTGGCGATGCATCCGAAGGTGCTTATAATGGACGAGCCCACCGCAGGTCTCGATCCCCAGATGTCCAAGGAGCTCATCGAGCTCGCGGACGAGCTGCAC +CATCTTGGAACGACCGTTATTTTTTCAACCCATGACGTGGACCTCTCATATTCTTGGGCGGACGAGGTCCATGTCCTAAGAGGGGGCCGTAATGTATATT +CGGGGAGCTCAGAAAGATTCTATGACGATACTTCGGAAGTTTATCTTTCGGGCCTTGTCGAACCGGCCATGTACGACATCAACGTCAGCATCTCCGAGCT +TGCCGGATGCCCCGTTGAACCGTTTCCCAAAACCCTGCCTCAGCTTGTGGCCAAGGCAGTGCCGTCAGAGGGGCCGGGCACGGTTCACATCCTTCCCGTG +GAAGGTCCGGTCGACCGGGAGCTGTTCTCCTCTCTGACGTCCGGGTCCGGGATGTCCGCAACAGGCGTCTACGGTACTAATGCAAGAAAATCTGCGGAGG +CTTCCAAATTGCCGATAGATTATTTCTTCGGGGCCGACGAGGGATGCATAATAGAGGCTTTGCACGGCAAAGACACGCTGATATGCTGCGACAGGTCCCT +TACAGATCTGCTGATATCGAAGATAGGCAGTATGTCCCGGTTCGGGACAGAGGTCCCTTATTCTCTGCACTGAACATTTCTTTTTTCCGGGGGTTCGAAC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_contig.contig2classification.names.txt Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,4 @@ +# contig classification reason lineage lineage scores superkingdom phylum class order family genus species +contig_38063 classified based on 1/2 ORFs 1;131567;2;1783272;1239;91061;1385;186820;1637;1639 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00 Bacteria: 1.00 Firmicutes: 1.00 Bacilli: 1.00 Bacillales: 1.00 Listeriaceae: 1.00 Listeria: 1.00 Listeria monocytogenes: 1.00 +contig_44250 classified based on 1/2 ORFs 1;131567;2;1224;1236;135623;641;662;666 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00 Bacteria: 1.00 Proteobacteria: 1.00 Gammaproteobacteria: 1.00 Vibrionales: 1.00 Vibrionaceae: 1.00 Vibrio: 1.00 Vibrio cholerae: 1.00 +contig_9952 classified based on 1/5 ORFs 1;131567;2;1783272;1239;91061* 1.00;1.00;1.00;1.00;1.00;1.00 Bacteria: 1.00 Firmicutes: 1.00 Bacilli*: 1.00 not classified not classified not classified not classified
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_contig.contig2classification.txt Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,4 @@ +# contig classification reason lineage lineage scores +contig_38063 classified based on 1/2 ORFs 1;131567;2;1783272;1239;91061;1385;186820;1637;1639 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00 +contig_44250 classified based on 1/2 ORFs 1;131567;2;1224;1236;135623;641;662;666 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00 +contig_9952 classified based on 1/5 ORFs 1;131567;2;1783272;1239;91061* 1.00;1.00;1.00;1.00;1.00;1.00
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/cat_database.loc.sample Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,7 @@ +## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz +# ls CAT_prepare_20190719/ +# 2019-07-19.CAT_prepare.fresh.log +# 2019-07-19_CAT_database +# 2019-07-19_taxonomy +#value name database_folder taxonomy_folder +#2019-07-19_CAT_database 2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of CAT databases --> + <table name="cat_database" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, database_folder, taxonomy_folder</columns> + <file path="tool-data/cat_database.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Sun Nov 24 21:56:00 2019 -0500 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of CAT databases --> + <table name="cat_database" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, database_folder, taxonomy_folder</columns> + <file path="${__HERE__}/test-data/cached_locally/cat_database.loc" /> + </table> +</tables>