Repository 'cat_prepare'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/cat_prepare

Changeset 0:b6c5e7343617 (2019-12-10)
Next changeset 1:8315b5cebb82 (2020-01-08)
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
added:
cat_prepare.xml
macros.xml
tabpad.py
test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.dmnd
test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid
test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.gz
test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring
test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp
test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp
test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz
test-data/cached_locally/cat_database.loc
test-data/contigs.fasta
test-data/genome2.fna
test-data/genome3.fna
test-data/test_contig.contig2classification.names.txt
test-data/test_contig.contig2classification.txt
tool-data/cat_database.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r b6c5e7343617 cat_prepare.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_prepare.xml Tue Dec 10 16:07:39 2019 -0500
[
@@ -0,0 +1,32 @@
+<tool id="cat_prepare" name="CAT prepare" version="@VERSION@.0">
+    <description>a database for CAT - Contig Annotation Tool</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <command detect_errors="exit_code"><![CDATA[
+        #import os.path
+        #set $catdb = $cat_db.files_path
+        mkdir -p $catdb &&
+        echo CAT_DB `date '+%Y-%m-%d'` "@DATABASE_FOLDER@" "@TAXONOMY_FOLDER@" > $cat_db &&
+        CAT prepare --fresh
+        --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")'
+        --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+    ]]></command>
+    <inputs>
+    </inputs>
+    <outputs>
+        <data name="cat_db" format="txt" />
+    </outputs>
+    <help><![CDATA[
+**CAT prepare**
+
+Prepare CAT reference data for classifying metagomic contigs or genome assemblies.
+
+**NOTE:** This requires over a 100GB of RAM, 250GB of disk space, and up to 24 hours.
+
+@COMMON_HELP@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r b6c5e7343617 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Dec 10 16:07:39 2019 -0500
[
b'@@ -0,0 +1,389 @@\n+<macros>\n+    <token name="@VERSION@">5.0.3</token>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="@VERSION@">cat</requirement>\n+            <yield/>\n+        </requirements>\n+    </xml>\n+    <xml name="version_command">\n+        <version_command><![CDATA[CAT --version]]></version_command>\n+    </xml>\n+    <token name="@DATABASE_FOLDER@">CAT_database</token>\n+    <token name="@TAXONOMY_FOLDER@">taxonomy</token>\n+    <xml name="cat_db">\n+        <conditional name="db">\n+            <param name="db_src" type="select" label="CAT database (--database_folder,--taxonomy_folder) from">\n+                <option value="cached">local cached database</option>\n+                <option value="history">history</option>\n+            </param>\n+            <when value="cached">\n+                <param name="cat_builtin" type="select" label="Use a built-in CAT database" help="If the CAT database of interest is not listed, contact your Galaxy administrator">\n+                    <options from_data_table="cat_database">\n+                        <filter type="sort_by" column="2" />\n+                        <validator type="no_options" message="No CAT database is available." />\n+                    </options>\n+                </param>\n+            </when>\n+            <when value="history">\n+                <param name="cat_db" type="data" format="txt" label="A history dataset from CAT prepare tool"/>\n+            </when>\n+        </conditional>\n+    </xml>\n+    <token name="@CAT_DB@"><![CDATA[\n+        #if $db.db_src == \'cached\':\n+        --database_folder \'$db.cat_builtin.fields.database_folder\'\n+        --taxonomy_folder \'$db.cat_builtin.fields.taxonomy_folder\'\n+        #else\n+        #import os.path\n+        #set $catdb = $db.cat_db.extra_files_path\n+        --database_folder \'$os.path.join($catdb,"@DATABASE_FOLDER@")\'\n+        --taxonomy_folder \'$os.path.join($catdb,"@TAXONOMY_FOLDER@")\'\n+        #end if\n+]]></token>\n+    <token name="@CAT_TAXONOMY@"><![CDATA[\n+        #if $db.db_src == \'cached\':\n+        --taxonomy_folder \'$db.cat_builtin.fields.taxonomy_folder\'\n+        #else\n+        #import os.path\n+        #set $catdb = $db.cat_db.extra_files_path\n+        --taxonomy_folder \'$os.path.join($catdb,"@TAXONOMY_FOLDER@")\'\n+        #end if\n+]]></token>\n+    <xml name="test_catdb">\n+        <conditional name="db">\n+            <param name="db_src" value="cached"/>\n+            <param name="cat_builtin" value="CAT_prepare_test"/>\n+        </conditional>\n+    </xml>\n+    <xml name="use_intermediates">\n+        <conditional name="previous">\n+            <param name="use_previous" type="select" label="Use previous prodigal gene prediction and diamond alignment">\n+                <help>predicted_proteins.faa and alignment.diamond from previous CAT run.</help> \n+                <option value="yes">Yes</option>\n+                <option value="no" selected="true">No</option>\n+            </param>\n+            <when value="yes">\n+                <param argument="--proteins_fasta" type="data" format="fasta" label="prodigal predicted proteins fasta"/>\n+                <param argument="--diamond_alignment" type="data" format="tabular" label="alignment.diamond file"/>\n+            </when>\n+            <when value="no"/>\n+        </conditional>\n+    </xml>\n+    <token name="@USE_INTERMEDIATES@"><![CDATA[\n+      #if $previous.use_previous == \'yes\'\n+      --proteins_fasta \'$previous.proteins_fasta\'\n+      --diamond_alignment \'$previous.diamond_alignment\'\n+      #end if\n+      --out_prefix \'cat_output\'\n+]]></token>\n+    <xml name="custom_settings">\n+        <param argument="--range" type="integer" value="10" min="0" max="49" label="range"/>\n+        <param argument="--fraction" type="float" value="0.5" min="0" max="0.99" label="fraction"/>\n+    </xml>\n+    <token name="@CUSTOM_SETTINGS@"><![CDATA[\n+      --range \'$range\'\n+      --fraction \'$fraction\'\n+]]></token>\n+    <xml name="diamon'..b'tputs with taxonomic names. \n+  - CAT summerise - reports number of assignments to each taxonomic name.\n+\n+A CAT database can either be installed by data_manager_cat or in the local history by CAT prepare tool.\n+\n+.. _Prodigal: https://github.com/hyattpd/Prodigal\n+.. _Diamond: https://github.com/bbuchfink/diamond\n+\n+]]></token>\n+    <token name="@OUTPUTS_HELP@"><![CDATA[\n+\n+**OUTPUTS**\n+\n+Any of the files produced by the CAT workflow are available as outputs\n+  - Prodigal\n+\n+    - predicted_proteins.faa\n+    - predicted_proteins.gff\n+\n+  - Diamond\n+\n+    - alignment.diamond\n+\n+  - CAT contigs/bins\n+\n+    - contigs/bin2classification.txt\n+    - ORF2LCA.txt\n+\n+  - CAT add_names (optional)\n+\n+    - contigs/bin2classification.names.txt\n+    - ORF2LCA.names.txt\n+\n+  - CAT summarise (optional)\n+\n+    - contigs/bin2classification.summary.txt\n+\n+\n+]]></token>\n+ \n+    <token name="@OPTIONS_HELP@"><![CDATA[\n+\n+Optional arguments:\n+  -r, --range               cut-off range after alignment [0-49] (default: 10).\n+  -f, --fraction            fraction of bit-score support for each classification\n+                            [0-0.99] (default: 0.5).\n+  -p, --proteins_fasta\n+                            Path to predicted proteins fasta file. If supplied,\n+                            CAT will skip the protein prediction step.\n+  -a, --diamond_alignment\n+                            Path to DIAMOND alignment table. If supplied, CAT will\n+                            skip the DIAMOND alignment step and directly classify\n+                            the sequences. A predicted proteins fasta file should\n+                            also be supplied with argument [-p / --proteins].\n+\n+\n+DIAMOND specific optional arguments:\n+  --sensitive     Run DIAMOND in sensitive mode (default: not enabled).\n+\n+  --block_size    DIAMOND block-size parameter (default: 2.0). Lower\n+                  numbers will decrease memory and temporary disk space\n+                  usage.\n+\n+  --index_chunks\n+                  DIAMOND index-chunks parameter (default: 4). Set to 1\n+                  on high memory machines. The parameter has no effect\n+                  on temporary disk space usage.\n+\n+  --top\n+                  DIAMOND top parameter [0-50] (default: 50). Governs\n+                  hits within range of best hit that are written to the\n+                  alignment file. This is not the [-r / --range]\n+                  parameter!\n+\n+\n+Setting the DIAMOND --top parameter\n+\n+You can speed up DIAMOND considerably, and at the same time greatly reduce disk usage, by setting the DIAMOND --top parameter to lower values. This will govern hits within range of the best hit that are written to the alignment file.\n+\n+You have to be very carefull to 1) not confuse this parameter with the r / --range parameter, which does a similar cut-off but after alignment and 2) be aware that if you want to run CAT or BAT again afterwards with different values of the -r / --range parameter, your options will be limited to the range you have chosen with --top earlier, because all hits that fall outside this range will not be included in the alignment file. Importantly, CAT and BAT currently do not warn you if you choose -r / --range in a second run higher than --top in a previous one, so it\'s up to you to remember this!\n+\n+If you have understood all this, or you do not plan to tune -r / --range at all afterwards, you can enjoy a huge speedup with much smaller alignment files! For CAT you can for example set --top 11 and for BAT --top 6.\n+\n+]]></token>\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">https://doi.org/10.1101/072868</citation>\n+            <citation type="doi">https://doi.org/10.1186/s13059-019-1817-x</citation>\n+            <citation type="doi">https://doi.org/10.1038/nmeth.3176</citation>\n+            <citation type="doi">https://doi.org/10.1186/1471-2105-11-119</citation>\n+            <yield />\n+        </citations>\n+    </xml>\n+</macros>\n'
b
diff -r 000000000000 -r b6c5e7343617 tabpad.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tabpad.py Tue Dec 10 16:07:39 2019 -0500
[
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+import argparse
+import re
+
+
+def padfile(infile, outfile, fieldcnt=None):
+    with open(infile, 'r') as fh:
+        out = open(outfile, 'w')
+        commentlines = []
+        tabs = '\t' * fieldcnt if fieldcnt is not None else None
+
+        def pad_line(txtline, tabs=None):
+            line = txtline.rstrip('\r\n')
+            fields = line.split('\t')
+            if not tabs:
+                tabs = '\t' * len(fields)
+            out.write('%s%s\n' % (line, tabs[len(fields):]))
+
+        for i, txtline in enumerate(fh):
+            if txtline.lstrip().startswith('#'):
+                commentlines.append(txtline)
+            else:
+                if commentlines:
+                    for i in range(len(commentlines) - 1):
+                        out.write(commentlines[i])
+                    pad_line(commentlines[-1], tabs=tabs)
+                    commentlines = []
+                pad_line(txtline, tabs=tabs)
+        out.close()
+
+
+def fieldcount(infile):
+    fieldcnt = 0
+    with open(infile, 'r') as fh:
+        for i, line in enumerate(fh):
+            fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t')))
+    return fieldcnt
+
+
+def tsvname(infile):
+    return re.sub('.txt$', '', infile) + '.tsv'
+
+
+def __main__():
+    parser = argparse.ArgumentParser(
+        description='Pad a file with TABS for equal field size across lines')
+    parser.add_argument(
+        '-i', '--input', help='input file')
+    parser.add_argument(
+        '-o', '--output', help='output file')
+    parser.add_argument(
+        'files', nargs='*', help='.txt files')
+    args = parser.parse_args()
+
+    if args.input:
+        outfile = args.output if args.output else tsvname(args.input)
+        fieldcnt = fieldcount(args.input)
+        padfile(args.input, outfile, fieldcnt=fieldcnt)
+    for infile in args.files:
+        outfile = tsvname(infile)
+        fieldcnt = fieldcount(infile)
+        padfile(infile, outfile, fieldcnt=fieldcnt)
+
+
+if __name__ == "__main__":
+    __main__()
b
diff -r 000000000000 -r b6c5e7343617 test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.dmnd
b
Binary file test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.dmnd has changed
b
diff -r 000000000000 -r b6c5e7343617 test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,10 @@
+WP_000159554.1 2
+WP_000214552.1 91061
+WP_000346214.1 91061
+WP_000568619.1 666
+WP_000958804.1 1301
+WP_000991933.1 666
+WP_000996146.1 666
+WP_003722398.1 1639
+WP_005378126.1 662
+XP_961517.1 5141
b
diff -r 000000000000 -r b6c5e7343617 test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.gz
b
Binary file test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.gz has changed
b
diff -r 000000000000 -r b6c5e7343617 test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,17 @@
+91061
+1
+641
+1224
+1236
+131567
+1637
+1639
+1783272
+2
+662
+13562
+13562
+641
+662
+666
+91061
b
diff -r 000000000000 -r b6c5e7343617 test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp Tue Dec 10 16:07:39 2019 -0500
[
b'@@ -0,0 +1,252 @@\n+1\t|\tall\t|\t\t|\tsynonym\t|\n+1\t|\troot\t|\t\t|\tscientific name\t|\n+2\t|\tBacteria\t|\tBacteria <prokaryotes>\t|\tscientific name\t|\n+2\t|\tMonera\t|\tMonera <Bacteria>\t|\tin-part\t|\n+2\t|\tProcaryotae\t|\tProcaryotae <Bacteria>\t|\tin-part\t|\n+2\t|\tProkaryota\t|\tProkaryota <Bacteria>\t|\tin-part\t|\n+2\t|\tProkaryotae\t|\tProkaryotae <Bacteria>\t|\tin-part\t|\n+2\t|\tbacteria\t|\tbacteria <blast2>\t|\tblast name\t|\n+2\t|\teubacteria\t|\t\t|\tgenbank common name\t|\n+2\t|\tprokaryote\t|\tprokaryote <Bacteria>\t|\tin-part\t|\n+2\t|\tprokaryotes\t|\tprokaryotes <Bacteria>\t|\tin-part\t|\n+641\t|\tVibrionaceae\t|\t\t|\tscientific name\t|\n+641\t|\tVibrionaceae Veron 1965\t|\t\t|\tauthority\t|\n+641\t|\tgamma-3 proteobacteria\t|\tgamma-3 proteobacteria <#3>\t|\tin-part\t|\n+662\t|\t"Microspira" Schroeter 1886\t|\t\t|\tauthority\t|\n+662\t|\t"Pacinia" Trevisan 1885\t|\t\t|\tauthority\t|\n+662\t|\tBeneckea\t|\t\t|\tsynonym\t|\n+662\t|\tBeneckea Campbell 1957\t|\t\t|\tauthority\t|\n+662\t|\tListonella\t|\t\t|\tsynonym\t|\n+662\t|\tListonella MacDonell and Colwell 1986\t|\t\t|\tauthority\t|\n+662\t|\tMicrospira\t|\t\t|\tsynonym\t|\n+662\t|\tPacinia\t|\t\t|\tsynonym\t|\n+662\t|\tVibrio\t|\t\t|\tscientific name\t|\n+662\t|\tVibrio Pacini 1854\t|\t\t|\tauthority\t|\n+666\t|\t"Bacillo virgola del Koch" Trevisan 1884\t|\t\t|\tauthority\t|\n+666\t|\t"Bacillus cholerae" (Pacini 1854) Trevisan 1884\t|\t\t|\tauthority\t|\n+666\t|\t"Bacillus cholerae-asiaticae" Trevisan 1884\t|\t\t|\tauthority\t|\n+666\t|\t"Kommabacillus" Koch 1884\t|\t\t|\tauthority\t|\n+666\t|\t"Liquidivibrio cholerae" (Pacini 1854) Orla-Jensen 1909\t|\t\t|\tauthority\t|\n+666\t|\t"Microspira comma" Schroeter 1886\t|\t\t|\tauthority\t|\n+666\t|\t"Pacinia cholerae-asiaticae" (Trevisan 1884) Trevisan 1885\t|\t\t|\tauthority\t|\n+666\t|\t"Spirillum cholerae" (Pacini 1854) Mac1889\t|\t\t|\tauthority\t|\n+666\t|\t"Spirillum cholerae-asiaticae" (Trevisan 1884) Zopf 1885\t|\t\t|\tauthority\t|\n+666\t|\t"Vibrio cholera" (sic) Pacini 1854\t|\t\t|\tauthority\t|\n+666\t|\t"Vibrio cholerae-asiaticae" (Trevisan 1884) Pfeiffer 1896\t|\t\t|\tauthority\t|\n+666\t|\t"Vibrio comma" (Schroeter 1886) Blanchard 1906\t|\t\t|\tauthority\t|\n+666\t|\tATCC 14035\t|\tATCC 14035 <type strain>\t|\ttype material\t|\n+666\t|\tATCC 14547 [[Vibrio albensis]]\t|\tATCC 14547 [[Vibrio albensis]] <type strain>\t|\ttype material\t|\n+666\t|\tBacillo virgola del Koch\t|\t\t|\tsynonym\t|\n+666\t|\tBacillus cholerae\t|\t\t|\tsynonym\t|\n+666\t|\tBacillus cholerae-asiaticae\t|\t\t|\tsynonym\t|\n+666\t|\tCCUG 48664 [[Vibrio albensis]]\t|\tCCUG 48664 [[Vibrio albensis]] <type strain>\t|\ttype material\t|\n+666\t|\tCCUG 9118 A\t|\tCCUG 9118 A <type strain>\t|\ttype material\t|\n+666\t|\tCECT 514\t|\tCECT 514 <type strain>\t|\ttype material\t|\n+666\t|\tCIP 62.13\t|\tCIP 62.13 <type strain>\t|\ttype material\t|\n+666\t|\tKommabacillus\t|\t\t|\tsynonym\t|\n+666\t|\tLMG 4406 [[Vibrio albensis]]\t|\tLMG 4406 [[Vibrio albensis]] <type strain>\t|\ttype material\t|\n+666\t|\tLMG:4406 [[Vibrio albensis]]\t|\tLMG:4406 [[Vibrio albensis]] <type strain>\t|\ttype material\t|\n+666\t|\tLiquidivibrio cholerae\t|\t\t|\tsynonym\t|\n+666\t|\tMicrospira comma\t|\t\t|\tsynonym\t|\n+666\t|\tNCIMB 41 [[Vibrio albensis]]\t|\tNCIMB 41 [[Vibrio albensis]] <type strain>\t|\ttype material\t|\n+666\t|\tNCTC 8021\t|\tNCTC 8021 <type strain>\t|\ttype material\t|\n+666\t|\tPacinia cholerae-asiaticae\t|\t\t|\tsynonym\t|\n+666\t|\tSpirillum cholerae\t|\t\t|\tsynonym\t|\n+666\t|\tSpirillum cholerae-asiaticae\t|\t\t|\tsynonym\t|\n+666\t|\tVibrio albensis\t|\t\t|\tsynonym\t|\n+666\t|\tVibrio albensis Lehmann and Neumann 1896\t|\t\t|\tauthority\t|\n+666\t|\tVibrio cholera\t|\t\t|\tsynonym\t|\n+666\t|\tVibrio cholerae\t|\t\t|\tscientific name\t|\n+666\t|\tVibrio cholerae Pacini 1854\t|\t\t|\tauthority\t|\n+666\t|\tVibrio cholerae biovar albensis\t|\t\t|\tsynonym\t|\n+666\t|\tVibrio cholerae bv. albensis\t|\t\t|\tsynonym\t|\n+666\t|\tVibrio cholerae-asiaticae\t|\t\t|\tsynonym\t|\n+666\t|\tVibrio comma\t|\t\t|\tsynonym\t|\n+1224\t|\tAlphaproteobacteraeota\t|\t\t|\tsynonym\t|\n+1224\t|\tAlphaproteobacteraeota Oren et al. 2015\t|\t\t|\tauthority\t|\n+1224\t|\tAlphaproteobacteriota\t|\t\t|\tsynonym\t|\n+1224\t|\tProteobacteria\t|\t\t|\tscientific name\t|\n+1224\t|\tProteobacteria Garrity et al. 2005\t|\t\t|\tauthority\t|\n+1224\t|\tProteobacteria [class] Stackebrandt et al. 1988\t|\t\t|\tauthority\t|\n+1224\t|\tproteobacteria\t|\tproteobacteria <blast1224>\t|\tblast name\t|\n+1224\t|\tpu'..b'teria sp. FDA00013574\t|\t\t|\tincludes\t|\n+1639\t|\tListeria sp. FDA00013575\t|\t\t|\tincludes\t|\n+1639\t|\tListeria sp. FDA00013576\t|\t\t|\tincludes\t|\n+1639\t|\tListeria sp. FDA00013577\t|\t\t|\tincludes\t|\n+1639\t|\tListeria sp. FDA00013578\t|\t\t|\tincludes\t|\n+1639\t|\tListeria sp. FDA00013579\t|\t\t|\tincludes\t|\n+1639\t|\tListeria sp. FDA00013607\t|\t\t|\tincludes\t|\n+1639\t|\tNCTC 10357\t|\tNCTC 10357 <type strain>\t|\ttype material\t|\n+1639\t|\tSLCC 53\t|\tSLCC 53 <type strain>\t|\ttype material\t|\n+2157\t|\t"Archaea" Woese et al. 1990\t|\t\t|\tauthority\t|\n+2157\t|\t"Archaebacteria" (sic) Woese and Fox 1977\t|\t\t|\tauthority\t|\n+2157\t|\tArchaea\t|\t\t|\tscientific name\t|\n+2157\t|\tArchaebacteria\t|\t\t|\tsynonym\t|\n+2157\t|\tMendosicutes\t|\t\t|\tsynonym\t|\n+2157\t|\tMetabacteria\t|\t\t|\tsynonym\t|\n+2157\t|\tMonera\t|\tMonera <Archaea>\t|\tin-part\t|\n+2157\t|\tProcaryotae\t|\tProcaryotae <Archaea>\t|\tin-part\t|\n+2157\t|\tProkaryota\t|\tProkaryota <Archaea>\t|\tin-part\t|\n+2157\t|\tProkaryotae\t|\tProkaryotae <Archaea>\t|\tin-part\t|\n+2157\t|\tarchaea\t|\tarchaea <blast2157>\t|\tblast name\t|\n+2157\t|\tprokaryote\t|\tprokaryote <Archaea>\t|\tin-part\t|\n+2157\t|\tprokaryotes\t|\tprokaryotes <Archaea>\t|\tin-part\t|\n+2158\t|\tMethanobacteriales\t|\t\t|\tscientific name\t|\n+2158\t|\tMethanobacteriales Balch and Wolfe 1981\t|\t\t|\tauthority\t|\n+2159\t|\tMethanobacteriaceae\t|\t\t|\tscientific name\t|\n+2159\t|\tMethanobacteriaceae Barker 1956\t|\t\t|\tauthority\t|\n+2172\t|\tMethanobrevibacter\t|\t\t|\tscientific name\t|\n+2172\t|\tMethanobrevibacter Balch and Wolfe 1981\t|\t\t|\tauthority\t|\n+28890\t|\t"Euryarchaeota" Woese et al. 1990\t|\t\t|\tauthority\t|\n+28890\t|\tEuryarchaeota\t|\t\t|\tscientific name\t|\n+28890\t|\tEuryarchaeota Garrity and Holt 2002\t|\t\t|\tauthority\t|\n+28890\t|\tMethanobacteraeota\t|\t\t|\tsynonym\t|\n+28890\t|\tMethanobacteraeota Oren et al. 2015\t|\t\t|\tauthority\t|\n+28890\t|\tMethanobacteriota\t|\t\t|\tsynonym\t|\n+28890\t|\teuryarchaeotes\t|\teuryarchaeotes <blast28890>\t|\tblast name\t|\n+83816\t|\tATCC 35063\t|\tATCC 35063 <type strain>\t|\ttype material\t|\n+83816\t|\tDSM 1093\t|\tDSM 1093 <type strain>\t|\ttype material\t|\n+83816\t|\tJCM 13430\t|\tJCM 13430 <type strain>\t|\ttype material\t|\n+83816\t|\tMethanobacterium ruminantium\t|\t\t|\tsynonym\t|\n+83816\t|\tMethanobacterium ruminantium Smith and Hungate 1958 (Approved Lists 1980)\t|\t\t|\tauthority\t|\n+83816\t|\tMethanobrevibacter ruminantium\t|\t\t|\tscientific name\t|\n+83816\t|\tMethanobrevibacter ruminantium (Smith and Hungate 1958) Balch and Wolfe 1981\t|\t\t|\tauthority\t|\n+83816\t|\tOCM 146\t|\tOCM 146 <type strain>\t|\ttype material\t|\n+83816\t|\tstrain M1\t|\tstrain M1 <type strain> <taxid 83816>\t|\ttype material\t|\n+91061\t|\tBacilli\t|\t\t|\tscientific name\t|\n+91061\t|\tBacilli Ludwig et al. 2010\t|\t\t|\tauthority\t|\n+91061\t|\tBacillus/Lactobacillus/Streptococcus group\t|\t\t|\tsynonym\t|\n+91061\t|\tFirmibacteria\t|\t\t|\tsynonym\t|\n+91061\t|\tFirmibacteria Murray 1988\t|\t\t|\tauthority\t|\n+131567\t|\tbiota\t|\t\t|\tsynonym\t|\n+131567\t|\tcellular organisms\t|\t\t|\tscientific name\t|\n+135623\t|\t\'Vibrionales\'\t|\t\t|\tsynonym\t|\n+135623\t|\tVibrionaceae group\t|\t\t|\tsynonym\t|\n+135623\t|\tVibrionales\t|\t\t|\tscientific name\t|\n+183925\t|\tArchaeobacteria\t|\t\t|\tsynonym\t|\n+183925\t|\tArchaeobacteria Murray 1988\t|\t\t|\tauthority\t|\n+183925\t|\tMethanobacteria\t|\t\t|\tscientific name\t|\n+183925\t|\tMethanobacteria Boone 2002\t|\t\t|\tauthority\t|\n+183967\t|\tThermoplasmata\t|\t\t|\tscientific name\t|\n+183967\t|\tThermoplasmata Reysenbach 2002\t|\t\t|\tauthority\t|\n+186820\t|\tListeriaceae\t|\t\t|\tscientific name\t|\n+186820\t|\tListeriaceae Ludwig et al. 2010\t|\t\t|\tauthority\t|\n+1235850\t|\t"Methanoplasmatales" Paul et al. 2012\t|\t\t|\tauthority\t|\n+1235850\t|\tMethanomassiliicoccales\t|\t\t|\tscientific name\t|\n+1235850\t|\tMethanomassiliicoccales Iino et al. 2013\t|\t\t|\tauthority\t|\n+1235850\t|\tMethanoplasmatales\t|\t\t|\tsynonym\t|\n+1783272\t|\tTerrabacteria group\t|\t\t|\tscientific name\t|\n+2283794\t|\t"Methanomada" Petitjean et al. 2015\t|\t\t|\tauthority\t|\n+2283794\t|\tMethanogen Class I\t|\t\t|\tsynonym\t|\n+2283794\t|\tMethanomada\t|\t\t|\tequivalent name\t|\n+2283794\t|\tMethanomada group\t|\t\t|\tscientific name\t|\n+2283796\t|\tDiaforarchaea\t|\t\t|\tequivalent name\t|\n+2283796\t|\tDiaforarchaea Petijean et al. 2015\t|\t\t|\tauthority\t|\n+2283796\t|\tDiaforarchaea group\t|\t\t|\tscientific name\t|\n'
b
diff -r 000000000000 -r b6c5e7343617 test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,27 @@
+1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
+2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
+641 | 135623 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+662 | 641 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+666 | 662 | species | VC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1239 | 1783272 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1385 | 91061 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1637 | 186820 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1639 | 1637 | species | LM | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+2157 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
+2158 | 183925 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2159 | 2158 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2172 | 2159 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+28890 | 2157 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+83816 | 2172 | species | MR | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+91061 | 1239 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | |
+135623 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+183925 | 2283794 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+183967 | 2283796 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+186820 | 1385 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1235850 | 183967 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1783272 | 2 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+2283794 | 28890 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2283796 | 28890 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
b
diff -r 000000000000 -r b6c5e7343617 test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz
b
Binary file test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz has changed
b
diff -r 000000000000 -r b6c5e7343617 test-data/cached_locally/cat_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/cat_database.loc Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,8 @@
+## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz
+# ls CAT_prepare_20190719/
+#  2019-07-19.CAT_prepare.fresh.log
+#  2019-07-19_CAT_database
+#  2019-07-19_taxonomy
+#value  name    database_folder taxonomy_folder
+#2019-07-19_CAT_database 2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
+CAT_prepare_test CAT_prepare_test ${__HERE__}/CAT_prepare_test/CAT_database ${__HERE__}/CAT_prepare_test/taxonomy
b
diff -r 000000000000 -r b6c5e7343617 test-data/contigs.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contigs.fasta Tue Dec 10 16:07:39 2019 -0500
b
b'@@ -0,0 +1,93 @@\n+>contig_9952\n+TGGTTATGTACGCAGACAGCTTACTCCTCCTCGGAGACCACTCGCGGTACTCTATTCCCTCCACGGTTGCGAGGCGCTCGCCGTAGATCCGCTTCCCGGG\n+ACAGGCAGACAGGGTGTATAGCCTCCCCCTCTCGGAAAAAACCCCGGGCACGCGGTCCAAAGACTCCATGCCCGTAACAACGCCGTGGTTTTAGAAATAA\n+TCTGTGCCGTCGGTTGCAAACCCTAAATACAGGGGGATATCAATGCGGTTGCATGGATATCCACATCCTTCGTGAGATCGCCGACGCAGTTCAGGCGGCG\n+GTATCTCTCATACCCGACCCCTGCAGCAGGGGCAACGAGATATGCATGGGCAACGACGGCACACCCACATCCGAGATAGACAAAGTGGCTGAGAACGCGG\n+TGCTCGGGTACATAGAGTCCAACCGCCTGGCTCTGAACGTGCTCAGCGAAGAGATAGGCTTCGTGGACAACGGCGCGTCGGAGGTTCTGGTCCTCGATCC\n+CATCGATGGGACAAGCAATTCAGTGGCCGAGATACCTTTCTATACGATATCCATGGCCGTCGGCAAGGATTCGCTCTGCGGCATGCACACGGCCTACATC\n+AGGAACCTGGCGACAGGGGACGAGTTCTGGGCGCACAAAGGGGATGGCGCTTATTACAACGGAAGGAGGATAAACGTCAGGAAGCCGGATTTCTCCAAAC\n+TGTTCGCCCTTATATACATGGGGAACGCCGCTGTCGATGAAGCGTTCGCCCTTGCAAAGAACGTCAAGACCTCCCGCTCCATGGGCTGCGCCTCCCTTGA\n+GATGACGCTCGTGGCACTAGGACACGCCGATATCTATTACATGAACACCTACCGTTACAACCGTGCCGTCAGGACTGTGGACATCGCCGCCAGCGCCCTG\n+ATACTCAGGGAGGCGGGCGGCGAGATATTCGATATCGGCGGCAACAAGCTGGATATGCCGCTGGACAACGCTTACCACGCAAGCTTCGTGGCGTGCTCCT\n+GCAAAGAGGTATTCGACCACATCATGAGGGCCCACATCGAGGAGCACGGCGCTACGCGTTACGGGATATACGCCAACGAGACCGTCCCCGGGGCGGCCGA\n+GTATGTGAGGAGGGCGTACGATGCTTTGAGGGGGGAGAAGGTAACCCTCGACACGGCGGCCGCCAGGCTGATCGGGGCGGAAGGCGTGCCTATTTCGGAG\n+ATCGAGGCGGACATCGTCGTGGTGATAGGAGGGGACGGCACGATACTCAGGGCGCTCAAGAAGACGGATGCCGCCGTGATAGGGATCAACGCCGGAGGCG\n+TGGGGTTCCTGGCCGAGGTCGAGCCGGACGAGATAGAGGAGAGCATATCCCGCATCAGGCGCGGAGAGTACTCGGTTGAGGAGAGGATCAAGCTCAGGAC\n+TTTTTACGAGGGGGAATATCTCTCGGAGGCCGTGAACGAGACAGTGATACACACTGATTCCGTGGCGAAGATCAGGCAGTTCAAGATATATGTCAACGAA\n+CACCTGGCAACGGAGGTCCGCGCGGACGGCATAATCATCTCGACGCCCACAGGCTCCACCTGCTACGCCATGAGCCTCGGCGCGCCCATAACCGACCCGG\n+GGGTCGGAGCTTTCCTGATAGTCCCCATGGCGGCGTTCAAGTTCGCTTCCCGTCCGTTCGTCGTTCCCTATACGGCGAAGATAACCGTCGAGGCGGTCAT\n+GGACAAGGGCTGCCTCATCGTGGTGGACGGCCAGCACGAGTACCCGATGAGGGGAGGGACGCGGGCGGAATTCTCGCTTTCCGACAACCTCGCCAAATTC\n+TCGGCCCCGGCGTTCCTGGCATCGACGGGCATCTCGAAGTAGATCTCGCCGCCCATCATGTTGATGTCGGCCTTAAAAGGTAAAGAAAGCCAGATGGCGT\n+TCGAGACATCGGAGTCGTCCAGCTCGGCGAAGTAATCGCCGCTCGTCGTGACAATCTTCATTCTGCTCAAAGCGCCACCCGGCCGTCGGTTCAGTTCTTT\n+TTCTTCTTTTCGAAGAGCTTCCTGATCTCCCTGCCCGTGACCTCGATCTGGAGGTCCTTCTCCTTCTCCTCCATTGCCTTCAGGCCCTTCAGGTCGTTGG\n+CCCATTCGGAGGTCCAGTCGGCTTTGAATTTTCCGGATTCGATGTCGTCCAGAATCTTCTTCATGCCCTTCTCGGACTCTTCGGTGATCACCAGGTCCCT\n+CCTGGTAAGGCCTCCGTACTCGGCAGTGTTGGAAACGACGTGCCACATCTTCTCGAAACCGCCCTCGTTTATGAGGTCGACGATGAGCTTCGCCTCATGG\n+CATACTTCGAAGTAGGCCATTTCGGGAGGGTATCCTCCCTCGACCAGGGTCTTGAATCCCGACTTTATGAGGCCGGTGGTCCCTCCGCACAGCACGGCCT\n+GCTCTCCGAACAGGTCTGTGAGCGTCTCGTTGTCGAAAGTGGTCTCGAAGACGCCGGCGCGGGTGGCTCCGAGCCCCTTTGCAAGTGCAAGGGCGATCTT\n+CTTGGCGTTGCCGGTATAGTCCTGGTGGACGCAAACGAGGGCCGGAACTCCGAATCCCTCGACGAACACATCCCTTTCTTTGTCCCCGGGGGCCTTGGGA\n+GCCATCATTATGACGTCGATGTTTTTCGGAGGAACGATGGTCTTGTAGGTCACAGCGAAACCGTGGGCGAACTCAAGTGCGCAGCCCTCCCTGATGTTGG\n+GCTCGACGAATTCTTTGTATACCTTTGGCTGGACCTCGTCGGGCAGAAGCATCATGATGACGTCCGCGGTCTTGGCGGCCTCGGCGAAATCTACGACCTT\n+GAAGCCGTCCTCTTTCGCTTTGTTCCATGATCTTCCGTCTTTCCTGAGCCCGATCACTACGTTGAGGCCGGAGTCCCTGAAGCACAGGGCCTGCGCTCTC\n+CCCTGGGATCCGTAGCCCATGACGGCGACCGTTTTTCCTTTAAGGACATCTATGTCCACATCTGCATCGTGGTAAATCTTCATTATATCCACCTGTTTAG\n+AGGTCCAACTGCTTTATAGACTAAAAGGTATCGTTCCCGCTCCGACATATAGGTCAGTTCAGTACTGGCAGCGTCCTTTGACCAGGGCCTGATTCGGATT\n+GGCAGGCAGCATGGGCAACACGTCCTCCTCGGGATCGATGTGGATGTCCAGCAGGCACGTCTCGCCGCTGTCTATTGCGGTCTTCAGGGCGTCGGCTATC\n+TCTCCCGGCTTCTCGACCAGCATTCCTCTGGCCCCGTAGGCCTCGGCTATCTTGGAGAAGTCCGGGTCGGCGCCAAGCTCGGTCTCGCTGTACCTCTTGT\n+TCCAGAACAGCTTCTGCCACTGTTTGACCATTCCCAGCCATCCGTTGTTCAGCAGGACTATGACGACCGGCAGGTCCTCGGCCACCGAGGTGGCCAGCTC\n+CTGTTGGACCATCTGGAATCCCCCGTCCCCTGTTATGGTCAGGACGGTGCTGTCGGGCTTGGCGGCCTTCGCCCCTATGGCGGAGGGGAGCCCGAAACCC\n+ATCGTGCCGAAGCTTCCCGAGGAGAGGAGCTGTCTGGGCCTGTGGACGTGCAGATGGTGCATGGCCCACATTTGGTTCTGTCCCACGTCGGTGGTGACTA\n+TCATGTCGTCGTCCTTGTCGATCAGCCTGTTGATCTCGTATATGACCTTCTGAGGGACGATCGGTGTAAGGTCTATGTCGATCTTGCACCTGCAACGCCT\n+CCTGTACTCCGCATAGGTGCTGTTCCAGTCGGCATGGGTATCCCTGTATCCGGAGAGCCCGTCGATGAGCGCCGCGGTACCCTTCTTAGCATCGCAGAGA\n+AGGTTGACGTCGTTGTTCTTGTGCTTGTCGAACTCCGTCGCGTCTATGTCTATCTGGACGACCCTGCATGCGCCGTCGAACCTGGTGTGGG'..b'CGGGGGCCATATGCCCACGCAGGTGTTTCATGGCTGTAATAAAGGTCGGTATCAACGGATTCGGAACCATAGGGAAA\n+AGGGTCGCCTCCGCAGTGAGCGCACAGGATGACATGGAAGTCGTAGGTGTGACGAAGACCCGCCCGTCCTTCGAGTCGGAGGTCGCAAGGTACAGGGGAT\n+TCGACCTGTACGCGCCTCAGAAAAGCGTCGAACTGTTCGACAAAGCGAACGTGCCGGTCGGGGGGACCGTCGAAGACCTCTGCGGCAAGGTAGACATCAT\n+GGTCGACTGCACGCCCGGAAACGTAGGGCAGGAATACAAGGCGATGTACGCCAAAGCAGGCATAAAGGCGATATTCCAGGGAGGGGAGGACCACAGCCTG\n+ACGGGGATATCCTTCAACTCCACCGCCAACTACAAGGAGTCCTGGGGCGCCCAGTTCTCCCGTGTCGTTTCTTGCAACACCACGGGGCTGCTGAGGACGC\n+TCTACCCCATAGACCGCGAGTTCGGTATCGAGAAGGCGTACGTAACGTTGGTCAGAAGGGCCGCGGACCCCGGTGACAGCAAGAACGGGCCGATCAACGG\n+GCTGGAGCCCACCGTCAAGCTGCCGACCCACCACGGGCCGGACGTCCAGAGCATCATGCCATGGGTCAACATCAACACCATGGCGATAAAGGCCTCCACT\n+ACGTTGATGCACATGCACACGGTCACGCTGGAGCTGAAGAACTCCGCTTCCACCGAGGCCGCGGTCGAAGCGATAAGGAACTCCTCGCGCGTCAGGATGG\n+TGGACGCGGCGTCCGGCATCAGGTCCACGGCGGAGGTCATGGAGCTGTCGAGGGACCTGGCCAGGGACAGGTCCGACATGTACGAGATCGTGGTATGGGA\n+\n+>contig_38063\n+CTATCTCCTCAGGAGGTCTGGGAATCTCTGATCGGGAAGAACAGTAACTACCGCATCATAGTCGTGGACCTCAATCTGACCCGTGTGCTGTTCGGCATGA\n+TAGTGGGCGCCGGCCTGGCGGTGGCCGGTGCGGTCATGCAGGCCCTGTTCAAGAACCCGATGGCCTCGCCTTATACTCTCGGGCTCTCGTCAGGCGCCGC\n+ATTGGGCGCCGCATTGGGGATTCTCTTCCCTCTTTCGTTCGTACCTGAGGTCGCATCGGTCCCAATCCTGGCTTTCGTTTTCTGTCTGGGGACCATGTTC\n+CTCGTGTACTCTATTGCCAGAGTGGGCAACCAGACGCACATGGAGACTCTTCTGCTGGCCGGAATAGCCGTAGCGGCATTGGCGCAGGCGGCGGTCTCCC\n+TGCTCACGTACATAGCGGGCGAGAGCATCACGGAGATAGTCTTCTGGGGAATGGGCAGCCTGACCGTCAGCCTCCCATGGGTCAAGATCCCGATAGTGCT\n+GGTCCTCAGCGCCGTGGGCATATTCGCAATGCTCTACTACGCCAAGGACCTGAACGCCATGATGCTGGGGGACGCCCACGCCATGGACCTTGGAATAGAC\n+GTAAAAAAGACAAGGCTGGCACTGTTGATCGCCTCGTCTCTCGTCACCGCGGCTGCGGTATGTTTCGTGGGGACCATCGGCTTCGTAGGCCTTGTGATCC\n+CGCACATACTCAGGATACTTCTTGGTCCGGACAACCGTCTGCTTCTGCCGATGTGCGTGCTGACCGGAGGGATATATCTTGTAGGATGCGACTATCTGGC\n+ACATCTCTTCGCCCAATCTCTGGGCGTCATGCCCATAGGCATAGTGACATCTCTGATAGGCGCCCCGTATTTCATCTATCTGCTCAGGAGAAGAAAAAAG\n+GAGGTGGGATGGGTATGAGCCTGGATATCCGTGACTTATTCTACAATTACGATGGGAAGCCTGTTCTCAAAGACGTTTCGTTCCTGGTCAAGGAAGGAGA\n+GGTCCTGGGGATACTGGGGCCCAACGGATGCGGAAAGACGACCCTGCTGGGCAATCTGAACAGGAATCTGAGCCCCAAAGGCGGATGCGTGCTTCTGGAC\n+GGGGAGGACCTTCACAATTACAAGAAAAAAGACATCGCGAAGGAGATAGCGGTGGTTCCGCAGGACAGTCGCGTAGGTTTCTCGTTCACCGTAAGAGAGA\n+TCGTCTCCATGGGCAGGATGCCATTCCAGGACGCCTTCCAGGGAGACTCCTCGGAAGACCTCAGGATAATCGAAGACGCGATGAGGAAGACCAACGTACT\n+GGATATGGCAGACCGTTACGTGAACACCATGAGCGGCGGGGAAAGGCAGAAGGTCATAATCGCCAGGGCCATGGCGCAGACGCCCAAGATACTGCTGATG\n+GACGAGCCC\n+\n+>contig_44250\n+GGTGATGTACTGGGGCTTGTAGGCTACTTTGACCTTTGCGTCTATCTTGCCGCCGTCTGGAGGGATCTCTCCGGCCAGCATCTTTACGAAAGTGGTCTTT\n+CCTGTGGCGTTGGGACCGACGACCCCGACGGATTCCCCCATCTTTATGGAACCGCCGACGACATCCAAAGTGAACTCTCCGAAGTCCTTGGACAGGCCCT\n+CGAAGGAAAGCAGGTCGGAAGTGACCCAGTCGCTCCTGGGAGGAGACGCGAAGAACTCTATCGGCCTATCCCTGAAACGGATATTCTCTTCGGGAAGGTA\n+ACCGTCCAGATATACGTTTATGGCGGTCCTGACCTGTCTTGCAAGAGTGAACACGCCGTACGCCCCCTCGGTACCGTATACAACGCTGACGATGTCGGCG\n+AGGAAATCGAGTATGGCAAGATCGTGTTCTATCACGACCACCTGCTTTTCTGCGCTGAGTTCTTTGATGATGCGTGCCATCCTGATCCTCTGGTAGATGT\n+CAAGGTACGAGGTGGGCTCGTCGAAGAAGTATACGTCCGCGTCCTTCATGACCGTGGCAGCCATGGCGACCCTCTGAAGCTCTCCTCCTGAAAGTTTCTT\n+TATATCCCTGTCCAGAAGCTCGGTCAGCTCGAACATGATGGCGGCCTCCTCGAGTGTCAGGCGGCCTTTTATGCCGGAAAGCAGGTCCTTCACGGGCCCC\n+GATGCGGCTTTGGGTATGAGGTCCACGTACTGTGGCTTTATGGCCGTCCTCACCTTGCCGGCGTAGACGTCCGTGAGATAGGATTTGACCTCGGTACCGT\n+CGTAGTGCTGCAGCACTTCCTCTTTGGATGGAGGTTTCTCATAGTTGCCCAGGTTGGGGACGAGTTCCCCGGAAAGTATCTTGATCGCCGTGGATTTTCC\n+GATCCCGTTCGGTCCAAGTATGCCCGTGACCATGCCTTTCTTCGGCACCGGGAGCCTATAGAGGCGGAAGGCGTTCTCGCCGTACTGGTGGACCATCTCC\n+GTCTTCAGCTCGTCGGCCAGGCCTATGATCTTTATGGCGTCGAACTGGCATTTGTTGACGCATATCCCGCATCCCTGGCACAGGGATTCGGATATGATGG\n+GCTTGCCCCTCTCGCCGAACACTATGCATTCCACGCCCGTTCTGACCAACGGGCAGAACTTATAGCATTCCTTGTTGCATTTTCTGTTCTGGCATCTGTC\n+CTGCAGGACGGCCGCAATACGCATGTCCCCGCTTAGACCGATTTAAGATATAACCTTTAAGGATGGTATCGCAGATAAGCTGATAAGGGAAGACGGAGAC\n+AGATGGGCATGGCCGAAGCGGATGGGACCACCGAGGACGTCAGGATACTTACGGGCGACTACAGGAGGGCGATAAGGCATCTCTCCATACCGATAGCCGT\n+GGCTCTTGCGATACAGCATATCAACATACTCGTAGACACGTTCTGGGTCGCGGGCCTGGGGGCGGACCCGATGGCTTCAATAAGCATAGTATACCCGGTT\n+TTCGCCACGGTCATGGGCATCGGAAGCGGGCTGGGGATCGGTGCTTCTTCCGCGATAGCCAGAAGCATCGGGCATAACAGGAGGAAGGAAGCCGGCACGA\n'
b
diff -r 000000000000 -r b6c5e7343617 test-data/genome2.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome2.fna Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,51 @@
+>contig_11394
+GCTTTTTACGCCCAACGGGCTTGTTCTTGCTCAGACAGTCCAAGGCTTTCCAGATATGATAATCGCTGAATTTCGGTATCGGGCCGCCTGCACCCCTGTG
+CTGCTGATTGGACATATGGATGTATCATTCATCCATTAAATAAAGTAATTGATTGTCATGTCGCAAGATACGGTATTAACGCCGTCAATAGCTGTTAAGC
+CAGTGCCCGCCGTCGCTTTCCATCCGTAATACTCGCGCGTTTTTTATATCGAAGGAACTCTTCACATCCATGGTTCAAAAGTCGGTGGACTTTGTTATTA
+TTGAGAATGTTTCCAAGAGGTTCGGGAACAAGACGGTCCTTAATAATGTGAGCGCCACTATACAGACCGGTAAGATACTGGGCCTGATAGGCAAGAGCGC
+CGCGGGCAAGAGCGTTTTGATAATGATGCTGAGGGGAAGCGAAGATTACGCACCCGACTCCGGAAGGGTGCTGTACAGGGTCAATAGGTGTTCCGGATGC
+GGAAACCTCGACCTCCCCCATGAAGGAACGCCCTGCTCGAAATGCGGGTGTGAAACAGGAACGATCACCGTGGATTTCTGGTCTTTGAAAGATGACGACC
+CTTTAAGACGCCAGCTCAAGAGCCGCATCGCCATAATGCTGCAGAGGACGTTCGCCCTTTTCGGGGATAAGACCGTGATCGAGAACATCTTCGAGGCCAT
+AGGCGACCGTGCAGAGGGCAAGGCCAGGACGGACATGGCGCTCCAGCTGCTGGAATTCGTGGGGATGACTCACAGGACCACACACATAGCCAGGGACCTG
+TCCGGAGGAGAGAAGCAGAGGATAGTCCTCGCAAGGCAGATAGCCAGGGATCCTCTCTTCTTCTTGGCGGACGAGCCGACGGGAACGCTTGACCCGTACA
+CGGCGGAATTGATGCACGAGCGTCTTGTGGACTACGTCGGGAAGAGAGGGATCTCGATGGTCTTCGCGTCCCATTGGCCCGAGGCCGTGGATAAGATGGC
+CGACGAGGCCATATGGCTGGATTCCGGCAACGTGCTGATGCAGGGCGACCCGAAGGAGATCGCCGATAAATTTATGGAAGGATACTCGTTCGAAAGGACA
+AAGGCCGCCGACCTGGGAGAGCCGATAATATCGCTCAAGGATGCGGAGAAGCACTTTTTCTCTGTCGTCAGAGGAGTCGTCAAGGCAGTGGACGGTGTAA
+CCTTCGATATAATGGAGCGCGAGGTGTTCGGCCTTGTGGGAAAGTCGGGCGCCGGCAAGACCACGACGTCAAGAATGGTCGCCGGCATGACGCCCGCCAC
+CCGCGGGTCCGTGAAGATAAGGATCGGCGACGACTGGGTTGATATGTCAGAGATGGGGCCGAGCGGGAAAGGCCGCGCCACCCCCTATATCGGGTTCCTC
+CATCAGGAATACACGCTCTATCCCTTCGACAACATACTCAGCAACCTTACGACCAGCATAGGCACCAGGATGCCAGCGGAACTTGCAAAGTTCAAGGCCA
+TACAAGTGCTTCAGAGCGTAGGGTTCGACAAGAAGAACATGGAGAGTCTTCTCTACTCTTACCCCGACACACTGAGCGTCGGAGAGTGCCAGAGGATAGC
+CTTCGCACAGGTCCTGATAAGGGAGCCCCGCATCATAGTGCTGGATGAGCCTACAGGGACAATGGACCCGATAACAAAGACCATCATAGCAAAATCCGTC
+ATCCGGGCGAGGGAGACCCTGGGCGAGACCTTCGTCGTGGTGAGCCACGACATGGATTTTGTCGAGAACGTCTGCGACCGCGTAGCGTTCATAAGGAACG
+GCGTCGTGGAAGACATGGGAACTCCCGAGTCGGTCATCCAGAGGTTCGGTCTGAAAGAGCTTCAGGATGACGACTCCGAGGGTGAATGAATGAAGCAGCA
+GATCGGGCGCCACCTCAGCTTCGTTGAATGCAGAGAGGCCATGGGGCTCGGCGTGGGCGGTGCCCTGGCACAGAGGGCGACCATCTCTGACAGCGGAAGG
+GACGTCGTTGCGGTGGCCATGGGCCCGGGCAAGAGGCACATAACCAAACCGGTATGCGAGATAACATATGCCCTCAGAGAAGAGGGCATAGATACCAGCG
+TCGCCTGAGCGCCTGGCACTTATTGCGGAGTATGTCAAGGACATGATGACCGAACTCGAACCGGACAACGCGGCCGTCTTCGAAGCGGGATGCGCCAGCT
+ACCGGGCCAAGGTAGATGTGCTGATAGGGCTTGAACAAGAATATCTGACAGGCAAGGCGACTACCGAGATCATCGTCTGGCACCCTTCCTGGGCGTATCT
+TCTTCCGGATAATGTGACCGAGGCAGAGCTCATGGAAGCAGCCGAGGCGGCATCCACGCCCTCATCGATCGCGATGCTGCAGGGAGGGACGCCGGAAAAT
+CCTATCAACGTGTTCCTTTCGGAACCCGAAGAGATCAACGGTCTTACCCAGCAGGGGCTTTGTGAAATGGGAATATATGTAAACATAATAGTGATTAACA
+TACTCGCCGGGGACTGGGTCGAATATCTGGGCCAGGTCATCGAGATACTGGGAGATAATATTCCGGATGCGGGGACATGAATTGATGATACCAATAGAAA
+TTAAGGACCTTACCGCTGGATATGACGGCCGAGCCGTTTTCAGCAACGTCGACCTGGAGCTCAGGGACAAAGACTTCCTGGCGGTCATAGGGCCCAACGG
+CGGCGGGAAGACAACGCTCTTCAGGGCGATCCTGGGCCTAATAAAACCCATGGGGGGGACCGTAAAAGTGTTCGGCAAGGAGCCGGCAGGTTCGCCCCCG
+GGCATAGGATACGTTCCGCAGAACGAGAATCTGGACTCAGAATATCCAATAAGTGCCAGGGAAGTCGTCCTTATGGGAATGAGGTGCAAGAAGGGCCTTA
+GGCCGTTCTATTCCAGTGAGGAGAAGGAGTCCGCAGAGAGGGCCATGGAGTACGCCGAGGTCTCGGATTTCGCAGACAGCCGAATAAGCAACCTGTCGGG
+AGGGCAGAGACAGAGAGTATACCTCGCAAGGGCTCTTGCCCCGGAACCGAAGATACTCATGCTGGACGAACCCACCGCGAGCCTGGACCCGTCGATGAAG
+GACTGCACCTACGACATACTCAGGAAGCTGAACAGGGACGGGATAGCCATAATGGTGATAACTCACGATATGAGCAGCATCTCTCATGATGTCAAACGTG
+TAGCATGCATGAACCGCAGGCTGATAGTCAACGATGCGCCCGAGATAACCCAGGAGATGATCGCATTGGGATTCCACTGCATCCCCGAGCTAGTGCACAT
+AGGTCCCTGCGATTGCGGAGGTCACAACGATGGTTGATTGGGTCGCGGCATTCTCGATGCCTCTGATTCAGAACATGTTCATGGTCGCGGCCATAGCATG
+CGTTCTTTGCGGAGTCGTGGGAACCCTGGTGGTCGTGAAACGGATGGTGTTCGTAACGGGTGGCATAGCACACACCACTTTCGGAGGTGTGGGTCTTGCA
+TATTATGTTATGTCCGTCGTCGCAGTCTCATGGTTCACCCCCATGATCGGCGCCGCACTGTTCGCGGTCGTTTCGGCGGTCATAATGGCGCTTCCCGCGG
+
+>contig_159
+TATAGCTCAGCTCGTTGGCGGAGACGCTGCTTCCGTACATCTGGCCGCCGCCGTTGATGCCGCCGCCCCAGGCGGCCGTCCCGATCCCCACGGGAGAAAT
+GTCCGTACCTCGGAACCTTATGTTTCTCACGGATTCCCGTATATGTTCCTGGATTATAACTGATACGCAATCCTGTTTCCGACGTCCGCCATGTTTAGAT
+AAATTGACGGTATAGCCGAAGGCATGGATATGGCAATGGAGCTGAGGAACGTCTCCGTAGTGAGGGACGGGAAGCGGATACTGGATTCCGTCTGCCTCGA
+TATCGGCGCCTCCGAGAACGTTGCCGTCATAGGGCCGAACGGTTCGGGGAAGACGACGCTCATCAAACTGCTGAGGGGCGATATTTATCCCTACTACGAC
+GAGGACCGCCCCGCGGAGATGAGGATCTTCGGTGAGAAGATATGGTCCATCTACGACATACGGAGCCGCATGGGCGTGGTCTCCATGGACCTCCAGGGCA
+TGTTCGGCGGCGAAACGCTGGTCGGAGACGTCATAATGTCGGGATACTTCAGCAGCCTGGACATTTTCCGCAACCATGAGGTCACCGACAACATGCGCTC
+CGGGGCCTCGCGAGCGGCCGGGTACATGGGAGTGGAACATCTCGTCGGCAGAGATCTGTCCGGCCTTTCTCTGGGAGAGATGAGGCGGACGCTGATCGCC
+CGGGCGCTGGTCACCGCCCCCGAGATGCTCGTCCTCGACGAACCGATGACGGGCCTCGATATTGTAATGAAATCCAAATTCAGGAAGATGTTCGACATCA
+TGACGGAAACGGGAGTGAGCATCGTCATGATAACCCACGACCTCACCGACATCCCCGTTTCCTTGAACCGCATAATAATGATCAAGGATGGGAAAGTGTT
+CGCGGACGGTCCTAAAAAAGACGTCCTGACGTCCGAGGTCGTCAGCGGGCTTTTCGATGAACCTATTAATGTACAATGCGTTAACGGGATATATTCAATG
+AGGATGGATGAGTGACAAGGTATATCTGTTCCGAATGCGGGAACGAGATTCCGTACGTTTCGGATTTCTGCTACCAGTGCGGTAGCCTGAAGAGCAAGGC
+GTTCAAGATAGACGAGGGCGGCGAGATGGAGGGCGGGGAGGTCCCGTGCCCCAACTGCGGAAAGCCCATAGAGGAGGACGCCCGGTACTGCAGGCACTGC
b
diff -r 000000000000 -r b6c5e7343617 test-data/genome3.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome3.fna Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,60 @@
+>contig_4003
+ATCAAAGAGGAACTGAAGGCGGCGATGCTGCTGACCGGTTCCTCTGACATAAGAGAGCTCTCTGATGCAGAGTATATCGTCATGGGAGAGACACGCAAAT
+GGATGGAAGGCCTGAAATGACCGACGTCAAGAAGATATTGAAACAGATGTCCGACGAGCTGAGCAAGCCGATCGAATCATACATAGAAGACGAACTGCCC
+GCCAATCTCATCGAAGCGGCAAGACAGTACCCCTATGCCGGCGGAAAGAGGATGAGACCGGCCATGGTCATCGCCGCGTGCAGGGCGGTGGGAGGGGATG
+GCAGGAAGGCCGTTCCCCTTGCGGTTGCCATAGAGTACATACACAATTTCACGCTGATCCATGATGACCTCATGGACGGGGACGAGAAGNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCATGGACGGGGACGAGAAGCGCC
+GGGGCATGACCACATCCCATGTGAAGTACGGCATGCCCACAGCGGTGTTGGCGGGAGATGCCCTGTTCGCTAAGGCCTTCCAGATAATCGCCGACCTCGA
+TGCGGACGGCGAAACGGTCAGAGATGTCCTTAGAGTGGTCTCCCAATCCGTCTGGGACCTGGCCAGAGGTCAGCAGATGGATATAAACAACGAGAACGGG
+ACCGAGGTCACCATGGATGAGTACATCGAGACCATCAGACTGAAGACCAGCGTTCTGTTCGCCGCCGGTGCCGCCGGAGGCGCTATGATCGGAGGTGCGA
+GCAAGGAGGTCGTGGACGCCGTCCACGGCTACGCCATGAACCTGGGCGTCGCCTTCCAGATGTATGACGATATACTTGGGATAGTCGGGGACCCGGCCGT
+TACCGGTAAATCGTCCGGTAATGACATTCGCAAAGGGAAGAGCACCGTCATCGTGTGCCACGCCCTGAAGAACATAGCTGACAGGGCGGACCTGCTGGTC
+TTCCGCGATATCCTCGGCAAGACAGACGCCACCGATGCGGAGATAGACGAGGTCAGGAGCATACTCCGAAGAGCCTGCAGCCTGGATTACGCCATAGAGA
+CCGCAGAGGATTACATCAACAAAGCCGTCGACTGCCTGGATGCGCTGGAGCCCTCAAAGGACAAGGACTTCATGATAGCCCTGGCAGAATACACGATGAC
+CAGGACCCTTTAGTCGGAGATCCCCTTCTCCGTTATGGAGTATGTGGCTTTCCGGCCTTCCGGTATGCTGCGGTGCTTGACCATGATCGCGGCCCTGCGG
+CCGTTGCCTTTCTTCTCCAAGCGGATTATGGTCTTCGCGTTATGATGCATGGCGTGGCCTCCGAGGAACTCTATCGTACCGGCGCCTATGTTGGTGTATA
+
+>contig_4403
+CACCGGTCACCCGAAGGTCACGCGCGTATCGATGCGTGACATCGCAGACCTGGGAGAGAGGGGCCTGTACATCCTTCACGAGATCGGTACGGACCTCGTC
+GGCAAGATGGAGGGCTGCACCGGGTGCAAGAAGTGCGAGCACGAATGCCCCGAGAACGCGTTGACCGTAAGCAAGGACAAGACGATCACCGTGAAGACCA
+AGAACTGCCTCGGAACGGCATGCTACAGATGCCAGTACGTCTGTCCCGAGAAGGTCATGCAGTTCGACTCCCTAAGGCTGTCGTGATAAACGGTTTTGGG
+CGGGGCCGGCCCCGCCCTTTTTTCATTTACCGCCGTTCAGGGCCTCGGCGTGCACGGCAGGCCTGACATTCTCGTCCTCCAGCTCCGTAAGTATCTGCTT
+GCGCAGCCTGATGAACTCGGGCGAAGCGCGGTCCCTGGGCCGCGGAATGCCTATGTCCACGATGTCCTTGATGCTGGCAGGACGCTTGGTAAGGACGACT
+ATCCTGTCTGAAAGATAAACGGCCTCGTCGACCGAGTGGGTCACGAACAGGATCGTAGTGTCCGTCTTCTCGACTATCCTCAGCAGCTCGCCCTGCATGA
+TGTTGCGCGTCTGGGCGTCCAACGCGCCGAACGGCTCGTCCATGAGCAGCACGTCGGGCTTGGTAACAAGGGCCCTTGCGATGCCCACGCGCTGCTTCAT
+ACCTCCGCTGAGCTCGTGGACACGATGGTCCTCGAAACCTTCGAGGCCGACCGCCCTGATGTAGCGTTCGGCGGTCTTCCTGCGCTGCTCCGCCGGGACG
+CCGGCGATCTCCAGGCCGAACTCGACATTCTTCCTTACAGAACGCCAAGGGAACAGTGCGAACTCCTGGAACACCATGCCTCTGTCGGGGCCTGGCCCGG
+TGCACTTCTTCCCGCCTATCGACACTTCTCCGGAGGACGGCTCCATGAGCCCTGCTATAAGCCTGAGCAGAGTCGTCTTTCCGCATCCCGAGGGACCGAC
+TATGGATATAAGCTCGCCCTTCTGGATCTCCAGAGAGAAATCCTCCAGGGCCACGGTCTCCTGTTCATCGGTCTTGTAGACCTTCCTCAGATGATTGATA
+ACGATCTTCTCGCTCATTCTATCCCCATCCTTCTTGTTATGACCTTGTGCAGATAGTCGGCGAGGCTGGTCGTCAGTATTCCGAGGATTGCGATTATGAC
+TATGCCCGCGTAGACGTTGGGCCAGTACCCCATCTGCGCCTGTATGCTGATGAAGTATCCGACGCCTCCTCCGAACGATGCGTACAGCTCGGAGGCAACT
+ATGCACATCCACCCGACCCCCATGCCTATGCGGAGGCCGTTCATTATGTATGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+>contig_14302
+GATGTGGAATGCGGCCAAATGCCGCCTCGGTTTCAGCGGGGCAAGGCGTTCCATGTCCACGACCGCTGGAATACTGGTGAACGTTTTCATTACTTCTCTG
+GAGATAGCCGAACGTTCTCAGCCTGCGCTCGAAAGCAGGAACTACAACGGCAGTTTCCCCATTTATAGGATGCCCAACAAGATCGGTCTCTACTGGTTCG
+CCATCACGGCCGCCGCGTGCGCCTCGCTATACATCATCGGATACTACATCTCGACCCCGAACATGGCGGCAATACTCCTGGGGGCGGTCTGACGGTCTGC
+GACCTTCGTACGGATAAAGTGAGCTACAGGTACGGCAATTCGAATTACAATGCTATCGATTCTGTGGATTTCACAGCCTCCCATGGCAGAAGGACCGTCA
+TCCTGGGAGAGAACGGGTGCGGCAAGTCGACGCTGATATATCAGCTCAACGGAGTATACAAGCCTGTTTCCGGTACTGTGTTCTATGGAGATATGCCCAT
+ATCATACGACAAGGAGTTCCTTACGGAGCTGCGTTCCGACGTTTCCGTTGTTCTGCAGAATCCAGACGATCAGATTTTCTCTTCCACCGTCGAGGAGGAC
+GTGGCATTTGGACCGTTAAACTTGGGCCTTTCCCGGGAAGAGGTTGGAGAAAGAATCGGCCGGTCTTTGGAATGCGTGGGGATGTCGGGATTTGCCGAAG
+TGCCTGTTCAGCGCCTTTCATACGGTCAGAAAAAAAGAGTGTCACTCGCAGGCGCCCTAGCATCACATCCAAAGATACTGGTCCTGGACGAGCCTACCGC
+AGGCCTGGACCCGCAGATGTCCAGAGAGGTGATGGAGATCGCAAATTCTCTCATCAGAGAAGGGATCTCCGTCGTAGTATCCACCCATGACGTGAATCTG
+ATCTACAATTGGGTCGAGGACCTTTACGTGATGCGGAACGGACACATGGTCTTCTCCGGAGATGCGGACGAGTTCTTCTCCGACCGTCCGTCCGTTTATC
+TTTCAGGTCTGGAACAGCCCTCGATATTCAGCATAAACCACAATATGGAGACGTTAAGAGGGACGATTCCCGCGTCATATCCCAAGACCATGAGCCAGAT
+GGTCAGCAGATTATTCCCTTCAGGATCCTCGGCCGGAAGGATATTCATCTATCAGACTGAAGGCGAGCGCATCGACCAGGATGCGATCGAGGAGGCCGTG
+GGAAAGAAAGGGATGCCCATTGCAGTATACGGCCCCTCCGCGCGCAGGTCGGTGACCCGATCGAAGCTCAGGGTTGATTTCTATTTTAACGGCATAGAGT
+GTTGCATCAGGGAGGCCATGGTAAACCATGATTCCCTGATAATAGTCGACCGGGGCTTGAAGGGGATCGTCACGGAGGCGATTGAAGAGCTTAGGGCATA
+CGGAACCCGGATCAGTATCAGGGAGTTGGTTTTTTGAGCGCTCCCCTTTTCCGCACCGAAGGTCTTTTCTTCAGATACGAAGGCGGCCGGGGGGACGCGT
+TGGCAGACGTGAACATCACGATCAAAGAGGGTGCCAGAACTGTCATCATGGGAGCCAACGGAGCTGGAAAATCCACGTTCTTCTATCATCTTAACGGAGT
+CTTGAGGCCGTCGAAGGGCTCGGTGTTTTTCCGGGGAGAAAAAATACCGCACAGGGGAAAAGCTCTCAGGAAGCTGCGCTCGGAGGTCGCGGTGATGCTC
+CAAGACCCCAACGACCAGCTTTTTGCACCAAAAGTATCTGACGACATAGCATTCGGCCCGAAGAACCTGGGACTCGACGCTCAGACTGTAGGGGAGAGGG
+TCAGGGACGCCCTCTACATCACAGGCATCGAATCTCTGGAGGGTCGCAGCGTGATGCAGCTGTCGTTCGGCCAGAAGAAGAGGGTGGTGCTGGCCGGTGC
+CTTGGCGATGCATCCGAAGGTGCTTATAATGGACGAGCCCACCGCAGGTCTCGATCCCCAGATGTCCAAGGAGCTCATCGAGCTCGCGGACGAGCTGCAC
+CATCTTGGAACGACCGTTATTTTTTCAACCCATGACGTGGACCTCTCATATTCTTGGGCGGACGAGGTCCATGTCCTAAGAGGGGGCCGTAATGTATATT
+CGGGGAGCTCAGAAAGATTCTATGACGATACTTCGGAAGTTTATCTTTCGGGCCTTGTCGAACCGGCCATGTACGACATCAACGTCAGCATCTCCGAGCT
+TGCCGGATGCCCCGTTGAACCGTTTCCCAAAACCCTGCCTCAGCTTGTGGCCAAGGCAGTGCCGTCAGAGGGGCCGGGCACGGTTCACATCCTTCCCGTG
+GAAGGTCCGGTCGACCGGGAGCTGTTCTCCTCTCTGACGTCCGGGTCCGGGATGTCCGCAACAGGCGTCTACGGTACTAATGCAAGAAAATCTGCGGAGG
+CTTCCAAATTGCCGATAGATTATTTCTTCGGGGCCGACGAGGGATGCATAATAGAGGCTTTGCACGGCAAAGACACGCTGATATGCTGCGACAGGTCCCT
+TACAGATCTGCTGATATCGAAGATAGGCAGTATGTCCCGGTTCGGGACAGAGGTCCCTTATTCTCTGCACTGAACATTTCTTTTTTCCGGGGGTTCGAAC
b
diff -r 000000000000 -r b6c5e7343617 test-data/test_contig.contig2classification.names.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_contig.contig2classification.names.txt Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,4 @@
+# contig classification reason lineage lineage scores superkingdom phylum class order family genus species
+contig_38063 classified based on 1/2 ORFs 1;131567;2;1783272;1239;91061;1385;186820;1637;1639 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00 Bacteria: 1.00 Firmicutes: 1.00 Bacilli: 1.00 Bacillales: 1.00 Listeriaceae: 1.00 Listeria: 1.00 Listeria monocytogenes: 1.00
+contig_44250 classified based on 1/2 ORFs 1;131567;2;1224;1236;135623;641;662;666 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00 Bacteria: 1.00 Proteobacteria: 1.00 Gammaproteobacteria: 1.00 Vibrionales: 1.00 Vibrionaceae: 1.00 Vibrio: 1.00 Vibrio cholerae: 1.00
+contig_9952 classified based on 1/5 ORFs 1;131567;2;1783272;1239;91061* 1.00;1.00;1.00;1.00;1.00;1.00 Bacteria: 1.00 Firmicutes: 1.00 Bacilli*: 1.00 not classified not classified not classified not classified
b
diff -r 000000000000 -r b6c5e7343617 test-data/test_contig.contig2classification.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_contig.contig2classification.txt Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,4 @@
+# contig classification reason lineage lineage scores
+contig_38063 classified based on 1/2 ORFs 1;131567;2;1783272;1239;91061;1385;186820;1637;1639 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+contig_44250 classified based on 1/2 ORFs 1;131567;2;1224;1236;135623;641;662;666 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+contig_9952 classified based on 1/5 ORFs 1;131567;2;1783272;1239;91061* 1.00;1.00;1.00;1.00;1.00;1.00
b
diff -r 000000000000 -r b6c5e7343617 tool-data/cat_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/cat_database.loc.sample Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,7 @@
+## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz
+# ls CAT_prepare_20190719/
+#  2019-07-19.CAT_prepare.fresh.log
+#  2019-07-19_CAT_database
+#  2019-07-19_taxonomy
+#value  name    database_folder taxonomy_folder
+#2019-07-19_CAT_database 2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
b
diff -r 000000000000 -r b6c5e7343617 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of CAT databases -->
+    <table name="cat_database" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, database_folder, taxonomy_folder</columns>
+        <file path="tool-data/cat_database.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r b6c5e7343617 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Dec 10 16:07:39 2019 -0500
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of CAT databases -->
+    <table name="cat_database" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, database_folder, taxonomy_folder</columns>
+        <file path="${__HERE__}/test-data/cached_locally/cat_database.loc" />
+    </table>
+</tables>