# HG changeset patch
# User jjohnson
# Date 1574650560 18000
# Node ID ad7507073c3fd1005fb62f9db1dd78ea89e0dfc9
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
diff -r 000000000000 -r ad7507073c3f cat_add_names.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_add_names.xml Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,41 @@
+
+ annotate with taxonomic classification
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r ad7507073c3f cat_bins.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_bins.xml Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,71 @@
+
+ annotate with taxonomic classification
+
+ macros.xml
+
+
+
+ 1:
+ #set $bin_dir = 'inputs'
+ mkdir -p $bin_dir &&
+ #for mag in $mags:
+ ln -s '$mag' $bin_dir/ &&
+ #end for
+ #end if
+ CAT
+ #if $bin_dir
+ bins -s '.dat' -b $bin_dir
+ #else
+ bin -b '$mags'
+ #end if
+ @CAT_DB@
+ @USE_INTERMEDIATES@
+ @CUSTOM_SETTINGS@
+ && @TXT2TSV@ *.ORF2LCA.txt *.bin2classification.txt
+ @ADD_NAMES@
+ @SUMMARISE@
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r ad7507073c3f cat_contigs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_contigs.xml Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,48 @@
+
+ annotate with taxonomic classification
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r ad7507073c3f cat_prepare.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_prepare.xml Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,29 @@
+
+ database for CAT - Contig Annotation Tool
+
+ macros.xml
+
+
+
+ $cat_db &&
+ CAT prepare --fresh
+ --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")'
+ --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")'
+ ]]>
+
+
+
+
+
+
+
+
diff -r 000000000000 -r ad7507073c3f cat_summarise.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_summarise.xml Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,40 @@
+
+ annotate with taxonomic classification
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r ad7507073c3f datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff -r 000000000000 -r ad7507073c3f macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,267 @@
+
+ 5.0.3
+
+
+ cat
+
+
+
+
+ CAT --version
+
+ CAT_database
+ taxonomy
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 'log' in select_outputs or not select_outputs
+
+
+ 'predicted_proteins_faa' in select_outputs
+
+
+ 'predicted_proteins_gff' in select_outputs
+
+
+ 'alignment_diamond' in select_outputs
+
+
+
+
+
+
+ 'orf2lca' in select_outputs
+
+
+
+
+
+
+ 'contig2classification' in select_outputs
+
+
+
+
+
+
+ 'bin2classification' in select_outputs
+
+
+
+
+
+
+ 'orf2lca' in names.add_names
+
+
+
+
+
+
+ 'classification' in names.add_names
+
+
+
+
+
+
+ 'classification' in summarise
+
+
+
+
+
+
+
+
+
+ https://doi.org/10.1101/072868
+ https://doi.org/10.1186/s13059-019-1817-x
+
+
+
+
diff -r 000000000000 -r ad7507073c3f tabpad.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tabpad.py Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+import argparse
+import re
+
+
+def padfile(infile, outfile, fieldcnt=None):
+ with open(infile, 'r') as fh:
+ out = open(outfile, 'w')
+ tabs = '\t' * fieldcnt if fieldcnt is not None else None
+ for i, txtline in enumerate(fh):
+ line = txtline.rstrip('\r\n')
+ fields = line.split('\t')
+ if not tabs:
+ tabs = '\t' * len(fields)
+ out.write('%s%s\n' % (line, tabs[len(fields):]))
+ out.close()
+
+
+def fieldcount(infile):
+ fieldcnt = 0
+ with open(infile, 'r') as fh:
+ for i, line in enumerate(fh):
+ fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t')))
+ return fieldcnt
+
+
+def tsvname(infile):
+ return re.sub('\.txt$', '', infile) + '.tsv'
+
+
+def __main__():
+ parser = argparse.ArgumentParser(
+ description='Pad a file with TABS for equal field size across lines')
+ parser.add_argument(
+ '-i', '--input', help='input file')
+ parser.add_argument(
+ '-o', '--output', help='output file')
+ parser.add_argument(
+ 'files', nargs='*', help='.txt files')
+ args = parser.parse_args()
+
+ if args.input:
+ outfile = args.output if args.output else tsvname(args.input)
+ fieldcnt = fieldcount(args.input)
+ padfile(args.input, outfile, fieldcnt=fieldcnt)
+ for infile in args.files:
+ outfile = tsvname(infile)
+ fieldcnt = fieldcount(infile)
+ padfile(infile, outfile, fieldcnt=fieldcnt)
+
+
+if __name__ == "__main__":
+ __main__()
diff -r 000000000000 -r ad7507073c3f test-data/cached_locally/CAT_prepare_test/CAT_database/protIDs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/protIDs Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,11 @@
+WP_000159554.1
+WP_000214552.1
+WP_000346214.1
+WP_000568619.1
+WP_000958804.1
+WP_000991933.1
+WP_000996146.1
+WP_003722398.1
+WP_005378126.1
+XP_961517.1
+
diff -r 000000000000 -r ad7507073c3f test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.dmnd
Binary file test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.dmnd has changed
diff -r 000000000000 -r ad7507073c3f test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.fastaid2LCAtaxid Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,10 @@
+WP_000159554.1 2
+WP_000214552.1 91061
+WP_000346214.1 91061
+WP_000568619.1 666
+WP_000958804.1 1301
+WP_000991933.1 666
+WP_000996146.1 666
+WP_003722398.1 1639
+WP_005378126.1 662
+XP_961517.1 5141
diff -r 000000000000 -r ad7507073c3f test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.gz
Binary file test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.gz has changed
diff -r 000000000000 -r ad7507073c3f test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/CAT_database/test.nr.taxids_with_multiple_offspring Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,17 @@
+91061
+1
+641
+1224
+1236
+131567
+1637
+1639
+1783272
+2
+662
+13562
+13562
+641
+662
+666
+91061
diff -r 000000000000 -r ad7507073c3f test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/names.dmp Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,252 @@
+1 | all | | synonym |
+1 | root | | scientific name |
+2 | Bacteria | Bacteria | scientific name |
+2 | Monera | Monera | in-part |
+2 | Procaryotae | Procaryotae | in-part |
+2 | Prokaryota | Prokaryota | in-part |
+2 | Prokaryotae | Prokaryotae | in-part |
+2 | bacteria | bacteria | blast name |
+2 | eubacteria | | genbank common name |
+2 | prokaryote | prokaryote | in-part |
+2 | prokaryotes | prokaryotes | in-part |
+641 | Vibrionaceae | | scientific name |
+641 | Vibrionaceae Veron 1965 | | authority |
+641 | gamma-3 proteobacteria | gamma-3 proteobacteria <#3> | in-part |
+662 | "Microspira" Schroeter 1886 | | authority |
+662 | "Pacinia" Trevisan 1885 | | authority |
+662 | Beneckea | | synonym |
+662 | Beneckea Campbell 1957 | | authority |
+662 | Listonella | | synonym |
+662 | Listonella MacDonell and Colwell 1986 | | authority |
+662 | Microspira | | synonym |
+662 | Pacinia | | synonym |
+662 | Vibrio | | scientific name |
+662 | Vibrio Pacini 1854 | | authority |
+666 | "Bacillo virgola del Koch" Trevisan 1884 | | authority |
+666 | "Bacillus cholerae" (Pacini 1854) Trevisan 1884 | | authority |
+666 | "Bacillus cholerae-asiaticae" Trevisan 1884 | | authority |
+666 | "Kommabacillus" Koch 1884 | | authority |
+666 | "Liquidivibrio cholerae" (Pacini 1854) Orla-Jensen 1909 | | authority |
+666 | "Microspira comma" Schroeter 1886 | | authority |
+666 | "Pacinia cholerae-asiaticae" (Trevisan 1884) Trevisan 1885 | | authority |
+666 | "Spirillum cholerae" (Pacini 1854) Mac1889 | | authority |
+666 | "Spirillum cholerae-asiaticae" (Trevisan 1884) Zopf 1885 | | authority |
+666 | "Vibrio cholera" (sic) Pacini 1854 | | authority |
+666 | "Vibrio cholerae-asiaticae" (Trevisan 1884) Pfeiffer 1896 | | authority |
+666 | "Vibrio comma" (Schroeter 1886) Blanchard 1906 | | authority |
+666 | ATCC 14035 | ATCC 14035 | type material |
+666 | ATCC 14547 [[Vibrio albensis]] | ATCC 14547 [[Vibrio albensis]] | type material |
+666 | Bacillo virgola del Koch | | synonym |
+666 | Bacillus cholerae | | synonym |
+666 | Bacillus cholerae-asiaticae | | synonym |
+666 | CCUG 48664 [[Vibrio albensis]] | CCUG 48664 [[Vibrio albensis]] | type material |
+666 | CCUG 9118 A | CCUG 9118 A | type material |
+666 | CECT 514 | CECT 514 | type material |
+666 | CIP 62.13 | CIP 62.13 | type material |
+666 | Kommabacillus | | synonym |
+666 | LMG 4406 [[Vibrio albensis]] | LMG 4406 [[Vibrio albensis]] | type material |
+666 | LMG:4406 [[Vibrio albensis]] | LMG:4406 [[Vibrio albensis]] | type material |
+666 | Liquidivibrio cholerae | | synonym |
+666 | Microspira comma | | synonym |
+666 | NCIMB 41 [[Vibrio albensis]] | NCIMB 41 [[Vibrio albensis]] | type material |
+666 | NCTC 8021 | NCTC 8021 | type material |
+666 | Pacinia cholerae-asiaticae | | synonym |
+666 | Spirillum cholerae | | synonym |
+666 | Spirillum cholerae-asiaticae | | synonym |
+666 | Vibrio albensis | | synonym |
+666 | Vibrio albensis Lehmann and Neumann 1896 | | authority |
+666 | Vibrio cholera | | synonym |
+666 | Vibrio cholerae | | scientific name |
+666 | Vibrio cholerae Pacini 1854 | | authority |
+666 | Vibrio cholerae biovar albensis | | synonym |
+666 | Vibrio cholerae bv. albensis | | synonym |
+666 | Vibrio cholerae-asiaticae | | synonym |
+666 | Vibrio comma | | synonym |
+1224 | Alphaproteobacteraeota | | synonym |
+1224 | Alphaproteobacteraeota Oren et al. 2015 | | authority |
+1224 | Alphaproteobacteriota | | synonym |
+1224 | Proteobacteria | | scientific name |
+1224 | Proteobacteria Garrity et al. 2005 | | authority |
+1224 | Proteobacteria [class] Stackebrandt et al. 1988 | | authority |
+1224 | proteobacteria | proteobacteria | blast name |
+1224 | purple bacteria | | common name |
+1224 | purple bacteria and relatives | | common name |
+1224 | purple non-sulfur bacteria | | common name |
+1224 | purple photosynthetic bacteria | | common name |
+1224 | purple photosynthetic bacteria and relatives | | common name |
+1236 | Gammaproteobacteria | | scientific name |
+1236 | Gammaproteobacteria Garrity et al. 2005 emend. Williams and Kelly 2013 | | authority |
+1236 | Proteobacteria gamma subdivision | | synonym |
+1236 | Purple bacteria, gamma subdivision | | synonym |
+1236 | g-proteobacteria | | blast name |
+1236 | gamma proteobacteria | | synonym |
+1236 | gamma subdivision | | synonym |
+1236 | gamma subgroup | | synonym |
+1239 | Bacillaeota | | synonym |
+1239 | Bacillaeota Oren et al. 2015 | | authority |
+1239 | Bacillota | | synonym |
+1239 | Bacillus/Clostridium group | | synonym |
+1239 | Clostridium group firmicutes | | synonym |
+1239 | Firmacutes | | synonym |
+1239 | Firmicutes | | scientific name |
+1239 | Firmicutes corrig. Gibbons and Murray 1978 | | authority |
+1239 | Low G+C firmicutes | | synonym |
+1239 | clostridial firmicutes | | synonym |
+1239 | firmicutes | firmicutes | blast name |
+1239 | low G+C Gram-positive bacteria | | common name |
+1239 | low GC Gram+ | | common name |
+1385 | Bacillales | | scientific name |
+1385 | Bacillales Prevot 1953 | | authority |
+1385 | Bacillus/Staphylococcus group | | synonym |
+1637 | "Listerella" Pirie 1927 (nom. rej. Opin. 14) | | authority |
+1637 | Listerella | | synonym |
+1637 | Listeria | | scientific name |
+1637 | Listeria Pirie 1940 | | authority |
+1639 | "Bacterium monocytogenes hominis" Nyfeldt 1932 | | authority |
+1639 | "Bacterium monocytogenes" Murray et al. 1926 | | authority |
+1639 | "Corynebacterium infantisepticum" Potel 1950 | | authority |
+1639 | "Corynebacterium parvulum" Schultz et al. 1934 | | authority |
+1639 | "Erysipelothrix monocytogenes" (Murray et al. 1926) Wilson and Miles 1946 | | authority |
+1639 | "Listerella hepatolytica" Pirie 1927 | | authority |
+1639 | ATCC 15313 | ATCC 15313 | type material |
+1639 | Bacterium monocytogenes | | synonym |
+1639 | Bacterium monocytogenes hominis | | synonym |
+1639 | CCUG 15526 | CCUG 15526 | type material |
+1639 | CIP 82.110 | CIP 82.110 | type material |
+1639 | Corynebacterium infantisepticum | | synonym |
+1639 | Corynebacterium parvulum | | synonym |
+1639 | DSM 20600 | DSM 20600 | type material |
+1639 | Erysipelothrix monocytogenes | | synonym |
+1639 | Listerella hepatolytica | | synonym |
+1639 | Listeria monocytogenes | | scientific name |
+1639 | Listeria monocytogenes (Murray et al. 1926) Pirie 1940 | | authority |
+1639 | Listeria sp. FDA00013359 | | includes |
+1639 | Listeria sp. FDA00013360 | | includes |
+1639 | Listeria sp. FDA00013361 | | includes |
+1639 | Listeria sp. FDA00013362 | | includes |
+1639 | Listeria sp. FDA00013363 | | includes |
+1639 | Listeria sp. FDA00013364 | | includes |
+1639 | Listeria sp. FDA00013365 | | includes |
+1639 | Listeria sp. FDA00013366 | | includes |
+1639 | Listeria sp. FDA00013367 | | includes |
+1639 | Listeria sp. FDA00013503 | | includes |
+1639 | Listeria sp. FDA00013504 | | includes |
+1639 | Listeria sp. FDA00013505 | | includes |
+1639 | Listeria sp. FDA00013506 | | includes |
+1639 | Listeria sp. FDA00013507 | | includes |
+1639 | Listeria sp. FDA00013508 | | includes |
+1639 | Listeria sp. FDA00013509 | | includes |
+1639 | Listeria sp. FDA00013510 | | includes |
+1639 | Listeria sp. FDA00013511 | | includes |
+1639 | Listeria sp. FDA00013512 | | includes |
+1639 | Listeria sp. FDA00013536 | | includes |
+1639 | Listeria sp. FDA00013537 | | includes |
+1639 | Listeria sp. FDA00013538 | | includes |
+1639 | Listeria sp. FDA00013539 | | includes |
+1639 | Listeria sp. FDA00013540 | | includes |
+1639 | Listeria sp. FDA00013541 | | includes |
+1639 | Listeria sp. FDA00013542 | | includes |
+1639 | Listeria sp. FDA00013543 | | includes |
+1639 | Listeria sp. FDA00013544 | | includes |
+1639 | Listeria sp. FDA00013545 | | includes |
+1639 | Listeria sp. FDA00013546 | | includes |
+1639 | Listeria sp. FDA00013547 | | includes |
+1639 | Listeria sp. FDA00013548 | | includes |
+1639 | Listeria sp. FDA00013549 | | includes |
+1639 | Listeria sp. FDA00013550 | | includes |
+1639 | Listeria sp. FDA00013551 | | includes |
+1639 | Listeria sp. FDA00013552 | | includes |
+1639 | Listeria sp. FDA00013553 | | includes |
+1639 | Listeria sp. FDA00013554 | | includes |
+1639 | Listeria sp. FDA00013555 | | includes |
+1639 | Listeria sp. FDA00013556 | | includes |
+1639 | Listeria sp. FDA00013557 | | includes |
+1639 | Listeria sp. FDA00013558 | | includes |
+1639 | Listeria sp. FDA00013559 | | includes |
+1639 | Listeria sp. FDA00013560 | | includes |
+1639 | Listeria sp. FDA00013561 | | includes |
+1639 | Listeria sp. FDA00013562 | | includes |
+1639 | Listeria sp. FDA00013563 | | includes |
+1639 | Listeria sp. FDA00013564 | | includes |
+1639 | Listeria sp. FDA00013565 | | includes |
+1639 | Listeria sp. FDA00013566 | | includes |
+1639 | Listeria sp. FDA00013567 | | includes |
+1639 | Listeria sp. FDA00013568 | | includes |
+1639 | Listeria sp. FDA00013570 | | includes |
+1639 | Listeria sp. FDA00013571 | | includes |
+1639 | Listeria sp. FDA00013572 | | includes |
+1639 | Listeria sp. FDA00013573 | | includes |
+1639 | Listeria sp. FDA00013574 | | includes |
+1639 | Listeria sp. FDA00013575 | | includes |
+1639 | Listeria sp. FDA00013576 | | includes |
+1639 | Listeria sp. FDA00013577 | | includes |
+1639 | Listeria sp. FDA00013578 | | includes |
+1639 | Listeria sp. FDA00013579 | | includes |
+1639 | Listeria sp. FDA00013607 | | includes |
+1639 | NCTC 10357 | NCTC 10357 | type material |
+1639 | SLCC 53 | SLCC 53 | type material |
+2157 | "Archaea" Woese et al. 1990 | | authority |
+2157 | "Archaebacteria" (sic) Woese and Fox 1977 | | authority |
+2157 | Archaea | | scientific name |
+2157 | Archaebacteria | | synonym |
+2157 | Mendosicutes | | synonym |
+2157 | Metabacteria | | synonym |
+2157 | Monera | Monera | in-part |
+2157 | Procaryotae | Procaryotae | in-part |
+2157 | Prokaryota | Prokaryota | in-part |
+2157 | Prokaryotae | Prokaryotae | in-part |
+2157 | archaea | archaea | blast name |
+2157 | prokaryote | prokaryote | in-part |
+2157 | prokaryotes | prokaryotes | in-part |
+2158 | Methanobacteriales | | scientific name |
+2158 | Methanobacteriales Balch and Wolfe 1981 | | authority |
+2159 | Methanobacteriaceae | | scientific name |
+2159 | Methanobacteriaceae Barker 1956 | | authority |
+2172 | Methanobrevibacter | | scientific name |
+2172 | Methanobrevibacter Balch and Wolfe 1981 | | authority |
+28890 | "Euryarchaeota" Woese et al. 1990 | | authority |
+28890 | Euryarchaeota | | scientific name |
+28890 | Euryarchaeota Garrity and Holt 2002 | | authority |
+28890 | Methanobacteraeota | | synonym |
+28890 | Methanobacteraeota Oren et al. 2015 | | authority |
+28890 | Methanobacteriota | | synonym |
+28890 | euryarchaeotes | euryarchaeotes | blast name |
+83816 | ATCC 35063 | ATCC 35063 | type material |
+83816 | DSM 1093 | DSM 1093 | type material |
+83816 | JCM 13430 | JCM 13430 | type material |
+83816 | Methanobacterium ruminantium | | synonym |
+83816 | Methanobacterium ruminantium Smith and Hungate 1958 (Approved Lists 1980) | | authority |
+83816 | Methanobrevibacter ruminantium | | scientific name |
+83816 | Methanobrevibacter ruminantium (Smith and Hungate 1958) Balch and Wolfe 1981 | | authority |
+83816 | OCM 146 | OCM 146 | type material |
+83816 | strain M1 | strain M1 | type material |
+91061 | Bacilli | | scientific name |
+91061 | Bacilli Ludwig et al. 2010 | | authority |
+91061 | Bacillus/Lactobacillus/Streptococcus group | | synonym |
+91061 | Firmibacteria | | synonym |
+91061 | Firmibacteria Murray 1988 | | authority |
+131567 | biota | | synonym |
+131567 | cellular organisms | | scientific name |
+135623 | 'Vibrionales' | | synonym |
+135623 | Vibrionaceae group | | synonym |
+135623 | Vibrionales | | scientific name |
+183925 | Archaeobacteria | | synonym |
+183925 | Archaeobacteria Murray 1988 | | authority |
+183925 | Methanobacteria | | scientific name |
+183925 | Methanobacteria Boone 2002 | | authority |
+183967 | Thermoplasmata | | scientific name |
+183967 | Thermoplasmata Reysenbach 2002 | | authority |
+186820 | Listeriaceae | | scientific name |
+186820 | Listeriaceae Ludwig et al. 2010 | | authority |
+1235850 | "Methanoplasmatales" Paul et al. 2012 | | authority |
+1235850 | Methanomassiliicoccales | | scientific name |
+1235850 | Methanomassiliicoccales Iino et al. 2013 | | authority |
+1235850 | Methanoplasmatales | | synonym |
+1783272 | Terrabacteria group | | scientific name |
+2283794 | "Methanomada" Petitjean et al. 2015 | | authority |
+2283794 | Methanogen Class I | | synonym |
+2283794 | Methanomada | | equivalent name |
+2283794 | Methanomada group | | scientific name |
+2283796 | Diaforarchaea | | equivalent name |
+2283796 | Diaforarchaea Petijean et al. 2015 | | authority |
+2283796 | Diaforarchaea group | | scientific name |
diff -r 000000000000 -r ad7507073c3f test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/CAT_prepare_test/taxonomy/nodes.dmp Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,27 @@
+1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
+2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
+641 | 135623 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+662 | 641 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+666 | 662 | species | VC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1239 | 1783272 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1385 | 91061 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1637 | 186820 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1639 | 1637 | species | LM | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+2157 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
+2158 | 183925 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2159 | 2158 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2172 | 2159 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+28890 | 2157 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+83816 | 2172 | species | MR | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+91061 | 1239 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | |
+135623 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+183925 | 2283794 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+183967 | 2283796 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+186820 | 1385 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1235850 | 183967 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1783272 | 2 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+2283794 | 28890 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2283796 | 28890 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
diff -r 000000000000 -r ad7507073c3f test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz
Binary file test-data/cached_locally/CAT_prepare_test/taxonomy/test.prot.accession2taxid.gz has changed
diff -r 000000000000 -r ad7507073c3f test-data/cached_locally/cat_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/cat_database.loc Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,8 @@
+## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz
+# ls CAT_prepare_20190719/
+# 2019-07-19.CAT_prepare.fresh.log
+# 2019-07-19_CAT_database
+# 2019-07-19_taxonomy
+#value name database_folder taxonomy_folder
+#2019-07-19_CAT_database 2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
+CAT_database_test CAT_database_test ${__HERE__}/CAT_prepare_test/CAT_database ${__HERE__}/CAT_prepare_test/taxonomy
diff -r 000000000000 -r ad7507073c3f test-data/contigs.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/contigs.fasta Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,94 @@
+
+>contig_9952
+TGGTTATGTACGCAGACAGCTTACTCCTCCTCGGAGACCACTCGCGGTACTCTATTCCCTCCACGGTTGCGAGGCGCTCGCCGTAGATCCGCTTCCCGGG
+ACAGGCAGACAGGGTGTATAGCCTCCCCCTCTCGGAAAAAACCCCGGGCACGCGGTCCAAAGACTCCATGCCCGTAACAACGCCGTGGTTTTAGAAATAA
+TCTGTGCCGTCGGTTGCAAACCCTAAATACAGGGGGATATCAATGCGGTTGCATGGATATCCACATCCTTCGTGAGATCGCCGACGCAGTTCAGGCGGCG
+GTATCTCTCATACCCGACCCCTGCAGCAGGGGCAACGAGATATGCATGGGCAACGACGGCACACCCACATCCGAGATAGACAAAGTGGCTGAGAACGCGG
+TGCTCGGGTACATAGAGTCCAACCGCCTGGCTCTGAACGTGCTCAGCGAAGAGATAGGCTTCGTGGACAACGGCGCGTCGGAGGTTCTGGTCCTCGATCC
+CATCGATGGGACAAGCAATTCAGTGGCCGAGATACCTTTCTATACGATATCCATGGCCGTCGGCAAGGATTCGCTCTGCGGCATGCACACGGCCTACATC
+AGGAACCTGGCGACAGGGGACGAGTTCTGGGCGCACAAAGGGGATGGCGCTTATTACAACGGAAGGAGGATAAACGTCAGGAAGCCGGATTTCTCCAAAC
+TGTTCGCCCTTATATACATGGGGAACGCCGCTGTCGATGAAGCGTTCGCCCTTGCAAAGAACGTCAAGACCTCCCGCTCCATGGGCTGCGCCTCCCTTGA
+GATGACGCTCGTGGCACTAGGACACGCCGATATCTATTACATGAACACCTACCGTTACAACCGTGCCGTCAGGACTGTGGACATCGCCGCCAGCGCCCTG
+ATACTCAGGGAGGCGGGCGGCGAGATATTCGATATCGGCGGCAACAAGCTGGATATGCCGCTGGACAACGCTTACCACGCAAGCTTCGTGGCGTGCTCCT
+GCAAAGAGGTATTCGACCACATCATGAGGGCCCACATCGAGGAGCACGGCGCTACGCGTTACGGGATATACGCCAACGAGACCGTCCCCGGGGCGGCCGA
+GTATGTGAGGAGGGCGTACGATGCTTTGAGGGGGGAGAAGGTAACCCTCGACACGGCGGCCGCCAGGCTGATCGGGGCGGAAGGCGTGCCTATTTCGGAG
+ATCGAGGCGGACATCGTCGTGGTGATAGGAGGGGACGGCACGATACTCAGGGCGCTCAAGAAGACGGATGCCGCCGTGATAGGGATCAACGCCGGAGGCG
+TGGGGTTCCTGGCCGAGGTCGAGCCGGACGAGATAGAGGAGAGCATATCCCGCATCAGGCGCGGAGAGTACTCGGTTGAGGAGAGGATCAAGCTCAGGAC
+TTTTTACGAGGGGGAATATCTCTCGGAGGCCGTGAACGAGACAGTGATACACACTGATTCCGTGGCGAAGATCAGGCAGTTCAAGATATATGTCAACGAA
+CACCTGGCAACGGAGGTCCGCGCGGACGGCATAATCATCTCGACGCCCACAGGCTCCACCTGCTACGCCATGAGCCTCGGCGCGCCCATAACCGACCCGG
+GGGTCGGAGCTTTCCTGATAGTCCCCATGGCGGCGTTCAAGTTCGCTTCCCGTCCGTTCGTCGTTCCCTATACGGCGAAGATAACCGTCGAGGCGGTCAT
+GGACAAGGGCTGCCTCATCGTGGTGGACGGCCAGCACGAGTACCCGATGAGGGGAGGGACGCGGGCGGAATTCTCGCTTTCCGACAACCTCGCCAAATTC
+TCGGCCCCGGCGTTCCTGGCATCGACGGGCATCTCGAAGTAGATCTCGCCGCCCATCATGTTGATGTCGGCCTTAAAAGGTAAAGAAAGCCAGATGGCGT
+TCGAGACATCGGAGTCGTCCAGCTCGGCGAAGTAATCGCCGCTCGTCGTGACAATCTTCATTCTGCTCAAAGCGCCACCCGGCCGTCGGTTCAGTTCTTT
+TTCTTCTTTTCGAAGAGCTTCCTGATCTCCCTGCCCGTGACCTCGATCTGGAGGTCCTTCTCCTTCTCCTCCATTGCCTTCAGGCCCTTCAGGTCGTTGG
+CCCATTCGGAGGTCCAGTCGGCTTTGAATTTTCCGGATTCGATGTCGTCCAGAATCTTCTTCATGCCCTTCTCGGACTCTTCGGTGATCACCAGGTCCCT
+CCTGGTAAGGCCTCCGTACTCGGCAGTGTTGGAAACGACGTGCCACATCTTCTCGAAACCGCCCTCGTTTATGAGGTCGACGATGAGCTTCGCCTCATGG
+CATACTTCGAAGTAGGCCATTTCGGGAGGGTATCCTCCCTCGACCAGGGTCTTGAATCCCGACTTTATGAGGCCGGTGGTCCCTCCGCACAGCACGGCCT
+GCTCTCCGAACAGGTCTGTGAGCGTCTCGTTGTCGAAAGTGGTCTCGAAGACGCCGGCGCGGGTGGCTCCGAGCCCCTTTGCAAGTGCAAGGGCGATCTT
+CTTGGCGTTGCCGGTATAGTCCTGGTGGACGCAAACGAGGGCCGGAACTCCGAATCCCTCGACGAACACATCCCTTTCTTTGTCCCCGGGGGCCTTGGGA
+GCCATCATTATGACGTCGATGTTTTTCGGAGGAACGATGGTCTTGTAGGTCACAGCGAAACCGTGGGCGAACTCAAGTGCGCAGCCCTCCCTGATGTTGG
+GCTCGACGAATTCTTTGTATACCTTTGGCTGGACCTCGTCGGGCAGAAGCATCATGATGACGTCCGCGGTCTTGGCGGCCTCGGCGAAATCTACGACCTT
+GAAGCCGTCCTCTTTCGCTTTGTTCCATGATCTTCCGTCTTTCCTGAGCCCGATCACTACGTTGAGGCCGGAGTCCCTGAAGCACAGGGCCTGCGCTCTC
+CCCTGGGATCCGTAGCCCATGACGGCGACCGTTTTTCCTTTAAGGACATCTATGTCCACATCTGCATCGTGGTAAATCTTCATTATATCCACCTGTTTAG
+AGGTCCAACTGCTTTATAGACTAAAAGGTATCGTTCCCGCTCCGACATATAGGTCAGTTCAGTACTGGCAGCGTCCTTTGACCAGGGCCTGATTCGGATT
+GGCAGGCAGCATGGGCAACACGTCCTCCTCGGGATCGATGTGGATGTCCAGCAGGCACGTCTCGCCGCTGTCTATTGCGGTCTTCAGGGCGTCGGCTATC
+TCTCCCGGCTTCTCGACCAGCATTCCTCTGGCCCCGTAGGCCTCGGCTATCTTGGAGAAGTCCGGGTCGGCGCCAAGCTCGGTCTCGCTGTACCTCTTGT
+TCCAGAACAGCTTCTGCCACTGTTTGACCATTCCCAGCCATCCGTTGTTCAGCAGGACTATGACGACCGGCAGGTCCTCGGCCACCGAGGTGGCCAGCTC
+CTGTTGGACCATCTGGAATCCCCCGTCCCCTGTTATGGTCAGGACGGTGCTGTCGGGCTTGGCGGCCTTCGCCCCTATGGCGGAGGGGAGCCCGAAACCC
+ATCGTGCCGAAGCTTCCCGAGGAGAGGAGCTGTCTGGGCCTGTGGACGTGCAGATGGTGCATGGCCCACATTTGGTTCTGTCCCACGTCGGTGGTGACTA
+TCATGTCGTCGTCCTTGTCGATCAGCCTGTTGATCTCGTATATGACCTTCTGAGGGACGATCGGTGTAAGGTCTATGTCGATCTTGCACCTGCAACGCCT
+CCTGTACTCCGCATAGGTGCTGTTCCAGTCGGCATGGGTATCCCTGTATCCGGAGAGCCCGTCGATGAGCGCCGCGGTACCCTTCTTAGCATCGCAGAGA
+AGGTTGACGTCGTTGTTCTTGTGCTTGTCGAACTCCGTCGCGTCTATGTCTATCTGGACGACCCTGCATGCGCCGTCGAACCTGGTGTGGGGGCTGAACG
+TCCTGTCCGAGAACTTCGTGCCTATGGCTATTACCAGGTCGGCGTTGCGGAAAGTATCGAGGGCGCACATCTTGCCGTGCATCCCCAAGGGGCCCAGGCT
+GAGCGGGTGCTCGGTGGACATGGCACCCAATCCCATGAGCGTGAAGACCGCCGGCGCGCCGATGAGCTCTGCGAGCCTCGTAACTTCCTCGGACGCGTTC
+GCGCTTATCGTTCCGCCGCCGATCAGCAGGACGGGCCTCTGCGCTTCCTTGATCCATTGGACCGCGGTGCCCAGTTCGGACATGTCCTCCCTGGGCTCCT
+TGATCCCGTACGAGATGCCCAGGAGGCTCTCGTCGATCTCCGAGTTCATCTGGTCTGAGGGGAGGTCGATGTGGACAGGCCCCGGTCGCCCGGTCTGGCA
+CATCTTCCATGCCTCGTCCACCGCATGGGGCAGCCTGTTGACGTCTAGGACCCTGAAGTTGTGCTTCGTTATAGGCATGAGGAGGCTGTACGCGTCCACT
+TCCTGGAAAGCGCCGAGCCCCAGGGACCCGGTTCCGACCTGTCCGGTAAGTGCCAGCATGGGAGTTGAGTCCGCATACGCCGTGCCTATGCCGGTGATCA
+TGTTGGTGGCACCGGGCCCGCTGGTGGCCATGCAGACGCCCGGCCTCCCGCTGGCCCTGGCATATCCGTCTGCCATGTGGGCGGCGCACTGCTCGTGGCG
+TACTAGGACATGGTTTATCGATGAGTTCATTATCTCGTCGTAGATCGAGATTACGCTTCCGCCCGGATATCCGAACATGGTCTCGACACCTCTGTCCTCC
+AGCATTTGGAGCAATGCTCTGTTTCCTTTCATGGTTGGTCTCCGGCGACGTATCGCGCTTGTTTTTTATAATTCTATTTGGAAAAGCGCGCCGAAACGCG
+CCAGCGGAAGAAGTTTATGTATACGGGGGCCATATGCCCACGCAGGTGTTTCATGGCTGTAATAAAGGTCGGTATCAACGGATTCGGAACCATAGGGAAA
+AGGGTCGCCTCCGCAGTGAGCGCACAGGATGACATGGAAGTCGTAGGTGTGACGAAGACCCGCCCGTCCTTCGAGTCGGAGGTCGCAAGGTACAGGGGAT
+TCGACCTGTACGCGCCTCAGAAAAGCGTCGAACTGTTCGACAAAGCGAACGTGCCGGTCGGGGGGACCGTCGAAGACCTCTGCGGCAAGGTAGACATCAT
+GGTCGACTGCACGCCCGGAAACGTAGGGCAGGAATACAAGGCGATGTACGCCAAAGCAGGCATAAAGGCGATATTCCAGGGAGGGGAGGACCACAGCCTG
+ACGGGGATATCCTTCAACTCCACCGCCAACTACAAGGAGTCCTGGGGCGCCCAGTTCTCCCGTGTCGTTTCTTGCAACACCACGGGGCTGCTGAGGACGC
+TCTACCCCATAGACCGCGAGTTCGGTATCGAGAAGGCGTACGTAACGTTGGTCAGAAGGGCCGCGGACCCCGGTGACAGCAAGAACGGGCCGATCAACGG
+GCTGGAGCCCACCGTCAAGCTGCCGACCCACCACGGGCCGGACGTCCAGAGCATCATGCCATGGGTCAACATCAACACCATGGCGATAAAGGCCTCCACT
+ACGTTGATGCACATGCACACGGTCACGCTGGAGCTGAAGAACTCCGCTTCCACCGAGGCCGCGGTCGAAGCGATAAGGAACTCCTCGCGCGTCAGGATGG
+TGGACGCGGCGTCCGGCATCAGGTCCACGGCGGAGGTCATGGAGCTGTCGAGGGACCTGGCCAGGGACAGGTCCGACATGTACGAGATCGTGGTATGGGA
+
+>contig_38063
+CTATCTCCTCAGGAGGTCTGGGAATCTCTGATCGGGAAGAACAGTAACTACCGCATCATAGTCGTGGACCTCAATCTGACCCGTGTGCTGTTCGGCATGA
+TAGTGGGCGCCGGCCTGGCGGTGGCCGGTGCGGTCATGCAGGCCCTGTTCAAGAACCCGATGGCCTCGCCTTATACTCTCGGGCTCTCGTCAGGCGCCGC
+ATTGGGCGCCGCATTGGGGATTCTCTTCCCTCTTTCGTTCGTACCTGAGGTCGCATCGGTCCCAATCCTGGCTTTCGTTTTCTGTCTGGGGACCATGTTC
+CTCGTGTACTCTATTGCCAGAGTGGGCAACCAGACGCACATGGAGACTCTTCTGCTGGCCGGAATAGCCGTAGCGGCATTGGCGCAGGCGGCGGTCTCCC
+TGCTCACGTACATAGCGGGCGAGAGCATCACGGAGATAGTCTTCTGGGGAATGGGCAGCCTGACCGTCAGCCTCCCATGGGTCAAGATCCCGATAGTGCT
+GGTCCTCAGCGCCGTGGGCATATTCGCAATGCTCTACTACGCCAAGGACCTGAACGCCATGATGCTGGGGGACGCCCACGCCATGGACCTTGGAATAGAC
+GTAAAAAAGACAAGGCTGGCACTGTTGATCGCCTCGTCTCTCGTCACCGCGGCTGCGGTATGTTTCGTGGGGACCATCGGCTTCGTAGGCCTTGTGATCC
+CGCACATACTCAGGATACTTCTTGGTCCGGACAACCGTCTGCTTCTGCCGATGTGCGTGCTGACCGGAGGGATATATCTTGTAGGATGCGACTATCTGGC
+ACATCTCTTCGCCCAATCTCTGGGCGTCATGCCCATAGGCATAGTGACATCTCTGATAGGCGCCCCGTATTTCATCTATCTGCTCAGGAGAAGAAAAAAG
+GAGGTGGGATGGGTATGAGCCTGGATATCCGTGACTTATTCTACAATTACGATGGGAAGCCTGTTCTCAAAGACGTTTCGTTCCTGGTCAAGGAAGGAGA
+GGTCCTGGGGATACTGGGGCCCAACGGATGCGGAAAGACGACCCTGCTGGGCAATCTGAACAGGAATCTGAGCCCCAAAGGCGGATGCGTGCTTCTGGAC
+GGGGAGGACCTTCACAATTACAAGAAAAAAGACATCGCGAAGGAGATAGCGGTGGTTCCGCAGGACAGTCGCGTAGGTTTCTCGTTCACCGTAAGAGAGA
+TCGTCTCCATGGGCAGGATGCCATTCCAGGACGCCTTCCAGGGAGACTCCTCGGAAGACCTCAGGATAATCGAAGACGCGATGAGGAAGACCAACGTACT
+GGATATGGCAGACCGTTACGTGAACACCATGAGCGGCGGGGAAAGGCAGAAGGTCATAATCGCCAGGGCCATGGCGCAGACGCCCAAGATACTGCTGATG
+GACGAGCCC
+
+>contig_44250
+GGTGATGTACTGGGGCTTGTAGGCTACTTTGACCTTTGCGTCTATCTTGCCGCCGTCTGGAGGGATCTCTCCGGCCAGCATCTTTACGAAAGTGGTCTTT
+CCTGTGGCGTTGGGACCGACGACCCCGACGGATTCCCCCATCTTTATGGAACCGCCGACGACATCCAAAGTGAACTCTCCGAAGTCCTTGGACAGGCCCT
+CGAAGGAAAGCAGGTCGGAAGTGACCCAGTCGCTCCTGGGAGGAGACGCGAAGAACTCTATCGGCCTATCCCTGAAACGGATATTCTCTTCGGGAAGGTA
+ACCGTCCAGATATACGTTTATGGCGGTCCTGACCTGTCTTGCAAGAGTGAACACGCCGTACGCCCCCTCGGTACCGTATACAACGCTGACGATGTCGGCG
+AGGAAATCGAGTATGGCAAGATCGTGTTCTATCACGACCACCTGCTTTTCTGCGCTGAGTTCTTTGATGATGCGTGCCATCCTGATCCTCTGGTAGATGT
+CAAGGTACGAGGTGGGCTCGTCGAAGAAGTATACGTCCGCGTCCTTCATGACCGTGGCAGCCATGGCGACCCTCTGAAGCTCTCCTCCTGAAAGTTTCTT
+TATATCCCTGTCCAGAAGCTCGGTCAGCTCGAACATGATGGCGGCCTCCTCGAGTGTCAGGCGGCCTTTTATGCCGGAAAGCAGGTCCTTCACGGGCCCC
+GATGCGGCTTTGGGTATGAGGTCCACGTACTGTGGCTTTATGGCCGTCCTCACCTTGCCGGCGTAGACGTCCGTGAGATAGGATTTGACCTCGGTACCGT
+CGTAGTGCTGCAGCACTTCCTCTTTGGATGGAGGTTTCTCATAGTTGCCCAGGTTGGGGACGAGTTCCCCGGAAAGTATCTTGATCGCCGTGGATTTTCC
+GATCCCGTTCGGTCCAAGTATGCCCGTGACCATGCCTTTCTTCGGCACCGGGAGCCTATAGAGGCGGAAGGCGTTCTCGCCGTACTGGTGGACCATCTCC
+GTCTTCAGCTCGTCGGCCAGGCCTATGATCTTTATGGCGTCGAACTGGCATTTGTTGACGCATATCCCGCATCCCTGGCACAGGGATTCGGATATGATGG
+GCTTGCCCCTCTCGCCGAACACTATGCATTCCACGCCCGTTCTGACCAACGGGCAGAACTTATAGCATTCCTTGTTGCATTTTCTGTTCTGGCATCTGTC
+CTGCAGGACGGCCGCAATACGCATGTCCCCGCTTAGACCGATTTAAGATATAACCTTTAAGGATGGTATCGCAGATAAGCTGATAAGGGAAGACGGAGAC
+AGATGGGCATGGCCGAAGCGGATGGGACCACCGAGGACGTCAGGATACTTACGGGCGACTACAGGAGGGCGATAAGGCATCTCTCCATACCGATAGCCGT
+GGCTCTTGCGATACAGCATATCAACATACTCGTAGACACGTTCTGGGTCGCGGGCCTGGGGGCGGACCCGATGGCTTCAATAAGCATAGTATACCCGGTT
+TTCGCCACGGTCATGGGCATCGGAAGCGGGCTGGGGATCGGTGCTTCTTCCGCGATAGCCAGAAGCATCGGGCATAACAGGAGGAAGGAAGCCGGCACGA
diff -r 000000000000 -r ad7507073c3f test-data/genome2.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome2.fna Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,89 @@
+>contig_11394
+GCTTTTTACGCCCAACGGGCTTGTTCTTGCTCAGACAGTCCAAGGCTTTCCAGATATGATAATCGCTGAATTTCGGTATCGGGCCGCCTGCACCCCTGTG
+CTGCTGATTGGACATATGGATGTATCATTCATCCATTAAATAAAGTAATTGATTGTCATGTCGCAAGATACGGTATTAACGCCGTCAATAGCTGTTAAGC
+CAGTGCCCGCCGTCGCTTTCCATCCGTAATACTCGCGCGTTTTTTATATCGAAGGAACTCTTCACATCCATGGTTCAAAAGTCGGTGGACTTTGTTATTA
+TTGAGAATGTTTCCAAGAGGTTCGGGAACAAGACGGTCCTTAATAATGTGAGCGCCACTATACAGACCGGTAAGATACTGGGCCTGATAGGCAAGAGCGC
+CGCGGGCAAGAGCGTTTTGATAATGATGCTGAGGGGAAGCGAAGATTACGCACCCGACTCCGGAAGGGTGCTGTACAGGGTCAATAGGTGTTCCGGATGC
+GGAAACCTCGACCTCCCCCATGAAGGAACGCCCTGCTCGAAATGCGGGTGTGAAACAGGAACGATCACCGTGGATTTCTGGTCTTTGAAAGATGACGACC
+CTTTAAGACGCCAGCTCAAGAGCCGCATCGCCATAATGCTGCAGAGGACGTTCGCCCTTTTCGGGGATAAGACCGTGATCGAGAACATCTTCGAGGCCAT
+AGGCGACCGTGCAGAGGGCAAGGCCAGGACGGACATGGCGCTCCAGCTGCTGGAATTCGTGGGGATGACTCACAGGACCACACACATAGCCAGGGACCTG
+TCCGGAGGAGAGAAGCAGAGGATAGTCCTCGCAAGGCAGATAGCCAGGGATCCTCTCTTCTTCTTGGCGGACGAGCCGACGGGAACGCTTGACCCGTACA
+CGGCGGAATTGATGCACGAGCGTCTTGTGGACTACGTCGGGAAGAGAGGGATCTCGATGGTCTTCGCGTCCCATTGGCCCGAGGCCGTGGATAAGATGGC
+CGACGAGGCCATATGGCTGGATTCCGGCAACGTGCTGATGCAGGGCGACCCGAAGGAGATCGCCGATAAATTTATGGAAGGATACTCGTTCGAAAGGACA
+AAGGCCGCCGACCTGGGAGAGCCGATAATATCGCTCAAGGATGCGGAGAAGCACTTTTTCTCTGTCGTCAGAGGAGTCGTCAAGGCAGTGGACGGTGTAA
+CCTTCGATATAATGGAGCGCGAGGTGTTCGGCCTTGTGGGAAAGTCGGGCGCCGGCAAGACCACGACGTCAAGAATGGTCGCCGGCATGACGCCCGCCAC
+CCGCGGGTCCGTGAAGATAAGGATCGGCGACGACTGGGTTGATATGTCAGAGATGGGGCCGAGCGGGAAAGGCCGCGCCACCCCCTATATCGGGTTCCTC
+CATCAGGAATACACGCTCTATCCCTTCGACAACATACTCAGCAACCTTACGACCAGCATAGGCACCAGGATGCCAGCGGAACTTGCAAAGTTCAAGGCCA
+TACAAGTGCTTCAGAGCGTAGGGTTCGACAAGAAGAACATGGAGAGTCTTCTCTACTCTTACCCCGACACACTGAGCGTCGGAGAGTGCCAGAGGATAGC
+CTTCGCACAGGTCCTGATAAGGGAGCCCCGCATCATAGTGCTGGATGAGCCTACAGGGACAATGGACCCGATAACAAAGACCATCATAGCAAAATCCGTC
+ATCCGGGCGAGGGAGACCCTGGGCGAGACCTTCGTCGTGGTGAGCCACGACATGGATTTTGTCGAGAACGTCTGCGACCGCGTAGCGTTCATAAGGAACG
+GCGTCGTGGAAGACATGGGAACTCCCGAGTCGGTCATCCAGAGGTTCGGTCTGAAAGAGCTTCAGGATGACGACTCCGAGGGTGAATGAATGAAGCAGCA
+GATCGGGCGCCACCTCAGCTTCGTTGAATGCAGAGAGGCCATGGGGCTCGGCGTGGGCGGTGCCCTGGCACAGAGGGCGACCATCTCTGACAGCGGAAGG
+GACGTCGTTGCGGTGGCCATGGGCCCGGGCAAGAGGCACATAACCAAACCGGTATGCGAGATAACATATGCCCTCAGAGAAGAGGGCATAGATACCAGCG
+TCGCCTGAGCGCCTGGCACTTATTGCGGAGTATGTCAAGGACATGATGACCGAACTCGAACCGGACAACGCGGCCGTCTTCGAAGCGGGATGCGCCAGCT
+ACCGGGCCAAGGTAGATGTGCTGATAGGGCTTGAACAAGAATATCTGACAGGCAAGGCGACTACCGAGATCATCGTCTGGCACCCTTCCTGGGCGTATCT
+TCTTCCGGATAATGTGACCGAGGCAGAGCTCATGGAAGCAGCCGAGGCGGCATCCACGCCCTCATCGATCGCGATGCTGCAGGGAGGGACGCCGGAAAAT
+CCTATCAACGTGTTCCTTTCGGAACCCGAAGAGATCAACGGTCTTACCCAGCAGGGGCTTTGTGAAATGGGAATATATGTAAACATAATAGTGATTAACA
+TACTCGCCGGGGACTGGGTCGAATATCTGGGCCAGGTCATCGAGATACTGGGAGATAATATTCCGGATGCGGGGACATGAATTGATGATACCAATAGAAA
+TTAAGGACCTTACCGCTGGATATGACGGCCGAGCCGTTTTCAGCAACGTCGACCTGGAGCTCAGGGACAAAGACTTCCTGGCGGTCATAGGGCCCAACGG
+CGGCGGGAAGACAACGCTCTTCAGGGCGATCCTGGGCCTAATAAAACCCATGGGGGGGACCGTAAAAGTGTTCGGCAAGGAGCCGGCAGGTTCGCCCCCG
+GGCATAGGATACGTTCCGCAGAACGAGAATCTGGACTCAGAATATCCAATAAGTGCCAGGGAAGTCGTCCTTATGGGAATGAGGTGCAAGAAGGGCCTTA
+GGCCGTTCTATTCCAGTGAGGAGAAGGAGTCCGCAGAGAGGGCCATGGAGTACGCCGAGGTCTCGGATTTCGCAGACAGCCGAATAAGCAACCTGTCGGG
+AGGGCAGAGACAGAGAGTATACCTCGCAAGGGCTCTTGCCCCGGAACCGAAGATACTCATGCTGGACGAACCCACCGCGAGCCTGGACCCGTCGATGAAG
+GACTGCACCTACGACATACTCAGGAAGCTGAACAGGGACGGGATAGCCATAATGGTGATAACTCACGATATGAGCAGCATCTCTCATGATGTCAAACGTG
+TAGCATGCATGAACCGCAGGCTGATAGTCAACGATGCGCCCGAGATAACCCAGGAGATGATCGCATTGGGATTCCACTGCATCCCCGAGCTAGTGCACAT
+AGGTCCCTGCGATTGCGGAGGTCACAACGATGGTTGATTGGGTCGCGGCATTCTCGATGCCTCTGATTCAGAACATGTTCATGGTCGCGGCCATAGCATG
+CGTTCTTTGCGGAGTCGTGGGAACCCTGGTGGTCGTGAAACGGATGGTGTTCGTAACGGGTGGCATAGCACACACCACTTTCGGAGGTGTGGGTCTTGCA
+TATTATGTTATGTCCGTCGTCGCAGTCTCATGGTTCACCCCCATGATCGGCGCCGCACTGTTCGCGGTCGTTTCGGCGGTCATAATGGCGCTTCCCGCGG
+
+>contig_5089
+TGCCGAAAGGTACGAGGAGATCATGGAGGCGCTCCGGGAGCTGGAGGAGATGTCTTGGGAACGGGTCATACTCGTGGAGGGCCGAAGGGACGTCACCGCG
+CTGGAGCATCTCGGCATATTCGGGGACGTTTTCACCGTCCAGGCGTCGGGCGGCCCGGTAAAGGCCGCAGAATATGTGGCCGGCAGACGGAAGAAGGCGG
+TCATACTGACCGATTGGGACAGGAAGGGCGACATAATAGCCTCGGACCTGGAGGTCCACCTGAGCGCTCTCGATGTCCAGTACGACACGGCGGTAAGAAG
+CAGACTGGCGGGCCTTTGCAGGATCGACATCAAGGACGTGCAGTCCCTGGACGAACTGGTACACCGACTCGAGACGGCATGAAGTAATATATTCAGTAAA
+TCATATTGAACCGTTAAGGATGGCAGGCCGTTTCATTGTTTTCGAAGGCATCGACGGTGCGGGCAAAAGCACTTTGATAGATGAAGTTTCAAAAAAATTG
+GAGTCGGCGGGCATAAGGACCGTCGTAACCGCAGAGCCCACTGAAGGGCCGATAGGAATGCTGATACGGAGCGGGGCGGTCAAATGCATATCTCCGAACG
+CGGAAGCTCTGCTGTTCACCGCCGACCGTGCCTGCCACACCTCCGAGATAGTCGGATGGATGGAGGAGGGGACGACCGTCCTCTGCGACCGTTACTACGC
+CTCCACCATAGCGTACCAGTCCGCAGGACTCGACGGAACGGTGTCGGGCAAGGAATGGCTCATGGACATCAACCGTCCCGTCACCGTAGAACCCGACACG
+ACGATACTTCTGGACATCGACCCCGAGGCGGGGATGCGCCGGGTGGGGGAACGCGGAGCGAGGAGCAAGTACGAGGTCACCGAGTACCTCGGCAGAGTGC
+GCTCCAATTATCTGGAAATAGCGGAGGAGAAAGGATTCCGCATAATAGACGCTTCTCGTCCGAAGGACGAGGTGCTGAGAGAAACGATGAAAATCTTAGG
+TGAGTGAAATGCATCCGTCGGAAGAGATCTATTGTGAGAAGAGCAACAGGCTGAAGGGAAAGACAGTGGTACTCGGGATAACGGGAAGCATCGCCGCAAC
+GGAATGTTTCTCCACGATACGCGAGCTGATACGCCACGGCGCTACGGTTATACCTGTCATGACAAGGGCGGCCTGCGACATAGTGACCGAGCAGAGCATA
+GAGTTCGCATCCGGAAAAAAACCCATAACCGAGCTCACGGGCCAGACCGAGCATGTCAAGCTGATGGGCGACTCCCGCACCGCGGACCTTCTTATGATCT
+AGAATGGACCACAGATGCGGATACAATGCATGCGGACACGAGGATCGGCACTGTCTCGGGCTCCGCGAACCCCGAAACGAAGGACGAGACCGAGGGATAC
+GAGACCAGCATCAAAGCGGCCAGCGTGTTGACGTGAATGCCAGGCACCAGTCCGGTCACCGCGCCCATCAGGGCCCCCGCCATGCTCATCAGCGATACAA
+GAAGCAGGACGTCCGTACCCATGGCCTAAGATCTGTCGCCGTGCGGTATATATGCGGATATCTACAGTTAGCAATCCAGGAACGGTTTGCCGTCCCTCAC
+TCCCACGCGCAGCATCGCCTTCACACGGACACCCGGCACGGCATCTGCGAGGTCCCCGGTCTTATCGAAGACCGCCAATACCTCCACGAGCCTGATGCCG
+TGGGAGGCCAGCGCAAGGGCGAGGGCCCTCACGGTCCCGCCGGTGCTGAGGGTGTCCTCGACTATCACGGCCCTGTCGCCTCTTCCCGGTCCGTTTATAT
+ACAAACTTCCTGAAGAATAGCCGGTGCTCCTGTCTATGATTATTTCTCCGGGAAGCCCGTAACCGCGTTTCCTTACTATGCTGTAGGGGATGCCCAGCCT
+TAGGGATATTGGCACGGCCAGCGGGATGCCCATAGCCTCCGGCGCGAGTATGACGTCGCAGTCGAAATCTCCCAGGTCAATCAGCCCCTCCACGACTTCG
+TTCAGAAGCGCGGGGTCCACGCGGGGAACGCCGTCGCTTATGGGATTGACGAAATACGGGTAGCCGTTCCTGTCGATCACAGGGCTGGCCATAAAGCTCT
+TTCTCAGAAGCTCGTACACTGAACGTGCACGGCGATGTGGTTATTTACCTGTTCTTAAAAGGAGACATATTTAAGGGAAGGTACGATAACAGCAACGATA
+TCCGTCGAAACGGCTCCGTTGACAACGGTCACGAACACCTGGACGGAATCGGAATCGATCCCTCCGAACGCACCGGTGTTTGCGGATATGTAGTAAGCTT
+TGATCTTACCCTGTGTCCCGTTATCGATATTGTCGCTGAAGTCAGGGCTCACCACCAGAAGGTTAGAGAGGCCCTTCTCCTGCATCAGCGAGATCGTCTC
+CTCGTCGGTCAGTATGGAGGACGGGTACAGCTTGATCAGGTACTGGTCCCTGGTGGCGGGATCGTACATCCCGTCCAACGCGGAATTGACAGTGTCGTAA
+TATGTGGAAGAACTCTGATCGTCGTAGTTTATCACGCCCATCCTGACGGGCTCCATGGCCTGTTCGGTCTGGTCGCTGATCATATATCCCAGACCTACGA
+ACAGTATCATGACGACCGCTATCGAGATTAGGGACTCGAGGGTCATCAGCTCCCTGAGCTCTTTCTTGACCAGGTTAAACAGATTGTTCAACGGACTTCA
+CCCCCTTGATGAAGACCTCTTCAAGGTTTTTAGCGTCGTATTTCGCCTTGAGCTCCGCGGGAGTCCCCTGCATGATGATCTCTCCCTTGTTGATCATGGC
+GACGCGGTCGCACAGGGATTCGACCTCATACATGTTGTGCGAGGAGAGGAGAACCGTAACTCCGGACCCGGCTATCTCCCGGATCAGCTCCCTGATGTCG
+TGCGCGTTCATCACGTCCAATCCGGATGTTACCTCGTCCATGATGGCGAAACGCGGTGAGGTCATTATCGCCCTTGCGATGAGCAGGCGTCTCATCATGC
+CCTTGCTGTAAGTGTTGACCTTGCTGTCGATGCGGTCTCCCAGGTTGGCGATGTCCATCCCCCTCTGGGTCATTTTTTCTGTCTCCTCGCCGTCGGTGAA
+GAAGCCCGCGATGAAGCGCAGATAGGTGCGTCCCGTCAGGTCCTTGTAGGCGCCCGCGTCCTCGGGCAGATAGCTTATGGATTTTCTTACGTCGTCCCCC
+TGGGCGGCGACATCGTATCCGCAGACGGTTATCTTTCCGGAAGTTATGGTTATGAGCGTCGAGATCATCCTAAGCGCCGTGGTCTTCCCGGCGCCGTTCG
+GTCCGATGAGGCCAAAGATCTCTCCCTCTTTGACAGAGAAGTTTATTCCCTTGACGGCCTCGATGTTCCCGTATATTTTATGCACGTTCTCTACGCGCAG
+GGCATCCATGAAGCATTGAGGTATATCTCGCTTTTAAATTTATTGGAGTGCTGTTAAGTGTCTAAAAAAGTGTGTATTCGGGCCGGGGCCCGTAATGTGT
+TTACTGTTCCGCGCCGGCCGAGGCGTTGCCCATGGCCTTATTTATGGTCTCTTGGAGGGACTGGTATTTCTCGCGGAGACTTTTCTCCTGGCGGTCCAGG
+
+>contig_159
+TATAGCTCAGCTCGTTGGCGGAGACGCTGCTTCCGTACATCTGGCCGCCGCCGTTGATGCCGCCGCCCCAGGCGGCCGTCCCGATCCCCACGGGAGAAAT
+GTCCGTACCTCGGAACCTTATGTTTCTCACGGATTCCCGTATATGTTCCTGGATTATAACTGATACGCAATCCTGTTTCCGACGTCCGCCATGTTTAGAT
+AAATTGACGGTATAGCCGAAGGCATGGATATGGCAATGGAGCTGAGGAACGTCTCCGTAGTGAGGGACGGGAAGCGGATACTGGATTCCGTCTGCCTCGA
+TATCGGCGCCTCCGAGAACGTTGCCGTCATAGGGCCGAACGGTTCGGGGAAGACGACGCTCATCAAACTGCTGAGGGGCGATATTTATCCCTACTACGAC
+GAGGACCGCCCCGCGGAGATGAGGATCTTCGGTGAGAAGATATGGTCCATCTACGACATACGGAGCCGCATGGGCGTGGTCTCCATGGACCTCCAGGGCA
+TGTTCGGCGGCGAAACGCTGGTCGGAGACGTCATAATGTCGGGATACTTCAGCAGCCTGGACATTTTCCGCAACCATGAGGTCACCGACAACATGCGCTC
+CGGGGCCTCGCGAGCGGCCGGGTACATGGGAGTGGAACATCTCGTCGGCAGAGATCTGTCCGGCCTTTCTCTGGGAGAGATGAGGCGGACGCTGATCGCC
+CGGGCGCTGGTCACCGCCCCCGAGATGCTCGTCCTCGACGAACCGATGACGGGCCTCGATATTGTAATGAAATCCAAATTCAGGAAGATGTTCGACATCA
+TGACGGAAACGGGAGTGAGCATCGTCATGATAACCCACGACCTCACCGACATCCCCGTTTCCTTGAACCGCATAATAATGATCAAGGATGGGAAAGTGTT
+CGCGGACGGTCCTAAAAAAGACGTCCTGACGTCCGAGGTCGTCAGCGGGCTTTTCGATGAACCTATTAATGTACAATGCGTTAACGGGATATATTCAATG
+AGGATGGATGAGTGACAAGGTATATCTGTTCCGAATGCGGGAACGAGATTCCGTACGTTTCGGATTTCTGCTACCAGTGCGGTAGCCTGAAGAGCAAGGC
+GTTCAAGATAGACGAGGGCGGCGAGATGGAGGGCGGGGAGGTCCCGTGCCCCAACTGCGGAAAGCCCATAGAGGAGGACGCCCGGTACTGCAGGCACTGC
diff -r 000000000000 -r ad7507073c3f test-data/genome3.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome3.fna Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,60 @@
+>contig_4003
+ATCAAAGAGGAACTGAAGGCGGCGATGCTGCTGACCGGTTCCTCTGACATAAGAGAGCTCTCTGATGCAGAGTATATCGTCATGGGAGAGACACGCAAAT
+GGATGGAAGGCCTGAAATGACCGACGTCAAGAAGATATTGAAACAGATGTCCGACGAGCTGAGCAAGCCGATCGAATCATACATAGAAGACGAACTGCCC
+GCCAATCTCATCGAAGCGGCAAGACAGTACCCCTATGCCGGCGGAAAGAGGATGAGACCGGCCATGGTCATCGCCGCGTGCAGGGCGGTGGGAGGGGATG
+GCAGGAAGGCCGTTCCCCTTGCGGTTGCCATAGAGTACATACACAATTTCACGCTGATCCATGATGACCTCATGGACGGGGACGAGAAGNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCATGGACGGGGACGAGAAGCGCC
+GGGGCATGACCACATCCCATGTGAAGTACGGCATGCCCACAGCGGTGTTGGCGGGAGATGCCCTGTTCGCTAAGGCCTTCCAGATAATCGCCGACCTCGA
+TGCGGACGGCGAAACGGTCAGAGATGTCCTTAGAGTGGTCTCCCAATCCGTCTGGGACCTGGCCAGAGGTCAGCAGATGGATATAAACAACGAGAACGGG
+ACCGAGGTCACCATGGATGAGTACATCGAGACCATCAGACTGAAGACCAGCGTTCTGTTCGCCGCCGGTGCCGCCGGAGGCGCTATGATCGGAGGTGCGA
+GCAAGGAGGTCGTGGACGCCGTCCACGGCTACGCCATGAACCTGGGCGTCGCCTTCCAGATGTATGACGATATACTTGGGATAGTCGGGGACCCGGCCGT
+TACCGGTAAATCGTCCGGTAATGACATTCGCAAAGGGAAGAGCACCGTCATCGTGTGCCACGCCCTGAAGAACATAGCTGACAGGGCGGACCTGCTGGTC
+TTCCGCGATATCCTCGGCAAGACAGACGCCACCGATGCGGAGATAGACGAGGTCAGGAGCATACTCCGAAGAGCCTGCAGCCTGGATTACGCCATAGAGA
+CCGCAGAGGATTACATCAACAAAGCCGTCGACTGCCTGGATGCGCTGGAGCCCTCAAAGGACAAGGACTTCATGATAGCCCTGGCAGAATACACGATGAC
+CAGGACCCTTTAGTCGGAGATCCCCTTCTCCGTTATGGAGTATGTGGCTTTCCGGCCTTCCGGTATGCTGCGGTGCTTGACCATGATCGCGGCCCTGCGG
+CCGTTGCCTTTCTTCTCCAAGCGGATTATGGTCTTCGCGTTATGATGCATGGCGTGGCCTCCGAGGAACTCTATCGTACCGGCGCCTATGTTGGTGTATA
+
+>contig_4403
+CACCGGTCACCCGAAGGTCACGCGCGTATCGATGCGTGACATCGCAGACCTGGGAGAGAGGGGCCTGTACATCCTTCACGAGATCGGTACGGACCTCGTC
+GGCAAGATGGAGGGCTGCACCGGGTGCAAGAAGTGCGAGCACGAATGCCCCGAGAACGCGTTGACCGTAAGCAAGGACAAGACGATCACCGTGAAGACCA
+AGAACTGCCTCGGAACGGCATGCTACAGATGCCAGTACGTCTGTCCCGAGAAGGTCATGCAGTTCGACTCCCTAAGGCTGTCGTGATAAACGGTTTTGGG
+CGGGGCCGGCCCCGCCCTTTTTTCATTTACCGCCGTTCAGGGCCTCGGCGTGCACGGCAGGCCTGACATTCTCGTCCTCCAGCTCCGTAAGTATCTGCTT
+GCGCAGCCTGATGAACTCGGGCGAAGCGCGGTCCCTGGGCCGCGGAATGCCTATGTCCACGATGTCCTTGATGCTGGCAGGACGCTTGGTAAGGACGACT
+ATCCTGTCTGAAAGATAAACGGCCTCGTCGACCGAGTGGGTCACGAACAGGATCGTAGTGTCCGTCTTCTCGACTATCCTCAGCAGCTCGCCCTGCATGA
+TGTTGCGCGTCTGGGCGTCCAACGCGCCGAACGGCTCGTCCATGAGCAGCACGTCGGGCTTGGTAACAAGGGCCCTTGCGATGCCCACGCGCTGCTTCAT
+ACCTCCGCTGAGCTCGTGGACACGATGGTCCTCGAAACCTTCGAGGCCGACCGCCCTGATGTAGCGTTCGGCGGTCTTCCTGCGCTGCTCCGCCGGGACG
+CCGGCGATCTCCAGGCCGAACTCGACATTCTTCCTTACAGAACGCCAAGGGAACAGTGCGAACTCCTGGAACACCATGCCTCTGTCGGGGCCTGGCCCGG
+TGCACTTCTTCCCGCCTATCGACACTTCTCCGGAGGACGGCTCCATGAGCCCTGCTATAAGCCTGAGCAGAGTCGTCTTTCCGCATCCCGAGGGACCGAC
+TATGGATATAAGCTCGCCCTTCTGGATCTCCAGAGAGAAATCCTCCAGGGCCACGGTCTCCTGTTCATCGGTCTTGTAGACCTTCCTCAGATGATTGATA
+ACGATCTTCTCGCTCATTCTATCCCCATCCTTCTTGTTATGACCTTGTGCAGATAGTCGGCGAGGCTGGTCGTCAGTATTCCGAGGATTGCGATTATGAC
+TATGCCCGCGTAGACGTTGGGCCAGTACCCCATCTGCGCCTGTATGCTGATGAAGTATCCGACGCCTCCTCCGAACGATGCGTACAGCTCGGAGGCAACT
+ATGCACATCCACCCGACCCCCATGCCTATGCGGAGGCCGTTCATTATGTATGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+>contig_14302
+GATGTGGAATGCGGCCAAATGCCGCCTCGGTTTCAGCGGGGCAAGGCGTTCCATGTCCACGACCGCTGGAATACTGGTGAACGTTTTCATTACTTCTCTG
+GAGATAGCCGAACGTTCTCAGCCTGCGCTCGAAAGCAGGAACTACAACGGCAGTTTCCCCATTTATAGGATGCCCAACAAGATCGGTCTCTACTGGTTCG
+CCATCACGGCCGCCGCGTGCGCCTCGCTATACATCATCGGATACTACATCTCGACCCCGAACATGGCGGCAATACTCCTGGGGGCGGTCTGACGGTCTGC
+GACCTTCGTACGGATAAAGTGAGCTACAGGTACGGCAATTCGAATTACAATGCTATCGATTCTGTGGATTTCACAGCCTCCCATGGCAGAAGGACCGTCA
+TCCTGGGAGAGAACGGGTGCGGCAAGTCGACGCTGATATATCAGCTCAACGGAGTATACAAGCCTGTTTCCGGTACTGTGTTCTATGGAGATATGCCCAT
+ATCATACGACAAGGAGTTCCTTACGGAGCTGCGTTCCGACGTTTCCGTTGTTCTGCAGAATCCAGACGATCAGATTTTCTCTTCCACCGTCGAGGAGGAC
+GTGGCATTTGGACCGTTAAACTTGGGCCTTTCCCGGGAAGAGGTTGGAGAAAGAATCGGCCGGTCTTTGGAATGCGTGGGGATGTCGGGATTTGCCGAAG
+TGCCTGTTCAGCGCCTTTCATACGGTCAGAAAAAAAGAGTGTCACTCGCAGGCGCCCTAGCATCACATCCAAAGATACTGGTCCTGGACGAGCCTACCGC
+AGGCCTGGACCCGCAGATGTCCAGAGAGGTGATGGAGATCGCAAATTCTCTCATCAGAGAAGGGATCTCCGTCGTAGTATCCACCCATGACGTGAATCTG
+ATCTACAATTGGGTCGAGGACCTTTACGTGATGCGGAACGGACACATGGTCTTCTCCGGAGATGCGGACGAGTTCTTCTCCGACCGTCCGTCCGTTTATC
+TTTCAGGTCTGGAACAGCCCTCGATATTCAGCATAAACCACAATATGGAGACGTTAAGAGGGACGATTCCCGCGTCATATCCCAAGACCATGAGCCAGAT
+GGTCAGCAGATTATTCCCTTCAGGATCCTCGGCCGGAAGGATATTCATCTATCAGACTGAAGGCGAGCGCATCGACCAGGATGCGATCGAGGAGGCCGTG
+GGAAAGAAAGGGATGCCCATTGCAGTATACGGCCCCTCCGCGCGCAGGTCGGTGACCCGATCGAAGCTCAGGGTTGATTTCTATTTTAACGGCATAGAGT
+GTTGCATCAGGGAGGCCATGGTAAACCATGATTCCCTGATAATAGTCGACCGGGGCTTGAAGGGGATCGTCACGGAGGCGATTGAAGAGCTTAGGGCATA
+CGGAACCCGGATCAGTATCAGGGAGTTGGTTTTTTGAGCGCTCCCCTTTTCCGCACCGAAGGTCTTTTCTTCAGATACGAAGGCGGCCGGGGGGACGCGT
+TGGCAGACGTGAACATCACGATCAAAGAGGGTGCCAGAACTGTCATCATGGGAGCCAACGGAGCTGGAAAATCCACGTTCTTCTATCATCTTAACGGAGT
+CTTGAGGCCGTCGAAGGGCTCGGTGTTTTTCCGGGGAGAAAAAATACCGCACAGGGGAAAAGCTCTCAGGAAGCTGCGCTCGGAGGTCGCGGTGATGCTC
+CAAGACCCCAACGACCAGCTTTTTGCACCAAAAGTATCTGACGACATAGCATTCGGCCCGAAGAACCTGGGACTCGACGCTCAGACTGTAGGGGAGAGGG
+TCAGGGACGCCCTCTACATCACAGGCATCGAATCTCTGGAGGGTCGCAGCGTGATGCAGCTGTCGTTCGGCCAGAAGAAGAGGGTGGTGCTGGCCGGTGC
+CTTGGCGATGCATCCGAAGGTGCTTATAATGGACGAGCCCACCGCAGGTCTCGATCCCCAGATGTCCAAGGAGCTCATCGAGCTCGCGGACGAGCTGCAC
+CATCTTGGAACGACCGTTATTTTTTCAACCCATGACGTGGACCTCTCATATTCTTGGGCGGACGAGGTCCATGTCCTAAGAGGGGGCCGTAATGTATATT
+CGGGGAGCTCAGAAAGATTCTATGACGATACTTCGGAAGTTTATCTTTCGGGCCTTGTCGAACCGGCCATGTACGACATCAACGTCAGCATCTCCGAGCT
+TGCCGGATGCCCCGTTGAACCGTTTCCCAAAACCCTGCCTCAGCTTGTGGCCAAGGCAGTGCCGTCAGAGGGGCCGGGCACGGTTCACATCCTTCCCGTG
+GAAGGTCCGGTCGACCGGGAGCTGTTCTCCTCTCTGACGTCCGGGTCCGGGATGTCCGCAACAGGCGTCTACGGTACTAATGCAAGAAAATCTGCGGAGG
+CTTCCAAATTGCCGATAGATTATTTCTTCGGGGCCGACGAGGGATGCATAATAGAGGCTTTGCACGGCAAAGACACGCTGATATGCTGCGACAGGTCCCT
+TACAGATCTGCTGATATCGAAGATAGGCAGTATGTCCCGGTTCGGGACAGAGGTCCCTTATTCTCTGCACTGAACATTTCTTTTTTCCGGGGGTTCGAAC
diff -r 000000000000 -r ad7507073c3f test-data/test_contig.contig2classification.names.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_contig.contig2classification.names.txt Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,4 @@
+# contig classification reason lineage lineage scores superkingdom phylum class order family genus species
+contig_38063 classified based on 1/2 ORFs 1;131567;2;1783272;1239;91061;1385;186820;1637;1639 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00 Bacteria: 1.00 Firmicutes: 1.00 Bacilli: 1.00 Bacillales: 1.00 Listeriaceae: 1.00 Listeria: 1.00 Listeria monocytogenes: 1.00
+contig_44250 classified based on 1/2 ORFs 1;131567;2;1224;1236;135623;641;662;666 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00 Bacteria: 1.00 Proteobacteria: 1.00 Gammaproteobacteria: 1.00 Vibrionales: 1.00 Vibrionaceae: 1.00 Vibrio: 1.00 Vibrio cholerae: 1.00
+contig_9952 classified based on 1/5 ORFs 1;131567;2;1783272;1239;91061* 1.00;1.00;1.00;1.00;1.00;1.00 Bacteria: 1.00 Firmicutes: 1.00 Bacilli*: 1.00 not classified not classified not classified not classified
diff -r 000000000000 -r ad7507073c3f test-data/test_contig.contig2classification.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_contig.contig2classification.txt Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,4 @@
+# contig classification reason lineage lineage scores
+contig_38063 classified based on 1/2 ORFs 1;131567;2;1783272;1239;91061;1385;186820;1637;1639 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+contig_44250 classified based on 1/2 ORFs 1;131567;2;1224;1236;135623;641;662;666 1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00;1.00
+contig_9952 classified based on 1/5 ORFs 1;131567;2;1783272;1239;91061* 1.00;1.00;1.00;1.00;1.00;1.00
diff -r 000000000000 -r ad7507073c3f tool-data/cat_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/cat_database.loc.sample Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,7 @@
+## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz
+# ls CAT_prepare_20190719/
+# 2019-07-19.CAT_prepare.fresh.log
+# 2019-07-19_CAT_database
+# 2019-07-19_taxonomy
+#value name database_folder taxonomy_folder
+#2019-07-19_CAT_database 2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
diff -r 000000000000 -r ad7507073c3f tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,7 @@
+
+
+
+ value, name, database_folder, taxonomy_folder
+
+
+
diff -r 000000000000 -r ad7507073c3f tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Sun Nov 24 21:56:00 2019 -0500
@@ -0,0 +1,7 @@
+
+
+
+ value, name, database_folder, taxonomy_folder
+
+
+