Repository 'kraken_biom'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/kraken_biom

Changeset 0:3ff4712dc111 (2022-09-03)
Next changeset 1:65eb9962d272 (2022-09-09)
Commit message:
planemo upload for repository https://github.com/smdabdoub/kraken-biom commit a7d3ae6f4ff1a49b1b329532379f5ccfafd573fa
added:
kraken_biom.xml
macros.xml
test-data/krakenReport1.tabular
test-data/krakenReport2.tabular
test-data/krakenReport3.tabular
b
diff -r 000000000000 -r 3ff4712dc111 kraken_biom.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kraken_biom.xml Sat Sep 03 22:13:08 2022 +0000
[
@@ -0,0 +1,128 @@
+<tool id="kraken_biom" name="Kraken-biom" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Create BIOM-format tables from kraken output</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command>kraken-biom --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+#for $i, $s in enumerate($kraken_reports)
+    #if $s
+       ln -s '$s' './$i-kraken_report.tabular' &&
+    #end if
+#end for
+kraken-biom
+#for $i, $s in enumerate($kraken_reports)
+    #if $s
+        '$i-kraken_report.tabular'
+    #end if
+#end for
+#if $max
+    --max $max
+#end if
+#if $min
+    --min $min
+#end if
+#if $otu_fp
+    --otu_fp '$otu_fp'
+#end if
+#if $fmt
+    --fmt $fmt
+#end if
+    ]]></command>
+    <inputs>
+        <param name="kraken_reports" type="data" multiple="true" format="tabular" label="Input files to Kraken-biom: Kraken report output file(s)"/>
+        <param argument="--max" type="select" optional="true" label="Max" help="Assigned reads will be recorded only if they are at or below max rank. Default: O">
+            <option value="O" selected="true">O</option>
+            <option value="D">D</option>
+            <option value="P">P</option>
+            <option value="C">C</option>
+            <option value="F">F</option>
+            <option value="G">G</option>
+            <option value="S">S</option>
+        </param>
+        <param argument="--min" type="select" optional="true" label="Min" help="Reads assigned at and below min rank will be recorded as being assigned to the min rank level. Default: S">
+            <option value="O">O</option>
+            <option value="D">D</option>
+            <option value="P">P</option>
+            <option value="C">C</option>
+            <option value="F">F</option>
+            <option value="G">G</option>
+            <option value="S" selected="true">S</option>
+        </param>
+        <param argument="--otu_fp" type="data" optional="true" format="tabular" label="OTU FP" help="Create a file containing just the (NCBI) OTU IDs for use with a service such as phyloT (http://phylot.biobyte.de/) to generate a phylogenetic tree for use in downstream analysis such as UniFrac, iTol (itol.embl.de), or PhyloToAST (phylotoast.org)"/>
+        <param argument="--fmt" type="select" optional="true" label="Output Format" help="Set the output format of the BIOM table. Default is HDF5">
+            <option value="tsv">TSV</option>
+            <option value="json">JSON</option>
+            <option value="hdf5" selected="true">Biom2 (HDF5)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="biomOutput" format="biom2" from_work_dir="./table.biom" label="Kraken biom output file">
+            <change_format>
+                <when input="fmt" value="json" format="json" />
+            </change_format>
+            <change_format>
+                <when input="fmt" value="tsv" format="tsv" />
+            </change_format>    
+        </data>       
+    </outputs>  
+    <tests>
+    <test expect_num_outputs="1">
+        <param name="kraken_reports" value="krakenReport1.tabular,krakenReport2.tabular,krakenReport3.tabular"/>
+        <param name="fmt" value="hdf5"/>
+        <output name="biomOutput" ftype="biom2">
+            <assert_contents>
+                <has_text text="creation-date"/>
+            </assert_contents>
+        </output>
+    </test>
+    <test expect_num_outputs="1">
+        <param name="kraken_reports" value="krakenReport1.tabular,krakenReport2.tabular,krakenReport3.tabular"/>
+        <param name="fmt" value="tsv"/>
+        <output name="biomOutput" ftype="tsv">
+            <assert_contents>
+                <has_n_lines n="8246"/>
+                <has_text text="Constructed from biom file"/>
+            </assert_contents>
+        </output>
+    </test>    
+    <test expect_num_outputs="1">
+        <param name="kraken_reports" value="krakenReport1.tabular,krakenReport2.tabular,krakenReport3.tabular"/>
+        <param name="fmt" value="json"/>
+        <output name="biomOutput" ftype="json">
+            <assert_contents>
+                <has_text text="Biological Observation"/>
+            </assert_contents>
+        </output>
+    </test>        
+    </tests>
+    <help><![CDATA[
+Kraken-biom
+===========
+Create BIOM-format tables (http://biom-format.org) from Kraken output (http://ccb.jhu.edu/software/kraken/).
+
+Input
+=====
+The program takes as input, one or more files output from the kraken-report tool. Each file is parsed and the counts for each OTU (operational taxonomic unit) are recorded, along with database ID (e.g. NCBI), and lineage. The extracted data are then stored in a BIOM table where each count is linked to the Sample and OTU it belongs to. Sample IDs are extracted from the input filenames (everything up to the '.').
+
+OTUs are defined by the --max and --min arguments. By default these are set to Order and Species respectively. This means that counts assigned directly to an Order, Family, or Genus are recorded under the associated OTU ID, and counts assigned at or below the Species level are assigned to the OTU ID for the species. Setting a minimum rank below Species is not yet available.
+
+Output
+======
+The BIOM format currently has two major versions. Version 1.0 uses the JSON (JavaScript Object Notation) format as a base. Version 2.x uses the HDF5 (Hierarchical Data Format v5) as a base. The output format can be specified with the --fmt option. Note that a tab-separated (tsv) output format is also available. The resulting file will not contain most of the metadata, but can be opened by spreadsheet programs.
+
+Version 2 of the BIOM format is used by default for output, but requires the Python library 'h5py'. If the library is not installed, kraken-biom will automatically switch to using version 1.0. Note that the output can optionally be compressed with gzip (--gzip) for version 1.0 and TSV files. Version 2 files are automatically compressed.
+    ]]></help> 
+    <citations>
+        <citation type="bibtex">
+@misc{githubseqtk,
+  author = {Dabdoub, SM},
+  year = {2016},
+  title = {kraken-biom},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/smdabdoub/kraken-biom},
+}</citation>
+    </citations>
+ </tool>
b
diff -r 000000000000 -r 3ff4712dc111 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sat Sep 03 22:13:08 2022 +0000
b
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">1.0.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.01</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">kraken-biom</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+ </macros>
\ No newline at end of file
b
diff -r 000000000000 -r 3ff4712dc111 test-data/krakenReport1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/krakenReport1.tabular Sat Sep 03 22:13:08 2022 +0000
b
b'@@ -0,0 +1,12471 @@\n+ 91.15\t14195379\t14195379\tU\t0\tunclassified\n+  8.85\t1379004\t401\tR\t1\troot\n+  8.84\t1376958\t10737\tR1\t131567\t  cellular organisms\n+  7.20\t1121868\t43514\tD\t2\t    Bacteria\n+  3.57\t556613\t24838\tP\t1224\t      Proteobacteria\n+  1.47\t229058\t7482\tC\t28211\t        Alphaproteobacteria\n+  0.93\t144510\t6453\tO\t356\t          Hyphomicrobiales\n+  0.52\t80503\t1900\tF\t41294\t            Bradyrhizobiaceae\n+  0.44\t69011\t10554\tG\t374\t              Bradyrhizobium\n+  0.14\t22423\t4040\tG1\t2631580\t                unclassified Bradyrhizobium\n+  0.03\t5066\t5066\tS\t858422\t                  Bradyrhizobium sp. CCBAU 051011\n+  0.01\t1865\t1865\tS\t1197460\t                  Bradyrhizobium sp. 6(2017)\n+  0.01\t1549\t1549\tS\t1325100\t                  Bradyrhizobium sp. CCBAU 51753\n+  0.01\t1005\t1005\tS\t376\t                  Bradyrhizobium sp.\n+  0.01\t959\t959\tS\t2057741\t                  Bradyrhizobium sp. SK17\n+  0.01\t947\t947\tS\t1325120\t                  Bradyrhizobium sp. CCBAU 53421\n+  0.01\t895\t895\tS\t288000\t                  Bradyrhizobium sp. BTAi1\n+  0.01\t858\t858\tS\t2715960\t                  Bradyrhizobium sp. PSBB068\n+  0.00\t748\t748\tS\t115808\t                  Bradyrhizobium sp. ORS 285\n+  0.00\t631\t631\tS\t114615\t                  Bradyrhizobium sp. ORS 278\n+  0.00\t591\t591\tS\t1404888\t                  Bradyrhizobium sp. 1(2017)\n+  0.00\t585\t585\tS\t1325102\t                  Bradyrhizobium sp. CCBAU 51765\n+  0.00\t548\t548\tS\t1325112\t                  Bradyrhizobium sp. CCBAU 53340\n+  0.00\t532\t532\tS\t1325111\t                  Bradyrhizobium sp. CCBAU 53338\n+  0.00\t468\t468\tS\t1325114\t                  Bradyrhizobium sp. CCBAU 53351\n+  0.00\t458\t458\tS\t1223566\t                  Bradyrhizobium sp. CCGE-LA001\n+  0.00\t253\t253\tS\t319017\t                  Bradyrhizobium sp. WSM471\n+  0.00\t205\t205\tS\t2493093\t                  Bradyrhizobium sp. LCT2\n+  0.00\t82\t82\tS\t2599805\t                  Bradyrhizobium sp. SG09\n+  0.00\t73\t73\tS\t1404649\t                  Bradyrhizobium sp. 323S2\n+  0.00\t54\t54\tS\t2599819\t                  Bradyrhizobium sp. TM102\n+  0.00\t10\t10\tS\t1325083\t                  Bradyrhizobium sp. CCBAU 21365\n+  0.00\t1\t1\tS\t2578114\t                  Bradyrhizobium sp. KBS0727\n+  0.07\t11646\t11646\tS\t1437360\t                Bradyrhizobium erythrophlei\n+  0.02\t3838\t3838\tS\t1274631\t                Bradyrhizobium icense\n+  0.02\t3704\t3704\tS\t722472\t                Bradyrhizobium lablabi\n+  0.02\t3445\t3445\tS\t190148\t                Bradyrhizobium paxllaeri\n+  0.02\t2527\t2525\tS\t1355477\t                Bradyrhizobium diazoefficiens\n+  0.00\t2\t2\tS1\t224911\t                  Bradyrhizobium diazoefficiens USDA 110\n+  0.01\t2286\t2286\tS\t255045\t                Bradyrhizobium canariense\n+  0.01\t1081\t865\tS\t375\t                Bradyrhizobium japonicum\n+  0.00\t213\t213\tS1\t476282\t                  Bradyrhizobium japonicum SEMIA 5079\n+  0.00\t3\t3\tS1\t1037409\t                  Bradyrhizobium japonicum USDA 6\n+  0.01\t1054\t1054\tS\t83637\t                Bradyrhizobium genosp. L\n+  0.00\t694\t694\tS\t1325107\t                Bradyrhizobium zhanjiangense\n+  0.00\t661\t0\tS\t44255\t                Bradyrhizobium oligotrophicum\n+  0.00\t661\t661\tS1\t1245469\t                  Bradyrhizobium oligotrophicum S58\n+  0.00\t576\t576\tS\t1549949\t                Bradyrhizobium vignae\n+  0.00\t562\t562\tS\t83627\t                Bradyrhizobium genosp. B\n+  0.00\t556\t556\tS\t1404367\t                Bradyrhizobium symbiodeficiens\n+  0.00\t547\t547\tS\t858423\t                Bradyrhizobium arachidis\n+  0.00\t480\t480\tS\t1404864\t                Bradyrhizobium cosmicum\n+  0.00\t459\t459\tS\t1325090\t                Bradyrhizobium guangdongense\n+  0.00\t450\t450\tS\t1325095\t                Bradyrhizobium guangzhouense\n+  0.00\t425\t425\tS\t931866\t                Bradyrhizobium ottawaense\n+  0.00\t362\t362\tS\t1325115\t                Bradyrhizobium guangxiense\n+  0.00\t320\t320\tS\t1404768\t                Bradyrhizobium amphicarpaeae\n+  0.00\t305\t305\tS\t244734\t                Bradyrhizobium betae\n+  0.00\t56\t0\tS\t29448\t                Bradyrhizobium elkanii\n+  0.00\t56\t56\tS1\t1275962\t           '..b'      Oxyplax ochracea nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t1850906\t        Catopsilia pomona nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t1307956\t        Leucania separata nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t1642929\t        Lambdina fiscellaria nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t74660\t        Choristoneura fumiferana DEF multiple nucleopolyhedrovirus\n+  0.00\t3\t0\tG\t558018\t      Gammabaculovirus\n+  0.00\t2\t2\tS\t111874\t        Neodiprion sertifer nucleopolyhedrovirus\n+  0.00\t1\t0\tG1\t1233523\t        unclassified Gammabaculovirus\n+  0.00\t1\t1\tS\t204507\t          Neodiprion abietis NPV\n+  0.00\t95\t0\tD1\t12429\t    unclassified viruses\n+  0.00\t88\t0\tD2\t2204151\t      unclassified DNA viruses\n+  0.00\t86\t0\tD3\t51368\t        unclassified dsDNA viruses\n+  0.00\t82\t7\tG\t2060084\t          Pandoravirus\n+  0.00\t30\t30\tS\t2107707\t            Pandoravirus macleodensis\n+  0.00\t11\t11\tS\t1349409\t            Pandoravirus dulcis\n+  0.00\t10\t10\tS\t1349410\t            Pandoravirus salinus\n+  0.00\t9\t9\tS\t1605721\t            Pandoravirus inopinatum\n+  0.00\t9\t9\tS\t2107708\t            Pandoravirus neocaledonia\n+  0.00\t6\t6\tS\t2107709\t            Pandoravirus quercus\n+  0.00\t2\t2\tS\t1100043\t          Apis mellifera filamentous virus\n+  0.00\t2\t0\tF\t2023203\t          Pithoviridae\n+  0.00\t1\t0\tG\t1805626\t            Pithovirus\n+  0.00\t1\t1\tS\t1450746\t              Pithovirus sibericum\n+  0.00\t1\t1\tS\t1903266\t            Cedratvirus A11\n+  0.00\t2\t0\tD3\t2136008\t        unclassified archaeal dsDNA viruses\n+  0.00\t2\t0\tD4\t128706\t          Haloviruses\n+  0.00\t2\t2\tS\t1262530\t            Halovirus HSTV-1\n+  0.00\t4\t4\tS\t1904876\t      Human DNA virus\n+  0.00\t3\t3\tS\t1678078\t      Mollivirus sibericum\n+  0.00\t11\t0\tD1\t2731342\t    Monodnaviria\n+  0.00\t11\t0\tK\t2732092\t      Shotokuvirae\n+  0.00\t8\t0\tP\t2732416\t        Cressdnaviricota\n+  0.00\t8\t0\tC\t2732424\t          Repensiviricetes\n+  0.00\t8\t0\tO\t2732539\t            Geplafuvirales\n+  0.00\t8\t0\tF\t10811\t              Geminiviridae\n+  0.00\t8\t0\tG\t10814\t                Begomovirus\n+  0.00\t4\t4\tS\t437063\t                  Ageratum yellow vein Hualian virus\n+  0.00\t2\t2\tS\t1915203\t                  Common bean severe mosaic virus\n+  0.00\t1\t1\tS\t1982675\t                  Jacquemontia yellow vein virus\n+  0.00\t1\t1\tS\t85752\t                  Tomato yellow leaf curl Thailand virus\n+  0.00\t3\t0\tP\t2732415\t        Cossaviricota\n+  0.00\t3\t0\tC\t2732421\t          Papovaviricetes\n+  0.00\t2\t0\tO\t2732532\t            Sepolyvirales\n+  0.00\t2\t0\tF\t151341\t              Polyomaviridae\n+  0.00\t2\t0\tG\t1891714\t                Betapolyomavirus\n+  0.00\t2\t2\tS\t1891762\t                  Human polyomavirus 1\n+  0.00\t1\t0\tO\t2732533\t            Zurhausenvirales\n+  0.00\t1\t0\tF\t151340\t              Papillomaviridae\n+  0.00\t1\t0\tF1\t2169595\t                Firstpapillomavirinae\n+  0.00\t1\t0\tG\t325455\t                  Gammapapillomavirus\n+  0.00\t1\t0\tG1\t735504\t                    unclassified Gammapapillomavirus\n+  0.00\t1\t1\tS\t2049444\t                      Gammapapillomavirus sp.\n+  0.00\t5\t0\tF\t10482\t    Polydnaviridae\n+  0.00\t3\t0\tG\t10485\t      Bracovirus\n+  0.00\t3\t3\tS\t39640\t        Cotesia congregata bracovirus\n+  0.00\t2\t0\tG\t10483\t      Ichnovirus\n+  0.00\t2\t2\tS\t265522\t        Hyposoter fugitivus ichnovirus\n+  0.00\t3\t0\tF\t1511852\t    Nudiviridae\n+  0.00\t2\t0\tF1\t1110703\t      unclassified Nudiviridae\n+  0.00\t2\t2\tS\t1654582\t        Kallithea virus\n+  0.00\t1\t0\tG\t1511853\t      Alphanudivirus\n+  0.00\t1\t1\tS\t92521\t        Oryctes rhinoceros nudivirus\n+  0.00\t2\t0\tF\t1285590\t    Hytrosaviridae\n+  0.00\t1\t0\tG\t1285591\t      Glossinavirus\n+  0.00\t1\t0\tS\t2747309\t        Glossina hytrosavirus\n+  0.00\t1\t1\tS1\t379529\t          Glossina pallidipes salivary gland hypertrophy virus\n+  0.00\t1\t0\tG\t1285593\t      Muscavirus\n+  0.00\t1\t0\tS\t2747498\t        Musca hytrosavirus\n+  0.00\t1\t1\tS1\t523909\t          Musca domestica salivary gland hypertrophy virus\n+  0.00\t1\t0\tF\t196937\t    Nimaviridae\n+  0.00\t1\t0\tG\t249585\t      Whispovirus\n+  0.00\t1\t1\tS\t342409\t        White spot syndrome virus\n+  0.00\t1\t0\tR1\t2787854\t  other entries\n+  0.00\t1\t1\tR2\t28384\t    other sequences\n'
b
diff -r 000000000000 -r 3ff4712dc111 test-data/krakenReport2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/krakenReport2.tabular Sat Sep 03 22:13:08 2022 +0000
b
b'@@ -0,0 +1,11152 @@\n+ 92.06\t8810061\t8810061\tU\t0\tunclassified\n+  7.94\t759690\t240\tR\t1\troot\n+  7.93\t758772\t5483\tR1\t131567\t  cellular organisms\n+  6.64\t635165\t26878\tD\t2\t    Bacteria\n+  3.06\t293202\t14752\tP\t1224\t      Proteobacteria\n+  1.27\t121724\t4181\tC\t28211\t        Alphaproteobacteria\n+  0.72\t68949\t3483\tO\t356\t          Hyphomicrobiales\n+  0.35\t33460\t958\tF\t41294\t            Bradyrhizobiaceae\n+  0.29\t28150\t4256\tG\t374\t              Bradyrhizobium\n+  0.10\t9951\t1601\tG1\t2631580\t                unclassified Bradyrhizobium\n+  0.02\t2350\t2350\tS\t858422\t                  Bradyrhizobium sp. CCBAU 051011\n+  0.01\t786\t786\tS\t1325100\t                  Bradyrhizobium sp. CCBAU 51753\n+  0.01\t727\t727\tS\t1197460\t                  Bradyrhizobium sp. 6(2017)\n+  0.01\t505\t505\tS\t376\t                  Bradyrhizobium sp.\n+  0.00\t446\t446\tS\t2057741\t                  Bradyrhizobium sp. SK17\n+  0.00\t396\t396\tS\t2715960\t                  Bradyrhizobium sp. PSBB068\n+  0.00\t389\t389\tS\t288000\t                  Bradyrhizobium sp. BTAi1\n+  0.00\t364\t364\tS\t1325120\t                  Bradyrhizobium sp. CCBAU 53421\n+  0.00\t342\t342\tS\t115808\t                  Bradyrhizobium sp. ORS 285\n+  0.00\t317\t317\tS\t114615\t                  Bradyrhizobium sp. ORS 278\n+  0.00\t278\t278\tS\t1404888\t                  Bradyrhizobium sp. 1(2017)\n+  0.00\t241\t241\tS\t1325112\t                  Bradyrhizobium sp. CCBAU 53340\n+  0.00\t237\t237\tS\t1325102\t                  Bradyrhizobium sp. CCBAU 51765\n+  0.00\t222\t222\tS\t1325111\t                  Bradyrhizobium sp. CCBAU 53338\n+  0.00\t222\t222\tS\t1223566\t                  Bradyrhizobium sp. CCGE-LA001\n+  0.00\t201\t201\tS\t1325114\t                  Bradyrhizobium sp. CCBAU 53351\n+  0.00\t133\t133\tS\t319017\t                  Bradyrhizobium sp. WSM471\n+  0.00\t104\t104\tS\t2493093\t                  Bradyrhizobium sp. LCT2\n+  0.00\t49\t49\tS\t1404649\t                  Bradyrhizobium sp. 323S2\n+  0.00\t16\t16\tS\t2599805\t                  Bradyrhizobium sp. SG09\n+  0.00\t15\t15\tS\t1325083\t                  Bradyrhizobium sp. CCBAU 21365\n+  0.00\t10\t10\tS\t2599819\t                  Bradyrhizobium sp. TM102\n+  0.04\t3785\t3785\tS\t1437360\t                Bradyrhizobium erythrophlei\n+  0.02\t1883\t1883\tS\t1274631\t                Bradyrhizobium icense\n+  0.02\t1629\t1629\tS\t190148\t                Bradyrhizobium paxllaeri\n+  0.01\t1149\t1149\tS\t1355477\t                Bradyrhizobium diazoefficiens\n+  0.01\t1138\t1138\tS\t722472\t                Bradyrhizobium lablabi\n+  0.01\t819\t819\tS\t255045\t                Bradyrhizobium canariense\n+  0.01\t510\t397\tS\t375\t                Bradyrhizobium japonicum\n+  0.00\t113\t113\tS1\t476282\t                  Bradyrhizobium japonicum SEMIA 5079\n+  0.00\t366\t366\tS\t83637\t                Bradyrhizobium genosp. L\n+  0.00\t295\t295\tS\t1325107\t                Bradyrhizobium zhanjiangense\n+  0.00\t294\t0\tS\t44255\t                Bradyrhizobium oligotrophicum\n+  0.00\t294\t294\tS1\t1245469\t                  Bradyrhizobium oligotrophicum S58\n+  0.00\t260\t260\tS\t83627\t                Bradyrhizobium genosp. B\n+  0.00\t249\t249\tS\t858423\t                Bradyrhizobium arachidis\n+  0.00\t213\t213\tS\t1325095\t                Bradyrhizobium guangzhouense\n+  0.00\t206\t206\tS\t1404367\t                Bradyrhizobium symbiodeficiens\n+  0.00\t198\t198\tS\t1325115\t                Bradyrhizobium guangxiense\n+  0.00\t193\t193\tS\t1549949\t                Bradyrhizobium vignae\n+  0.00\t188\t188\tS\t1404864\t                Bradyrhizobium cosmicum\n+  0.00\t179\t179\tS\t931866\t                Bradyrhizobium ottawaense\n+  0.00\t133\t133\tS\t1404768\t                Bradyrhizobium amphicarpaeae\n+  0.00\t116\t116\tS\t1325090\t                Bradyrhizobium guangdongense\n+  0.00\t102\t102\tS\t244734\t                Bradyrhizobium betae\n+  0.00\t38\t0\tS\t29448\t                Bradyrhizobium elkanii\n+  0.00\t38\t38\tS1\t1275962\t                  Bradyrhizobium elkanii USDA 61\n+  0.01\t1313\t44\tG\t85413\t              Bosea\n+  0.01\t1095\t83\tG1\t2653178\t                unclassified Bosea\n+  0.00\t221\t221\tS\t2599640\t                  Bosea sp. F3-2\n+  0.00\t191\t191\tS\t1842539\t             '..b'40\t1\tG\t558017\t      Betabaculovirus\n+  0.00\t23\t23\tS\t28289\t        Cydia pomonella granulovirus\n+  0.00\t11\t11\tS\t56947\t        Choristoneura fumiferana granulovirus\n+  0.00\t2\t2\tS\t192584\t        Phthorimaea operculella granulovirus\n+  0.00\t2\t2\tS\t489830\t        Helicoverpa armigera granulovirus\n+  0.00\t1\t1\tS\t51677\t        Xestia c-nigrum granulovirus\n+  0.00\t9\t0\tG\t558016\t      Alphabaculovirus\n+  0.00\t3\t3\tS\t224399\t        Adoxophyes honmai nucleopolyhedrovirus\n+  0.00\t2\t2\tS\t28288\t        Hyphantria cunea nucleopolyhedrovirus\n+  0.00\t2\t2\tS\t1987479\t        Choristoneura murinana nucleopolyhedrovirus\n+  0.00\t1\t0\tS\t2560642\t        Perigonia lusca nucleopolyhedrovirus\n+  0.00\t1\t1\tS1\t1675865\t          Perigonia lusca single nucleopolyhedrovirus\n+  0.00\t1\t0\tG1\t745176\t        unclassified Alphabaculovirus\n+  0.00\t1\t1\tS\t1346829\t          Peridroma alphabaculovirus\n+  0.00\t43\t0\tD1\t12429\t    unclassified viruses\n+  0.00\t41\t0\tD2\t2204151\t      unclassified DNA viruses\n+  0.00\t38\t0\tD3\t51368\t        unclassified dsDNA viruses\n+  0.00\t34\t1\tG\t2060084\t          Pandoravirus\n+  0.00\t10\t10\tS\t2107709\t            Pandoravirus quercus\n+  0.00\t9\t9\tS\t1349409\t            Pandoravirus dulcis\n+  0.00\t6\t6\tS\t1349410\t            Pandoravirus salinus\n+  0.00\t4\t4\tS\t1605721\t            Pandoravirus inopinatum\n+  0.00\t3\t3\tS\t2107708\t            Pandoravirus neocaledonia\n+  0.00\t1\t1\tS\t2107707\t            Pandoravirus macleodensis\n+  0.00\t3\t3\tS\t1100043\t          Apis mellifera filamentous virus\n+  0.00\t1\t0\tF\t2023203\t          Pithoviridae\n+  0.00\t1\t1\tS\t1903266\t            Cedratvirus A11\n+  0.00\t3\t0\tD3\t2136008\t        unclassified archaeal dsDNA viruses\n+  0.00\t3\t0\tD4\t128706\t          Haloviruses\n+  0.00\t3\t3\tS\t1273746\t            Halovirus HCTV-1\n+  0.00\t2\t2\tS\t1678078\t      Mollivirus sibericum\n+  0.00\t8\t0\tD1\t2731342\t    Monodnaviria\n+  0.00\t8\t0\tK\t2732092\t      Shotokuvirae\n+  0.00\t7\t0\tP\t2732415\t        Cossaviricota\n+  0.00\t7\t0\tC\t2732421\t          Papovaviricetes\n+  0.00\t7\t0\tO\t2732533\t            Zurhausenvirales\n+  0.00\t7\t0\tF\t151340\t              Papillomaviridae\n+  0.00\t7\t0\tF1\t2169595\t                Firstpapillomavirinae\n+  0.00\t4\t0\tG\t1513238\t                  Dyokappapapillomavirus\n+  0.00\t4\t0\tS\t1513247\t                    Dyokappapapillomavirus 1\n+  0.00\t4\t4\tS1\t634772\t                      Ovis aries papillomavirus 3\n+  0.00\t2\t0\tG\t934803\t                  Dyozetapapillomavirus\n+  0.00\t2\t0\tS\t1177766\t                    Dyozetapapillomavirus 1\n+  0.00\t2\t2\tS1\t485241\t                      Caretta caretta papillomavirus 1\n+  0.00\t1\t0\tG\t325455\t                  Gammapapillomavirus\n+  0.00\t1\t0\tG1\t735504\t                    unclassified Gammapapillomavirus\n+  0.00\t1\t1\tS\t2049444\t                      Gammapapillomavirus sp.\n+  0.00\t1\t0\tP\t2732416\t        Cressdnaviricota\n+  0.00\t1\t0\tC\t2732424\t          Repensiviricetes\n+  0.00\t1\t0\tO\t2732539\t            Geplafuvirales\n+  0.00\t1\t0\tF\t10811\t              Geminiviridae\n+  0.00\t1\t0\tG\t2022857\t                Capulavirus\n+  0.00\t1\t1\tS\t1830242\t                  Plantago lanceolata latent virus\n+  0.00\t5\t0\tF\t1511852\t    Nudiviridae\n+  0.00\t3\t0\tG\t1511854\t      Betanudivirus\n+  0.00\t3\t0\tS\t29250\t        Heliothis zea nudivirus\n+  0.00\t3\t3\tS1\t1128424\t          Helicoverpa zea nudivirus 2\n+  0.00\t2\t0\tG\t1511853\t      Alphanudivirus\n+  0.00\t2\t2\tS\t432587\t        Gryllus bimaculatus nudivirus\n+  0.00\t3\t0\tF\t10482\t    Polydnaviridae\n+  0.00\t2\t0\tG\t10483\t      Ichnovirus\n+  0.00\t1\t1\tS\t10484\t        Campoletis sonorensis ichnovirus\n+  0.00\t1\t1\tS\t419435\t        Glypta fumiferanae ichnovirus\n+  0.00\t1\t0\tG\t10485\t      Bracovirus\n+  0.00\t1\t1\tS\t39640\t        Cotesia congregata bracovirus\n+  0.00\t1\t0\tF\t1285590\t    Hytrosaviridae\n+  0.00\t1\t0\tG\t1285593\t      Muscavirus\n+  0.00\t1\t0\tS\t2747498\t        Musca hytrosavirus\n+  0.00\t1\t1\tS1\t523909\t          Musca domestica salivary gland hypertrophy virus\n+  0.00\t1\t0\tF\t423358\t    Bicaudaviridae\n+  0.00\t1\t0\tF1\t1123963\t      unclassified Bicaudaviridae\n+  0.00\t1\t1\tS\t1732177\t        Sulfolobus monocaudavirus SMV3\n'
b
diff -r 000000000000 -r 3ff4712dc111 test-data/krakenReport3.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/krakenReport3.tabular Sat Sep 03 22:13:08 2022 +0000
b
b"@@ -0,0 +1,12317 @@\n+ 91.75\t14211438\t14211438\tU\t0\tunclassified\n+  8.25\t1277434\t386\tR\t1\troot\n+  8.24\t1276166\t6094\tR1\t131567\t  cellular organisms\n+  7.70\t1192581\t48540\tD\t2\t    Bacteria\n+  3.62\t559951\t3486\tD1\t1783272\t      Terrabacteria group\n+  3.25\t503885\t2455\tP\t201174\t        Actinobacteria\n+  3.15\t487919\t42867\tC\t1760\t          Actinomycetia\n+  0.75\t116898\t5584\tO\t85006\t            Micrococcales\n+  0.34\t52165\t3276\tF\t85023\t              Microbacteriaceae\n+  0.10\t16078\t1589\tG\t33882\t                Microbacterium\n+  0.05\t7231\t591\tG1\t2609290\t                  unclassified Microbacterium\n+  0.00\t641\t641\tS\t1906742\t                    Microbacterium sp. BH-3-3-3\n+  0.00\t467\t467\tS\t2567934\t                    Microbacterium sp. 4R-513\n+  0.00\t424\t424\tS\t2782168\t                    Microbacterium sp. WY121\n+  0.00\t411\t411\tS\t2782169\t                    Microbacterium sp. NY27\n+  0.00\t356\t356\tS\t1714373\t                    Microbacterium sp. No. 7\n+  0.00\t314\t314\tS\t2483401\t                    Microbacterium sp. 10M-3C3\n+  0.00\t305\t305\tS\t2763257\t                    Microbacterium sp. YJN-G\n+  0.00\t286\t286\tS\t1906274\t                    Microbacterium sp. JZ31\n+  0.00\t279\t279\tS\t912630\t                    Microbacterium sp. LKL04\n+  0.00\t271\t271\tS\t2268461\t                    Microbacterium sp. ABRD_28\n+  0.00\t260\t260\tS\t2782167\t                    Microbacterium sp. A18JL200\n+  0.00\t255\t255\tS\t367477\t                    Microbacterium sp. XT11\n+  0.00\t244\t244\tS\t2709304\t                    Microbacterium sp. Se63.02b\n+  0.00\t243\t243\tS\t2048898\t                    Microbacterium sp. Y-01\n+  0.00\t240\t240\tS\t2489212\t                    Microbacterium sp. RG1\n+  0.00\t233\t233\tS\t2603598\t                    Microbacterium sp. CBA3102\n+  0.00\t225\t225\tS\t2782166\t                    Microbacterium sp. A18JL241\n+  0.00\t224\t224\tS\t2014534\t                    Microbacterium sp. PM5\n+  0.00\t176\t176\tS\t2606451\t                    Microbacterium sp. 1S1\n+  0.00\t171\t171\tS\t1938334\t                    Microbacterium sp. TPU 3598\n+  0.00\t114\t114\tS\t2708079\t                    Microbacterium sp. HY82\n+  0.00\t114\t114\tS\t2766784\t                    Microbacterium sp. Nx66\n+  0.00\t104\t104\tS\t1795053\t                    Microbacterium sp. PAMC 28756\n+  0.00\t87\t87\tS\t2769067\t                    Microbacterium sp. HY60\n+  0.00\t75\t75\tS\t1696072\t                    Microbacterium sp. CGR1\n+  0.00\t68\t68\tS\t1916917\t                    Microbacterium sp. 1.5R\n+  0.00\t33\t33\tS\t2070348\t                    Microbacterium sp. SGAir0570\n+  0.00\t20\t20\tS\t2103230\t                    Microbacterium sp. str. 'China'\n+  0.01\t1187\t1187\tS\t1072463\t                  Microbacterium lemovicicum\n+  0.01\t940\t940\tS\t104336\t                  Microbacterium foliorum\n+  0.00\t770\t770\tS\t162426\t                  Microbacterium hominis\n+  0.00\t527\t527\tS\t273677\t                  Microbacterium oleivorans\n+  0.00\t443\t443\tS\t370764\t                  Microbacterium pygmaeum\n+  0.00\t384\t384\tS\t82380\t                  Microbacterium oxydans\n+  0.00\t290\t0\tS\t2033\t                  Microbacterium testaceum\n+  0.00\t290\t290\tS1\t979556\t                    Microbacterium testaceum StLB037\n+  0.00\t269\t269\tS\t36805\t                  Microbacterium aurum\n+  0.00\t262\t262\tS\t2541726\t                  Microbacterium wangchenii\n+  0.00\t262\t262\tS\t743009\t                  Microbacterium oryzae\n+  0.00\t246\t246\tS\t904291\t                  Microbacterium sediminis\n+  0.00\t240\t240\tS\t2614638\t                  Microbacterium caowuchunii\n+  0.00\t235\t235\tS\t273678\t                  Microbacterium hydrocarbonoxydans\n+  0.00\t229\t229\tS\t84292\t                  Microbacterium chocolatum\n+  0.00\t216\t216\tS\t2614639\t                  Microbacterium lushaniae\n+  0.00\t214\t214\tS\t57043\t                  Microbacterium esteraromaticum\n+  0.00\t205\t205\tS\t2509458\t                  Microbacterium protaetiae\n+  0.00\t134\t134\tS\t69362\t                  Microbacterium schleiferi\n+  0.00\t77\t77\tS\t1526412\t                  Microbacterium endophyticum\n+  0.00\t76\t76\tS\t936337\t      "..b'ithovirus sibericum\n+  0.00\t2\t2\tS\t1678078\t      Mollivirus sibericum\n+  0.00\t1\t1\tS\t1904876\t      Human DNA virus\n+  0.00\t47\t0\tF\t10442\t    Baculoviridae\n+  0.00\t25\t0\tG\t558017\t      Betabaculovirus\n+  0.00\t9\t9\tS\t28289\t        Cydia pomonella granulovirus\n+  0.00\t9\t9\tS\t56947\t        Choristoneura fumiferana granulovirus\n+  0.00\t4\t4\tS\t51677\t        Xestia c-nigrum granulovirus\n+  0.00\t2\t2\tS\t307454\t        Spodoptera frugiperda granulovirus\n+  0.00\t1\t1\tS\t2169746\t        Mythimna unipuncta granulovirus B\n+  0.00\t22\t0\tG\t558016\t      Alphabaculovirus\n+  0.00\t4\t4\tS\t1307956\t        Leucania separata nucleopolyhedrovirus\n+  0.00\t4\t0\tG1\t745176\t        unclassified Alphabaculovirus\n+  0.00\t3\t3\tS\t1346829\t          Peridroma alphabaculovirus\n+  0.00\t1\t1\tS\t38012\t          Malacosoma neustria nucleopolyhedrovirus\n+  0.00\t3\t3\tS\t10449\t        Lymantria dispar multiple nucleopolyhedrovirus\n+  0.00\t2\t2\tS\t447897\t        Mythimna unipuncta nucleopolyhedrovirus\n+  0.00\t2\t2\tS\t1850906\t        Catopsilia pomona nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t262177\t        Orgyia pseudotsugata multiple nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t1675866\t        Urbanus proteus nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t1642929\t        Lambdina fiscellaria nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t1580580\t        Agrotis segetum nucleopolyhedrovirus B\n+  0.00\t1\t1\tS\t208013\t        Agrotis ipsilon multiple nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t490711\t        Orgyia leucostigma nucleopolyhedrovirus\n+  0.00\t1\t1\tS\t1307954\t        Maruca vitrata nucleopolyhedrovirus\n+  0.00\t8\t0\tD1\t2731342\t    Monodnaviria\n+  0.00\t8\t0\tK\t2732092\t      Shotokuvirae\n+  0.00\t5\t0\tP\t2732416\t        Cressdnaviricota\n+  0.00\t3\t0\tC\t2732424\t          Repensiviricetes\n+  0.00\t3\t0\tO\t2732539\t            Geplafuvirales\n+  0.00\t3\t0\tF\t10811\t              Geminiviridae\n+  0.00\t2\t0\tG\t10814\t                Begomovirus\n+  0.00\t2\t2\tS\t1915203\t                  Common bean severe mosaic virus\n+  0.00\t1\t0\tG\t2022857\t                Capulavirus\n+  0.00\t1\t1\tS\t1830242\t                  Plantago lanceolata latent virus\n+  0.00\t2\t0\tC\t2732423\t          Arfiviricetes\n+  0.00\t2\t0\tO\t2732536\t            Cirlivirales\n+  0.00\t2\t0\tF\t39724\t              Circoviridae\n+  0.00\t2\t0\tG\t742914\t                Cyclovirus\n+  0.00\t2\t0\tS\t2038725\t                  Human associated cyclovirus 7\n+  0.00\t2\t2\tS1\t742923\t                    Cyclovirus NG14\n+  0.00\t3\t0\tP\t2732415\t        Cossaviricota\n+  0.00\t3\t0\tC\t2732421\t          Papovaviricetes\n+  0.00\t2\t0\tO\t2732532\t            Sepolyvirales\n+  0.00\t2\t0\tF\t151341\t              Polyomaviridae\n+  0.00\t2\t0\tG\t1891713\t                Alphapolyomavirus\n+  0.00\t2\t2\tS\t1236407\t                  Piliocolobus rufomitratus polyomavirus 1\n+  0.00\t1\t0\tO\t2732533\t            Zurhausenvirales\n+  0.00\t1\t0\tF\t151340\t              Papillomaviridae\n+  0.00\t1\t0\tF1\t2169595\t                Firstpapillomavirinae\n+  0.00\t1\t0\tG\t325455\t                  Gammapapillomavirus\n+  0.00\t1\t0\tG1\t735504\t                    unclassified Gammapapillomavirus\n+  0.00\t1\t1\tS\t2049444\t                      Gammapapillomavirus sp.\n+  0.00\t4\t0\tF\t1285590\t    Hytrosaviridae\n+  0.00\t2\t0\tG\t1285591\t      Glossinavirus\n+  0.00\t2\t0\tS\t2747309\t        Glossina hytrosavirus\n+  0.00\t2\t2\tS1\t379529\t          Glossina pallidipes salivary gland hypertrophy virus\n+  0.00\t2\t0\tG\t1285593\t      Muscavirus\n+  0.00\t2\t0\tS\t2747498\t        Musca hytrosavirus\n+  0.00\t2\t2\tS1\t523909\t          Musca domestica salivary gland hypertrophy virus\n+  0.00\t3\t0\tF\t1511852\t    Nudiviridae\n+  0.00\t3\t0\tF1\t1110703\t      unclassified Nudiviridae\n+  0.00\t2\t2\tS\t2072209\t        Esparto virus\n+  0.00\t1\t1\tS\t2057187\t        Drosophila innubila nudivirus\n+  0.00\t2\t0\tF\t196937\t    Nimaviridae\n+  0.00\t2\t0\tG\t249585\t      Whispovirus\n+  0.00\t2\t2\tS\t342409\t        White spot syndrome virus\n+  0.00\t1\t0\tF\t423358\t    Bicaudaviridae\n+  0.00\t1\t0\tF1\t1123963\t      unclassified Bicaudaviridae\n+  0.00\t1\t1\tS\t1797140\t        Acidianus tailed spindle virus\n+  0.00\t1\t0\tR1\t2787854\t  other entries\n+  0.00\t1\t1\tR2\t28384\t    other sequences\n'