Repository 'phyloseq_from_biom'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/phyloseq_from_biom

Changeset 0:c0101c72b8af (2024-03-16)
Next changeset 1:1feea247d08a (2024-12-03)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
added:
macros.xml
phyloseq_from_biom.R
phyloseq_from_biom.xml
phyloseq_from_dada2.R
phyloseq_plot_ordination.R
phyloseq_plot_richness.R
test-data/biom-refseq.fasta
test-data/biom-tree.phy
test-data/output.phyloseq
test-data/rich_dense_otu_table.biom
test-data/rich_dense_otu_table.biom2
test-data/sample_data.tabular
test-data/sequence_table.dada2_sequencetable
test-data/taxonomy_table.tabular
b
diff -r 000000000000 -r c0101c72b8af macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sat Mar 16 07:56:17 2024 +0000
b
@@ -0,0 +1,32 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.46.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.01</token>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">phyloseq</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">bioconductor-phyloseq</requirement>
+            <requirement type="package" version="1.7.3">r-optparse</requirement>
+            <requirement type="package" version="2.0.0">r-tidyverse</requirement>
+        </requirements>
+    </xml>
+    <xml name="phyloseq_input">
+        <param name="input" type="data" format="phyloseq" label="File containing a phyloseq object"/>
+    </xml>
+    <xml name="outputs">
+        <outputs>
+            <data name="output" format="pdf"/>
+        </outputs>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.18129/B9.bioc.phyloseq</citation>
+            <citation type="doi">10.1371/journal.pone.0061217</citation>
+        </citations>
+    </xml>
+</macros>
+
b
diff -r 000000000000 -r c0101c72b8af phyloseq_from_biom.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_from_biom.R Sat Mar 16 07:56:17 2024 +0000
[
@@ -0,0 +1,30 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("phyloseq"))
+suppressPackageStartupMessages(library("tidyverse"))
+
+option_list <- list(
+    make_option(c("--BIOMfilename"), action = "store", dest = "biom", help = "Input BIOM file"),
+    make_option(c("--treefilename"), action = "store", dest = "tree", default = NULL, help = "Input Tree newick/nexus file"),
+    make_option(c("--parseFunction"), action = "store", dest = "parsefoo", default = "parse_taxonomy_default", help = "Parse function parse_taxonomy_default/read_tree_greengenes"),
+    make_option(c("--refseqfilename"), action = "store", dest = "sequences", default = NULL, help = "Input Sequence fasta file"),
+    make_option(c("--output"), action = "store", dest = "output", help = "RDS output")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
+args <- parse_args(parser, positional_arguments = TRUE)
+opt <- args$options
+
+parsefoo <- get(opt$parsefoo)
+phyloseq_obj <- import_biom(
+    BIOMfilename = opt$biom,
+    treefilename = opt$tree,
+    refseqfilename = opt$sequences,
+    parseFunction = parsefoo
+)
+
+print(phyloseq_obj)
+
+# save R object to file
+saveRDS(phyloseq_obj, file = opt$output, compress = TRUE)
b
diff -r 000000000000 -r c0101c72b8af phyloseq_from_biom.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_from_biom.xml Sat Mar 16 07:56:17 2024 +0000
[
@@ -0,0 +1,92 @@
+<tool id="phyloseq_from_biom" name="Create phyloseq object" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+  <description>from a BIOM file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+Rscript '${__tool_directory__}/phyloseq_from_biom.R' 
+    --BIOMfilename '$BIOMfilename'
+    #if $treefilename
+        --treefilename '$treefilename'
+        --parseFunction $parseFunction
+    #end if
+    #if $refseqfilename
+        --refseqfilename '$refseqfilename'
+    #end if
+    --output '$output'
+    ]]></command>
+    <inputs>
+        <param argument="BIOMfilename" type="data" format="biom1,biom2" label="BIOM file" help=""/>
+        <param argument="treefilename" type="data" format="nex,newick" optional="true" label="Phylogeny" help=""/>
+        <param name="parseFunction" type="select" label="Tree label parser" help="">
+            <option value="parse_taxonomy_default">Default</option>
+            <option value="parse_taxonomy_greengenes">Greengenes</option>
+            <option value="parse_taxonomy_qiime">Qiime</option>
+        </param>
+        <param argument="refseqfilename" type="data" format="fasta" optional="true" label="Sequences" help=""/>
+    </inputs>
+    <outputs>
+        <data name="output" format="phyloseq"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="BIOMfilename" value="rich_dense_otu_table.biom" ftype="biom1"/>
+            <output name="output">
+                <assert_contents>
+                    <has_size value="880" delta="100"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_line line="phyloseq-class experiment-level object"/>
+                <has_line line="otu_table()   OTU Table:         [ 5 taxa and 6 samples ]"/>
+                <has_line line="sample_data() Sample Data:       [ 6 samples by 4 sample variables ]"/>
+                <has_line line="tax_table()   Taxonomy Table:    [ 5 taxa by 7 taxonomic ranks ]"/>
+                <has_n_lines n="4"/>
+            </assert_stdout>
+        </test>
+        <test>
+            <param name="BIOMfilename" value="rich_dense_otu_table.biom2" ftype="biom2"/>
+            <output name="output">
+                <assert_contents>
+                    <has_size value="880" delta="100"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_line line="phyloseq-class experiment-level object"/>
+                <has_line line="otu_table()   OTU Table:         [ 5 taxa and 6 samples ]"/>
+                <has_line line="sample_data() Sample Data:       [ 6 samples by 4 sample variables ]"/>
+                <has_line line="tax_table()   Taxonomy Table:    [ 5 taxa by 7 taxonomic ranks ]"/>
+                <has_n_lines n="4"/>
+            </assert_stdout>
+        </test>
+        <test>
+            <param name="BIOMfilename" value="rich_dense_otu_table.biom" ftype="biom1"/>
+            <param name="treefilename" value="biom-tree.phy"/>
+            <param name="parseFunction" value="parse_taxonomy_greengenes"/>
+            <param name="refseqfilename" value="biom-refseq.fasta"/>
+            <output name="output">
+                <assert_contents>
+                    <has_size value="1880" delta="200"/>
+                </assert_contents>
+            </output>
+            <assert_stdout>
+                <has_line line="phyloseq-class experiment-level object"/>
+                <has_line line="otu_table()   OTU Table:         [ 5 taxa and 6 samples ]"/>
+                <has_line line="sample_data() Sample Data:       [ 6 samples by 4 sample variables ]"/>
+                <has_line line="tax_table()   Taxonomy Table:    [ 5 taxa by 7 taxonomic ranks ]"/>
+                <has_line line="phy_tree()    Phylogenetic Tree: [ 5 tips and 4 internal nodes ]"/>
+                <has_line line="refseq()      DNAStringSet:      [ 5 reference sequences ]"/>
+                <has_n_lines n="6"/>
+            </assert_stdout>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Read a BIOM file (and optionally sequences and a phylogeny) and store them in a phyloseq object.
+    </help>
+    <expand macro="citations"/>
+</tool>
+
b
diff -r 000000000000 -r c0101c72b8af phyloseq_from_dada2.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_from_dada2.R Sat Mar 16 07:56:17 2024 +0000
[
@@ -0,0 +1,55 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("phyloseq"))
+suppressPackageStartupMessages(library("tidyverse"))
+
+option_list <- list(
+    make_option(c("--sequence_table"), action = "store", dest = "sequence_table", help = "Input sequence table"),
+    make_option(c("--taxonomy_table"), action = "store", dest = "taxonomy_table", help = "Input taxonomy table"),
+    make_option(c("--sample_table"), action = "store", default = NULL, dest = "sample_table", help = "Input sample table"),
+    make_option(c("--output"), action = "store", dest = "output", help = "RDS output")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
+args <- parse_args(parser, positional_arguments = TRUE)
+opt <- args$options
+# The input sequence_table is an integer matrix
+# stored as tabular (rows = samples, columns = ASVs).
+seq_table_numeric_matrix <- data.matrix(read.table(opt$sequence_table, header = T, sep = "\t", row.names = 1, check.names = FALSE))
+# The input taxonomy_table is a table containing
+# the assigned taxonomies exceeding the minBoot
+# level of bootstrapping confidence. Rows correspond
+# to sequences, columns to taxonomic levels. NA
+# indicates that the sequence was not consistently
+# classified at that level at the minBoot threshold.
+tax_table_matrix <- as.matrix(read.table(opt$taxonomy_table, header = T, sep = "\t", row.names = 1, check.names = FALSE))
+# Construct a tax_table object.  The rownames of
+# tax_tab must match the OTU names (taxa_names)
+# of the otu_table defined below.
+tax_tab <- tax_table(tax_table_matrix)
+
+# Construct an otu_table object.
+otu_tab <- otu_table(seq_table_numeric_matrix, taxa_are_rows = TRUE)
+
+# Construct a phyloseq object.
+phyloseq_obj <- phyloseq(otu_tab, tax_tab)
+if (!is.null(opt$sample_table)) {
+    sample_tab <- sample_data(
+        read.table(opt$sample_table, header = T, sep = "\t", row.names = 1, check.names = FALSE)
+    )
+    phyloseq_obj <- merge_phyloseq(phyloseq_obj, sample_tab)
+}
+
+# use short names for our ASVs and save the ASV sequences
+# refseq slot of the phyloseq object as described in
+# https://benjjneb.github.io/dada2/tutorial.html
+dna <- Biostrings::DNAStringSet(taxa_names(phyloseq_obj))
+names(dna) <- taxa_names(phyloseq_obj)
+phyloseq_obj <- merge_phyloseq(phyloseq_obj, dna)
+taxa_names(phyloseq_obj) <- paste0("ASV", seq(ntaxa(phyloseq_obj)))
+
+print(phyloseq_obj)
+
+# save R object to file
+saveRDS(phyloseq_obj, file = opt$output, compress = TRUE)
b
diff -r 000000000000 -r c0101c72b8af phyloseq_plot_ordination.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_plot_ordination.R Sat Mar 16 07:56:17 2024 +0000
[
@@ -0,0 +1,27 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("phyloseq"))
+
+option_list <- list(
+    make_option(c("--input"), action = "store", dest = "input", help = "Input file containing a phyloseq object"),
+    make_option(c("--method"), action = "store", dest = "method", help = "Ordination method"),
+    make_option(c("--distance"), action = "store", dest = "distance", help = "Distance method"),
+    make_option(c("--type"), action = "store", dest = "type", help = "Plot type"),
+    make_option(c("--output"), action = "store", dest = "output", help = "Output")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
+args <- parse_args(parser, positional_arguments = TRUE)
+opt <- args$options
+# Construct a phyloseq object.
+phyloseq_obj <- readRDS(opt$input)
+# Transform data to proportions as appropriate for
+# Bray-Curtis distances.
+proportions_obj <- transform_sample_counts(phyloseq_obj, function(otu) otu / sum(otu))
+ordination_obj <- ordinate(proportions_obj, method = opt$method, distance = opt$distance)
+# Start PDF device driver and generate the plot.
+dev.new()
+pdf(file = opt$output)
+plot_ordination(proportions_obj, ordination_obj, type = opt$type)
+dev.off()
b
diff -r 000000000000 -r c0101c72b8af phyloseq_plot_richness.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_plot_richness.R Sat Mar 16 07:56:17 2024 +0000
[
@@ -0,0 +1,19 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("phyloseq"))
+
+option_list <- list(
+    make_option(c("--input"), action = "store", dest = "input", help = "Input RDS file containing a phyloseq object"),
+    make_option(c("--output"), action = "store", dest = "output", help = "Output PDF")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
+args <- parse_args(parser, positional_arguments = TRUE)
+opt <- args$options
+phyloseq_obj <- readRDS(opt$input)
+# Start PDF device driver and generate the plot.
+dev.new()
+pdf(file = opt$output)
+plot_richness(phyloseq_obj, x = "samples", color = "samples")
+dev.off()
b
diff -r 000000000000 -r c0101c72b8af test-data/biom-refseq.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/biom-refseq.fasta Sat Mar 16 07:56:17 2024 +0000
b
@@ -0,0 +1,29 @@
+>GG_OTU_1
+AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCA
+TGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCCGTGTAGCGGTG
+AAATGCGTAGAGATGGGGAGGAACACCAGTGGCGAAGGCGGCCTGCTGGGCTTTAACTGACGCTGAGGCACGAAAGCGTG
+GGTAGCAAACAGGATTAGATACCCTGGTAGTCCACGCTGTAAACGATGATTACTAGGTGTGGGGGTCTGACCCCTTCCGT
+GCCGGAGTTAACAC
+>GG_OTU_2
+TACGTAGGGAGCAAGCGTTATCCGGATTTATTGGGTGTAAAGGGTGCGTAGACGGGAGAACAAGTTAGTTGTGAAAGCCC
+TCGGCTTAACTGAGGAACTGCAACTAAAACTATTTTTCTTGAGTGCAGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTG
+AAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACTGTAACTGACGTTGAGGCACGAAAGTGTG
+GGGAGCAAACAGGATTAGATACCCTGGTAGTCCACACCGTAAACGATGGATACTAGGTGTAGGAGATGATTTCATCATCT
+GTGCCGAAAGCAAACGCAATAAGTATCCCACCTGGGGAGTACGGCCGCAAGGTTGAAACTCAAAGGATTGACGGGGCCCG
+CACAAGCAGTGGAGTATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGGCTTGACATA
+>GG_OTU_3
+TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATCACAAGTCAGAAGTGAAAAATC
+CGGGGGCTCCAACCCCGGAACTGCTTTTGAAACTGTGGAGCTGGAGTGCAGGAGAGGTAAGCGGAATTCCTAGTGTAGCG
+GTAGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGCTTACTGGACTGTAACTGACGTTGAGGCTCGAAAGC
+GTGGGGAGC
+>GG_OTU_4
+TACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGCAGGCGGTGCGGCAAGTCTGATGTGAAAGCCC
+GGGGCTCAACCCCGGTACTGCATTGGAAACTGTCGTACTAGAGTGTCGGAGGGGTAAGCGGAATTCCTAGTGTAGCGGTG
+AAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATAACTGACGCTGAGGCTCGAAAGCGTG
+GGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAATACTAGGTGTTGGGAAGCATTGCTTCTCGGT
+GCCGTCGCAAACGCAGTAAGTATTCCACCTGGGGGATACGTTTCGACAAGAATAGAAACTACAAAAGGAATTAGGACGGG
+GACCCGCACAAGCGGTGAGCATGTGGTTAATCGAAGCAACGCGAAGAACCTTA
+>GG_OTU_5
+AACGTAGGGTGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAGACAAGTTGGAAGTGAAACCATG
+GGCTCAACCCATGAATTGCTTTCAAAACTGTTTTTCTTGAGTTAGTGCAGAGGTAGATGGAATTCCCGGTGTAGCGGTGG
+AATGCGTAGATATCGGGA
b
diff -r 000000000000 -r c0101c72b8af test-data/biom-tree.phy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/biom-tree.phy Sat Mar 16 07:56:17 2024 +0000
b
@@ -0,0 +1,1 @@
+(((GG_OTU_1:0.00892,GG_OTU_2:0.01408)1.000.2:0.12196,GG_OTU_3:0.16022)0.995.2:0.01869,(GG_OTU_4:0.08976,GG_OTU_5:0.0665)0.766:0.09714)0.764.3;
b
diff -r 000000000000 -r c0101c72b8af test-data/output.phyloseq
b
Binary file test-data/output.phyloseq has changed
b
diff -r 000000000000 -r c0101c72b8af test-data/rich_dense_otu_table.biom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rich_dense_otu_table.biom Sat Mar 16 07:56:17 2024 +0000
[
@@ -0,0 +1,56 @@
+{
+     "id":null,
+     "format": "Biological Observation Matrix 1.0.0-dev",
+     "format_url": "http://biom-format.org",
+     "type": "OTU table",
+     "generated_by": "QIIME revision XYZ",
+     "date": "2011-12-19T19:00:00",  
+     "rows":[
+        {"id":"GG_OTU_1", "metadata":{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}},
+        {"id":"GG_OTU_2", "metadata":{"taxonomy":["k__Bacteria", "p__Cyanobacteria", "c__Nostocophycideae", "o__Nostocales", "f__Nostocaceae", "g__Dolichospermum", "s__"]}},
+        {"id":"GG_OTU_3", "metadata":{"taxonomy":["k__Archaea", "p__Euryarchaeota", "c__Methanomicrobia", "o__Methanosarcinales", "f__Methanosarcinaceae", "g__Methanosarcina", "s__"]}},
+        {"id":"GG_OTU_4", "metadata":{"taxonomy":["k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Halanaerobiales", "f__Halanaerobiaceae", "g__Halanaerobium", "s__Halanaerobiumsaccharolyticum"]}},
+        {"id":"GG_OTU_5", "metadata":{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}}
+        ],  
+     "columns":[
+        {"id":"Sample1", "metadata":{
+                                 "BarcodeSequence":"CGCTTATCGAGA",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"gut",
+                                 "Description":"human gut"}},
+        {"id":"Sample2", "metadata":{
+                                 "BarcodeSequence":"CATACCAGTAGC",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"gut",
+                                 "Description":"human gut"}},
+        {"id":"Sample3", "metadata":{
+                                 "BarcodeSequence":"CTCTCTACCTGT",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"gut",
+                                 "Description":"human gut"}},
+        {"id":"Sample4", "metadata":{
+                                 "BarcodeSequence":"CTCTCGGCCTGT",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"skin",
+                                 "Description":"human skin"}},
+        {"id":"Sample5", "metadata":{
+                                 "BarcodeSequence":"CTCTCTACCAAT",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"skin",
+                                 "Description":"human skin"}},
+        {"id":"Sample6", "metadata":{
+                                 "BarcodeSequence":"CTAACTACCAAT",
+                                 "LinkerPrimerSequence":"CATGCTGCCTCCCGTAGGAGT",
+                                 "BODY_SITE":"skin",
+                                 "Description":"human skin"}}
+                ],
+     "matrix_type": "dense",
+     "matrix_element_type": "int",
+     "shape": [5,6],
+     "data":  [[0,0,1,0,0,0], 
+               [5,1,0,2,3,1],
+               [0,0,1,4,2,0],
+               [2,1,1,0,0,1],
+               [0,1,1,0,0,0]]
+    }
+
b
diff -r 000000000000 -r c0101c72b8af test-data/rich_dense_otu_table.biom2
b
Binary file test-data/rich_dense_otu_table.biom2 has changed
b
diff -r 000000000000 -r c0101c72b8af test-data/sample_data.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_data.tabular Sat Mar 16 07:56:17 2024 +0000
b
@@ -0,0 +1,3 @@
+ Property Number
+SRR14190457 Early 1
+SRR14190458 Late 2
\ No newline at end of file
b
diff -r 000000000000 -r c0101c72b8af test-data/sequence_table.dada2_sequencetable
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequence_table.dada2_sequencetable Sat Mar 16 07:56:17 2024 +0000
b
b'@@ -0,0 +1,65 @@\n+\tSRR14190457\tSRR14190458\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGTGCAGGCGGTTCAATAAGTCTGATGTGAAAGCCTTCGGCTCAACCGGAGAATTGCATCAGAAACTGTTGAACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\t178\t11\n+GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACTGGTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGTCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGGATTAGAAACCCCCGTAGTCC\t136\t15\n+GTGTCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCCGCGCCGGGTACGGGCGGGCTTGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\t129\t16\n+GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATATCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCCTGCTAAGCTGCAACTGACATTGAGGCTCGAAAGTGTGGGTATCAAACAGGATTAGATACCCCCGTAGTCC\t128\t22\n+GTGTCAGCAGCCGCGGTAATACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTGTGTAAGTCTGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTGGAAACTATGTAACTAGAGTGTCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATCACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\t110\t22\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\t104\t22\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCCGTAGTCC\t97\t24\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTTGGAACTGTCAGGCTAGAGTGTCGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACGATGACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\t90\t25\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGCAGGCGGTTCCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAGAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\t88\t26\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\t86\t26\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\t84\t27\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAAATAAGTCTAATGTGAAAGCCCTCGGCTTAACCGAGGAACTGCATCGGAAACTGTTTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\t83\t27\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTGCTTAGGTCTGATGTGAAAGCCTTCGGCTTAACCGAAGAAGTGCATCGGAAACCGGGCGACTTGAGTGCAGAAGAGGACAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTGTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCAGTAGTCC\t71\t28\n+GTGTCAGCAGCCGCGGTAATACGTATGGTGCAAGCGTTATCCGGATTTA'..b'CAGTGGCGAAGGCGGATAACTGGACGGCAACTGACGGTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCTTGTAGTCC\t28\t71\n+GTGTCAGCAGCCGCGGTAAAACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAAGCAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGAAATGCGTAGATATTGGGAGGAACACCAGTGGCGAAGGCGCCTTTCTGGACTGTGTCTGACGCTGAGATGCGAAAGCCAGGGTAGCGAACGGGATTAGATACCCCCGTAGTCC\t28\t71\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGATGGACAAGTCTGATGTGAAAGGCTGGGGCTCAACCCCGGGACTGCATTGGAAACTGCCCGTCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATCACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCGCGTAGTCC\t27\t83\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGCTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCGATGGCGAAGGCAGGTCTCTGGGCCGTCACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCCGTAGTCC\t27\t84\n+GTGTCAGCAGCCGCGGTAATACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGCTCGTAGGTGGTTTGTCGCGTCGTCTGTGAAATTCTGGGGCTTAACTCCGGGCGTGCAGGCGATACGGGCATAACTTGAGTGCTGTAGGGGTAACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTTACTGGGCAGTTACTGACGCTGAGGAGCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCAGTAGTCC\t26\t86\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCTAGTAGTCC\t26\t88\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\t25\t90\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCTTGTAGTCC\t24\t97\n+GTGTCAGCAGCCGCGGTAATACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCAGCAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTACTGAGCTAGAGTACGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCGCGTAGTCC\t22\t104\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\t22\t110\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\t22\t128\n+GTGTCAGCAGCCGCGGTGATACGTAGGGTGCGAGCGTTGTCCGGATTTATTGGGCGTAAAGGGCTCGTAGGTGGTTGATCGCGTCGGAAGTGTAATCTTGGGGCTTAACCCTGAGCGTGCTTTCGATACGGGTTGACTTGAGGAAGGTAGGGGAGAATGGAATTCCTGGTGGAGCGGTGGAATGCGCAGATATCAGGAGGAACACCAGTGGCGAAGGCGGTTCTCTGGGCCTTTCCTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGCTTAGATACCCCTGTAGTCC\t16\t129\n+GTGTCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGTCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGCGGTCTTGAGTGAGCGAGAAGTACGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCGTACCGGCGCTCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGGATTAGATACCCCCGTAGTCC\t15\t136\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGCGGGCGTATAAGTCAGTGGTGAAATCCTGGAGCTTAACTCCAGAACTGCCATTGATACTATATGTCTTGAATATGGTGGAGGTAAGCGGAATATGTCATGTAGCGGTGAAATGCATAGATATGACATAGAACACCTATTGCGAAGGCAGCTTACTACGCCTATATTGACGCTGAGGCACGAAAGCGTGGGGATCAAACAGGATTAGAAACCCGAGTAGTCC\t11\t178\n'
b
diff -r 000000000000 -r c0101c72b8af test-data/taxonomy_table.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/taxonomy_table.tabular Sat Mar 16 07:56:17 2024 +0000
b
b'@@ -0,0 +1,65 @@\n+\tKingdom\tPhylum\tClass\tOrder\tFamily\tGenus\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGTGCAGGCGGTTCAATAAGTCTGATGTGAAAGCCTTCGGCTCAACCGGAGAATTGCATCAGAAACTGTTGAACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tFirmicutes\tBacilli\tLactobacillales\tLactobacillaceae\tLactobacillus\n+GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACTGGTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGTCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tBacteroidota\tBacteroidia\tBacteroidales\tBacteroidaceae\tBacteroides\n+GTGTCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCCGCGCCGGGTACGGGCGGGCTTGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\tBacteria\tActinobacteriota\tActinobacteria\tBifidobacteriales\tBifidobacteriaceae\tBifidobacterium\n+GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATATCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCCTGCTAAGCTGCAACTGACATTGAGGCTCGAAAGTGTGGGTATCAAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tBacteroidota\tBacteroidia\tBacteroidales\tBacteroidaceae\tBacteroides\n+GTGTCAGCAGCCGCGGTAATACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTGTGTAAGTCTGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTGGAAACTATGTAACTAGAGTGTCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATCACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\tBacteria\tFirmicutes\tClostridia\tLachnospirales\tLachnospiraceae\tTyzzerella\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tFirmicutes\tBacilli\tLactobacillales\tLactobacillaceae\tLactobacillus\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTTGGAACTGTCAGGCTAGAGTGTCGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACGATGACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tFirmicutes\tClostridia\tLachnospirales\tLachnospiraceae\tNA\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGCAGGCGGTTCCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAGAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\tBacteria\tFirmicutes\tBacilli\tBacillales\tBacillaceae\tBacillus\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACAC'..b'CTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tActinobacteriota\tActinobacteria\tBifidobacteriales\tBifidobacteriaceae\tBifidobacterium\n+GTGTCAGCAGCCGCGGTAATACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGCTCGTAGGTGGTTTGTCGCGTCGTCTGTGAAATTCTGGGGCTTAACTCCGGGCGTGCAGGCGATACGGGCATAACTTGAGTGCTGTAGGGGTAACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTTACTGGGCAGTTACTGACGCTGAGGAGCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCAGTAGTCC\tBacteria\tActinobacteriota\tActinobacteria\tCorynebacteriales\tCorynebacteriaceae\tCorynebacterium\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCTAGTAGTCC\tBacteria\tFirmicutes\tBacilli\tLactobacillales\tLactobacillaceae\tLactobacillus\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCTTGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGTCAGCAGCCGCGGTAATACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCAGCAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTACTGAGCTAGAGTACGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCGCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tPseudomonadales\tPseudomonadaceae\tAzorhizophilus\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\tBacteria\tFirmicutes\tClostridia\tPeptostreptococcales-Tissierellales\tPeptostreptococcaceae\tRomboutsia\n+GTGTCAGCAGCCGCGGTGATACGTAGGGTGCGAGCGTTGTCCGGATTTATTGGGCGTAAAGGGCTCGTAGGTGGTTGATCGCGTCGGAAGTGTAATCTTGGGGCTTAACCCTGAGCGTGCTTTCGATACGGGTTGACTTGAGGAAGGTAGGGGAGAATGGAATTCCTGGTGGAGCGGTGGAATGCGCAGATATCAGGAGGAACACCAGTGGCGAAGGCGGTTCTCTGGGCCTTTCCTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGCTTAGATACCCCTGTAGTCC\tBacteria\tActinobacteriota\tActinobacteria\tPropionibacteriales\tPropionibacteriaceae\tCutibacterium\n+GTGTCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGTCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGCGGTCTTGAGTGAGCGAGAAGTACGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCGTACCGGCGCTCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tBacteroidota\tBacteroidia\tBacteroidales\tMuribaculaceae\tNA\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGCGGGCGTATAAGTCAGTGGTGAAATCCTGGAGCTTAACTCCAGAACTGCCATTGATACTATATGTCTTGAATATGGTGGAGGTAAGCGGAATATGTCATGTAGCGGTGAAATGCATAGATATGACATAGAACACCTATTGCGAAGGCAGCTTACTACGCCTATATTGACGCTGAGGCACGAAAGCGTGGGGATCAAACAGGATTAGAAACCCGAGTAGTCC\tBacteria\tBacteroidota\tBacteroidia\tChitinophagales\tChitinophagaceae\tAsinibacterium\n'