Repository 'phyloseq_plot_ordination'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/phyloseq_plot_ordination

Changeset 0:11d43fa12aab (2022-03-03)
Next changeset 1:92e77800ef2c (2024-02-09)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
added:
macros.xml
phyloseq_from_dada2.R
phyloseq_plot_ordination.R
phyloseq_plot_ordination.xml
phyloseq_plot_richness.R
test-data/output.phyloseq
test-data/sequence_table.dada2_sequencetable
test-data/taxonomy_table.tabular
b
diff -r 000000000000 -r 11d43fa12aab macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Mar 03 13:28:30 2022 +0000
b
@@ -0,0 +1,27 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.38.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.01</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">bioconductor-phyloseq</requirement>
+            <requirement type="package" version="1.7.1">r-optparse</requirement>
+            <requirement type="package" version="1.3.1">r-tidyverse</requirement>
+        </requirements>
+    </xml>
+    <xml name="phyloseq_input">
+        <param name="input" type="data" format="phyloseq" label="File containing a phyloseq object"/>
+    </xml>
+    <xml name="outputs">
+        <outputs>
+            <data name="output" format="pdf"/>
+        </outputs>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.18129/B9.bioc.phyloseq</citation>
+            <citation type="doi">10.1371/journal.pone.0061217</citation>
+        </citations>
+    </xml>
+</macros>
+
b
diff -r 000000000000 -r 11d43fa12aab phyloseq_from_dada2.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_from_dada2.R Thu Mar 03 13:28:30 2022 +0000
[
@@ -0,0 +1,39 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("phyloseq"))
+suppressPackageStartupMessages(library("tidyverse"))
+
+option_list <- list(
+    make_option(c("--sequence_table"), action = "store", dest = "sequence_table", help = "Input sequence table"),
+    make_option(c("--taxonomy_table"), action = "store", dest = "taxonomy_table", help = "Input taxonomy table"),
+    make_option(c("--output"), action = "store", dest = "output", help = "RDS output")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list);
+args <- parse_args(parser, positional_arguments = TRUE);
+opt <- args$options;
+
+# The input sequence_table is an integer matrix
+# stored as tabular (rows = samples, columns = ASVs).
+seq_table_numeric_matrix <- data.matrix(read.table(opt$sequence_table, sep = "\t"));
+
+# The input taxonomy_table is a table containing
+# the assigned taxonomies exceeding the minBoot
+# level of bootstrapping confidence. Rows correspond
+# to sequences, columns to taxonomic levels. NA
+# indicates that the sequence was not consistently
+# classified at that level at the minBoot threshold.
+tax_table_matrix <- as.matrix(read.table(opt$taxonomy_table, header = FALSE, sep = "\t"));
+
+# Construct a tax_table object.  The rownames of
+# tax_tab must match the OTU names (taxa_names)
+# of the otu_table defined below.
+tax_tab <- tax_table(tax_table_matrix);
+
+# Construct an otu_table object.
+otu_tab <- otu_table(seq_table_numeric_matrix, taxa_are_rows = TRUE);
+
+# Construct a phyloseq object.
+phyloseq_obj <- phyloseq(otu_tab, tax_tab);
+saveRDS(phyloseq_obj, file = opt$output, compress = TRUE);
b
diff -r 000000000000 -r 11d43fa12aab phyloseq_plot_ordination.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_plot_ordination.R Thu Mar 03 13:28:30 2022 +0000
[
@@ -0,0 +1,30 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("phyloseq"))
+
+option_list <- list(
+    make_option(c("--input"), action = "store", dest = "input", help = "Input file containing a phyloseq object"),
+    make_option(c("--method"), action = "store", dest = "method", help = "Ordination method"),
+    make_option(c("--distance"), action = "store", dest = "distance", help = "Distance method"),
+    make_option(c("--type"), action = "store", dest = "type", help = "Plot type"),
+    make_option(c("--output"), action = "store", dest = "output", help = "Output")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list);
+args <- parse_args(parser, positional_arguments = TRUE);
+opt <- args$options;
+
+# Construct a phyloseq object.
+phyloseq_obj <- readRDS(opt$input);
+
+# Transform data to proportions as appropriate for
+# Bray-Curtis distances.
+proportions_obj <- transform_sample_counts(phyloseq_obj, function(otu) otu / sum(otu));
+ordination_obj <- ordinate(proportions_obj, method = opt$method, distance = opt$distance);
+
+# Start PDF device driver and generate the plot.
+dev.new();
+pdf(file = opt$output);
+plot_ordination(proportions_obj, ordination_obj, type = opt$type);
+dev.off();
b
diff -r 000000000000 -r 11d43fa12aab phyloseq_plot_ordination.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_plot_ordination.xml Thu Mar 03 13:28:30 2022 +0000
[
@@ -0,0 +1,89 @@
+<tool id="phyloseq_plot_ordination" name="Phyloseq: plot ordination" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+  <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+Rscript '${__tool_directory__}/phyloseq_plot_ordination.R' 
+--input '$input' 
+--method '$method'
+--distance '$distance'
+--type '$type'
+--output '$output'
+    ]]></command>
+    <inputs>
+        <expand macro="phyloseq_input"/>
+        <param name="method" type="select" label="Ordination method">
+            <option value="DCA" selected="true">DCA</option>
+            <option value="CCA">CCA</option>
+            <option value="RDA">RDA</option>
+            <option value="CAP">CAP</option>
+            <option value="NMDS">NMDS</option>
+            <option value="MDS">MDS</option>
+            <option value="PCoA">PCoA</option>
+        </param>
+        <param name="distance" type="select" label="Distance method" help="Utilized only if a distance matrix is required by the Ordination method selected above">
+            <option value="bray" selected="true">bray</option>
+            <option value="canberra">canberra</option>
+            <option value="euclidean">euclidean</option>
+            <option value="gower">gower</option>
+            <option value="horn">horn</option>
+            <option value="jaccard">jaccard</option>
+            <option value="kulczynski">kulczynski</option>
+            <option value="manhattan">manhattan</option>
+            <option value="maximum">maximum</option>
+            <option value="minkowski">minkowski</option>
+            <option value="morisita">morisita</option>
+            <option value="mountford">mountford</option>
+        </param>
+        <param name="type" type="select" label="Plot type">
+            <option value="biplot" selected="true">biplot</option>
+            <option value="samples">samples</option>
+            <option value="scree">scree</option>
+            <option value="species">species</option>
+            <option value="split">split</option>
+        </param>
+    </inputs>
+    <expand macro="outputs"/>
+    <tests>
+        <test>
+            <param name="input" value="output.phyloseq" ftype="phyloseq"/>
+            <output name="output" ftype="pdf">
+                <assert_contents>
+                    <has_text text="%PDF"/>
+                    <has_text text="%%EOF"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Accepts a dataset containing a phyloseq object created from a dada2 taxonomy table and a dada2 sequence table,
+and generates an ordination plot of the samples.
+
+**Options**
+
+ **Ordination method**
+
+  * **DCA** - Performs detrended correspondence analysis using decorana.
+  * **CCA** - Performs correspondence analysis, or optionally, constrained correspondence analysis (a.k.a. canonical correspondence analysis) via vegan cca.
+  * **RDA** - Performs redundancy analysis, or optionally principal components analysis, via vegan rda.
+  * **CAP** - [Partial] Constrained Analysis of Principal Coordinates or distance-based RDA, via vegan capscale.
+  * **NMDS** - Performs Non-metric MultiDimenstional Scaling of a sample-wise ecological distance matrix onto a user-specified number of axes (k).  
+  * **MDS/PCoA** - Performs principal coordinate analysis (also called principle coordinate decomposition, multidimensional scaling (MDS), or classical scaling) of a distance matrix including two correction methods for negative eigenvalues.
+
+ **Distance method** - Utilized only if a distance matrix is required by the Ordination method documented above.
+
+ **Plot type**
+
+  * **biplot** - Produces a combined plot with both taxa and samples.
+  * **samples** - Produces a single plot of just the samples of the ordination.
+  * **scree** - Produces an ordered bar plot of the normalized eigenvalues associated with each ordination axis.
+  * **species** - Produces a single plot of just the species of the ordination.
+  * **split** - Produces a plot with both taxa and samples separated in two facet panels respectively.
+    </help>
+    <expand macro="citations"/>
+</tool>
+
b
diff -r 000000000000 -r 11d43fa12aab phyloseq_plot_richness.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_plot_richness.R Thu Mar 03 13:28:30 2022 +0000
[
@@ -0,0 +1,21 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("phyloseq"))
+
+option_list <- list(
+    make_option(c("--input"), action = "store", dest = "input", help = "Input RDS file containing a phyloseq object"),
+    make_option(c("--output"), action = "store", dest = "output", help = "Output PDF")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list);
+args <- parse_args(parser, positional_arguments = TRUE);
+opt <- args$options;
+
+phyloseq_obj <- readRDS(opt$input);
+
+# Start PDF device driver and generate the plot.
+dev.new();
+pdf(file = opt$output);
+plot_richness(phyloseq_obj, x = "samples", color = "samples");
+dev.off()
b
diff -r 000000000000 -r 11d43fa12aab test-data/output.phyloseq
b
Binary file test-data/output.phyloseq has changed
b
diff -r 000000000000 -r 11d43fa12aab test-data/sequence_table.dada2_sequencetable
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequence_table.dada2_sequencetable Thu Mar 03 13:28:30 2022 +0000
b
b'@@ -0,0 +1,65 @@\n+\tSRR14190457\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGTGCAGGCGGTTCAATAAGTCTGATGTGAAAGCCTTCGGCTCAACCGGAGAATTGCATCAGAAACTGTTGAACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\t178\n+GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACTGGTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGTCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGGATTAGAAACCCCCGTAGTCC\t136\n+GTGTCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCCGCGCCGGGTACGGGCGGGCTTGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\t129\n+GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATATCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCCTGCTAAGCTGCAACTGACATTGAGGCTCGAAAGTGTGGGTATCAAACAGGATTAGATACCCCCGTAGTCC\t128\n+GTGTCAGCAGCCGCGGTAATACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTGTGTAAGTCTGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTGGAAACTATGTAACTAGAGTGTCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATCACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\t110\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\t104\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCCGTAGTCC\t97\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTTGGAACTGTCAGGCTAGAGTGTCGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACGATGACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\t90\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGCAGGCGGTTCCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAGAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\t88\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\t86\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\t84\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAAATAAGTCTAATGTGAAAGCCCTCGGCTTAACCGAGGAACTGCATCGGAAACTGTTTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\t83\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTGCTTAGGTCTGATGTGAAAGCCTTCGGCTTAACCGAAGAAGTGCATCGGAAACCGGGCGACTTGAGTGCAGAAGAGGACAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTGTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCAGTAGTCC\t71\n+GTGTCAGCAGCCGCGGTAATACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAGTGGCAAGTCTGATGTGAAAACCCG'..b'CGGAATTCCTTGTGTAGCGGTGAAATGCGTAGATATAAGGAAGAACACCAGTGGCGAAGGCGGATAACTGGACGGCAACTGACGGTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCTTGTAGTCC\t28\n+GTGTCAGCAGCCGCGGTAAAACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAAGCAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGGTGTAGCGGTGAAATGCGTAGATATTGGGAGGAACACCAGTGGCGAAGGCGCCTTTCTGGACTGTGTCTGACGCTGAGATGCGAAAGCCAGGGTAGCGAACGGGATTAGATACCCCCGTAGTCC\t28\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGATGGACAAGTCTGATGTGAAAGGCTGGGGCTCAACCCCGGGACTGCATTGGAAACTGCCCGTCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATCACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCGCGTAGTCC\t27\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGCTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCGATGGCGAAGGCAGGTCTCTGGGCCGTCACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCCGTAGTCC\t27\n+GTGTCAGCAGCCGCGGTAATACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGCTCGTAGGTGGTTTGTCGCGTCGTCTGTGAAATTCTGGGGCTTAACTCCGGGCGTGCAGGCGATACGGGCATAACTTGAGTGCTGTAGGGGTAACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTTACTGGGCAGTTACTGACGCTGAGGAGCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCAGTAGTCC\t26\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCTAGTAGTCC\t26\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\t25\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCTTGTAGTCC\t24\n+GTGTCAGCAGCCGCGGTAATACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCAGCAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTACTGAGCTAGAGTACGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCGCGTAGTCC\t22\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\t22\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\t22\n+GTGTCAGCAGCCGCGGTGATACGTAGGGTGCGAGCGTTGTCCGGATTTATTGGGCGTAAAGGGCTCGTAGGTGGTTGATCGCGTCGGAAGTGTAATCTTGGGGCTTAACCCTGAGCGTGCTTTCGATACGGGTTGACTTGAGGAAGGTAGGGGAGAATGGAATTCCTGGTGGAGCGGTGGAATGCGCAGATATCAGGAGGAACACCAGTGGCGAAGGCGGTTCTCTGGGCCTTTCCTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGCTTAGATACCCCTGTAGTCC\t16\n+GTGTCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGTCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGCGGTCTTGAGTGAGCGAGAAGTACGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCGTACCGGCGCTCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGGATTAGATACCCCCGTAGTCC\t15\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGCGGGCGTATAAGTCAGTGGTGAAATCCTGGAGCTTAACTCCAGAACTGCCATTGATACTATATGTCTTGAATATGGTGGAGGTAAGCGGAATATGTCATGTAGCGGTGAAATGCATAGATATGACATAGAACACCTATTGCGAAGGCAGCTTACTACGCCTATATTGACGCTGAGGCACGAAAGCGTGGGGATCAAACAGGATTAGAAACCCGAGTAGTCC\t11\n'
b
diff -r 000000000000 -r 11d43fa12aab test-data/taxonomy_table.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/taxonomy_table.tabular Thu Mar 03 13:28:30 2022 +0000
b
b'@@ -0,0 +1,65 @@\n+\tKingdom\tPhylum\tClass\tOrder\tFamily\tGenus\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGTGCAGGCGGTTCAATAAGTCTGATGTGAAAGCCTTCGGCTCAACCGGAGAATTGCATCAGAAACTGTTGAACTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGCAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tFirmicutes\tBacilli\tLactobacillales\tLactobacillaceae\tLactobacillus\n+GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGACTGGTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGTCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tBacteroidota\tBacteroidia\tBacteroidales\tBacteroidaceae\tBacteroides\n+GTGTCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCCGCGCCGGGTACGGGCGGGCTTGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\tBacteria\tActinobacteriota\tActinobacteria\tBifidobacteriales\tBifidobacteriaceae\tBifidobacterium\n+GTGTCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATATCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCCTGCTAAGCTGCAACTGACATTGAGGCTCGAAAGTGTGGGTATCAAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tBacteroidota\tBacteroidia\tBacteroidales\tBacteroidaceae\tBacteroides\n+GTGTCAGCAGCCGCGGTAATACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTGTGTAAGTCTGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTGGAAACTATGTAACTAGAGTGTCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGATCACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\tBacteria\tFirmicutes\tClostridia\tLachnospirales\tLachnospiraceae\tTyzzerella\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tFirmicutes\tBacilli\tLactobacillales\tLactobacillaceae\tLactobacillus\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCATGGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCATTTGGAACTGTCAGGCTAGAGTGTCGGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGACGATGACTGACGTTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tFirmicutes\tClostridia\tLachnospirales\tLachnospiraceae\tNA\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGCAGGCGGTTCCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAGAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCAGTAGTCC\tBacteria\tFirmicutes\tBacilli\tBacillales\tBacillaceae\tBacillus\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACAC'..b'CTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tActinobacteriota\tActinobacteria\tBifidobacteriales\tBifidobacteriaceae\tBifidobacterium\n+GTGTCAGCAGCCGCGGTAATACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGCTCGTAGGTGGTTTGTCGCGTCGTCTGTGAAATTCTGGGGCTTAACTCCGGGCGTGCAGGCGATACGGGCATAACTTGAGTGCTGTAGGGGTAACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTTACTGGGCAGTTACTGACGCTGAGGAGCGAAAGCATGGGTAGCGAACAGGATTAGATACCCCAGTAGTCC\tBacteria\tActinobacteriota\tActinobacteria\tCorynebacteriales\tCorynebacteriaceae\tCorynebacterium\n+GTGTCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGAAAGATAAGTCTGATGTGAAAGCCCCCGGCTTAACCGAGGAATTGCATCGGAAACTGTGTTTCTTGAGTGCAGAAGAGGAGAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCATGGGTAGCGAACAGGATTAGATACCCTAGTAGTCC\tBacteria\tFirmicutes\tBacilli\tLactobacillales\tLactobacillaceae\tLactobacillus\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCCCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCTTGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGTCAGCAGCCGCGGTAATACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCAGCAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTACTGAGCTAGAGTACGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATGTGGAGGAACACCAGTGGCGAAGGCGACTCTCTGGTCTGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGAAACCCGCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tPseudomonadales\tPseudomonadaceae\tAzorhizophilus\n+GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tProteobacteria\tGammaproteobacteria\tEnterobacterales\tEnterobacteriaceae\tEscherichia/Shigella\n+GTGTCAGCAGCCGCGGTAATACGTAGGGGGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGGATTAGATACCCGCGTAGTCC\tBacteria\tFirmicutes\tClostridia\tPeptostreptococcales-Tissierellales\tPeptostreptococcaceae\tRomboutsia\n+GTGTCAGCAGCCGCGGTGATACGTAGGGTGCGAGCGTTGTCCGGATTTATTGGGCGTAAAGGGCTCGTAGGTGGTTGATCGCGTCGGAAGTGTAATCTTGGGGCTTAACCCTGAGCGTGCTTTCGATACGGGTTGACTTGAGGAAGGTAGGGGAGAATGGAATTCCTGGTGGAGCGGTGGAATGCGCAGATATCAGGAGGAACACCAGTGGCGAAGGCGGTTCTCTGGGCCTTTCCTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGCTTAGATACCCCTGTAGTCC\tBacteria\tActinobacteriota\tActinobacteria\tPropionibacteriales\tPropionibacteriaceae\tCutibacterium\n+GTGTCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGACTGTCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGCGGTCTTGAGTGAGCGAGAAGTACGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCGTACCGGCGCTCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGGATTAGATACCCCCGTAGTCC\tBacteria\tBacteroidota\tBacteroidia\tBacteroidales\tMuribaculaceae\tNA\n+GTGTCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGCGGGCGTATAAGTCAGTGGTGAAATCCTGGAGCTTAACTCCAGAACTGCCATTGATACTATATGTCTTGAATATGGTGGAGGTAAGCGGAATATGTCATGTAGCGGTGAAATGCATAGATATGACATAGAACACCTATTGCGAAGGCAGCTTACTACGCCTATATTGACGCTGAGGCACGAAAGCGTGGGGATCAAACAGGATTAGAAACCCGAGTAGTCC\tBacteria\tBacteroidota\tBacteroidia\tChitinophagales\tChitinophagaceae\tAsinibacterium\n'