Repository 'humann_barplot'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/humann_barplot

Changeset 0:5240d62d864d (2021-05-12)
Next changeset 1:79698158a59c (2021-05-19)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
added:
customizemapping.py
customizemetadata.py
humann_barplot.xml
humann_genefamilies_genus_level
humann_infer_taxonomy
macros.xml
static/images/731303924-page_DENITRIFICATION-PWY.png
test-data/barplot1.png
test-data/barplot2.pdf
test-data/barplot3.svg
test-data/barplot4.png
test-data/cpm_community_renormalized_pathway_abundance.tsv
test-data/demo-taxonomic-profile.tabular
test-data/demo.fasta.gz
test-data/demo.fastq.gz
test-data/demo.sam
test-data/demo_genefamilies.tsv
test-data/demo_joined_pathabundance_pathcoverage.tsv
test-data/demo_pathabundance.tsv
test-data/demo_pathcoverage.tsv
test-data/genus-level-gene-families.tsv
test-data/hmp_pathabund.txt
test-data/humann_nucleotide_database.loc
test-data/humann_protein_database.loc
test-data/humann_utility_mapping.loc
test-data/metaphlan_database.loc
test-data/regrouped_gene_families_to_infogo1000.tsv
test-data/relab_levelwise_renormalized_pathway_abundance.tsv
test-data/rna_dna_norm-dna.txt
test-data/rna_dna_norm-rna.txt
test-data/strain_profiler-input.txt
test-data/test-db/metaphlan-db/demo-db-v30.1.bt2
test-data/test-db/metaphlan-db/demo-db-v30.2.bt2
test-data/test-db/metaphlan-db/demo-db-v30.3.bt2
test-data/test-db/metaphlan-db/demo-db-v30.4.bt2
test-data/test-db/metaphlan-db/demo-db-v30.fasta
test-data/test-db/metaphlan-db/demo-db-v30.json
test-data/test-db/metaphlan-db/demo-db-v30.pkl
test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2
test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2
test-data/test-db/metaphlan-db/humann_markers.tabular
test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz
test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz
test-data/test-db/protein-db/uniref90_demo_prots_v201901b.dmnd
test-data/test-db/protein-db/uniref90_demo_prots_v201901b.fasta
test-data/test-db/utility_mapping/map_go_uniref90.txt
test-data/test-db/utility_mapping/map_uniref90_name.txt
tool-data/humann_nucleotide_database.loc.sample
tool-data/humann_protein_database.loc.sample
tool-data/humann_utility_mapping.loc.sample
tool-data/metaphlan_database.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
transform_json_to_pkl.py
b
diff -r 000000000000 -r 5240d62d864d customizemapping.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/customizemapping.py Wed May 12 09:00:09 2021 +0000
[
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+from pathlib import Path
+
+
+if __name__ == '__main__':
+    # Read command line
+    parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping')
+    parser.add_argument('--in_mapping', help="Path to mapping file to reduce")
+    parser.add_argument('--features', help="Path to tabular file with features to keep in first column")
+    parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns")
+    parser.add_argument('--out_mapping', help="Path to reduced mapping file")
+    args = parser.parse_args()
+
+    in_mapping_fp = Path(args.in_mapping)
+    feature_fp = Path(args.features)
+    element_fp = Path(args.elements)
+    out_mapping_fp = Path(args.out_mapping)
+
+    # extract features to keep
+    features = set()
+    with open(feature_fp, 'r') as feature_f:
+        for line in feature_f.readlines():
+            features.add(line.split("\t")[0])
+    print(features)
+
+    # extract elements to keep
+    elements = set()
+    with open(element_fp, 'r') as element_f:
+        for line in element_f.readlines():
+            elements.add(line.split("\t")[0])
+    print(elements)
+
+    # write mapping for features to keep while keeping only elements
+    with open(in_mapping_fp, 'r') as in_mapping_f:
+        with open(out_mapping_fp, 'w') as out_mapping_f:
+            for line in in_mapping_f.readlines():
+                l_split = line.split("\t")
+                feat = l_split[0]
+                if feat in features:
+                    to_write = [feat]
+                    for e in l_split[1:]:
+                        if e in elements:
+                            to_write.append(e)
+                    out_mapping_f.write("%s\n" % '\t'.join(to_write))
b
diff -r 000000000000 -r 5240d62d864d customizemetadata.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/customizemetadata.py Wed May 12 09:00:09 2021 +0000
[
b"@@ -0,0 +1,480 @@\n+#!/usr/bin/env python\n+# -*- coding: utf-8 -*-\n+\n+import argparse\n+import bz2\n+import json\n+import pickle\n+import re\n+from pathlib import Path\n+\n+\n+def load_from_json(json_fp):\n+    '''\n+    Read JSON file with marker metadata\n+\n+    :param json_fp: Path to JSON file\n+    '''\n+    with open(json_fp, 'r') as json_f:\n+        data = json.load(json_f)\n+\n+    for m in data['markers']:\n+        data['markers'][m]['ext'] = set(data['markers'][m]['ext'])\n+\n+    for t in data['taxonomy']:\n+        if isinstance(data['taxonomy'][t], list):\n+            data['taxonomy'][t] = tuple(data['taxonomy'][t])\n+    return data\n+\n+\n+def dump_to_json(data, json_fp):\n+    '''\n+    Dump marker metadata to JSON file\n+\n+    :param json_fp: Path to JSON file\n+    '''\n+    for m in data['markers']:\n+        data['markers'][m]['ext'] = list(data['markers'][m]['ext'])\n+\n+    with open(json_fp, 'w') as json_f:\n+        json.dump(data, json_f)\n+\n+\n+def transform_pkl_to_json(pkl_fp, json_fp):\n+    '''\n+    Read Pickle file and drop it to a JSON file\n+\n+    :param pkl_fp: Path to input Pickle file\n+    :param json_fp: Path to output JSON file\n+    '''\n+    # load metadata from Pickle file\n+    with bz2.BZ2File(pkl_fp, 'r') as pkl_f:\n+        in_metadata = pickle.load(pkl_f)\n+\n+    out_metadata = {\n+        'markers': in_metadata['markers'],\n+        'taxonomy': in_metadata['taxonomy'],\n+        'merged_taxon': {}\n+    }\n+    # transform merged_taxons tuple keys to string\n+    for k in in_metadata['merged_taxon']:\n+        n = ' , '.join(k)\n+        out_metadata[n] = in_metadata['merged_taxon'][k]\n+\n+    # dump metadata to JSON file\n+    dump_to_json(out_metadata, json_fp)\n+\n+\n+def transform_json_to_pkl(json_fp, pkl_fp):\n+    '''\n+    Read JSON file and drop it to a Pickle file\n+\n+    :param json_fp: Path to input JSON file\n+    :param pkl_fp: Path to output Pickle file\n+    '''\n+    # load metadata from JSON file\n+    in_metadata = load_from_json(json_fp)\n+\n+    out_metadata = {\n+        'markers': in_metadata['markers'],\n+        'taxonomy': in_metadata['taxonomy'],\n+        'merged_taxon': {}\n+    }\n+    # transform merged_taxons keys to tuple\n+    for k in in_metadata['merged_taxon']:\n+        n = ' , '.split(k)\n+        out_metadata[n] = in_metadata['merged_taxon'][k]\n+\n+    # dump metadata to Pickle file\n+    with bz2.BZ2File(pkl_fp, 'w') as pkl_f:\n+        pickle.dump(out_metadata, pkl_f)\n+\n+\n+def add_marker(in_json_fp, out_json_fp, name, m_length, g_length, gca, k_name, k_id, p_name, p_id, c_name, c_id, o_name, o_id, f_name, f_id, g_name, g_id, s_name, s_id, t_name):\n+    '''\n+    Add marker to JSON file\n+\n+    :param in_json_fp: Path to input JSON file\n+    :param out_json_fp: Path to output JSON file\n+    :param name: Name of new marker\n+    :param m_length: Length of new marker\n+    :param g_length: List with lengths of genomes from which the new marker has been extracted\n+    :param gca: List with GCA of genomes from which the new marker has been extracted\n+    :param k_name: List with Name of Kingdom for genomes from which the new marker has been extracted\n+    :param k_id: List with NCBI id of Kingdom for genomes from which the new marker has been extracted\n+    :param p_name: List with Name of Phylum for genomes from which the new marker has been extracted\n+    :param p_id: List with NCBI id of Phylum for genomes from which the new marker has been extracted\n+    :param c_name: List with Name of Class for genomes from which the new marker has been extracted\n+    :param c_id: List with NCBI id of Class for genomes from which the new marker has been extracted\n+    :param o_name: List with Name of Order for genomes from which the new marker has been extracted\n+    :param o_id: List with NCBI id of Order for genomes from which the new marker has been extracted\n+    :param f_name: List with Name of Family for genomes from which the new marker has been extracted\n+    :param f_id: List with NCBI id of Family for genomes from which th"..b'for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--p_name\', help="Name of Phylum for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--p_id\', help="NCBI id of Phylum for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--c_name\', help="Name of Class for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--c_id\', help="NCBI id of Class for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--o_name\', help="Name of Order for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--o_id\', help="NCBI id of Order for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--f_name\', help="Name of Family for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--f_id\', help="NCBI id of Family for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--g_name\', help="Name of Genus for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--g_id\', help="NCBI id of Genus for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--s_name\', help="Name of Species for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--s_id\', help="NCBI id of Species for genome from which the new marker has been extracted", action="append")\n+    add_marker_parser.add_argument(\'--t_name\', help="Name of Strain for genome from which the new marker has been extracted", action="append")\n+    # remove_markers subcommand\n+    remove_markers_parser = subparsers.add_parser(\'remove_markers\', help=\'Remove markers from JSON file\')\n+    remove_markers_parser.add_argument(\'--in_json\', help="Path to input JSON file")\n+    remove_markers_parser.add_argument(\'--markers\', help="Path to file with markers to remove (1 per line)")\n+    remove_markers_parser.add_argument(\'--out_json\', help="Path to output JSON file")\n+    remove_markers_parser.add_argument(\'--kept_markers\', help="Path to file with kept markers")\n+    # keep_markers subcommand\n+    keep_markers_parser = subparsers.add_parser(\'keep_markers\', help=\'Keep markers from JSON file, others will be removed\')\n+    keep_markers_parser.add_argument(\'--in_json\', help="Path to input JSON file")\n+    keep_markers_parser.add_argument(\'--markers\', help="Path to file with markers to keep (1 per line)")\n+    keep_markers_parser.add_argument(\'--out_json\', help="Path to output JSON file")\n+\n+    args = parser.parse_args()\n+\n+    if args.function == \'transform_pkl_to_json\':\n+        transform_pkl_to_json(Path(args.pkl), Path(args.json))\n+    elif args.function == \'transform_json_to_pkl\':\n+        transform_json_to_pkl(Path(args.json), Path(args.pkl))\n+    elif args.function == \'add_marker\':\n+        add_marker(\n+            args.in_json,\n+            args.out_json,\n+            args.name,\n+            args.m_length,\n+            args.g_length,\n+            args.gca,\n+            args.k_name,\n+            args.k_id,\n+            args.p_name,\n+            args.p_id,\n+            args.c_name,\n+            args.c_id,\n+            args.o_name,\n+            args.o_id,\n+            args.f_name,\n+            args.f_id,\n+            args.g_name,\n+            args.g_id,\n+            args.s_name,\n+            args.s_id,\n+            args.t_name)\n+    elif args.function == \'remove_markers\':\n+        remove_markers(args.in_json, args.markers, args.out_json, args.kept_markers)\n+    elif args.function == \'keep_markers\':\n+        keep_markers(args.in_json, args.markers, args.out_json)\n'
b
diff -r 000000000000 -r 5240d62d864d humann_barplot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann_barplot.xml Wed May 12 09:00:09 2021 +0000
[
b'@@ -0,0 +1,295 @@\n+<tool id="humann_barplot" name="Barplot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+    <description>stratified HUMAnN features</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="edam_ontology"/>\n+    <expand macro="requirements"/>\n+    <expand macro="version"/>\n+    <command detect_errors="exit_code"><![CDATA[\n+humann_barplot\n+    --input \'$input\'\n+#if str($last_metadata) != \'\'\n+    --last-metadata \'$last_metadata\'\n+#end if\n+    --focal-feature \'$focal_feature\'\n+    --top-taxa $species.top_taxa\n+    $species.as_genera \n+    $species.exclude_unclassified\n+    $species.remove_zeros\n+    --sort $species.sort\n+#if str($species.taxa_colormap) != \'\'\n+    --taxa-colormap \'$species.taxa_colormap\'\n+#end if\n+\n+#if str($sample.focal_metadata) != \'\'\n+    --focal-metadata \'$sample.focal_metadata\'\n+#end if\n+#if str($sample.meta_colormap) != \'\'\n+    --meta-colormap \'$sample.meta_colormap\'\n+#end if\n+    --max-metalevels $sample.max_metalevels\n+    --scaling \'$graphical.scaling\'\n+    #if str($graphical.ymin)  and str($graphical.ymax) != \'\'\n+    --ylims $graphical.ymin $graphical.ymax\n+#end if\n+    $graphical.no_grid\n+    --dimensions $graphical.height $graphical.width\n+#if str($graphical.units) != \'\'\n+    --units \'$graphical.units\'\n+#end if\n+    --legend-cols $legend.legend_cols\n+    --legend-rows $legend.legend_rows\n+    --legend-height $legend.legend_height\n+#if $out.format == \'png\'\n+    --output \'output.png\'\n+#elif $out.format == \'pdf\'\n+    --output \'output.pdf\'\n+#elif $out.format == \'svg\'\n+    --output \'output.svg\'\n+#end if\n+    ]]></command>\n+    <inputs>\n+        <param argument="--input" type="data" format="tsv,tabular" label="HUMAnN table with optional metadata"/>\n+        <param argument="--last-metadata" type="text" value="" optional="true" label="The name (header) of the last row containing metadata, if any">\n+            <sanitizer invalid_char="">\n+                <valid initial="string.ascii_letters,string.digits">\n+                    <add value="_" />\n+                    <add value="-" />\n+                </valid>\n+            </sanitizer>\n+        </param>\n+        <param argument="--focal-feature" type="text" value="" label="Feature ID of interest" help="Give ID not full name">\n+            <sanitizer invalid_char="">\n+                <valid initial="string.ascii_letters,string.digits">\n+                    <add value="_" />\n+                    <add value="-" />\n+                </valid>\n+            </sanitizer>\n+            <validator type="empty_field" />\n+        </param>\n+        <section name="species" title="Manipulation of species contributions" expanded="true">\n+            <param argument="--top-taxa" type="integer" value="18" min="0" label="Max taxon stratifications"/>\n+            <param argument="--as-genera" type="boolean" truevalue="--as-genera" falsevalue="" checked="false" label="Collapse species to genera?"/>\n+            <param argument="--exclude-unclassified" type="boolean" truevalue="--exclude-unclassified" falsevalue="" checked="false" label="Do not include the \'unclassified\' stratum?"/>\n+            <param argument="--remove-zeros" type="boolean" truevalue="--remove-zeros" falsevalue="" checked="false" label="Do not plot samples with zero sum for this feature?"/>\n+            <param argument="--sort" type="select" multiple="true" label="Sample sorting methods">\n+                <option value="none">Default</option>\n+                <option value="sum">Sum of stratified values</option>\n+                <option value="dominant">Value of the most dominant stratification</option>\n+                <option value="brawcurtis">Bray-Curtis agreement of relative stratifications</option>\n+                <option value="braycurtis_w">Bray-Curtis agreement of raw stratifications</option>\n+                <option value="metadata">Given metadata label</option>\n+            </param>\n+            <param argument="--taxa-c'..b'value="1.0"/>\n+            </section>\n+            <section name="out">\n+                <param name="format" value="pdf"/>\n+            </section>\n+            <output name="barplot_svg" file="barplot2.pdf" ftype="pdf" compare="sim_size"/>\n+        </test>\n+        <test expect_num_outputs="1">\n+            <param name="input" value="hmp_pathabund.txt"/>\n+            <param name="last_metadata" value="STSite"/>\n+            <param name="focal_feature" value="ANAGLYCOLYSIS-PWY"/>\n+            <section name="species">\n+                <param name="top_taxa" value="18"/>\n+                <param name="as_genera" value="true"/>\n+                <param name="exclude_unclassified" value="true"/>\n+                <param name="remove_zeros" value="true"/>\n+                <param name="sort" value="sum"/>\n+            </section>\n+            <section name="sample">\n+                <param name="focal_metadata" value="STSite"/>\n+                <param name="max_metalevels" value="7"/>\n+            </section>\n+            <section name="graphical">\n+                <param name="scaling" value="original"/>\n+                <param name="no_grid" value="false"/>\n+                <param name="height" value="8"/>\n+                <param name="width" value="4"/>\n+            </section>\n+            <section name="legend">\n+                <param name="legend_cols" value="3"/>\n+                <param name="legend_rows" value="10"/>\n+                <param name="legend_height" value="1.0"/>\n+            </section>\n+            <section name="out">\n+                <param name="format" value="svg"/>\n+                <param argument="write_taxa_colors" value="false"/>\n+                <param argument="write_sample_order" value="false"/>\n+            </section>\n+            <output name="barplot_svg" file="barplot3.svg" ftype="svg" compare="sim_size"/>\n+        </test>\n+        <test expect_num_outputs="1">\n+            <param name="input" value="hmp_pathabund.txt"/>\n+            <param name="last_metadata" value="STSite"/>\n+            <param name="focal_feature" value="ANAGLYCOLYSIS-PWY"/>\n+            <section name="species">\n+                <param name="top_taxa" value="12"/>\n+                <param name="as_genera" value="false"/>\n+                <param name="exclude_unclassified" value="false"/>\n+                <param name="remove_zeros" value="false"/>\n+                <param name="sort" value="sum"/>\n+            </section>\n+            <section name="sample">\n+                <param name="focal_metadata" value="STSite"/>\n+                <param name="max_metalevels" value="7"/>\n+            </section>\n+            <section name="graphical">\n+                <param name="scaling" value="original"/>\n+                <param name="no_grid" value="true"/>\n+                <param name="height" value="8"/>\n+                <param name="width" value="4"/>\n+            </section>\n+            <section name="legend">\n+                <param name="legend_cols" value="3"/>\n+                <param name="legend_rows" value="10"/>\n+                <param name="legend_height" value="1.0"/>\n+            </section>\n+            <section name="out">\n+                <param name="format" value="png"/>\n+            </section>\n+            <output name="barplot_png" file="barplot4.png" ftype="png" compare="sim_size"/>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+@HELP_HEADER@\n+\n+This tool produces plots of stratified HUMAnN features and includes many options for sorting and scaling data\n+\n+Here is an example of a HUMAnN barplot for a pathway (denitrification) that was preferentially enriched in Human Microbiome Project oral samples relative to other body sites.\n+This figure uses many options from humann_barplot, including regrouping by genus, pseudolog scaling, and sorting samples by similarity and metadata:\n+\n+.. image:: $PATH_TO_IMAGES/731303924-page_DENITRIFICATION-PWY.png\n+   :width: 800\n+    ]]></help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 5240d62d864d humann_genefamilies_genus_level
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann_genefamilies_genus_level Wed May 12 09:00:09 2021 +0000
[
@@ -0,0 +1,52 @@
+<!-- The tool is broken with current version of HUMAnN. Once it will be fixed (PR merged), we can update this wrapper and add the XML extension to enable it -->
+<tool id="humann_genefamilies_genus_level" name="Create a genus level" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>gene families and pathways from HUMAnN species level gene families</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="requirements"/>
+    <expand macro="version"/>
+    <command detect_errors="exit_code"><![CDATA[
+humann_genefamilies_genus_level 
+    --input '$input'
+    --output '$gene_families'
+&&
+humann
+    --input '$gene_families'
+    --output '$pathways'
+    ]]></command>
+    <inputs>
+        <param argument="--input" type="data" format="tsv,tabular" label="Species level gene families" help="Direct output from HUMAnN"/>
+    </inputs>
+    <outputs>
+        <data name="gene_families" format="tabular" label="${tool.name} on ${on_string}: Genus level gene families"/>
+        <data name="pathways" format="tabular" label="${tool.name} on ${on_string}: Genus level pathways"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input" value="demo_genefamilies.tsv"/>
+            <output name="gene_families" ftype="tabular" value="genus-level-gene-families.tsv" compare="sim_size">
+                <assert_contents>
+                    <has_text text="humann_Abundance"/>
+                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
+                    <has_n_columns n="2"/>
+                </assert_contents>
+            </output>
+            <output name="pathways" ftype="tabular" value="genus-level-pathways.tsv" compare="sim_size">
+                <assert_contents>
+                    <has_text text="humann_Abundance"/>
+                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
+                    <has_n_columns n="2"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+By default, the gene families and pathways output files from HUMAnN are species level.
+This tool generates genus level gene families and pathways
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 5240d62d864d humann_infer_taxonomy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann_infer_taxonomy Wed May 12 09:00:09 2021 +0000
[
@@ -0,0 +1,81 @@
+<!-- The tool is broken with current version of HUMAnN. Once it will be fixed, we can update this wrapper and add the XML extension to enable it -->
+<tool id="humann_infer_taxonomy" name="Infex taxonomy" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>for "unclassified" taxonomy in HUMAnN generated gene families</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="requirements"/>
+    <expand macro="version"/>
+    <command detect_errors="exit_code"><![CDATA[
+humann_infer_taxonomy
+    --input '$input'
+    --output '$output'
+    --level '$level'
+    --database '$database.fields.path'
+    --mode '$mode'
+    --lca-choice '$lca_choice'
+    --threshold $threshold
+    ]]></command>
+    <inputs>
+        <param argument="--input" type="data" format="tsv,tabular" label="Gene family table"/>
+        <param argument="--level" type="select" label="Desired level for taxonomic estimation/summation">
+            <option value="Kingdom">Kingdom</option>
+            <option value="Phylum">Phylum</option>
+            <option value="Class">Class</option>
+            <option value="Order">Order</option>
+            <option value="Family" selected="true">Family</option>
+            <option value="Genus">Genus</option>
+        </param>
+        <param argument="--database" type="select" label="UniRef-specific taxonomy database">
+            <options from_data_table="humann_utility_mapping">
+                <validator message="No utility mapping is available" type="no_options" />
+                <filter type="regexp" column="2" value="tol-lca" />
+            </options>
+        </param>
+        <param argument="--mode" type="select" label="Rows to include in the estimation/summation">
+            <option value="c_tmode" selected="true">Totals</option>
+            <option value="c_umode">Unclassified</option>
+            <option value="c_smode">Stratified</option>
+        </param>
+        <param argument="--lca-choice" type="select" label="Per-gene taxonomic annotation to consider">
+            <option value="source_tax">Source taxonomy</option>
+            <option value="uniref_lca">UniRef lowest common ancestor (LCA)</option>
+            <option value="humann_lca" selected="true">HUMAnN lowest common ancestor (LCA)</option>
+        </param>
+        <param argument="--threshold" type="float" value="1e-3" label="Minimum frequency for a new taxon to be included"/>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input" value=""/>
+            <param name="level" value="Kingdom"/>
+            <param name="database" value=""/>
+            <param name="mode" value="c_tmode"/>
+            <param name="lca_choice" value="source_tax"/>
+            <param name="threshold" value="1e-3"/>
+            <output name="output" ftype="tabular">
+                <assert_contents>
+                    <has_text text=""/>
+                    <has_line line=""/>
+                    <has_line_matching expression=""/>
+                    <has_n_columns n=""/>
+                    <has_size value="" delta=""/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+Based on the lowest common ancestor (LCA) annotation
+of each UniRef50/90 cluster, this tool infers approximate taxonomy 
+for unclassified features at a target level of resolution.
+
+It will modify features of known genus/species to match 
+target level.
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 5240d62d864d macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,38 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">3.0.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.01</token>
+    <xml name="edam_ontology">
+        <edam_topics>
+            <edam_topic>topic_3174</edam_topic>
+            <edam_topic>topic_0194</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_2478</edam_operation>
+            <edam_operation>operation_0324</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">humann</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command>humann --version</version_command>
+    </xml>
+    <token name="@HELP_HEADER@">
+What it does
+============
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. 
+
+Read more about the tool: http://huttenhower.sph.harvard.edu/humann
+    </token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1002358</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 5240d62d864d static/images/731303924-page_DENITRIFICATION-PWY.png
b
Binary file static/images/731303924-page_DENITRIFICATION-PWY.png has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/barplot1.png
b
Binary file test-data/barplot1.png has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/barplot2.pdf
b
Binary file test-data/barplot2.pdf has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/barplot3.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/barplot3.svg Wed May 12 09:00:09 2021 +0000
b
b'@@ -0,0 +1,28212 @@\n+<?xml version="1.0" encoding="utf-8" standalone="no"?>\n+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"\n+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n+<svg height="288pt" version="1.1" viewBox="0 0 576 288" width="576pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">\n+ <metadata>\n+  <rdf:RDF xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n+   <cc:Work>\n+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>\n+    <dc:date>2021-05-07T16:19:03.354536</dc:date>\n+    <dc:format>image/svg+xml</dc:format>\n+    <dc:creator>\n+     <cc:Agent>\n+      <dc:title>Matplotlib v3.4.1, https://matplotlib.org/</dc:title>\n+     </cc:Agent>\n+    </dc:creator>\n+   </cc:Work>\n+  </rdf:RDF>\n+ </metadata>\n+ <defs>\n+  <style type="text/css">*{stroke-linecap:butt;stroke-linejoin:round;}</style>\n+ </defs>\n+ <g id="figure_1">\n+  <g id="patch_1">\n+   <path d="M 0 288 \n+L 576 288 \n+L 576 0 \n+L 0 0 \n+z\n+" style="fill:#ffffff;"/>\n+  </g>\n+  <g id="axes_1">\n+   <g id="patch_2">\n+    <path d="M 72 132.1344 \n+L 518.4 132.1344 \n+L 518.4 34.56 \n+L 72 34.56 \n+z\n+" style="fill:#ffffff;"/>\n+   </g>\n+   <g id="line2d_1">\n+    <path clip-path="url(#p144fb674d8)" d="M 72 132.1344 \n+L 518.4 132.1344 \n+" style="fill:none;stroke:#bfbfbf;stroke-dasharray:3.7,1.6;stroke-dashoffset:0;"/>\n+   </g>\n+   <g id="line2d_2">\n+    <path clip-path="url(#p144fb674d8)" d="M 72 102.346556 \n+L 518.4 102.346556 \n+" style="fill:none;stroke:#bfbfbf;stroke-dasharray:3.7,1.6;stroke-dashoffset:0;"/>\n+   </g>\n+   <g id="line2d_3">\n+    <path clip-path="url(#p144fb674d8)" d="M 72 72.558713 \n+L 518.4 72.558713 \n+" style="fill:none;stroke:#bfbfbf;stroke-dasharray:3.7,1.6;stroke-dashoffset:0;"/>\n+   </g>\n+   <g id="line2d_4">\n+    <path clip-path="url(#p144fb674d8)" d="M 72 42.770869 \n+L 518.4 42.770869 \n+" style="fill:none;stroke:#bfbfbf;stroke-dasharray:3.7,1.6;stroke-dashoffset:0;"/>\n+   </g>\n+   <g id="line2d_5">\n+    <path clip-path="url(#p144fb674d8)" d="M 72 12.983026 \n+L 518.4 12.983026 \n+" style="fill:none;stroke:#bfbfbf;stroke-dasharray:3.7,1.6;stroke-dashoffset:0;"/>\n+   </g>\n+   <g id="patch_3">\n+    <path clip-path="url(#p144fb674d8)" d="M 72 132.1344 \n+L 73.703817 132.1344 \n+L 73.703817 132.1344 \n+L 72 132.1344 \n+z\n+" style="fill:#8c564b;"/>\n+   </g>\n+   <g id="patch_4">\n+    <path clip-path="url(#p144fb674d8)" d="M 73.703817 132.1344 \n+L 75.407634 132.1344 \n+L 75.407634 132.1344 \n+L 73.703817 132.1344 \n+z\n+" style="fill:#8c564b;"/>\n+   </g>\n+   <g id="patch_5">\n+    <path clip-path="url(#p144fb674d8)" d="M 75.407634 132.1344 \n+L 77.11145 132.1344 \n+L 77.11145 132.1344 \n+L 75.407634 132.1344 \n+z\n+" style="fill:#8c564b;"/>\n+   </g>\n+   <g id="patch_6">\n+    <path clip-path="url(#p144fb674d8)" d="M 77.11145 132.1344 \n+L 78.815267 132.1344 \n+L 78.815267 132.1344 \n+L 77.11145 132.1344 \n+z\n+" style="fill:#8c564b;"/>\n+   </g>\n+   <g id="patch_7">\n+    <path clip-path="url(#p144fb674d8)" d="M 78.815267 132.1344 \n+L 80.519084 132.1344 \n+L 80.519084 132.1344 \n+L 78.815267 132.1344 \n+z\n+" style="fill:#8c564b;"/>\n+   </g>\n+   <g id="patch_8">\n+    <path clip-path="url(#p144fb674d8)" d="M 80.519084 132.1344 \n+L 82.222901 132.1344 \n+L 82.222901 132.1344 \n+L 80.519084 132.1344 \n+z\n+" style="fill:#8c564b;"/>\n+   </g>\n+   <g id="patch_9">\n+    <path clip-path="url(#p144fb674d8)" d="M 82.222901 132.1344 \n+L 83.926718 132.1344 \n+L 83.926718 132.1344 \n+L 82.222901 132.1344 \n+z\n+" style="fill:#8c564b;"/>\n+   </g>\n+   <g id="patch_10">\n+    <path clip-path="url(#p144fb674d8)" d="M 83.926718 132.1344 \n+L 85.630534 132.1344 \n+L 85.630534 132.1344 \n+L 83.926718 132.1344 \n+z\n+" style="fill:#8c564b;"/>\n+   </g>\n+   <g id="patch_11">\n+    <path clip-path="url(#p144fb674d8)" d="M 85.630534 132.1344 \n+L 87.334351 132.1344 \n+L 87.334351 132.1344 \n+L 85.630534 132.1344 \n+z\n+" style="fill:#8c564b;"/>\n+   </g>\n+   <g id="patch_12">\n+    <path clip-pa'..b'685547" xlink:href="#DejaVuSans-6f"/>\n+     <use x="163.867188" xlink:href="#DejaVuSans-6f"/>\n+     <use x="225.048828" xlink:href="#DejaVuSans-6c"/>\n+    </g>\n+   </g>\n+   <g id="text_24">\n+    <!-- Supragingival_plaque -->\n+    <g transform="translate(382.992 202.021913)scale(0.1 -0.1)">\n+     <defs>\n+      <path d="M 2906 1791 \n+Q 2906 2416 2648 2759 \n+Q 2391 3103 1925 3103 \n+Q 1463 3103 1205 2759 \n+Q 947 2416 947 1791 \n+Q 947 1169 1205 825 \n+Q 1463 481 1925 481 \n+Q 2391 481 2648 825 \n+Q 2906 1169 2906 1791 \n+z\n+M 3481 434 \n+Q 3481 -459 3084 -895 \n+Q 2688 -1331 1869 -1331 \n+Q 1566 -1331 1297 -1286 \n+Q 1028 -1241 775 -1147 \n+L 775 -588 \n+Q 1028 -725 1275 -790 \n+Q 1522 -856 1778 -856 \n+Q 2344 -856 2625 -561 \n+Q 2906 -266 2906 331 \n+L 2906 616 \n+Q 2728 306 2450 153 \n+Q 2172 0 1784 0 \n+Q 1141 0 747 490 \n+Q 353 981 353 1791 \n+Q 353 2603 747 3093 \n+Q 1141 3584 1784 3584 \n+Q 2172 3584 2450 3431 \n+Q 2728 3278 2906 2969 \n+L 2906 3500 \n+L 3481 3500 \n+L 3481 434 \n+z\n+" id="DejaVuSans-67" transform="scale(0.015625)"/>\n+      <path d="M 947 1747 \n+Q 947 1113 1208 752 \n+Q 1469 391 1925 391 \n+Q 2381 391 2643 752 \n+Q 2906 1113 2906 1747 \n+Q 2906 2381 2643 2742 \n+Q 2381 3103 1925 3103 \n+Q 1469 3103 1208 2742 \n+Q 947 2381 947 1747 \n+z\n+M 2906 525 \n+Q 2725 213 2448 61 \n+Q 2172 -91 1784 -91 \n+Q 1150 -91 751 415 \n+Q 353 922 353 1747 \n+Q 353 2572 751 3078 \n+Q 1150 3584 1784 3584 \n+Q 2172 3584 2448 3432 \n+Q 2725 3281 2906 2969 \n+L 2906 3500 \n+L 3481 3500 \n+L 3481 -1331 \n+L 2906 -1331 \n+L 2906 525 \n+z\n+" id="DejaVuSans-71" transform="scale(0.015625)"/>\n+     </defs>\n+     <use xlink:href="#DejaVuSans-53"/>\n+     <use x="63.476562" xlink:href="#DejaVuSans-75"/>\n+     <use x="126.855469" xlink:href="#DejaVuSans-70"/>\n+     <use x="190.332031" xlink:href="#DejaVuSans-72"/>\n+     <use x="231.445312" xlink:href="#DejaVuSans-61"/>\n+     <use x="292.724609" xlink:href="#DejaVuSans-67"/>\n+     <use x="356.201172" xlink:href="#DejaVuSans-69"/>\n+     <use x="383.984375" xlink:href="#DejaVuSans-6e"/>\n+     <use x="447.363281" xlink:href="#DejaVuSans-67"/>\n+     <use x="510.839844" xlink:href="#DejaVuSans-69"/>\n+     <use x="538.623047" xlink:href="#DejaVuSans-76"/>\n+     <use x="597.802734" xlink:href="#DejaVuSans-61"/>\n+     <use x="659.082031" xlink:href="#DejaVuSans-6c"/>\n+     <use x="686.865234" xlink:href="#DejaVuSans-5f"/>\n+     <use x="736.865234" xlink:href="#DejaVuSans-70"/>\n+     <use x="800.341797" xlink:href="#DejaVuSans-6c"/>\n+     <use x="828.125" xlink:href="#DejaVuSans-61"/>\n+     <use x="889.404297" xlink:href="#DejaVuSans-71"/>\n+     <use x="952.880859" xlink:href="#DejaVuSans-75"/>\n+     <use x="1016.259766" xlink:href="#DejaVuSans-65"/>\n+    </g>\n+   </g>\n+   <g id="text_25">\n+    <!-- Tongue_dorsum -->\n+    <g transform="translate(382.992 210.153113)scale(0.1 -0.1)">\n+     <defs>\n+      <path d="M -19 4666 \n+L 3928 4666 \n+L 3928 4134 \n+L 2272 4134 \n+L 2272 0 \n+L 1638 0 \n+L 1638 4134 \n+L -19 4134 \n+L -19 4666 \n+z\n+" id="DejaVuSans-54" transform="scale(0.015625)"/>\n+     </defs>\n+     <use xlink:href="#DejaVuSans-54"/>\n+     <use x="44.083984" xlink:href="#DejaVuSans-6f"/>\n+     <use x="105.265625" xlink:href="#DejaVuSans-6e"/>\n+     <use x="168.644531" xlink:href="#DejaVuSans-67"/>\n+     <use x="232.121094" xlink:href="#DejaVuSans-75"/>\n+     <use x="295.5" xlink:href="#DejaVuSans-65"/>\n+     <use x="357.023438" xlink:href="#DejaVuSans-5f"/>\n+     <use x="407.023438" xlink:href="#DejaVuSans-64"/>\n+     <use x="470.5" xlink:href="#DejaVuSans-6f"/>\n+     <use x="531.681641" xlink:href="#DejaVuSans-72"/>\n+     <use x="572.794922" xlink:href="#DejaVuSans-73"/>\n+     <use x="624.894531" xlink:href="#DejaVuSans-75"/>\n+     <use x="688.273438" xlink:href="#DejaVuSans-6d"/>\n+    </g>\n+   </g>\n+  </g>\n+ </g>\n+ <defs>\n+  <clipPath id="p144fb674d8">\n+   <rect height="97.5744" width="446.4" x="72" y="34.56"/>\n+  </clipPath>\n+  <clipPath id="pe31b1f08dc">\n+   <rect height="8.8704" width="446.4" x="72" y="141.0048"/>\n+  </clipPath>\n+ </defs>\n+</svg>\n'
b
diff -r 000000000000 -r 5240d62d864d test-data/barplot4.png
b
Binary file test-data/barplot4.png has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/cpm_community_renormalized_pathway_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cpm_community_renormalized_pathway_abundance.tsv Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,5 @@
+# Pathway humann_Abundance
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 578694
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 578694
+PWY-4203: volatile benzenoid biosynthesis I (ester formation) 421306
+PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified 421306
b
diff -r 000000000000 -r 5240d62d864d test-data/demo-taxonomic-profile.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo-taxonomic-profile.tabular Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,12 @@
+#full-db
+#metaphlan in --input_type fasta --read_min_len 70 --bt2_ps very-sensitive --min_mapq_val 5 --bowtie2db test-data/test-db/metaphlan-db --index demo-db-v30 -t rel_ab --tax_lev a --min_cu_len 2000 --add_viruses --stat_q 0.2 --perc_nonzero 0.33 --avoid_disqm --sample_id_key SampleID --sample_id Metaphlan_Analysis -o /tmp/tmp7hcectnz/files/000/dataset_2.dat --bowtie2out bowtie2out -s /tmp/tmp7hcectnz/files/000/dataset_4.dat --biom /tmp/tmp7hcectnz/files/000/dataset_5.dat --nproc 1
+#SampleID Metaphlan_Analysis
+#clade_name NCBI_tax_id relative_abundance additional_species
+k__Bacteria 2 100.0
+k__Bacteria|p__Bacteroidetes 2|976 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia 2|976|200643 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales 2|976|200643|171549 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae 2|976|200643|171549|815 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides 2|976|200643|171549|815|816 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_vulgatus 2|976|200643|171549|815|816|821 50.80292
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_dorei 2|976|200643|171549|815|816|357276 49.19708
b
diff -r 000000000000 -r 5240d62d864d test-data/demo.fasta.gz
b
Binary file test-data/demo.fasta.gz has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/demo.fastq.gz
b
Binary file test-data/demo.fastq.gz has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/demo.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo.sam Wed May 12 09:00:09 2021 +0000
b
b'@@ -0,0 +1,5000 @@\n+s__Bacteroides_dorei_read000001\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_R7NX76|UniRef50_R5GDB8|927\t423\t11\t100M\t*\t0\t0\tTTTCGTATTAGTGGAGTATGGCATTGAAAGTACGGATGATGACACGTTGCGCAGGATAAACCGGGGACATACTTTTGCCGTTTCTGCAGAGGCTGTTCGG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXS:i:-30\tXN:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tNM:i:2\tMD:Z:3T38A57\tYT:Z:UU\n+s__Bacteroides_dorei_read000002\t0\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_A6L5K0|UniRef50_A6L5K0|468\t88\t25\t100M\t*\t0\t0\tTTTACTGTAGTCGCCCTATTGGGATTACAGGATTCAGACTCTAACATAACCATCGGAAACAACTTCGAGAGCAAAAAGCTAGGTAAAAAAGGAATTTTCA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-6\tXS:i:-42\tXN:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tNM:i:1\tMD:Z:96A3\tYT:Z:UU\n+s__Bacteroides_dorei_read000003\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_A6L5D4|UniRef50_C6X591|1338\t1159\t0\t100M\t*\t0\t0\tAATACACGACAGGAAGATTATTCCGGCAAGAATTTTTCGGAGTTTGAACTGAGCGCTATGGAAAAACGTCATATTGCTAAAGTTCTGCAACATACTAAAG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-24\tXS:i:-36\tXN:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tNM:i:4\tMD:Z:5C28A16G0C47\tYT:Z:UU\n+s__Bacteroides_dorei_read000004\t16\t821|g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A6L7A5|UniRef50_R7NQK6|948\t802\t1\t100M\t*\t0\t0\tCAACACTCACGGGAAATCCCCTTCAATGAATTATGGGACAGAATGCTAAGAATCAGAAAAAACAATTATATACATCTTCCACTTTACCTAAAAATAAAGT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXS:i:-12\tXN:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tNM:i:2\tMD:Z:85C0T13\tYT:Z:UU\n+s__Bacteroides_dorei_read000005\t0\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_A6L5C5|UniRef50_R6HHA5|1347\t637\t23\t100M\t*\t0\t0\tATAACATCCTTACATTCTGATGAAATTTGTTTCCGAGGTATGGAACATACTGGACCTCTTACATACGGTGAGGCCGAGAATTTTTTTGATAGCGGCATTG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-24\tXN:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tNM:i:4\tMD:Z:51C2A16A12A15\tYT:Z:UU\n+s__Bacteroides_dorei_read000006\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_unknown|UniRef50_D5EYI2|2565\t184\t7\t100M\t*\t0\t0\tAATCCCGTTGCTTTATTGGAACGTTTGAGCTATGAAAAGCAGGAGGCATTAACCCAGGATAAGGTTATTGTAAAGCGCCTGAACGATGTATATGCTAAAT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-18\tXS:i:-30\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:40T24G12A21\tYT:Z:UU\n+s__Bacteroides_dorei_read000007\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_R7P3C8|UniRef50_X5DCD9|1692\t629\t1\t100M\t*\t0\t0\tTGGCAGGAGAAGCCTATCTTCGGATGGGTATGCGTGATGCTTCCTAATTCAAGAAGGCGGAAGATGCGGTGACGCCTATTATACCAGGTGGCAAATATGA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-18\tXS:i:-18\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:46T36A7T8\tYT:Z:UU\n+s__Bacteroides_dorei_read000009\t0\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_unknown|UniRef50_R7DSH7|2871\t804\t2\t100M\t*\t0\t0\tAGGTACAGCCGGCTTTGCCTCCACCGAGAAAAAACCAGAAAAACCCTTTGCACAATGGATTCCCCGTATTCCCGAAACAGGAAAATATGCCGTCGATGTG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-24\tXS:i:-30\tXN:i:0\tXM:i:4\tXO:i:0\tXG:i:0\tNM:i:4\tMD:Z:10C25G6G50T5\tYT:Z:UU\n+s__Bacteroides_dorei_read000012\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_R6HWU5|UniRef50_R5JSB1|459\t11\t40\t100M\t*\t0\t0\tTATCTTATAAAATGTCCTACTATGCTTTATATGTCTGCTTTGCCGTTATTTTGGTAGTATTGGGCATGTTCTTCTTGGTAGGTTATAATAATCCGGTGGG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXN:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tNM:i:2\tMD:Z:4A77A17\tYT:Z:UU\n+s__Bacteroides_dorei_read000013\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_R6AN66|UniRef50_P9WQK2|1689\t1394\t12\t100M\t*\t0\t0\tTGGCACTGAAGGAAGAAGGCAACGTGCTGCTGCTGGACGAGCCTACCAATGATATTGACGTGAACTCGCTGCGCGCGCTGGAGGAAGGTCTGGACGATTT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII'..b'I8WRK2|UniRef50_D6CZZ3|396\t242\t40\t100M\t*\t0\t0\tTCTTCGAGGATGAAATCATGCAGTGGATTGAACAGGGTGGAACATCGGGGACAGGCTGCGAGGAAGATTTCAACAGGCGGCTCGAAGCGTTGCGCAGCGG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXN:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tNM:i:2\tMD:Z:37C18A43\tYT:Z:UU\n+s__Bacteroides_dorei_read006087\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_I9F3X8|UniRef50_R6FQJ3|1026\t106\t40\t100M\t*\t0\t0\tGTTCCGGTAAACCTCACCGCCACGTGGGAGCGGGAATGGGAACGCAGCTACCAGACCGACTGGCAGTCCGTGTGGAACCCTGCATGGCCCCAACGCTATG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXN:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tNM:i:2\tMD:Z:4A26A68\tYT:Z:UU\n+s__Bacteroides_dorei_read006089\t0\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_unknown|UniRef50_unknown|2841\t2127\t7\t100M\t*\t0\t0\tGGGCTCCGGTCTGGCTACAAGTTCTATTCTTGCATCGACTGTGCTGGGGGCAATCTCAGATTTTTGCGGGCTGAACTGGGATAAGAACGAGATATGTAAC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXS:i:-24\tXN:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tNM:i:2\tMD:Z:14G42T42\tYT:Z:UU\n+s__Bacteroides_dorei_read006092\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_A6KXU4|UniRef50_A6KXU4|1158\t387\t6\t100M\t*\t0\t0\tCCTGCTCATCTACCCATGCATTCATAAGCTTGTACGTGTCAATAAATCATTCATCGTGAAACGCGGCGTCAACATGCGGCAGAAACTGGAAGCATCTGCT\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-18\tXS:i:-24\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:18G11G36C32\tYT:Z:UU\n+s__Bacteroides_dorei_read006093\t0\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_unknown|UniRef50_S0F6Q6|2250\t708\t23\t100M\t*\t0\t0\tCGTGAGTGCTACCGGACACCAACAACGTTTAGCGCGTGAAGGTATCGTGATGAAGAACGTTGTGGCTGCTACGGAGGATGCTGTCAGACAGGAACGGTTG\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-30\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:27G17G13G7C2T29\tYT:Z:UU\n+s__Bacteroides_dorei_read006094\t0\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_A6KX21|UniRef50_A6KX21|1848\t788\t1\t100M\t*\t0\t0\tACAGAATTAGAATGATGAACAAAAAAAGAACCCGCACCACAGGGCATATCAAATATGCCTTGTCTGCACCGCTGACTGCTGCTCTACTCTTGGTCAGCAA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXS:i:-12\tXN:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tNM:i:2\tMD:Z:9A53T36\tYT:Z:UU\n+s__Bacteroides_dorei_read006095\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_R6HWC5|UniRef50_R6HYK5|1164\t333\t5\t100M\t*\t0\t0\tCGAAACTTTGATAAACGAGAAGGGTAGTCCTGTCTTGGGGTTGCATGTAGAAGGCCCTTACCTGAATTCGCAAAGCGCAGGTCAGCAGTTTGCCGGGAAA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-30\tXS:i:-60\tXN:i:0\tXM:i:5\tXO:i:0\tXG:i:0\tNM:i:5\tMD:Z:12G10A46A4A6G17\tYT:Z:UU\n+s__Bacteroides_dorei_read006096\t16\t821|g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_C3Q3D8|UniRef50_C3Q3D8|1362\t50\t1\t100M\t*\t0\t0\tCATATTTTTATCCTACAGCAGATATTAGGGTAATAAATAAGGAAACATATGATATATTATCGGCATTGAAAGAATATAAAGATATTCGTGAAGCCTTAGA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-12\tXS:i:-12\tXN:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tNM:i:2\tMD:Z:6A80A12\tYT:Z:UU\n+s__Bacteroides_dorei_read006097\t0\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_A6L3W8|UniRef50_C6Y1M5|1341\t1021\t16\t100M\t*\t0\t0\tATTCTGGAGGATATCCGTGTAAGCAATATTGTGATGAGTAAAATCAAGAAAGAAGCTATTGTTCTCAATCTAAAATATAGCAAGATGCCTGCCGAACCGA\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:-18\tXS:i:-48\tXN:i:0\tXM:i:3\tXO:i:0\tXG:i:0\tNM:i:3\tMD:Z:3G37T29G28\tYT:Z:UU\n+s__Bacteroides_dorei_read006098\t16\t357276|g__Bacteroides.s__Bacteroides_dorei|UniRef90_unknown|UniRef50_E6SR52|3096\t960\t32\t100M\t*\t0\t0\tCAATGAAGTAATTCCTGTCAAAGTAGGTTTCCGCAAGATTGAATTGAAAGGTGACCAGATCTTAGTTAACGGTAAAGCAGTCCTGTTCAAGGGGGCCGAC\tIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\tAS:i:0\tXS:i:-24\tXN:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tNM:i:0\tMD:Z:100\tYT:Z:UU\n'
b
diff -r 000000000000 -r 5240d62d864d test-data/demo_genefamilies.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_genefamilies.tsv Wed May 12 09:00:09 2021 +0000
b
b'@@ -0,0 +1,866 @@\n+# Gene Family\thumann_Abundance-RPKs\n+UNMAPPED\t18900.0000000000\n+UniRef90_A0A174QBF2\t200.0000000000\n+UniRef90_A0A174QBF2|g__Bacteroides.s__Bacteroides_vulgatus\t200.0000000000\n+UniRef90_A0A078RDY6\t166.6666666667\n+UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus\t166.6666666667\n+UniRef90_G1ULL9\t166.6666666667\n+UniRef90_G1ULL9|g__Bacteroides.s__Bacteroides_vulgatus\t166.6666666667\n+UniRef90_A0A015QIN1\t66.6666666667\n+UniRef90_A0A015QIN1|g__Bacteroides.s__Bacteroides_vulgatus\t66.6666666667\n+UniRef90_A0A069SBX4\t66.6666666667\n+UniRef90_A0A069SBX4|g__Bacteroides.s__Bacteroides_vulgatus\t66.6666666667\n+UniRef90_A0A069SHG5\t66.6666666667\n+UniRef90_A0A069SHG5|g__Bacteroides.s__Bacteroides_vulgatus\t66.6666666667\n+UniRef90_A0A078RD64\t66.6666666667\n+UniRef90_A0A078RD64|g__Bacteroides.s__Bacteroides_vulgatus\t66.6666666667\n+UniRef90_A0A174PVG2\t66.6666666667\n+UniRef90_A0A174PVG2|g__Bacteroides.s__Bacteroides_vulgatus\t66.6666666667\n+UniRef90_A0A173XK40\t56.3492063492\n+UniRef90_A0A173XK40|g__Bacteroides.s__Bacteroides_vulgatus\t56.3492063492\n+UniRef90_A0A078REY8\t55.5555555556\n+UniRef90_A0A078REY8|g__Bacteroides.s__Bacteroides_vulgatus\t55.5555555556\n+UniRef90_A0A174ILE1\t47.6190476190\n+UniRef90_A0A174ILE1|g__Bacteroides.s__Bacteroides_vulgatus\t47.6190476190\n+UniRef90_D4V7T1\t47.6190476190\n+UniRef90_D4V7T1|g__Bacteroides.s__Bacteroides_vulgatus\t47.6190476190\n+UniRef90_D4VBJ0\t47.6190476190\n+UniRef90_D4VBJ0|g__Bacteroides.s__Bacteroides_vulgatus\t47.6190476190\n+UniRef90_I3YNY4\t47.6190476190\n+UniRef90_I3YNY4|g__Bacteroides.s__Bacteroides_vulgatus\t47.6190476190\n+UniRef90_A0A078QZV1\t44.4444444444\n+UniRef90_A0A078QZV1|g__Bacteroides.s__Bacteroides_vulgatus\t44.4444444444\n+UniRef90_A0A078R670\t41.6666666667\n+UniRef90_A0A078R670|g__Bacteroides.s__Bacteroides_vulgatus\t41.6666666667\n+UniRef90_A0A078R6L3\t41.6666666667\n+UniRef90_A0A078R6L3|g__Bacteroides.s__Bacteroides_vulgatus\t41.6666666667\n+UniRef90_A0A078R911\t41.6666666667\n+UniRef90_A0A078R911|g__Bacteroides.s__Bacteroides_vulgatus\t41.6666666667\n+UniRef90_D4V4V0\t41.6666666667\n+UniRef90_D4V4V0|g__Bacteroides.s__Bacteroides_vulgatus\t41.6666666667\n+UniRef90_R9HIC6\t41.6666666667\n+UniRef90_R9HIC6|g__Bacteroides.s__Bacteroides_vulgatus\t41.6666666667\n+UniRef90_A0A078RFS7\t38.4615384615\n+UniRef90_A0A078RFS7|g__Bacteroides.s__Bacteroides_vulgatus\t38.4615384615\n+UniRef90_A7AE97\t37.0370370370\n+UniRef90_A7AE97|g__Bacteroides.s__Bacteroides_vulgatus\t37.0370370370\n+UniRef90_I9GA88\t36.3447559709\n+UniRef90_I9GA88|g__Bacteroides.s__Bacteroides_vulgatus\t36.3447559709\n+UniRef90_A0A078R120\t33.3333333333\n+UniRef90_A0A078R120|g__Bacteroides.s__Bacteroides_vulgatus\t33.3333333333\n+UniRef90_D4V9Q7\t33.3333333333\n+UniRef90_D4V9Q7|g__Bacteroides.s__Bacteroides_vulgatus\t33.3333333333\n+UniRef90_A0A3E4HP53\t31.2500000000\n+UniRef90_A0A3E4HP53|g__Bacteroides.s__Bacteroides_vulgatus\t31.2500000000\n+UniRef90_A0A069SKV1\t30.3030303030\n+UniRef90_A0A069SKV1|g__Bacteroides.s__Bacteroides_vulgatus\t30.3030303030\n+UniRef90_A0A173XQ94\t30.3030303030\n+UniRef90_A0A173XQ94|g__Bacteroides.s__Bacteroides_vulgatus\t30.3030303030\n+UniRef90_D4VA46\t30.3030303030\n+UniRef90_D4VA46|g__Bacteroides.s__Bacteroides_vulgatus\t30.3030303030\n+UniRef90_A0A078RD25\t28.5714285714\n+UniRef90_A0A078RD25|g__Bacteroides.s__Bacteroides_vulgatus\t28.5714285714\n+UniRef90_A0A069SV61\t27.7777777778\n+UniRef90_A0A069SV61|g__Bacteroides.s__Bacteroides_vulgatus\t27.7777777778\n+UniRef90_D4VC99\t27.7777777778\n+UniRef90_D4VC99|g__Bacteroides.s__Bacteroides_vulgatus\t27.7777777778\n+UniRef90_E5UU61\t26.1437908497\n+UniRef90_E5UU61|g__Bacteroides.s__Bacteroides_vulgatus\t26.1437908497\n+UniRef90_A0A078RDC9\t25.6410256410\n+UniRef90_A0A078RDC9|g__Bacteroides.s__Bacteroides_vulgatus\t25.6410256410\n+UniRef90_A0A174NIB7\t24.6913580247\n+UniRef90_A0A174NIB7|g__Bacteroides.s__Bacteroides_vulgatus\t24.6913580247\n+UniRef90_A0A078QYW1\t24.3902439024\n+UniRef90_A0A078QYW1|g__Bacteroides.s__Bacteroides_vulgatus\t24.3902439024\n+UniRef90_I0PXX6\t23.8095238095\n+UniRef90_I0PXX6|g__Bacteroides.s__Bacteroides_vulga'..b'Ref90_Q8A488|g__Bacteroides.s__Bacteroides_vulgatus\t6.5359477124\n+UniRef90_A0A0P0LLZ6\t6.5359477124\n+UniRef90_A0A0P0LLZ6|g__Bacteroides.s__Bacteroides_vulgatus\t6.5359477124\n+UniRef90_R7J8P5\t6.5146579805\n+UniRef90_R7J8P5|g__Bacteroides.s__Bacteroides_vulgatus\t6.5146579805\n+UniRef90_D4IQJ2\t6.4882400649\n+UniRef90_D4IQJ2|g__Bacteroides.s__Bacteroides_vulgatus\t6.4882400649\n+UniRef90_P94598\t6.4724919094\n+UniRef90_P94598|g__Bacteroides.s__Bacteroides_vulgatus\t6.4724919094\n+UniRef90_I8W803\t6.4279155188\n+UniRef90_I8W803|g__Bacteroides.s__Bacteroides_vulgatus\t6.4279155188\n+UniRef90_A0A081U579\t6.4214827788\n+UniRef90_A0A081U579|g__Bacteroides.s__Bacteroides_vulgatus\t6.4214827788\n+UniRef90_B5A7G1\t6.4102564103\n+UniRef90_B5A7G1|g__Bacteroides.s__Bacteroides_vulgatus\t6.4102564103\n+UniRef90_B6VTH0\t6.4102564103\n+UniRef90_B6VTH0|g__Bacteroides.s__Bacteroides_vulgatus\t6.4102564103\n+UniRef90_A0A076IVE7\t6.3291139241\n+UniRef90_A0A076IVE7|g__Bacteroides.s__Bacteroides_vulgatus\t6.3291139241\n+UniRef90_I9R4C5\t6.3168124393\n+UniRef90_I9R4C5|g__Bacteroides.s__Bacteroides_vulgatus\t6.3168124393\n+UniRef90_R6JK25\t6.3091482650\n+UniRef90_R6JK25|g__Bacteroides.s__Bacteroides_vulgatus\t6.3091482650\n+UniRef90_A0A0P0M475\t6.2893081761\n+UniRef90_A0A0P0M475|g__Bacteroides.s__Bacteroides_vulgatus\t6.2893081761\n+UniRef90_D1JXS4\t6.2695924765\n+UniRef90_D1JXS4|g__Bacteroides.s__Bacteroides_vulgatus\t6.2695924765\n+UniRef90_A6L3D5\t6.2597809077\n+UniRef90_A6L3D5|g__Bacteroides.s__Bacteroides_vulgatus\t6.2597809077\n+UniRef90_F7LXA9\t6.2597809077\n+UniRef90_F7LXA9|g__Bacteroides.s__Bacteroides_vulgatus\t6.2597809077\n+UniRef90_A0A0P0L9E5\t6.2500000000\n+UniRef90_A0A0P0L9E5|g__Bacteroides.s__Bacteroides_vulgatus\t6.2500000000\n+UniRef90_A0A1Y3ZKK1\t6.2111801242\n+UniRef90_A0A1Y3ZKK1|g__Bacteroides.s__Bacteroides_vulgatus\t6.2111801242\n+UniRef90_A0A076J5J6\t6.1919504644\n+UniRef90_A0A076J5J6|g__Bacteroides.s__Bacteroides_vulgatus\t6.1919504644\n+UniRef90_A6L1K4\t6.1892130858\n+UniRef90_A6L1K4|g__Bacteroides.s__Bacteroides_vulgatus\t6.1892130858\n+UniRef90_A0A076IIX7\t6.1871616396\n+UniRef90_A0A076IIX7|g__Bacteroides.s__Bacteroides_vulgatus\t6.1871616396\n+UniRef90_A0A076IUR5\t6.1728395062\n+UniRef90_A0A076IUR5|g__Bacteroides.s__Bacteroides_vulgatus\t6.1728395062\n+UniRef90_A0A0P0M3I1\t6.1728395062\n+UniRef90_A0A0P0M3I1|g__Bacteroides.s__Bacteroides_vulgatus\t6.1728395062\n+UniRef90_A6L3D0\t6.1349693252\n+UniRef90_A6L3D0|g__Bacteroides.s__Bacteroides_vulgatus\t6.1349693252\n+UniRef90_A0A0K2HGU4\t6.1162079511\n+UniRef90_A0A0K2HGU4|g__Bacteroides.s__Bacteroides_vulgatus\t6.1162079511\n+UniRef90_A0A076J0R4\t6.1162079511\n+UniRef90_A0A076J0R4|g__Bacteroides.s__Bacteroides_vulgatus\t6.1162079511\n+UniRef90_A0A076INL3\t6.0975609756\n+UniRef90_A0A076INL3|g__Bacteroides.s__Bacteroides_vulgatus\t6.0975609756\n+UniRef90_A0A0M1W2F8\t6.0882800609\n+UniRef90_A0A0M1W2F8|g__Bacteroides.s__Bacteroides_vulgatus\t6.0882800609\n+UniRef90_I8WL14\t6.0468631897\n+UniRef90_I8WL14|g__Bacteroides.s__Bacteroides_vulgatus\t6.0468631897\n+UniRef90_D4V591\t6.0122179883\n+UniRef90_D4V591|g__Bacteroides.s__Bacteroides_vulgatus\t6.0122179883\n+UniRef90_A0A0P0LFD2\t5.9880239521\n+UniRef90_A0A0P0LFD2|g__Bacteroides.s__Bacteroides_vulgatus\t5.9880239521\n+UniRef90_I8WHI1\t5.9523809524\n+UniRef90_I8WHI1|g__Bacteroides.s__Bacteroides_vulgatus\t5.9523809524\n+UniRef90_A6L013\t5.9259259259\n+UniRef90_A6L013|g__Bacteroides.s__Bacteroides_vulgatus\t5.9259259259\n+UniRef90_D4VC11\t5.9259259259\n+UniRef90_D4VC11|g__Bacteroides.s__Bacteroides_vulgatus\t5.9259259259\n+UniRef90_A0A3E4HLD6\t5.7870370370\n+UniRef90_A0A3E4HLD6|g__Bacteroides.s__Bacteroides_vulgatus\t5.7870370370\n+UniRef90_A0A0M1W7B1\t5.5741360089\n+UniRef90_A0A0M1W7B1|g__Bacteroides.s__Bacteroides_vulgatus\t5.5741360089\n+UniRef90_A6L100\t5.5248618785\n+UniRef90_A6L100|g__Bacteroides.s__Bacteroides_vulgatus\t5.5248618785\n+UniRef90_U6RK63\t5.4869684499\n+UniRef90_U6RK63|g__Bacteroides.s__Bacteroides_vulgatus\t5.4869684499\n+UniRef90_A0A174ILV5\t5.4137664346\n+UniRef90_A0A174ILV5|g__Bacteroides.s__Bacteroides_vulgatus\t5.4137664346\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 5240d62d864d test-data/demo_joined_pathabundance_pathcoverage.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_joined_pathabundance_pathcoverage.tsv Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,8 @@
+# Pathway humann_Abundance humann_Coverage
+UNMAPPED 9825.8289872883 1.0000000000
+UNINTEGRATED 3058.2089939573 1.0000000000
+UNINTEGRATED|unclassified 54.1292645042 1.0000000000
+PWY-4203: volatile benzenoid biosynthesis I (ester formation) 13.3772872602 0.8589609470
+PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified 13.3772872602 0.6438577695
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 18.3746362785 0.9781854342
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 18.3746362785 0.9117549729
b
diff -r 000000000000 -r 5240d62d864d test-data/demo_pathabundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_pathabundance.tsv Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,8 @@
+# Pathway humann_Abundance
+UNMAPPED 9825.8289872883
+UNINTEGRATED 3058.2089939573
+UNINTEGRATED|unclassified 54.1292645042
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 18.3746362785
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 18.3746362785
+PWY-4203: volatile benzenoid biosynthesis I (ester formation) 13.3772872602
+PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified 13.3772872602
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d test-data/demo_pathcoverage.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_pathcoverage.tsv Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,8 @@
+# Pathway humann_Coverage
+UNMAPPED 1.0000000000
+UNINTEGRATED 1.0000000000
+UNINTEGRATED|unclassified 1.0000000000
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 0.9781854342
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 0.9117549729
+PWY-4203: volatile benzenoid biosynthesis I (ester formation) 0.8589609470
+PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified 0.6438577695
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d test-data/genus-level-gene-families.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genus-level-gene-families.tsv Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,2 @@
+# Gene Family humann_Abundance-RPKs
+UNMAPPED 17559.0000000000
b
diff -r 000000000000 -r 5240d62d864d test-data/hmp_pathabund.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hmp_pathabund.txt Wed May 12 09:00:09 2021 +0000
b
b'@@ -0,0 +1,500 @@\n+FEATURE \\ SAMPLE\tSRS011084\tSRS011086\tSRS011090\tSRS011098\tSRS011111\tSRS011115\tSRS011126\tSRS011132\tSRS011134\tSRS011140\tSRS011144\tSRS011152\tSRS011239\tSRS011243\tSRS011247\tSRS011255\tSRS011263\tSRS011269\tSRS011302\tSRS011306\tSRS011310\tSRS011343\tSRS011355\tSRS011397\tSRS011405\tSRS011584\tSRS012273\tSRS012279\tSRS012281\tSRS012285\tSRS012291\tSRS012294\tSRS012663\tSRS012902\tSRS013155\tSRS013158\tSRS013164\tSRS013170\tSRS013215\tSRS013234\tSRS013239\tSRS013252\tSRS013269\tSRS013476\tSRS013502\tSRS013506\tSRS013521\tSRS013533\tSRS013542\tSRS013687\tSRS013705\tSRS013711\tSRS013723\tSRS013800\tSRS013818\tSRS013825\tSRS013836\tSRS013876\tSRS013879\tSRS013881\tSRS013945\tSRS013949\tSRS013951\tSRS013956\tSRS014124\tSRS014126\tSRS014235\tSRS014271\tSRS014287\tSRS014313\tSRS014343\tSRS014459\tSRS014464\tSRS014470\tSRS014472\tSRS014476\tSRS014494\tSRS014573\tSRS014575\tSRS014578\tSRS014613\tSRS014629\tSRS014682\tSRS014683\tSRS014684\tSRS014686\tSRS014690\tSRS014888\tSRS014890\tSRS014894\tSRS014901\tSRS014923\tSRS014979\tSRS015038\tSRS015040\tSRS015044\tSRS015051\tSRS015054\tSRS015133\tSRS015154\tSRS015158\tSRS015168\tSRS015174\tSRS015190\tSRS015209\tSRS015215\tSRS015217\tSRS015225\tSRS015264\tSRS015269\tSRS015272\tSRS015274\tSRS015278\tSRS015369\tSRS015374\tSRS015378\tSRS015395\tSRS015425\tSRS015430\tSRS015431\tSRS015434\tSRS015436\tSRS015440\tSRS015540\tSRS015574\tSRS015578\tSRS015640\tSRS015644\tSRS015646\tSRS015650\tSRS015663\tSRS015745\tSRS015752\tSRS015755\tSRS015762\tSRS015782\tSRS015890\tSRS015893\tSRS015895\tSRS015899\tSRS015937\tSRS015941\tSRS015960\tSRS015985\tSRS015989\tSRS015996\tSRS016002\tSRS016018\tSRS016033\tSRS016037\tSRS016039\tSRS016043\tSRS016056\tSRS016086\tSRS016092\tSRS016095\tSRS016111\tSRS016188\tSRS016191\tSRS016196\tSRS016200\tSRS016203\tSRS016225\tSRS016267\tSRS016292\tSRS016297\tSRS016319\tSRS016331\tSRS016335\tSRS016342\tSRS016349\tSRS016360\tSRS016434\tSRS016495\tSRS016501\tSRS016503\tSRS016513\tSRS016516\tSRS016541\tSRS016569\tSRS016575\tSRS016581\tSRS016600\tSRS016740\tSRS016746\tSRS016752\tSRS016753\tSRS016954\tSRS016989\tSRS017007\tSRS017013\tSRS017025\tSRS017044\tSRS017076\tSRS017080\tSRS017103\tSRS017120\tSRS017127\tSRS017139\tSRS017156\tSRS017191\tSRS017209\tSRS017215\tSRS017227\tSRS017244\tSRS017247\tSRS017304\tSRS017307\tSRS017433\tSRS017439\tSRS017441\tSRS017445\tSRS017451\tSRS017497\tSRS017520\tSRS017521\tSRS017687\tSRS017697\tSRS017700\tSRS017701\tSRS017713\tSRS017808\tSRS017810\tSRS017814\tSRS017820\tSRS017821\tSRS018133\tSRS018145\tSRS018149\tSRS018157\tSRS018300\tSRS018312\tSRS018313\tSRS018329\tSRS018337\tSRS018351\tSRS018357\tSRS018359\tSRS018369\tSRS018394\tSRS018427\tSRS018439\tSRS018463\tSRS018569\tSRS018573\tSRS018575\tSRS018585\tSRS018591\tSRS018656\tSRS018661\tSRS018665\tSRS018671\tSRS018739\tSRS018769\tSRS018774\tSRS018784\tSRS018791\tSRS018817\tSRS019215\tSRS019219\tSRS019221\tSRS019225\tSRS019245\tSRS019267\tSRS019327\tSRS019329\tSRS019333\tSRS019339\tSRS019379\tSRS019381\tSRS019386\tSRS019387\tSRS019389\tSRS019391\tSRS019397\tSRS019587\tSRS019591\tSRS019597\tSRS019600\tSRS019601\tSRS019607\tSRS019968\tSRS019974\tSRS019976\tSRS019980\tSRS019986\tSRS019989\tSRS020220\tSRS020222\tSRS020226\tSRS020232\tSRS020233\tSRS020328\tSRS020334\tSRS020336\tSRS020340\tSRS020349\tSRS020386\tSRS020856\tSRS020858\tSRS020862\tSRS020869\tSRS022137\tSRS022143\tSRS022145\tSRS022149\tSRS022158\tSRS022524\tSRS022530\tSRS022532\tSRS022536\tSRS022545\tSRS022713\tSRS023346\tSRS023352\tSRS023358\tSRS042428\tSRS042457\tSRS042643\tSRS043001\tSRS043239\tSRS043663\tSRS043755\tSRS044366\tSRS044373\tSRS045004\tSRS045049\tSRS045254\tSRS045262\tSRS045313\tSRS045713\tSRS046344\tSRS046973\tSRS047824\tSRS048164\tSRS049389\tSRS049712\tSRS049900\tSRS049959\tSRS050007\tSRS050025\tSRS050029\tSRS050184\tSRS050244\tSRS050628\tSRS050752\tSRS051244\tSRS051505\tSRS051613\tSRS051941\tSRS052227\tSRS052330\tSRS052590\tSRS052604\tSRS052697\tSRS052876\tSRS053335\tSRS053398\tSRS053437\tSRS053630\tSRS053854\tSRS054061\tSRS054590\tSRS054653\tSRS054687\tSRS054956\tSRS055118\tSRS055401\tSRS056323\tSRS056695\tSRS056796\tSRS056906\tSRS057539\tSRS057791\tSRS057807\tSRS058186\tSRS058213\tSRS058808\r\n+STSite\tStool\tTongue_dorsum\tBuccal_mucosa\tSupragingival_plaque\tPosterior_fornix\tTongue_dorsum\tSupragingival_plaque\tAnterior_nares\tStool\tTongue_dorsum\tBuccal_mucosa\tSupragingival_plaq'..b'5\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.000110565\t0\t0\t0\t0\t0\t0\t0\t0\t0.000105739\t0\t0\t0\t0\t2.33738e-05\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t9.88647e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1.39106e-05\t2.42838e-07\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.000295775\t0\t0\t0\t0\t0\t6.81845e-06\t0\t0\t0\t0\t8.23749e-05\t0\t0\t0\t4.76472e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1.12389e-06\t0\t0\t3.645e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\r\n+COA-PWY: coenzyme A biosynthesis I|g__Bulleidia.s__Bulleidia_extructa\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t5.73523e-07\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\r\n+COA-PWY: coenzyme A biosynthesis I|g__Campylobacter.s__Campylobacter_hominis\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t7.29465e-07\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t3.14158e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\r\n+COA-PWY: coenzyme A biosynthesis I|g__Capnocytophaga.s__Capnocytophaga_sp_CM59\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t3.57568e-07\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t4.79523e-05\t0\t5.46675e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t3.45956e-05\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1.52358e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1.65151e-05\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\r\n+COA-PWY: coenzyme A biosynthesis I|g__Capnocytophaga.s__Capnocytophaga_sp_oral_taxon_338\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t5.52837e-07\t0\t0\t0\t1.86027e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t9.5765e-07\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t9.11797e-07\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2.40718e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t8.57993e-07\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1.324e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t4.34332e-07\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1.88895e-06\t0\t0\t0\t0\t0\t4.18485e-07\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1.51934e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t2.32256e-06\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\r\n'
b
diff -r 000000000000 -r 5240d62d864d test-data/humann_nucleotide_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humann_nucleotide_database.loc Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,6 @@
+# Tab separated with 4 columns:
+# - db-build-version-date
+# - db-name
+# - build
+# - /path/to/data 
+chocophlan-DEMO-20210421 Demo ChocoPhlAn for HUManN humann_nucleotide_database ${__HERE__}/test-db/nucleotide-db/
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d test-data/humann_protein_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humann_protein_database.loc Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,6 @@
+# Tab separated with 4 columns:
+# - db-build-version-date
+# - db-name
+# - build
+# - /path/to/data 
+uniref-DEMO_diamond-20210421 Demo UniRef for HUManN DEMO_diamond ${__HERE__}/test-db/protein-db/
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d test-data/humann_utility_mapping.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humann_utility_mapping.loc Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,7 @@
+# Tab separated with 4 columns:
+# - db-build-version-date
+# - db-name
+# - build
+# - /path/to/data 
+utility_mapping-full-map_uniref90_name-3.0.0-29042021 Full mapping: UniRef90 from protein names full-map_uniref90_name ${__HERE__}/test-db/utility_mapping/map_uniref90_name.txt
+utility_mapping-full-map_go_uniref90-3.0.0-29042021 Full mapping: GO from UniRef90 full-map_go_uniref90 ${__HERE__}/test-db/utility_mapping/map_go_uniref90.txt
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d test-data/metaphlan_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metaphlan_database.loc Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,6 @@
+# Tab separated with 4 columns:
+# - db-build-version-date
+# - db-name
+# - build
+# - /path/to/data 
+metaphlan-demo-db-20210421 MetaPhlan Test Database demo-db-v30 ${__HERE__}/test-db/metaphlan-db
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d test-data/regrouped_gene_families_to_infogo1000.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regrouped_gene_families_to_infogo1000.tsv Wed May 12 09:00:09 2021 +0000
b
b'@@ -0,0 +1,1393 @@\n+# Gene Family\tdemo_Abundance-RPKs\n+UNMAPPED\t4531.0\n+UNGROUPED\t15490.72\n+UNGROUPED|g__Bacteroides.s__Bacteroides_dorei\t8949.382\n+UNGROUPED|g__Bacteroides.s__Bacteroides_vulgatus\t6476.05\n+UNGROUPED|unclassified\t65.288\n+GO:0000015\t4.177\n+GO:0000015|g__Bacteroides.s__Bacteroides_dorei\t2.506\n+GO:0000015|g__Bacteroides.s__Bacteroides_vulgatus\t1.671\n+GO:0000027\t7.937\n+GO:0000027|g__Bacteroides.s__Bacteroides_dorei\t7.937\n+GO:0000049\t29.232\n+GO:0000049|g__Bacteroides.s__Bacteroides_dorei\t7.205\n+GO:0000049|g__Bacteroides.s__Bacteroides_vulgatus\t22.027\n+GO:0000105\t22.302\n+GO:0000105|g__Bacteroides.s__Bacteroides_dorei\t9.093\n+GO:0000105|g__Bacteroides.s__Bacteroides_vulgatus\t13.209\n+GO:0000107\t5.077\n+GO:0000107|g__Bacteroides.s__Bacteroides_vulgatus\t5.077\n+GO:0000150\t29.481\n+GO:0000150|g__Bacteroides.s__Bacteroides_dorei\t14.86\n+GO:0000150|g__Bacteroides.s__Bacteroides_vulgatus\t14.62\n+GO:0000155\t98.896\n+GO:0000155|g__Bacteroides.s__Bacteroides_dorei\t45.55\n+GO:0000155|g__Bacteroides.s__Bacteroides_vulgatus\t53.346\n+GO:0000160\t71.811\n+GO:0000160|g__Bacteroides.s__Bacteroides_dorei\t43.683\n+GO:0000160|g__Bacteroides.s__Bacteroides_vulgatus\t28.128\n+GO:0000162\t6.418\n+GO:0000162|g__Bacteroides.s__Bacteroides_dorei\t1.115\n+GO:0000162|g__Bacteroides.s__Bacteroides_vulgatus\t5.303\n+GO:0000179\t5.376\n+GO:0000179|g__Bacteroides.s__Bacteroides_dorei\t2.688\n+GO:0000179|g__Bacteroides.s__Bacteroides_vulgatus\t2.688\n+GO:0000287\t144.517\n+GO:0000287|g__Bacteroides.s__Bacteroides_dorei\t69.932\n+GO:0000287|g__Bacteroides.s__Bacteroides_vulgatus\t60.803\n+GO:0000287|unclassified\t13.782\n+GO:0000453\t4.823\n+GO:0000453|g__Bacteroides.s__Bacteroides_dorei\t3.086\n+GO:0000453|g__Bacteroides.s__Bacteroides_vulgatus\t1.736\n+GO:0000723\t4.363\n+GO:0000723|g__Bacteroides.s__Bacteroides_dorei\t1.992\n+GO:0000723|g__Bacteroides.s__Bacteroides_vulgatus\t2.371\n+GO:0000917\t7.611\n+GO:0000917|g__Bacteroides.s__Bacteroides_dorei\t5.952\n+GO:0000917|g__Bacteroides.s__Bacteroides_vulgatus\t1.658\n+GO:0000967\t3.115\n+GO:0000967|g__Bacteroides.s__Bacteroides_vulgatus\t3.115\n+GO:0002094\t2.525\n+GO:0002094|g__Bacteroides.s__Bacteroides_vulgatus\t2.525\n+GO:0002100\t5.952\n+GO:0002100|g__Bacteroides.s__Bacteroides_vulgatus\t5.952\n+GO:0002161\t0.731\n+GO:0002161|g__Bacteroides.s__Bacteroides_vulgatus\t0.731\n+GO:0002935\t7.361\n+GO:0002935|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+GO:0002935|g__Bacteroides.s__Bacteroides_vulgatus\t5.258\n+GO:0002949\t4.47\n+GO:0002949|g__Bacteroides.s__Bacteroides_dorei\t2.778\n+GO:0002949|g__Bacteroides.s__Bacteroides_vulgatus\t1.692\n+GO:0003684\t9.246\n+GO:0003684|g__Bacteroides.s__Bacteroides_dorei\t5.554\n+GO:0003684|g__Bacteroides.s__Bacteroides_vulgatus\t3.692\n+GO:0003697\t24.977\n+GO:0003697|g__Bacteroides.s__Bacteroides_dorei\t16.618\n+GO:0003697|g__Bacteroides.s__Bacteroides_vulgatus\t8.358\n+GO:0003725\t7.91\n+GO:0003725|g__Bacteroides.s__Bacteroides_dorei\t5.972\n+GO:0003725|g__Bacteroides.s__Bacteroides_vulgatus\t1.938\n+GO:0003727\t12.121\n+GO:0003727|g__Bacteroides.s__Bacteroides_dorei\t6.061\n+GO:0003727|g__Bacteroides.s__Bacteroides_vulgatus\t6.061\n+GO:0003729\t4.739\n+GO:0003729|g__Bacteroides.s__Bacteroides_vulgatus\t4.739\n+GO:0003735\t156.347\n+GO:0003735|g__Bacteroides.s__Bacteroides_dorei\t76.023\n+GO:0003735|g__Bacteroides.s__Bacteroides_vulgatus\t80.324\n+GO:0003743\t6.762\n+GO:0003743|g__Bacteroides.s__Bacteroides_dorei\t4.016\n+GO:0003743|g__Bacteroides.s__Bacteroides_vulgatus\t2.746\n+GO:0003746\t16.907\n+GO:0003746|g__Bacteroides.s__Bacteroides_dorei\t13.849\n+GO:0003746|g__Bacteroides.s__Bacteroides_vulgatus\t3.058\n+GO:0003755\t33.832\n+GO:0003755|g__Bacteroides.s__Bacteroides_dorei\t14.287\n+GO:0003755|g__Bacteroides.s__Bacteroides_vulgatus\t19.544\n+GO:0003796\t3.906\n+GO:0003796|g__Bacteroides.s__Bacteroides_dorei\t3.906\n+GO:0003848\t9.37\n+GO:0003848|g__Bacteroides.s__Bacteroides_vulgatus\t9.37\n+GO:0003852\t3.462\n+GO:0003852|g__Bacteroides.s__Bacteroides_dorei\t1.392\n+GO:0003852|g__Bacteroides.s__Bacteroides_vulgatus\t2.07\n+GO:0003856\t4.154\n+GO:0003856|g__Bacteroides.s__Bacteroides_dorei\t4.154\n+G'..b's__Bacteroides_vulgatus\t13.172\n+GO:0048472\t4.796\n+GO:0048472|g__Bacteroides.s__Bacteroides_dorei\t2.398\n+GO:0048472|g__Bacteroides.s__Bacteroides_vulgatus\t2.398\n+GO:0048500\t7.353\n+GO:0048500|g__Bacteroides.s__Bacteroides_dorei\t3.268\n+GO:0048500|g__Bacteroides.s__Bacteroides_vulgatus\t4.085\n+GO:0050380\t1.425\n+GO:0050380|g__Bacteroides.s__Bacteroides_dorei\t1.425\n+GO:0050480\t3.49\n+GO:0050480|g__Bacteroides.s__Bacteroides_vulgatus\t3.49\n+GO:0050511\t8.721\n+GO:0050511|g__Bacteroides.s__Bacteroides_dorei\t5.814\n+GO:0050511|g__Bacteroides.s__Bacteroides_vulgatus\t2.907\n+GO:0050570\t5.988\n+GO:0050570|g__Bacteroides.s__Bacteroides_dorei\t5.988\n+GO:0050577\t2.045\n+GO:0050577|g__Bacteroides.s__Bacteroides_vulgatus\t2.045\n+GO:0051073\t6.144\n+GO:0051073|g__Bacteroides.s__Bacteroides_dorei\t3.072\n+GO:0051073|g__Bacteroides.s__Bacteroides_vulgatus\t3.072\n+GO:0051205\t4.522\n+GO:0051205|g__Bacteroides.s__Bacteroides_dorei\t3.39\n+GO:0051205|g__Bacteroides.s__Bacteroides_vulgatus\t1.132\n+GO:0051537\t26.424\n+GO:0051537|g__Bacteroides.s__Bacteroides_dorei\t15.914\n+GO:0051537|g__Bacteroides.s__Bacteroides_vulgatus\t10.51\n+GO:0051539\t133.729\n+GO:0051539|g__Bacteroides.s__Bacteroides_dorei\t41.322\n+GO:0051539|g__Bacteroides.s__Bacteroides_vulgatus\t54.67\n+GO:0051539|unclassified\t37.738\n+GO:0051607\t2.165\n+GO:0051607|g__Bacteroides.s__Bacteroides_vulgatus\t2.165\n+GO:0051775\t3.623\n+GO:0051775|g__Bacteroides.s__Bacteroides_dorei\t3.623\n+GO:0051920\t25.421\n+GO:0051920|g__Bacteroides.s__Bacteroides_dorei\t12.187\n+GO:0051920|g__Bacteroides.s__Bacteroides_vulgatus\t13.234\n+GO:0051989\t7.465\n+GO:0051989|unclassified\t7.465\n+GO:0051991\t8.721\n+GO:0051991|g__Bacteroides.s__Bacteroides_dorei\t5.814\n+GO:0051991|g__Bacteroides.s__Bacteroides_vulgatus\t2.907\n+GO:0052381\t4.848\n+GO:0052381|g__Bacteroides.s__Bacteroides_dorei\t4.848\n+GO:0052692\t3.49\n+GO:0052692|g__Bacteroides.s__Bacteroides_dorei\t2.327\n+GO:0052692|g__Bacteroides.s__Bacteroides_vulgatus\t1.163\n+GO:0052717\t5.952\n+GO:0052717|g__Bacteroides.s__Bacteroides_vulgatus\t5.952\n+GO:0052865\t5.301\n+GO:0052865|g__Bacteroides.s__Bacteroides_dorei\t4.699\n+GO:0052865|g__Bacteroides.s__Bacteroides_vulgatus\t0.602\n+GO:0052908\t5.376\n+GO:0052908|g__Bacteroides.s__Bacteroides_dorei\t2.688\n+GO:0052908|g__Bacteroides.s__Bacteroides_vulgatus\t2.688\n+GO:0061711\t1.086\n+GO:0061711|g__Bacteroides.s__Bacteroides_dorei\t1.086\n+GO:0070006\t5.507\n+GO:0070006|g__Bacteroides.s__Bacteroides_vulgatus\t5.507\n+GO:0070040\t7.361\n+GO:0070040|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+GO:0070040|g__Bacteroides.s__Bacteroides_vulgatus\t5.258\n+GO:0070084\t5.507\n+GO:0070084|g__Bacteroides.s__Bacteroides_vulgatus\t5.507\n+GO:0070181\t3.831\n+GO:0070181|g__Bacteroides.s__Bacteroides_dorei\t2.554\n+GO:0070181|g__Bacteroides.s__Bacteroides_vulgatus\t1.277\n+GO:0070204\t2.54\n+GO:0070204|g__Bacteroides.s__Bacteroides_vulgatus\t2.54\n+GO:0070401\t11.207\n+GO:0070401|g__Bacteroides.s__Bacteroides_dorei\t2.039\n+GO:0070401|g__Bacteroides.s__Bacteroides_vulgatus\t9.168\n+GO:0070402\t3.81\n+GO:0070402|g__Bacteroides.s__Bacteroides_dorei\t1.905\n+GO:0070402|g__Bacteroides.s__Bacteroides_vulgatus\t1.905\n+GO:0070403\t5.0\n+GO:0070403|g__Bacteroides.s__Bacteroides_vulgatus\t5.0\n+GO:0070475\t7.361\n+GO:0070475|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+GO:0070475|g__Bacteroides.s__Bacteroides_vulgatus\t5.258\n+GO:0070626\t2.404\n+GO:0070626|g__Bacteroides.s__Bacteroides_dorei\t0.801\n+GO:0070626|g__Bacteroides.s__Bacteroides_vulgatus\t1.603\n+GO:0070677\t4.823\n+GO:0070677|g__Bacteroides.s__Bacteroides_dorei\t3.086\n+GO:0070677|g__Bacteroides.s__Bacteroides_vulgatus\t1.736\n+GO:0070814\t15.615\n+GO:0070814|g__Bacteroides.s__Bacteroides_dorei\t9.504\n+GO:0070814|g__Bacteroides.s__Bacteroides_vulgatus\t6.111\n+GO:0071436\t2.407\n+GO:0071436|g__Bacteroides.s__Bacteroides_vulgatus\t2.407\n+GO:0090071\t3.831\n+GO:0090071|g__Bacteroides.s__Bacteroides_dorei\t3.831\n+GO:0097264\t3.745\n+GO:0097264|g__Bacteroides.s__Bacteroides_dorei\t3.745\n+GO:1990077\t8.016\n+GO:1990077|g__Bacteroides.s__Bacteroides_dorei\t1.517\n+GO:1990077|g__Bacteroides.s__Bacteroides_vulgatus\t6.499\n'
b
diff -r 000000000000 -r 5240d62d864d test-data/relab_levelwise_renormalized_pathway_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/relab_levelwise_renormalized_pathway_abundance.tsv Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,8 @@
+# Pathway humann_Abundance
+UNMAPPED 0.760761
+UNINTEGRATED 0.236781
+UNINTEGRATED|unclassified 0.630281
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 0.00142265
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 0.213954
+PWY-4203: volatile benzenoid biosynthesis I (ester formation) 0.00103573
+PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified 0.155765
b
diff -r 000000000000 -r 5240d62d864d test-data/rna_dna_norm-dna.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rna_dna_norm-dna.txt Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,7 @@
+# 1 2
+A 11 11
+A|1 10 10
+A|2 1 1
+D 5 10
+D|1 5 5
+D|2 0 5
b
diff -r 000000000000 -r 5240d62d864d test-data/rna_dna_norm-rna.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rna_dna_norm-rna.txt Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,7 @@
+# 1 2
+A 22 22
+A|1 20 20
+A|2 2 22
+R 10 20
+R|1 10 10
+R|2 0 10
b
diff -r 000000000000 -r 5240d62d864d test-data/strain_profiler-input.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/strain_profiler-input.txt Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,11 @@
+HEADERS 1 2 3
+A 10 10 10
+A|g1.s1 10 10 10
+A|g1.s2 0 10 10
+B 10 10 10
+B|g1.s1 10 10 10
+B|g1.s2 10 0 10
+C 10 10 10
+C|g1.s1 10 10 0
+C|g1.s2 10 10 0
+C|g1.s3 10 10 10
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/demo-db-v30.1.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.1.bt2 has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/demo-db-v30.2.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.2.bt2 has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/demo-db-v30.3.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.3.bt2 has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/demo-db-v30.4.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.4.bt2 has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/demo-db-v30.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/metaphlan-db/demo-db-v30.fasta Wed May 12 09:00:09 2021 +0000
b
b'@@ -0,0 +1,226 @@\n+>1262743__R5C054__BN727_01980 UniRef90_R5C054;k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_CAG_598;GCA_000431055\n+TTGATACATCAGCATCACTTGGCTTTTAGCGGTGGTTCGGAAAACTCAAACTATCGAGCATCGTTCGGTTTTATGGATCATAATACAATTGTCAAAGTTAACGATTACCGAAATTTAGTGGTAAAACTTGATGCTACACAAAAGGCTTTCGATGGTCGCTTAGTTGGCGATTTTGGTGTATATGGCTACTCGTCAAAGATACACGACATTTTCGATACACGAATACTGTTTTACTCGGCAGCTGCACAGAATCCTACATATCCAGCAGGAACTGATGTTAATGGCAACTGGGTGAAGAACTCGGCTGCATCACACATCAACCACCCCGGAGCACTCCTCTATGAGAAAAATGACTCCGAAGAACGGAATTTCAATACACATTTGGGGCTGAAATTTAATATCCTTGACAATTTGATATTGTCGGCTTTCGGCTCTTATTCATATTCATCTACGGGAAATGCTCAATTTTGTCCTACATGGGTGTGGGCGCAAGGCAATGTTTATCGTGGAGAGTTCAAGGGTGAAGACTACTTTACAAATGTGTCCCTCTCATATAACAATGCTTGGGGAGACTCACACCTTGATGCTGTTGTTGGCGCAGAATATCTTAAACAGGTAAGGACTGGTTTATGGGTGCAGGCAAAAGGAATAACAACAAATGATTTCTCCTATAATAACATCGGAGCAACATCATCGCGTCCTTTCGGTGGTACGAGCAGTAGCTATGAAGACCCGTCACTTGCTTCAATAATGGGTAGTGTCACATATAGTTACAAGGATAGATATTCTATTGCGGCAGCACTCCGTGGAGATGGCTCTTCAATGGTAAGCGATAACAATACTTTCGGATTCTTCCCATCAGTATCACTGGGTTGGGATGTAAAAAAAGAAGGCTTCCTCTCTGATACTGACTTTATAACAATGTTGAAACTAAGAACCGGATATGGTCGGTCAGGAAATCTTGGAGGTATAACATCCTATACAACACTTAATACCGTAAAGGAGAATGGTATCGTATCCATCAACGGTGCACCTACCGTAACAATGGGAAGTATACGCAACACGAATCCGGACCTTAAGTGGGAGACTCGTTCAACATTTAATATCGGTTTTGACTTAGGTATATGGGATAATCGGTTGATGCTTACCTCGGAATTATATTACTCAAAGACAACGGATATGCTCTATGAGTATGATGTTCCCGTTCCGACCTTTGCGTTCGATAAACTGATGGCAAATATCGGCTCGATGTCTAACCAGGGTGTCGAACTAGGAATCTCGGTAGTTCCCATTCAACGAAAGGATATGGAGATGAATATCAACTTCAATATGTCCTACCAGAAGAATAAGTTACTTTCGCTTAGTGGAGAGTATAATGGTATGCATATGACAGCTTCAGATATTACTCCGATTGGCTCGCTTTATGGTGCAGGTCAGAACGGTGGAGACAATAATGTAGTATATCAGATTGTAGGTCAGCCATTGGGGGTATTCTATCTACCTCACTGCAAAGGGCTTAAAGAAAATGAACTTGGTGGCTACAGTTACGATATTGAAGATTTGAATGATGATGGCGAGATTGATTTTAGCGATGGCGGAGACAGGTATATAGCAGGTCAGGCAACCCCCAAGGTAACTATTGGATCAAACATCAGTTTCCGTTACAAGTCCTTTGATATTGCCATGCAGATAAATGGTGCTTTCGGTCATAAGATATTCAATGGCACGGGCTTGGCTTATACCAATATGTCTATATTCCCTGACTACAATGTATTGAAGGGTGCTCCTGAAAAAAATATTATTGATCAGAATGTTTCAGACTATTGGTTGGAAAAAGGTGACTATGTAAATATTGAACATATTACCATAGGCTACAATATTCCAATGAAATCCAAGGCTGTGAAGTCATTGCGTCTTTCGGCAGGCATTAGCAACCTTGCAACAATCACAGGCTATAGCGGTCTTACTCCAATGATAAACAGTTATGTAGTAAGCAACACTTTGGGCATTGACGACAAACGCACCTATCCTTTATATCGTACCTATTCGTTAGGTCTTAGTATTCAATTCTAA\n+>189722__E0NQU7__HMPREF0658_0548 UniRef90_E0NQU7;k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Prevotellaceae|g__Prevotella|s__Prevotella_marshii;GCA_000146675\n+ATGGAATCATCAATCAAGGACAAATACATCATCTTGGGCTTTGTCGGCTTCGCCATCGTCCTAATATCTTCCATTGCCACGCTGGTAATAGCGGACAGCTTCAACCAAGACAACTTTGTCAGGTGGATAGTATTCGTATGCTGTAACCTGTTGGGATGGTTGCTCTATCTCTCCTTTCAGACACTTATCTTTGATACATACGAAATCTACAAAATCAAGTTCGGCAAGAAAGAAACGATTGCCGAAGCCATAGAGGTGCAGGAAGAACTGTCACAAAATACACTTGAAGAAGCCACATCTGTGCCTGGACCTACATCAGTCCCTGAGCCTGTACCCGAATCATCCCCGACAAAAGAAGAGACACTTATCCAAACACAACCGATAGAGCTTACTATCGCCCCGGATCTTCACGAAAAGAACCGTGCCAATTACGCCAAGCAGAGAGCAACGGGAAAAGGAAGAGCGCATCCGCATGGTCATGGAGTATTGCCATTATTACCTGCCTCGCATTGCCGACCAAGAAACCGTGAACCACATCTGTACTGA\n+>357276__A0A076II57__GV66_07710 UniRef90_A0A076II57;k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_dorei;GCA_000738065\n+ATGGATACTAAAAAAGAGTTGAAAATCCTCTTTTGGATGATTGTGGTTTATGCAGCCGTATTTTTCATGCCACTCGGCAATGAGAGGTTCATGACGGCTGTCGATGCAACCCTCGACCTTGCTAAATGGTATGCGCAGGAACACGTTATGTTATGTCTGCTTCCCACCTTTTTCATCGCAGGTGTGATTGCCGTTTTCGTCAGTCAGGGTTCGGTTCTCAAATATTTCGGAGCAAATGCCAAGAAGTGGTTATCCTATACGGCAGCTGCTGTTTCGGGTAGTATATTGGCAGTCTGTTCCTGCACGATTCTGCCGTTGTTTACGAGTATTTACAAACGGGGCGCAGGGTTAGGCCCTGCTATAGCCTTTCTCTATTCCGGCCCAGCCATCAGTATTTTATCCATCATTTTGACAGCCCGTATTTTGGGTGTAGAGATGGGTGTTGCACGAACGGTTGGAGCTATCGGATTCTCCGTAGTGATAGCCTTGCTGTGGCATTTATTTTCCGTAAGGAAGAGAAAGCCAAACAAGAGGAACAGATGA\n+>357276__A0A076IN30__DXD47_09350 UniRef90_A0A076IN30;k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_dorei;GCA_003466465\n+ATGAATCAGGTCAAAATGGTATTAATTTTTGCCATATTATTATTTTTATTTTGTGTGAATGTAAGTGCCCAAGTAATAAAGGGAAGA'..b'GACTTATATTGCTGATATGGCAAAGGCAAGTTTCTCACTTGTTTCAGGTAATGCCGAAGATGCTAACAGCGATAAAGTATTCTATGCAGAAAAGGCTGATTCTAAAGCAGGAATTGTCAACTTCTATAAGGTAACTAATGCAGGAGGTAAATTTGTTGCTGATGATAAAGTGGTTATTGTAGGATTTGATGCATTGGTAGATGGTACTGACAAGACATTTGCTTCTGACTTGATTAAGGTTGAAGTTTCTGAGAAGAATGATATTGTAGTTACAGCTCAGAAGAGAATCACAGTAGATACAAATGTCGTTGTTCCTGTTAAGTTGACTGTAACTAATGCAGATGGCACAAATGCAACTGTATACGCTTCATTCACAGTAACAGTAAAAGCATACCCGATTCAGTAA\n+>821__R7P1Q7__M099_2990 UniRef90_R7P1Q7;k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_vulgatus;GCA_000699865\n+ATGAAACGTAAAAATAAAACAACTAAATATATTCTGGCCGGGTTATTTCTTTCTTCGATGATGGGAGTAACAACTGCATGTGATCCGTTAGGAATAGAACCTACAACAAAAGTGGATGAAGAACGCTTTTGGGAAAATCCTCAATTGGCTCGTTCTTATGTGAACAATTTCTATTTCATATCCCAATCGGCATCTGGTGACACCTTCCAGTCAGAACAGTGGTCAGATAACTGTCAGGGAAACTATGAACAGGACTGGGATACGTATCGACAGTACAATTTTAACAAACGTACTTATGATGAAAATAATGGTATCACTTGTTTCAGCGCCCCATGGAGTGGAGCTTATAAAAACATTCGTGCTGTAAATCTTGGAATTGAAAAAATTTCTTCCAGCAGCATATTGACAGAAGCTCAGAAGAACCAATTCTTGGGTGAATGTTACTTTTTCCGTGCTTTTATTTATTTTGACATGGAAAAGTTTTGGGGAAGCGTTCCTTATGTAGACAAAGCGTTAACGATTGAAGATGAAACCTATTTGCCTCGTACCAAACGCGAAACTATTTTCGACAATATTCTAGACGATTTGCAGAAATCCGTAGATTATTTTAAAGCTTATGGTGGAACGCATACGCTTGGTATGGTAAATGAAGATGTAGCTAACGCATACATTTCGCGCGTGGCTTTATATGCAGCCAATGCGGCCGATGCTTCAGCCAAAGGTCTATATTCGGATGACGCTGAAGGCTTGTTCAAGTTTGAGAAGAATGCAAATCATTATTATGAATTGGCTTATAATGCAGCTAAAGGATTGATAGGCAAGTACAGTTTGGAACCGAATTATGAGGATCTGTTCACAAAAACAGAGTCACATACAAGCGTTGAGTCAATTTGGCCGGTCATGTTTAAAGAAAACCAACGTAGTGGTTTCAATCCAACTGCAAAGAACGGACCTGACGGGAATTATTATGGAGCAACCGAAGATGCGACTTATTCTTACGGGCGTCGTTCAGGTTTATTTCCTACACAGGATTTGGTAGACTGTTATTTACAGAAAGATGACGCTGATGGCAAATGGAAAAATTGGTGGGAAACTTCACAGGCAAAAGCCATGGGAATCCATAGAAATGCAGAAGGAGAATTAGAAGGAGAATCTGCTAATTATCGCGATATGTTCAAGAATCGCGATAGCCGCTTCTATTCCACAGTTACTTACGATGGAGCATATATGGGACCGGAAGAAGAACGTTATATTATTCAAACTTGGATTGACAATACTACACTAGATGAAAAGACCTTAAAATACAGTGCTTTACATTCTGGGTATAGAGTGATGGAAAATTTAAATTCCGCACCAATCAACAGAGCTTCCGCACAAACAATAACTGGTTATTATTCAAGGAAGTACTCTCAGTTTAATAAAATCAATACGGACGGGACTTTGGATTTTGACACCCAGCGTCAGACTTGCTATTTCAATGTACGGTATGCGGAAGTGTTGCTGAATTGTGCCGAGGCAAGTATTAAACTAGGCAAGACCGATGCGGCAGGCTATATCAATGAAATCCGTAATCGTGCCGGATTGCCTAATTATGACGGCAATGATTTGTGGAATGAAATGAAACTGCAACGTCGTTTGGAATTTGCATTTGAATGTCCGGGCTTCCGTTATTTCGATTTATTGCGTTGGGGGGAAGCAGAAGGCAAAACGACTATTGAAGAGTTGAATACTCCTTCCAGAGGATTATGGATTTTCCGTAAAGGTATGGAAAGCGAAAAGGCTGGTGAGAATGGTTATCCTGTAGAGCCGGGTGGTGAAGGATATTTTACTCCTAAATTCCAGACTTTTGAGATGCCGTATTCTTATTATGAAAGAAAGTTTGATGATGCAAGATACTACTTTGTACCATTTTCACAGTCCATGTTGAGAGACTATACACAATTGCAACAGAATCCGGGATGGAAAAACTTCAACTATAACAATTAA\n+>821__U6RDC2__DXD46_15675 UniRef90_U6RDC2;k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_vulgatus;GCA_003437415\n+ATGGAAGAATGCAAAGCAGCTGATTATTCCAATATTCTGTACATCAGAATGCTGTGGAAAGATTTGGAACCCGAGGAGGGCAAATATGCATGGATTTACAATGAACGGTATAAATGGTATATACAAAAAGCCAAAGACAAAGGGCTTAAACTGGCCTTCAGGGTGTTCTTTCATGGTGTAGACGGAGTACCGTCCTATGTGTACGAAGCCGGAGCCACAGAAAGCCCAATAGACGATGAAGGCAAAACCCAGCCTTATTATGATAATCCAGTATTCCTTGAAAAGCTGGACAAGTTCATAGAGGCTTTTGCAAAGGAATATGACAATCCGGATGAGGTAGATTATATTGATGCATATGGATTGGGAAGATGGGGAGAAGGACATGGACTGGTACTCGAAAAGCAAGATAATCTGGAAAGCGTTATCCGACAGATAACCGAATCGTATGCAAGACACTTCAAAAAAGTGCTTACGGTAATGAATCTTTCGCAGAGCGACTACAGGTTTTCCAAGCCGCTAGTATATGACAAGCTGGGGTTTCTTCCTCGCAGGGATGGTATAGGCAGTTTTGGGTTTTCTAATGAAGAACGTGCGATGGTGCATGACGAACTTTTCCCAAAAAGAGCTCTTATTGGTGAGGGATGCTGGTGGTTTAACGCACAAGATGGTGATAACTCAAAATACAAGCATTTCCAAGGAGACAAACGTTTTGCCATGAACGATTTCAAAGAAGCTTTTACCGTTTCTGTGACTGATGCTTTGGACAGCCATTGTAACACGCTGGATTTGCGTATGCCTTTACAGTGCAAATTCTGGATAGAAGAGCTGCCGGACCAAGTTCAGCGTTTTATAACTTTAGGCGGTTATCGTCTTTATCCGGACTATATAAAGGTGGAGCAAGACCACAAAACGTTGACTTTGTTTCATTCATGGAAAAACTATGGTGTGGGTGTATTGCCTAATAATCATCCCAATTGGAATTATAAATATCAGGTTAGTTTTGTTTTGATGAATGAAAAAAAGGAAATTGTATTTCTTTATACAGAACCGGAAGCAGAACCTTCCGAATGGTTGAAGGGAATATCATACAATTATTTGAGTCGGTTTAATATTCCGGCAGAATTGCAGGGAAAGTATACCTTATGTGTCGGCTTGACTGACAAGACAAAAAATAACGAAGCGGCTATTGATCTGGCTGTGTCTGGGAATTTAAAAATAGGGAAATGGATATTTGTGGTTGAACTGGAGTTGTAA\n'
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/demo-db-v30.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/metaphlan-db/demo-db-v30.json Wed May 12 09:00:09 2021 +0000
[
b'@@ -0,0 +1,1 @@\n+{"markers": {"189722__E0NQU7__HMPREF0658_0548": {"clade": "s__Prevotella_marshii", "ext": ["GCA_002161435", "GCA_000598585", "GCA_000699765", "GCA_900445525", "GCA_000178195", "GCA_000273725", "GCA_002811025", "GCA_000598905", "GCA_000598245", "GCA_900114865", "GCA_000699705", "GCA_000157015", "GCA_000155815", "GCA_000601115", "GCA_002811085", "GCA_000699785", "GCA_002894165", "GCA_000185605", "GCA_000273035", "GCA_001405475", "GCA_001548195", "GCA_001693695", "GCA_000273095", "GCA_001274835", "GCA_000759305", "GCA_000307435", "GCA_002160605", "GCA_000598305", "GCA_003439865", "GCA_001405955", "GCA_000598185", "GCA_000273055", "GCA_002884635", "GCA_002529225", "GCA_002811035", "GCA_000273155", "GCA_000759045", "GCA_000273315", "GCA_001405935", "GCA_001682215", "GCA_900167355", "GCA_000599065", "GCA_000577955", "GCA_000177075", "GCA_001406015", "GCA_002161135", "GCA_000598745", "GCA_000598885", "GCA_001314995", "GCA_000598425", "GCA_002222615", "GCA_000598785", "GCA_003438465", "GCA_000403175", "GCA_000154205", "GCA_900102645", "GCA_001398115", "GCA_900129535", "GCA_900454945", "GCA_003515045", "GCA_000599245", "GCA_000210495", "GCA_000156075", "GCA_003439285", "GCA_002959625", "GCA_000710365", "GCA_003437875", "GCA_000759245", "GCA_002797185", "GCA_000144405", "GCA_001405055", "GCA_002959715", "GCA_001574405", "GCA_000599365", "GCA_000177355", "GCA_001412315", "GCA_000759315", "GCA_000068585", "GCA_000699725", "GCA_001406095", "GCA_001552775", "GCA_001398395", "GCA_900095495", "GCA_001578575", "GCA_003437415", "GCA_000177055", "GCA_000598325", "GCA_000599305", "GCA_001552765", "GCA_001398375", "GCA_003539055", "GCA_003438835", "GCA_000025985", "GCA_003526655", "GCA_000955645", "GCA_900454835", "GCA_000273275", "GCA_001406715", "GCA_001217505", "GCA_002763975", "GCA_000699865", "GCA_000598665", "GCA_003466465", "GCA_000218325", "GCA_000598805", "GCA_002762425", "GCA_000599225", "GCA_001953935", "GCA_000599345", "GCA_002871515", "GCA_002763745", "GCA_001546595", "GCA_000699845", "GCA_001406315", "GCA_000158335", "GCA_003438705", "GCA_002204405", "GCA_002763715", "GCA_000297735", "GCA_002529435", "GCA_002753835", "GCA_000273135", "GCA_002763575", "GCA_000178295", "GCA_003439685", "GCA_000599205", "GCA_000273215", "GCA_001406635", "GCA_000218345", "GCA_002810995", "GCA_003438235", "GCA_000382445", "GCA_001405735", "GCA_000599285", "GCA_003386475", "GCA_001553225", "GCA_000766005", "GCA_001405155", "GCA_003438895", "GCA_003437605", "GCA_003437205", "GCA_003438205", "GCA_000599105", "GCA_002160595", "GCA_003436935", "GCA_003436175", "GCA_000598545", "GCA_001405515", "GCA_000261025", "GCA_003363235", "GCA_000307455", "GCA_000598285", "GCA_003439415", "GCA_003503335", "GCA_000169015", "GCA_000598165", "GCA_003436085", "GCA_900107315", "GCA_000601055", "GCA_000273785", "GCA_003437005", "GCA_000210835", "GCA_000598825", "GCA_002959635", "GCA_000738045", "GCA_000297755", "GCA_002161115", "GCA_000759165", "GCA_000273115", "GCA_000403235", "GCA_003439505", "GCA_000185845", "GCA_003438765", "GCA_000193395", "GCA_000210075", "GCA_002797155", "GCA_002161715", "GCA_001699865", "GCA_000273295", "GCA_000026905", "GCA_000738065", "GCA_003265025", "GCA_003436855", "GCA_000273075", "GCA_002206325", "GCA_002763535", "GCA_002794335", "GCA_003437545", "GCA_000759185", "GCA_001546565", "GCA_000012825", "GCA_001406135", "GCA_000598925", "GCA_000699665", "GCA_900445515", "GCA_000068525", "GCA_000598445", "GCA_000577295", "GCA_000699885", "GCA_003201715", "GCA_000759265", "GCA_900107825", "GCA_900107475", "GCA_003466305", "GCA_900100465", "GCA_001404375", "GCA_000598565", "GCA_001405775", "GCA_000191765", "GCA_003439225", "GCA_000177315", "GCA_001404395", "GCA_000598505", "GCA_000699905", "GCA_001915605", "GCA_001580095", "GCA_000163035", "GCA_000154125", "GCA_001405595", "GCA_000178275", "GCA_003438645", "GCA_001953955", "GCA_000598985", "GCA_000012845", "GCA_000762405", "GCA_900109635", "GCA_003436285", "GCA_000598945", "GCA_000724815"'..b'oidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577295": ["2|976|200643|171549|815|816|371601", 6484037], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_002161115": ["2|976|200643|171549|815|816|371601", 5692802], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000178215": ["2|976|200643|171549|815|816|371601", 6059812], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000577955": ["2|976|200643|171549|815|816|371601", 6228594], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000273315": ["2|976|200643|171549|815|816|371601", 6067695], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900107825": ["2|976|200643|171549|815|816|371601", 6131743], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_000210075": ["2|976|200643|171549|815|816|371601", 5976145], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_xylanisolvens|t__GCA_900114865": ["2|976|200643|171549|815|816|371601", 5867942], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_salanitronis|t__GCA_000190575": ["2|976|200643|171549|815|816|376805", 4308663], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_coprophilus|t__GCA_000157915": ["2|976|200643|171549|815|816|387090", 4041504], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_D2|t__GCA_000159075": ["2|976|200643|171549|815|816|556259", 6920457], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_001915515": ["2|976|200643|171549|815|816|626931", 6012549], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_000315485": ["2|976|200643|171549|815|816|626931", 7087734], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_oleiciplenus|t__GCA_003438445": ["2|976|200643|171549|815|816|626931", 6457077], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sartorii|t__GCA_000403195": ["2|976|200643|171549|815|816|671267", 5464209], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_reticulotermitis|t__GCA_000517545": ["2|976|200643|171549|815|816|1133319", 5365278], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_002221665": ["2|976|200643|171549|815|816|1796613", 4800416], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_caecimuris|t__GCA_001688725": ["2|976|200643|171549|815|816|1796613", 4839927], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_43_108|t__GCA_001915545": ["2|976|200643|171549|815|816|1896974", 5012994], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_sp_OM05_12|t__GCA_003438995": ["2|976|200643|171549|815|816|2292283", 4475735], "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Rikenellaceae|g__Rikenella|s__Rikenella_microfusus|t__GCA_900455755": ["2|976|200643|171549|171550|28138|28139", 2945869]}, "merged_taxon": {}}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/demo-db-v30.pkl
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.pkl has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.1.bt2 has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2
b
Binary file test-data/test-db/metaphlan-db/demo-db-v30.rev.2.bt2 has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/metaphlan-db/humann_markers.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/metaphlan-db/humann_markers.tabular Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,113 @@
+1262743__R5C054__BN727_01980
+189722__E0NQU7__HMPREF0658_0548
+357276__A0A076II57__GV66_07710
+357276__A0A076IN30__DXD47_09350
+357276__A0A076INK2__BSEG_04225
+357276__A0A076INR8__EL88_12300
+357276__A0A076IRX4__ABI39_13585
+357276__A0A076IS46__BACDOR_03265
+357276__A0A076IS73__GV66_00410
+357276__A0A076ITH2__BSEG_03273
+357276__A0A076IXE0__HMPREF1064_04364
+357276__A0A076IYE6__IY41_04275
+357276__A0A076J726__IY41_15030
+357276__A0A076J8D4__HMPREF1064_00553
+357276__A0A076J9V8__BACDOR_01395
+357276__A0A0K2HF58__IY41_03550
+357276__A0A0K2HFA1__HMPREF1065_02962
+357276__A0A0K2HG61__BACDOR_02028
+357276__A0A0K2HGB7__HMPREF1064_01580
+357276__A0A0K2HHN3__GV66_08845
+357276__A0A0K2HI25__DXD47_04465
+357276__A0A0K2HI42__ABI39_06975
+357276__A0A0K2HJA6__DXD47_07320
+357276__A0A0K2HMX2__HMPREF1063_03979
+357276__A0A0K2HNG3__HMPREF1064_03004
+357276__A0A0K2HP62__HMPREF1063_01560
+357276__A0A0M1VXQ9__BSEG_00942
+357276__A0A0M1VYB8__HMPREF1063_02104
+357276__A0A0M1W1I8__ABI39_13620
+357276__A0A0M1W657__HMPREF1064_04629
+357276__A0A0M1W6X7__HMPREF1065_03898
+357276__A0A0M1W9N4__GV66_05135
+357276__A0A174L570__GV66_05720
+357276__A0A1Y3Z6X3__GV66_07715
+357276__A0A3E4JJ65__HMPREF1064_00497
+357276__B6VVS0__HMPREF1064_03951
+357276__B6VXX7__HMPREF1063_02683
+357276__B6VYT9__HMPREF1063_05030
+357276__B6W1J2__BACDOR_03380
+357276__B6W1Y1__BSEG_03960
+357276__B6W1Y5__IY41_11405
+357276__B6W5Q7__DXD47_13945
+357276__B6W5Q9__HMPREF1063_03125
+357276__C3R9Y0__HMPREF1064_01889
+357276__C3RA36__HMPREF1065_03270
+357276__C3RDQ5__HMPREF1065_03827
+357276__C3REQ8__GV66_09740
+357276__D1JY58__BACDOR_04277
+357276__D1JYA0__EL88_06560
+357276__D1JZM4__HMPREF1065_01278
+357276__D1K3K9__EL88_23995
+357276__D1K526__GV66_17250
+357276__E5UTL1__HMPREF1065_03551
+357276__I8VN48__ABI39_07855
+357276__I8VQA2__HMPREF1065_04183
+357276__I8WBQ9__HMPREF1065_03212
+357276__I8WDF1__B5F00_02455
+357276__I8WFU9__BACDOR_02006
+357276__I9F327__DXD47_22495
+357276__I9FGI3__EL88_12385
+357276__I9FRW5__B5F95_21685
+357276__I9FU68__HMPREF1063_00557
+357276__I9QBT4__GV66_09000
+357276__I9QBT7__BSEG_03340
+357276__I9QPN0__B5F95_09195
+357276__I9QXD4__IY41_00345
+357276__I9R1V6__DXD47_04125
+357276__I9R201__B5F00_01910
+357276__I9R994__BACDOR_04044
+357276__I9RAC4__BSEG_00561
+357276__I9RCD3__IY41_15070
+671267__R9I8L9__C802_02304
+671267__R9IB12__C802_00696
+821__A0A0N7J7X2__SAMN04487923_101621
+821__A0A0P0L8F8__ERS852509_03705
+821__A0A0P0LFP6__C5Z04_13890
+821__A0A0P0LH09__ERS852457_03390
+821__A0A0P0LJ37__DXB82_14270
+821__A0A0P0LKG6__HMPREF1058_02188
+821__A0A0P0M2J4__DXD46_10295
+821__A0A0P0M3U7__DXB90_14325
+821__A0A0P0M405__DXC02_15075
+821__A0A0P0M4G3__M099_3092
+821__A0A173XDD3__DXB82_17180
+821__A0A174ALF2__BvMPK_3016
+821__A0A174AYT0__DXC02_03530
+821__A0A174JYD3__ERS852457_02610
+821__A0A174MBW9__SAMN04487923_10272
+821__A0A174NM17__lacZ_6
+821__A0A174NSU0__BVU_3144
+821__A0A174NVL6__NCTC10583_01782
+821__A0A174T998__DXB82_10860
+821__A0A395UNT8__DXD64_15275
+821__A0A395VIK7__BVU_1225
+821__A0A395VLM8__BvMPK_3650
+821__A0A396ER29__ERS852457_02298
+821__A0A396F1N0__M099_1553
+821__A0A396F598__CUU_3259
+821__A0A396F9T7__M097_3427
+821__A0A3E4HP25__SAMN04487923_102450
+821__A0A3E4KIM1__M097_3660
+821__A0A3E4WAR6__DXD64_10735
+821__A0A3E4XY72__NCTC10583_01067
+821__A6L0C7__ERS852556_03520
+821__C3RB94__DXC03_15585
+821__C6ZAA5__ERS852556_00403
+821__D4V4X2__DXC16_18525
+821__D4VC51__M099_1508
+821__D4VD59__BHV80_04260
+821__E5UPR6__SAMN04487923_101768
+821__I9R3W6__DXC16_12455
+821__R7P1Q7__M099_2990
+821__U6RDC2__DXD46_15675
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz
b
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v296_201901b.ffn.gz has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz
b
Binary file test-data/test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v296_201901b.ffn.gz has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/protein-db/uniref90_demo_prots_v201901b.dmnd
b
Binary file test-data/test-db/protein-db/uniref90_demo_prots_v201901b.dmnd has changed
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/protein-db/uniref90_demo_prots_v201901b.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/protein-db/uniref90_demo_prots_v201901b.fasta Wed May 12 09:00:09 2021 +0000
b
b'@@ -0,0 +1,88 @@\n+>UniRef90_W7T1N9|1098\n+MPYKCQCPSQRIKTHNLKVRSMFKACDGRGTTKIFLSIIYTKIPHQICQHSQHSTPICRSGKCEIHRKCSLMSSLALTPFKAGLFVGKSIIVTGGGTGLGYAIAKELVSLGAKVVIAARRIEVLEAAASEMNKSSQRGGKIYVCQCNVRNEDDIQKLVTFALETMGGIDGLVNNAGGQFVSPVENISARGFKAVVETNLLSCFMLSKEIYNRWWSQQEDRKRSGSIVNIILANKNGFPMMAHSGAARAGVENLTKSMALEWIGRGVRVNCVAPGIIYTESGFANYGEMAGAFLSSVLPCIPAHRCGTAEEVSSLVVFLLSDAAVYITGQNMGVDGGMGQGTIPLRPQDHGASLLPVYGELPMKARL\n+>UniRef90_U3KI10|2454\n+MRTDSNNENSVPKDFETIDNSNFAPRTQRQKPQSELVKKPLSKQKEHLRKKLEEERMKENLLLGKNSNEVVQFSDPLGKNSSSSNTLKEIDRFPTEHLLQKLEISSPEVKYEQPPKCEVTGKEAISAMSRAKSPQCRQEIADVYCQHKLGTLMPEQVTRFCALEGKANTNVQWDEDSVEFMPAQPVRIAFVLVVHGRASRQLQRMFKAIYHKDHFYYIHVDKRSNYLHRQVLQFASQYPNVRVTSWRMATIWGGASLLTTYLQTMKDLMEMSDWPWDFFINLSAADYPIRTNEQLVAFLSRYRDMNFLKSHGRDNARFIRKQGLDRLFLECDTHMWRLGDRKIPEGITVDGGSDWFLLNRKFVEYVTFSNDDLVTKMKRFYSYTLLPAESFFHTVLENSPFCDSMVDNNLRITNWNRKLGCKCQYKHIVDWCGCSPNDFKPADFHRFQQTARPTFFARKFEAVVNQEIIGQLDYHLYGNYPPGTPGLRSYWENVWEEPDGLGALSDAALTLFHSFSRLGLRRAQSALHAAGDSCRYYPMGHPVSIHLYFLADRFQGFLIRHHATNLAGSKLETLETWVMPKKVFKIASPPSDFGRLQFSEIGTEWDAKERLFRNFGGLLGPTDEPVGMQKWGKGPNVTVTVIWVDPINVIAATYDILIESSAEFTHYKPPLNLPLRPGVWTIKILHHWVQVAETKFLVTPLAFSNQQPIKQEDAIKLHSGPPKNAYMEQSFQGLNPVLNMPVSAARAEQARRNAALVGPRLDAWVDSLAGGVWSAVEVCSVGPSGCPALQGCAQTAWSSLSPDPKAELGPVKPDGRLR\n+>UniRef90_Z9JLB1|1683\n+MSQTRPWLQHYPSGIPTEIDADAFRTIVDVFNTSVIKYRDCPAYTNFGKTLTYGEIDLLTKQFASYLLNELKLKKGERIALMMLNCLQYPVATFGALRAGLTVVNVNPLYTARELKHQLVDAGASVLVVIDNFCTTVQHIIADTSVKQVITTGLGDLLGFPKRSLVNFAVKHIKKLVPEYRLPGSIRFREALARGGKHAMPPIHIASDDLAFLQYTGGTTGTAKGAMLTHRNMVANMQQTSHWLNNNLKESCETVITALPLYHIFALTANNLLFMKIGGCNHLITNPRDIPGFVKELKRVRFTAITGVNTLFNKLLNTPGVAEIDFSSLKIALGGGMAVQRSVAERWKQVTHVPLIEAYGLTEASPGVCINPLDLKEYNGSIGLPIPSTDVCIKDDTDTPLPTGEIGELCIKGPQVMKGYWQHPEETSNVFDADGWLHTGDIAKMDEQGFFYIIDRKKEIILVSGFNVYPNEIEEVIAMMPGVDEVAAFGVPDEKYGEVVKVVIVKKDPMLTAEEVKAHASANLTRYKLPRIIEFRTKLPKTDVGKILRHELRHAAPTSTT\n+>UniRef90_Z9JRB3|1773\n+MQTHTYDVVIVGAGGAGMRAALESSKRARTAVLTKLYPTRSHTGAAQGGMCAALANVEEDNWEWHTYDTVKGGDYLVDQDAAEVMAKEAIDAVLDLEKMGLPFNRTPEGKIDQRRFGGHTREHGEAPVRRSCYAADRTGHMILQTLYQNCVKQNVEFFNEFYVLDVLMTGDPRDEAEVRASGVVAYELATGEVHIFRAKSVVFASGGFGKMFKTTSNAHTLTGDGPAMALRRGIPLEDMEFFQFHPTGLAGLGILLSEAARGEGGILRNAQMERFMERYAPTLKDLAPRDVVARAMANEVREGRGCGPNKDYVLLDLTHLEPAHIDAKLPDITEFARTYLGVEPYTEPVPVFPTAHYGMGGMPTNIKGEVLRNETDVVPGLYAAGEVACVSVHGGNRLGTNSLLDINVFGRRAGIYAAEHAAGVELAEIEPGLEAPTVELLERLRDRPATTDRIADIRRDLQETMDANVQVFRTEETCRTALADIHQLKKRYETVAVQDKGRRYNLDLLEAVELGFLLDLAEVVTVGALNRKESRGGHFREDFEKRDDVNYLWHTMAYRTREGEEGFEGTDIRLGTKPVVITRYEPKERTF\n+>UniRef90_X8JRH1|978\n+MSTANGATGQKGFLASPKTIAIVGCPFSGGQPKAGVDRGPIHLINAGLEDQLSGLGWKVKFDGHHQFEEIDAQNDTPLGILKRPRLVSQVCEAVAKVVGDHARNGELPLTLGGDHSLAMGTISGVLSAHPEACVVWVDAHADINTPSTSTSGNIHGMPLSFLLGIAGEVPQAEFGPQPFSWIKPTLRPERLVYIGLRDIDDGERKILREHGIRAYSMHEVDRYGIGRVVELALAHVNPDGKRPIHLSFDVDALDPTVAPSTGTPVRGGLTFREGHYICEAIAETGLLVGLDIMEVNPSLGSTDADVSATVAVGCSLARSALGETLL\n+>UniRef90_Q8GT21|1368\n+MAHDQSLSFEVCRRKPELIRPAKQTPHEFKKLSDVEDQEGLRFQIPVIQFYKHNNESMQERDPVQVIREGIARALVYYYPFAGRLREVDGRKLVVECTGEGVMFIEADADVTLEQFGDALQPPFPCFDQLLFDVPGSGGILDSPLLLIQVTRLKCGSFIFALRLNHTMADAAGIVLFMKAVGEMARGAATPSTLPVWDRHILNARVPPQVTFNHREYEEVKGTIFTPFDDLAHRSFFFGSTEISAMRKQIPPHLRSCSTTIEVLTACLWRCRTLAIKPNPDEEVRMICIVNARSKFNPPLPDGYYGNAFAIPAAVTTAGKLCNNPLGFALELIRKAKREVTEEYMHSVADLMVATGRPHFTVVNTYLVSDVTRAGFGEVDFGWGEAVYGGPAKGGVGVIPGVTSFYIPLRNRQGEKGIVLPICLPSAAMEIFAEALNNTLNGKEIEIAKHFTQSSL\n+>UniRef90_X6L320|957\n+MSLEDAKRQIDHAFTRDDLKGPSFENVFAGAASFLRRKYTKDLTGVDIAVTGLPFDQAVTNRPGTRLGPRAIREASLLQTMDKPYGWDFDVLSDFAIADYGDMAFDYAMPSAVPARIEEHVRGILDAGAACVALGGDHSVTLGALRAHAAVHGPLSVIQFDAHTDTWADDDPGRVDHGTFLYTAAKEGIVVPERSVQIGIRTDNPDTMGFHILDAREVHAAGPERIAEQVHQIVGRSACYITFDIDALDPAFAPGTGTPVWGGLTSHQAAVMLRRLAGVNMVGGDVVEVSPPFDTTGATAIAAAHVATELLSIWASTRR\n+>UniRef90_W7DVV5|549\n+MPIGGGKGGSDFDPKGKSDAEIMRFCQSFMSELAMHIGPDTDVPAGDIGVSGREIGYLFGQYKKMTGRFDAGTITGKGLTYGGSLTRTEATGYGLVYFTKEMLAATNQSLKNKTVVVSGSGNVAIYAIEKAEELGAKVVACSDSSGYIFDQEGINLKTLKQIKEVERRRIHTYLEKHPTAEIF\n+>UniRef90_W8YTG4|1614\n+MAYEMPWTADTSKLNKMELWKIEKDGLDVIRTIIEKYALEGYDSIPEDDMNRFKWAGVYEQKPRDGYFMMRIRINSGVMTTAQARALASIGRDYGRDLIDVTTRQAIQYHWLRIENMPDIFKRLEEVGLYSYESCGDCPRT'..b'SGADLYKVEMPLFGKGSRKELLCASQKLNDHIAMPWVILSSGVDEKLFPRAVRVAMEAGASGFLAGRAVWSSVIGLPDTEMMLRDISVPKLHRLGEIVDEVMARR\n+>UniRef90_Z9JUT2|1308\n+MTASPAPADLCAPLEESTVSSPRERRALPRPYDAILLASFGGPEREEDVLPFLRNVTRGRGIPDERLEEVGAHYRALGGRSPINDQNRALIRALRAELDERGIDLPIHWGNRNWQPTMAEAVRGLHADGHREVLAIATSAYSSYSSCRQYREDFGRALVETGLLGTVRLDKVRPYFSHPGFLAPMADGILDALAQLQEEGHDGARVRILFSTHSIPTAMADASGPAEERTGGPARWYVRQHEAACRYLMDAVAQRRAAEGAGTELPGWELVYQSRSGAPHTPWLEPDVNDVIARIAEEGEHDAVVVVPVGFVSDHVEVIWDLDTEARESAEQHSLAFRRVATPGTDPRFVAALADLVEERIRPDAPRRAVTEFGPTADVCGTACCVSGSPRARIVPTTSALDSGEDLRAARTqaardegGSTRPAGAGTTGEAGGR\n+>UniRef90_U3KP22|1143\n+MFPSRRKAAQLPWEDGRRDTIPYLSPPPsprpekeprgrgRLCRASSLPRPLAGLASAPVRRKPppqddGGSRLLPSGLSRKCSVFHLFVACLLLGFFPLLWLQLSCSGDVARTAGGQGQETPGPPRACPPEPPPELWEEDASWGPHRLAVLVPFRERFEELLVFVPHMHRFLSRKKIQHHIYVLNQVDHFRFNRAALINVGFLESSNSTDYIAMHDVDLLPLNEELDYGFPEAGPFHVASPELHPLYHYKTYVGGILLLSKQHYQLCNGMSNRFWGWGREDDEFYRRIKGAGLQLFRPSGITTGYKTFRHLHDPAWRKRDQKRIAAQKQEQFKVDREGGLSTVRYRVDSRTALSVAGAPCTVLNIMLDCDKAATPWCTFS\n+>UniRef90_Z9JWW5|1404\n+MADTSTDQSYDVVILGGGSGGYAAALRGAQLGLKIALVEKDKLGGTCLHRGCVPTKALLHVGEVADSAAEGAEMGVKMSLEGIDIATTLEFKDKIIGRLYKGLQGLVKSRKVEYVEGFGRLTGKNTVSvetsegvreltgknvvlaSGSYSKTLPGLELGGRILDSEAALQLPEVPKNPIILGGGVIGVEFASVWKSFGAESVTIVEGLPHLAANEDESLSKALERSFKKRGIKFSLGTFFEKAVQTDTGVTVTLVDGTTFEGDYLLVAVGRGPATAGLGYEEQGITMDRGFVLADKNTLETNVPGIYAVGDIVPGLQLAHRGFQQGILVAERIAGQDPAPIVESGIPRVTYCDPQLGSVGITEKQAKEQFGEDGVETYEYNLGGNGKSQILGTTGFIKLVREKNGPIVGVHMIGRNLAEQIGEAQLIVNWEAYPEDVASLVHAHPTQNEAIGEAALALAGKPLHAHA\n+>UniRef90_Q9VTG7|1437\n+MKGGNYTSLGTCSGINVSGNVAGTRKMSLGKSIKMYLTIFILTTCIYMALYQYHISREPFAASEVVKHQEKSSSYIASYLWSPISLLMAnsssntnnnstttstttttapttptttttttvgsvgQKLGASSISSIRMVSLAATIPSFKSTLSESRSVSLGGHQKTATVKTSTtittrttasglattklsattrttaktsaklsaattpTASHMENGYKTRPTFVAASLPPPLYIITPTYRRPEQLAELTRLGYTLKHVVNLLWLVIEDANKTNPLVGHTLDRIGVPYEYMVAPMPEKYKQTKKAKPRGVSNRNRGLEYLREHATEGVLYFADDDNTYDISIFEQMRYISKVAMWPVGLVTKTGVSSPIIQAGKLVGYYDGWIGGRKYPVDMAGFAVSVKFLKERPNAQMPFKPGYEEDGFLRSLAPLDDAEIELLADECRDILTWHTQTKKNAPAQALNRTRYKNTNLEHIDRLLVRP\n+>UniRef90_X7F2P9|852\n+MSESDESVSRAKRVDIGKVGVGNDRPLALIAGPCQLENLDHARMLAHRIAEAADAAGLPWIFKASYDKANRSSLSGRRGLGIDEGLGILARIREEFGVPVLTDVHAPDQCARAAEAVDVLQIPAFLSRQTDLLLAAGETGAAINVKKGQFLAPWDMANVADKIASTGNARILLCERGASFGYNMLVSDMRSLPIMARTGWPVVFDATHSVQLPGGQGGSSGGQREFVEPLARAAVAVGCAAVFIETHEAPDTAPSDGPNMVPIDRLPALLDGLAALDRLTKGRG\n+>UniRef90_X8DN03|1056\n+MPPSRPGGPACATPCWLRWPTSSPPAARSTCAGQAPTLPPRSCRDSSVAASACARPSHCWAGCAGRCRAAALRAAASLELLHAFALLQDDVMDDSPVRRGRASAHVQFACWHRDRGLSGSSSRFGESAAMLLGDLCLVWAEQMLRGSGLARDALDRAWPRYDTMRTELAVGQLGDIVVDAASLPSLVEVLDVARRKSGNYTVRWPLEIGAVLAGCDDDVLTLLSGYGEAVGEAFQMRDDLLGIYGSPRVTGKPAGADLSERKATTVVVAAYQLADNGLRRRLSELMGSAELDQSAVDQWRTLIAATGAVELIEQMIAERVAAALELLSSDRIDHGVRDALADMAVACSQRAA\n+>UniRef90_Z9JWA2|2748\n+MTAHETPSPIGINLPSHAQDPDPEETREWLDSLDQLVDERGDERATAIVQNVIQRARDKKLHLPDSLTTDYVNTIDVESQPEYPGDLELEKEIRNALRWNAAMVVHRAQRPGVGVGGHLSSYASISTMYEVGFNHFFRGREHPGGGDHvffqghaspgiyarafmqgrlSQEDLDGFRQEFSSEHGMPSYPHPRAMPDFWEFPTVSMGIGPVNAIEQASFDKYLHNRGIKDTSDQHTWAFLGDGEMDEVESRGALHIAAKEHLDNLTFVVNCNLQRLDGPVRGNGKIVQELESQFRGAGWNVIKVIWGSGWDPLFAADSDGALIDLMNATPDGDFQTYRTENGGFIRDNFFGRDPRTKALVEDMSDDDIWWKLNRGGHDTKKIYAAFKAAMEHKGQPTVILAHTIKGYRLGKNFAGRNATHQMKKFTPEDLKALRDTLQIPISDEVLESSSVYDAPFYVPDADSPAMRYFHERRSELGGQVPSRSREHKPLNLPGEEAYKVVKKGSGKQEIATTMALVRLLKDLMRDKETGKLWVPIIPDEARTFGMDSLFPTAKIYNPDGQNYISVDRDLLLAYKESTSGQIKHMGINEISSTSAFTAAGTSYATHGQPMIPLYIFYSMFGFQRTGDFFWAAGDQLAKGFVIGATAGKTTLAGEGLQHMDGHSPILAYTNPGAVIYDPAYGYEIGHIMRDGLQRMYGADEKRLQEVFYYITVYNEPMVQPAEPENLDVEGLLKGMYLLAPAPEGEGPEVQLMASGVGVPWALHAQKLLAEDWGVRGAVWSVTSWTEMRKDALEAEKDAFLHPEQEARVPFISERLQGVEGPFIATSDYDFLVPDLIRPWIPGPYGVLGADGWGFSDTRPAARRHLHIDAHSMAVKALQMLARDGRVDASLPAQAVKKYDLLNPNAGQSGSFGGDS\n+>UniRef90_X5M7Z0|1134\n+MVTDRIKDFLRTTRSDEPFVVVDLDVVRDNYNRLARALPESRVYYAVKANPAPEILALLAKLGSSFDAASIAEVEMVLAAGASADRISFGNTIKKERDIARAYTLGVRMFAVDCPEEVEKVARVAPASRVFCRILTDGVGAEWPLSRKFGCVPDMASNVLRHAHDLGLEAHGVSFHVGSQQPDTGAWDRALEDASTIFRTLADQGIHLKMVNLGGGFPTRYLKEVPTSEDYGKAIFGALRRHFGNQIPETIIEPGRGLVGDAGVIRAEVVLVSRKTQEPDADRWVYLDIGKFGGLAETMDEAIRYPIRTPHDGERTTPCVVAGPTCDSADVLYEKTPYDLPVSLTIGDEILIEATGAYTTTYASNGFNGFAPLKSYII\n'
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/utility_mapping/map_go_uniref90.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/utility_mapping/map_go_uniref90.txt Wed May 12 09:00:09 2021 +0000
b
b'@@ -0,0 +1,326 @@\n+GO:0000027\tUniRef90_Q8A488\tUniRef90_Q8AAN9\n+GO:0000049\tUniRef90_A0A0M1W4C3\tUniRef90_Q8A488\n+GO:0000103\tUniRef90_A6KXG9\n+GO:0000139\tUniRef90_Q9VTG7\tUniRef90_U3KI10\tUniRef90_U5FT06\n+GO:0000150\tUniRef90_A0A0M1VZE5\tUniRef90_I8ZWC4\n+GO:0000155\tUniRef90_A0A174W082\n+GO:0000160\tUniRef90_A0A076J7B8\tUniRef90_E5UWP0\tUniRef90_I9R2L8\n+GO:0000287\tUniRef90_A0A076IRU0\tUniRef90_A0A0M1W4C3\tUniRef90_A6KYP1\tUniRef90_A6L011\tUniRef90_A6L100\tUniRef90_Q9ZUH4\n+GO:0000902\tUniRef90_R6MNH0\n+GO:0001681\tUniRef90_A0A076IRW6\n+GO:0002949\tUniRef90_A0A0K2HI27\n+GO:0003677\tUniRef90_A0A069DBZ5\tUniRef90_A0A069SUC4\tUniRef90_A0A076IQ08\tUniRef90_A0A076ISA0\tUniRef90_A0A076J4W7\tUniRef90_A0A0K2HLI9\tUniRef90_A0A0K2HND2\tUniRef90_A0A0M1VZE5\tUniRef90_A0A0P0LAS9\tUniRef90_A0A0P0LHR4\tUniRef90_A0A0P0M475\tUniRef90_A0A0P0M5D9\tUniRef90_A0A133PU47\tUniRef90_A0A1Q6HT91\tUniRef90_A6L330\tUniRef90_C3R5V5\tUniRef90_C3RGR6\tUniRef90_E5UWP0\tUniRef90_F7LXA9\tUniRef90_I8ZWC4\tUniRef90_I9GA88\tUniRef90_I9R2L8\tUniRef90_K6ASU4\tUniRef90_R9H1R7\tUniRef90_R9HNX0\tUniRef90_R9HQP6\tUniRef90_U6RGQ3\tUniRef90_U6RK63\tUniRef90_W4PUR5\n+GO:0003678\tUniRef90_U6RK63\n+GO:0003700\tUniRef90_A0A076IWE5\tUniRef90_A0A076J170\tUniRef90_A0A0K2HLI9\tUniRef90_A0A0P0M4Y9\tUniRef90_B6VTH0\tUniRef90_I9FBU4\tUniRef90_I9QPN0\tUniRef90_W4PUR5\n+GO:0003723\tUniRef90_A0A076IRA9\tUniRef90_A0A0M1W2F8\tUniRef90_A6KZJ9\n+GO:0003727\tUniRef90_A0A0K2HND2\n+GO:0003735\tUniRef90_A6KYH2\tUniRef90_A6KYH6\tUniRef90_A6KYL2\tUniRef90_A6KYS7\tUniRef90_A6L5E4\tUniRef90_A6L5E5\tUniRef90_B2RH62\tUniRef90_Q8A488\tUniRef90_Q8A4A1\tUniRef90_Q8AAN9\n+GO:0003746\tUniRef90_Q8A1F7\n+GO:0003755\tUniRef90_A0A076II47\tUniRef90_A0A076INM5\tUniRef90_A0A0P0L9E5\n+GO:0003824\tUniRef90_A0A1H7FY75\tUniRef90_B6VZF0\tUniRef90_D1JZ97\n+GO:0003866\tUniRef90_A6L1K4\n+GO:0003878\tUniRef90_W6EAW5\n+GO:0003896\tUniRef90_K6ASU4\tUniRef90_R9HNX0\n+GO:0003899\tUniRef90_A0A069DBZ5\tUniRef90_F7LXA9\n+GO:0003917\tUniRef90_A0A133PU47\n+GO:0003924\tUniRef90_A6L100\n+GO:0003951\tUniRef90_A0A0P0M3N9\n+GO:0003991\tUniRef90_A6L013\n+GO:0004020\tUniRef90_A6KXG9\n+GO:0004029\n+GO:0004034\tUniRef90_D4V472\n+GO:0004053\tUniRef90_A0A0P0M2Y9\n+GO:0004077\tUniRef90_I9QUC3\n+GO:0004107\tUniRef90_A6L3D0\n+GO:0004129\tUniRef90_Z9JY21\n+GO:0004148\n+GO:0004177\tUniRef90_A0A0P0LNL6\n+GO:0004252\tUniRef90_E5UPC0\n+GO:0004308\tUniRef90_A0A076ITY2\n+GO:0004316\tUniRef90_A0A076IUR5\n+GO:0004325\n+GO:0004354\tUniRef90_P94598\n+GO:0004356\n+GO:0004359\tUniRef90_B6VYI2\n+GO:0004467\n+GO:0004475\tUniRef90_A0A174JI66\n+GO:0004515\tUniRef90_A6L0W0\n+GO:0004519\tUniRef90_A0A076J0R4\tUniRef90_C3R5V5\n+GO:0004527\tUniRef90_E6SQG4\n+GO:0004553\tUniRef90_A0A0M1W0W7\tUniRef90_A0A1Y3ZH75\n+GO:0004563\tUniRef90_A0A076JBC9\n+GO:0004586\n+GO:0004588\tUniRef90_A6KYP1\n+GO:0004594\tUniRef90_A6L3F8\n+GO:0004601\tUniRef90_B0NSG7\n+GO:0004644\tUniRef90_D4VAI3\n+GO:0004650\tUniRef90_D1K1V0\n+GO:0004665\tUniRef90_A0A076J1R5\n+GO:0004739\n+GO:0004742\n+GO:0004765\tUniRef90_A6L011\n+GO:0004791\tUniRef90_A0A0K2HN45\n+GO:0004794\tUniRef90_A0A076ISH7\n+GO:0004803\tUniRef90_C3RGR6\tUniRef90_I9GA88\n+GO:0004807\tUniRef90_A6KXL2\n+GO:0004826\tUniRef90_A0A0M1W4C3\n+GO:0004853\n+GO:0005215\tUniRef90_A0A076ISY7\n+GO:0005249\tUniRef90_B6VV56\n+GO:0005524\tUniRef90_A0A076IZ32\tUniRef90_A0A076J147\tUniRef90_A0A081UAC7\tUniRef90_A0A0K2HE86\tUniRef90_A0A0K2HMJ5\tUniRef90_A0A0M1W4C3\tUniRef90_A0A0M1W874\tUniRef90_A0A0P0M668\tUniRef90_A6KXG9\tUniRef90_A6L011\tUniRef90_A6L013\tUniRef90_A6L0W0\tUniRef90_A6L3F8\tUniRef90_A6L4L7\tUniRef90_U2CVX4\tUniRef90_U6RK63\tUniRef90_W0L5S6\tUniRef90_W6EAW5\tUniRef90_Z9JXD8\n+GO:0005525\tUniRef90_A0A1Y4PC68\tUniRef90_A6L100\n+GO:0005615\tUniRef90_U3KI10\n+GO:0005618\tUniRef90_C3RCP9\n+GO:0005623\tUniRef90_B0NSG7\tUniRef90_D1K632\tUniRef90_Z9JWW5\n+GO:0005737\tUniRef90_A0A076IPF6\tUniRef90_A0A076IQP1\tUniRef90_A0A076IVM9\tUniRef90_A0A076J092\tUniRef90_A0A076J7U9\tUniRef90_A0A0K2HN45\tUniRef90_A0A0M1W4C3\tUniRef90_A0A0P0M668\tUniRef90_A6KXL2\tUniRef90_A6L011\tUniRef90_A6L013\tUniRef90_A6L100\tUniRef90_A6L1K4\tUniRef90_A6L2N4\tUniRef90_A6L3F8\tUniRef90_I9QYH8\tUniRef90_Q8A1F7\tUniRef90_X7F2P9\tUniRef90_Z9JNJ6\tUniRef90_Z9JUT2\tUniRef90_Z9JXD8\tUniRef90_Z9K4C5\n+GO:0005783\tU'..b'D4V7H1\tUniRef90_I9IVH0\n+GO:0016788\tUniRef90_I8WIS4\n+GO:0016798\tUniRef90_A0A076ITM0\n+GO:0016805\tUniRef90_C3RCP9\tUniRef90_U6RES5\n+GO:0016810\tUniRef90_A0A174LZM2\n+GO:0016829\tUniRef90_A0A076IL73\tUniRef90_A0A076J7U9\tUniRef90_R6JK25\tUniRef90_W6EAW5\n+GO:0016830\tUniRef90_R6CPE5\n+GO:0016831\tUniRef90_C3RBF3\n+GO:0016857\tUniRef90_I9QYH8\n+GO:0016887\tUniRef90_A0A0K2HMJ5\n+GO:0016903\tUniRef90_A0A1Y3ZJ45\n+GO:0016987\tUniRef90_A0A0K2HLI9\tUniRef90_W4PUR5\n+GO:0019143\tUniRef90_Z4WZ87\n+GO:0019288\tUniRef90_A6L0V6\n+GO:0019294\n+GO:0019305\tUniRef90_C3R6D2\n+GO:0019318\tUniRef90_D4V472\n+GO:0019430\tUniRef90_A0A0K2HN45\n+GO:0019518\tUniRef90_R7AEJ7\n+GO:0019843\tUniRef90_A6KYH2\tUniRef90_A6KYH6\tUniRef90_A6KYS7\tUniRef90_Q8A488\tUniRef90_Q8A4A1\tUniRef90_Q8AAN9\n+GO:0019867\tUniRef90_I8W803\n+GO:0020037\tUniRef90_W8YTG4\n+GO:0022625\tUniRef90_Q8A488\tUniRef90_Q8AAN9\n+GO:0022857\tUniRef90_A0A0M1VYB5\tUniRef90_A0A174PV06\tUniRef90_R7NWM3\n+GO:0022900\n+GO:0030158\tUniRef90_U3KI10\n+GO:0030163\tUniRef90_A0A0P0LJQ9\n+GO:0030166\tUniRef90_Q9VTG7\n+GO:0030170\tUniRef90_B6W3Z6\tUniRef90_R7AEJ7\n+GO:0030246\tUniRef90_A0A0M1W0Z2\tUniRef90_D4V472\n+GO:0030259\tUniRef90_A6L071\n+GO:0031119\tUniRef90_A0A076IRA9\tUniRef90_A6KZJ9\n+GO:0031460\tUniRef90_A0A0K2HMJ5\n+GO:0034605\tUniRef90_A0A0K2HND2\n+GO:0034768\n+GO:0036361\tUniRef90_A0A068NXI7\n+GO:0042254\tUniRef90_A6L100\n+GO:0042450\tUniRef90_A6L013\n+GO:0042586\tUniRef90_A6L7J9\n+GO:0042773\tUniRef90_Y0KIL9\n+GO:0042834\tUniRef90_A0A076JAA0\tUniRef90_A0A1Y4PDH7\n+GO:0042972\tUniRef90_A0A174R4F2\n+GO:0043023\tUniRef90_A0A0K2HND2\n+GO:0043365\tUniRef90_A0A076J7U9\n+GO:0043546\tUniRef90_V5V2L3\n+GO:0043565\tUniRef90_A0A076IWE5\tUniRef90_A0A0P0LJ44\tUniRef90_A0A0P0M4Y9\tUniRef90_B6VTH0\tUniRef90_I9FBU4\tUniRef90_I9QPN0\tUniRef90_I9R0Z5\n+GO:0043878\n+GO:0044205\tUniRef90_A6KYP1\n+GO:0045254\n+GO:0045261\tUniRef90_A6L4L7\n+GO:0045263\tUniRef90_A6L4M2\n+GO:0045312\tUniRef90_C3RBF3\n+GO:0045454\tUniRef90_B0NSG7\tUniRef90_D1K632\tUniRef90_Z9JWW5\n+GO:0045493\tUniRef90_A0A076ITM0\n+GO:0045903\tUniRef90_Q8A4A1\n+GO:0046373\tUniRef90_A0A076IQ71\n+GO:0046417\tUniRef90_A0A076J5J6\n+GO:0046538\tUniRef90_D4V4U7\n+GO:0046556\tUniRef90_A0A076IQ71\n+GO:0046872\tUniRef90_A0A076J7U9\tUniRef90_A0A0M1VYU8\tUniRef90_A0A0P0M2Y9\tUniRef90_A6L0V6\tUniRef90_A6L3F8\tUniRef90_A6L7J9\tUniRef90_C3R800\tUniRef90_D4V7H1\tUniRef90_E6SQG4\tUniRef90_I8WIS4\tUniRef90_Q6XMI3\tUniRef90_Q9SPV4\tUniRef90_Q9VTG7\tUniRef90_U6RCW1\tUniRef90_V5V2L3\tUniRef90_W8YTG4\tUniRef90_X6L320\tUniRef90_Y0KIL9\tUniRef90_Z4WXV3\tUniRef90_Z4WZ87\tUniRef90_Z9JUT2\tUniRef90_Z9JWA2\n+GO:0046873\tUniRef90_D4VAV8\n+GO:0046933\tUniRef90_A6L4L7\tUniRef90_A6L4M2\n+GO:0046983\tUniRef90_A0A076J147\n+GO:0046987\n+GO:0046988\n+GO:0046989\n+GO:0047419\tUniRef90_C3R800\n+GO:0047480\tUniRef90_A0A0P0M668\n+GO:0047889\n+GO:0048038\tUniRef90_A6L168\n+GO:0048307\n+GO:0050066\tUniRef90_A0A0N7J7F4\n+GO:0050511\tUniRef90_A6L071\n+GO:0050551\n+GO:0050650\tUniRef90_Q9VTG7\tUniRef90_U3KI10\n+GO:0050660\tUniRef90_A0A076IHX2\tUniRef90_R7J8P5\tUniRef90_Z9JRB3\tUniRef90_Z9JWW5\n+GO:0051082\tUniRef90_A0A076J147\tUniRef90_A0A076J532\n+GO:0051287\tUniRef90_A0A076IUR5\n+GO:0051301\tUniRef90_A0A076JAA0\tUniRef90_A0A0P0M668\tUniRef90_A0A1Y4PDH7\tUniRef90_A6L071\n+GO:0051536\tUniRef90_E6SQG4\n+GO:0051537\tUniRef90_A0A076INU9\tUniRef90_Y0KEF3\n+GO:0051539\tUniRef90_A0A076J7U9\tUniRef90_A0A0K2HDD4\tUniRef90_R6CPE5\tUniRef90_R9HLL9\tUniRef90_V5V2L3\tUniRef90_W8YTG4\tUniRef90_Y0KIL9\n+GO:0051607\tUniRef90_E6SQG4\n+GO:0051707\tUniRef90_Q6XMI3\n+GO:0051920\tUniRef90_B0NSG7\n+GO:0051991\tUniRef90_A6L071\n+GO:0052624\tUniRef90_Q6XMI3\n+GO:0052695\tUniRef90_Q9VTG7\n+GO:0055085\tUniRef90_A0A076ISY7\tUniRef90_A0A0M1W1W1\tUniRef90_U6REJ0\n+GO:0055114\tUniRef90_D4VEG6\n+GO:0061593\n+GO:0061594\n+GO:0061720\n+GO:0070009\tUniRef90_C6Z4I1\n+GO:0070814\tUniRef90_A6KXG9\n+GO:0071555\tUniRef90_A0A0P0M668\tUniRef90_A6L071\tUniRef90_C3RCP9\n+GO:0080027\tUniRef90_Q9ZUH4\n+GO:0080150\n+GO:0102387\n+GO:0102391\n+GO:0102701\n+GO:0102710\tUniRef90_A0A380ZS86\n+GO:0102720\n+GO:0102955\tUniRef90_A6L3D5\n+GO:0106029\tUniRef90_A0A076IRA9\tUniRef90_A6KZJ9\n+GO:1902604\tUniRef90_A0A0K2HLX7\n+GO:1902777\n+GO:1990077\tUniRef90_U6RK63\n+GO:1990397\tUniRef90_R7NWM3\n'
b
diff -r 000000000000 -r 5240d62d864d test-data/test-db/utility_mapping/map_uniref90_name.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-db/utility_mapping/map_uniref90_name.txt Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,8 @@
+UniRef90_A0A0K2HIA2 Acetyltransferase
+UniRef90_Q8GT21 Benzyl alcohol O-benzoyltransferase
+UniRef90_Q9ZUH4 Tricyclene synthase, chloroplastic
+UniRef90_U5FT06 Hexosyltransferase
+UniRef90_W7DVV5 Glutamate dehydrogenase
+UniRef90_W8YTG4 Ferredoxin--nitrite reductase
+UniRef90_Z9JRB3 Succinate dehydrogenase flavoprotein subunit
+UniRef90_Z9JUT2 Ferrochelatase
b
diff -r 000000000000 -r 5240d62d864d tool-data/humann_nucleotide_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/humann_nucleotide_database.loc.sample Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,8 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+# - db-build-version-date
+# - db-name
+# - build
+# - /path/to/data 
+#02_16_2014 ChocoPhlAn chocophlan /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d tool-data/humann_protein_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/humann_protein_database.loc.sample Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,8 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+# - db-build-version-date
+# - db-name
+# - build
+# - /path/to/data 
+#02_16_2014 EC-filtered UniRef90 uniref90_ec_filtered_diamond /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d tool-data/humann_utility_mapping.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/humann_utility_mapping.loc.sample Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,9 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+# Tab separated with 4 columns:
+# - db-build-version-date
+# - db-name
+# - build
+# - /path/to/data 
+#utility_mapping-full-20210421 Full mapping full /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d tool-data/metaphlan_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metaphlan_database.loc.sample Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,8 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+# - db-build-version-date
+# - db-name
+# - build
+# - /path/to/data 
+#02_16_2014  MetaPhlAn2 clade-specific marker genes db_v20 /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,18 @@
+<tables>
+    <table name="metaphlan_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/metaphlan_database.loc" />
+    </table>
+    <table name="humann_nucleotide_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/humann_nucleotide_database.loc" />
+    </table>
+    <table name="humann_protein_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/humann_protein_database.loc" />
+    </table>
+    <table name="humann_utility_mapping" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/humann_utility_mapping.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 5240d62d864d tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed May 12 09:00:09 2021 +0000
b
@@ -0,0 +1,18 @@
+<tables>
+    <table name="metaphlan_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="${__HERE__}/test-data/metaphlan_database.loc" />
+    </table>
+    <table name="humann_nucleotide_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="${__HERE__}/test-data/humann_nucleotide_database.loc" />
+    </table>
+    <table name="humann_protein_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="${__HERE__}/test-data/humann_protein_database.loc" />
+    </table>
+    <table name="humann_utility_mapping" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="${__HERE__}/test-data/humann_utility_mapping.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 5240d62d864d transform_json_to_pkl.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/transform_json_to_pkl.py Wed May 12 09:00:09 2021 +0000
[
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+import bz2
+import cPickle as pickle
+import json
+
+
+def transform_json_to_pkl(args):
+    with open(args.json_input, 'r') as json_file:
+        json_str = json_file.read()
+        metadata = json.loads(json_str)
+
+        for marker in metadata["markers"]:
+            a_set = set(metadata["markers"][marker]["ext"])
+            metadata["markers"][marker]["ext"] = a_set
+
+    pkl_output = bz2.BZ2File(args.pkl_output, 'w')
+    pickle.dump(metadata, pkl_output, pickle.HIGHEST_PROTOCOL)
+    pkl_output.close()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--json_input', required=True)
+    parser.add_argument('--pkl_output', required=True)
+    args = parser.parse_args()
+
+    transform_json_to_pkl(args)