Repository 'humann2'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/humann2

Changeset 0:1ab06263e083 (2017-03-13)
Next changeset 1:1d6d855c10d8 (2017-08-01)
Commit message:
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/humann2/ commit b46aa969c01b7e5f4f133192899fa4da286ecf89-dirty
added:
README.rst
generate_test_data
humann2.xml
humann2_macros.xml
repository_dependencies.xml
test-data/cpm_community_renormalized_pathway_abundance.tsv
test-data/demo_Abundance.tsv
test-data/demo_Coverage.tsv
test-data/demo_genefamilies.tsv
test-data/demo_pathabundance.tsv
test-data/demo_pathcoverage.tsv
test-data/g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz
test-data/g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz
test-data/input_sequences.fasta
test-data/joined_pathway_coverage_abundance.tsv
test-data/marker_metadata.json
test-data/marker_sequences.fasta
test-data/max_reduced_gene_family_abundance.tsv
test-data/reduced_uniref50.fasta
test-data/regrouped_gene_families_to_infogo1000.tsv
test-data/regrouped_gene_families_to_ko.tsv
test-data/regrouped_gene_families_to_rxn.tsv
test-data/relab_levelwise_renormalized_pathway_abundance.tsv
test-data/taxonomic_profile.tabular
tool-data/humann2_nucleotide_database.loc.sample
tool-data/humann2_protein_database.loc.sample
tool-data/metaphlan2_database.loc.sample
tool_data_table_conf.xml.sample
transform_json_to_pkl.py
b
diff -r 000000000000 -r 1ab06263e083 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,12 @@
+Galaxy wrappers for HUMAnN2
+===========================
+
+Galaxy should be able to automatically install the dependencies, i.e. the
+HUMAnN2 binaries and its dependencies.
+
+After installation, you must tell Galaxy about the default databases for MetaPhlAn2 and HUMAnN2. 
+You need to use the data managers for that and installing:
+
+- MetaPhlAn2 database
+- HUMAnN2 nucleotide database
+- HUMAnN2 protein database
\ No newline at end of file
b
diff -r 000000000000 -r 1ab06263e083 generate_test_data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/generate_test_data Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,124 @@
+#/usr/bin/env bash
+
+# humann2
+humann2 \
+    --input 'test-data/input_sequences.fasta' \
+    -o 'humann2_output' \
+    --annotation-gene-index 8 \
+    --taxonomic-profile 'test-data/taxonomic_profile.tabular' \
+    --metaphlan-options="-t rel_ab" \
+    --nucleotide-database 'test-data/' \
+    --protein-database 'test-data/' \
+    --evalue '1' \
+    --search-mode 'uniref50' \
+    --prescreen-threshold '0.01' \
+    --identity-threshold '50' \
+    --translated-subject-coverage-threshold '50' \
+    --translated-query-coverage-threshold '50' \
+    --translated-alignment 'diamond' \
+    --xipe 'off' \
+    --minpath 'on' \
+    --pick-frames 'on' \
+    --gap-fill 'off' \
+    --output-format 'tsv' \
+    --output-max-decimals '10' \
+    --output-basename 'humann2' \
+    --pathways 'metacyc'
+
+humann2 \
+    --input 'test-data/input_sequences.fasta' \
+    -o 'humann2_output' \
+    --annotation-gene-index 8 \
+    --metaphlan-options="-t rel_ab" \
+    --nucleotide-database 'test-data/' \
+    --protein-database 'test-data/' \
+    --evalue '1' \
+    --search-mode 'uniref90' \
+    --prescreen-threshold '0.01' \
+    --identity-threshold '50' \
+    --translated-subject-coverage-threshold '50' \
+    --translated-query-coverage-threshold '50' \
+    --translated-alignment 'rapsearch' \
+    --xipe 'off' \
+    --minpath 'on' \
+    --pick-frames 'on' \
+    --gap-fill 'off' \
+    --output-format 'tsv' \
+    --output-max-decimals '10' \
+    --output-basename 'humann2' \
+    --pathways 'unipathway'
+
+# humann2_regroup_table
+humann2_regroup_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/regrouped_gene_families_to_infogo1000.tsv' \
+    --function 'sum' \
+    --groups 'uniref90_infogo1000' \
+    --ungrouped 'Y' \
+    --protected 'Y'
+
+humann2_regroup_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/regrouped_gene_families_to_ko.tsv' \
+    --function 'mean' \
+    --groups 'uniref90_ko' \
+    --ungrouped 'Y' \
+    --protected 'Y'
+
+humann2_regroup_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/regrouped_gene_families_to_rxn.tsv' \
+    --function 'sum' \
+    --groups 'uniref90_rxn' \
+    --ungrouped 'Y' \
+    --protected 'Y'
+
+# humann2_renorm_table
+humann2_renorm_table \
+    --input 'test-data/demo_pathabundance.tsv' \
+    -o 'test-data/cpm_community_renormalized_pathway_abundance.tsv' \
+    --units 'cpm' \
+    --mode 'community' \
+    --special 'n'
+
+humann2_renorm_table \
+    --input 'test-data/demo_pathabundance.tsv' \
+    -o 'test-data/relab_levelwise_renormalized_pathway_abundance.tsv' \
+    --units 'relab' \
+    --mode 'levelwise' \
+    --special 'y'
+
+# humann2_join_tables
+mkdir join_table_tmp_dir
+cp 'test-data/demo_pathabundance.tsv' join_table_tmp_dir
+cp 'test-data/demo_pathcoverage.tsv' join_table_tmp_dir
+humann2_join_tables \
+    -i 'join_table_tmp_dir' \
+    -o 'test-data/joined_pathway_coverage_abundance.tsv'
+
+# humann2_merge_abundance_tables
+humann2_merge_abundance_tables \
+    --input-genes 'test-data/demo_genefamilies.tsv' \
+    --input-pathways 'test-data/demo_pathabundance.tsv' \
+    -o 'test-data/merged_gene_families_pathways_abundances.tsv' 
+
+# humann2_reduce_table
+humann2_reduce_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/max_reduced_gene_family_abundance.tsv' \
+    --function 'max' \
+    --sort-by 'name'
+
+# humann2_rename_table
+humann2_rename_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/renamed_genefamilies.tsv' \
+    --names 'metacyc-pwy'
+
+# humann2_split_table
+mkdir split_table_tmp_dir
+humann2_split_table \
+    --input 'test-data/joined_pathway_coverage_abundance.tsv' \
+    -o 'split_table_tmp_dir'
+cp 'split_table_tmp_dir/demo_Abundance.tsv' 'test-data/'
+cp 'split_table_tmp_dir/demo_Coverage.tsv' 'test-data/'
b
diff -r 000000000000 -r 1ab06263e083 humann2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2.xml Mon Mar 13 12:39:25 2017 -0400
[
b'@@ -0,0 +1,383 @@\n+<tool id="humann2" name="HUMAnN2" version="@WRAPPER_VERSION@.0">\n+    <description>to profile presence/absence and abundance of microbial pathways and gene families</description>\n+    <macros>\n+        <import>humann2_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements">        \n+        <requirement type="package" version="2.3.0">bowtie2</requirement>\n+        <requirement type="package" version="2.6.0">metaphlan2</requirement>\n+        <requirement type="package" version="0.8.24">diamond</requirement>\n+        <requirement type="package" version="2.24">rapsearch</requirement>\n+        <requirement type="package" version="2.7.10">python</requirement>\n+    </expand>\n+    <expand macro="version"/>\n+    <command detect_errors="exit_code"><![CDATA[\n+        #if $nucleotide_db.nucleotide_db_selector == "history"\n+            mkdir nucleotide_db\n+            &&\n+            #for $file in $nucleotide_db.nucleotide_database:\n+                cp \'$file\' \'nucleotide_db/$file.name\' &&\n+            #end for\n+        #end if\n+\n+        #if $protein_db.protein_db_selector == "history"\n+            mkdir protein_db\n+            &&\n+            #if $translated_alignment == "diamond"\n+                diamond makedb\n+                    --in \'$protein_db.protein_database\'\n+                    --db \'protein_db/protein_db\'\n+                    --threads "\\${GALAXY_SLOTS:-4}"\n+            #else\n+                prerapsearch \n+                    -d \'$protein_db.protein_database\'\n+                    -n \'protein_db/protein_db\'\n+            #end if\n+            &&\n+        #end if\n+\n+        #if $taxo_profile.taxonomic_profile_test == "false"\n+            #if $taxo_profile.metaphlan2_db_choice.metaphlan2_db_selector == "history"\n+                mkdir ref_db\n+                &&\n+                bowtie2-build \'$taxo_profile.metaphlan2_db_choice.bowtie2db\' \'ref_db/ref_db\'\n+                &&\n+                python \'$__tool_directory__/transform_json_to_pkl.py\'\n+                    --json_input \'$taxo_profile.metaphlan2_db_choice.mpa_pkl\'\n+                    --pkl_output \'ref_db/metadata.pkl\'\n+                &&\n+            #end if\n+        #end if\n+\n+        humann2\n+            --input \'$input\'\n+            -o \'output\'\n+            $bypass.bypass_prescreen\n+            $bypass.bypass_nucleotide_index\n+            $bypass.bypass_translated_search\n+            $bypass.bypass_nucleotide_search\n+            \n+            #set $metaphlan_option = "-t rel_ab"\n+            #if $taxo_profile.taxonomic_profile_test == "true":\n+                --taxonomic-profile \'$taxo_profile.taxonomic_profile\'\n+            #else\n+                #if $taxo_profile.metaphlan2_db_choice.metaphlan2_db_selector == "history"\n+                    #set $metaphlan_option += " --bowtie2db " + \'ref_db/ref_db\'\n+                    #set $metaphlan_option += " --mpa_pkl " + \'ref_db/metadata.pkl\'\n+                #else\n+                    #set $path = $taxo_profile.metaphlan2_db_choice.cached_db.fields.path\n+                    #set $value = $taxo_profile.metaphlan2_db_choice.cached_db.fields.value\n+                    #set $metaphlan_option += " --bowtie2db " + $path + "/" + $value\n+                    #set $metaphlan_option += " --mpa_pkl " + $path + "/" + $value + ".pkl"\n+                #end if\n+            #end if\n+            --metaphlan-options="$metaphlan_option"\n+\n+            #if $nucleotide_db.nucleotide_db_selector == "cached"\n+                --nucleotide-database \'$nucleotide_db.nucleotide_database.fields.path\'\n+            #else\n+                --nucleotide-database nucleotide_db\n+            #end if\n+\n+            --translated-alignment \'$translated_alignment\'\n+            #if $protein_db.protein_db_selector == "cached"\n+                --protein-database \'$protein_db.protein_database.fields.path\'\n+            #else\n+                --protein-database protein_db\n+            #end if\n+            --search-mode \'$search_mode\'\n+      '..b'</param>\n+            <param name="annotation_gene_index" value="8" />\n+            <param name="protein_db_selector" value="history"/>\n+            <param name="protein_database" value="reduced_uniref50.fasta"/>\n+            <param name="evalue" value="1"/>\n+            <param name="search_mode" value="uniref90"/>\n+            <param name="prescreen_threshold" value="0.01"/>\n+            <param name="identity_threshold" value="50"/>\n+            <param name="translated_subject_coverage_threshold" value="50"/>\n+            <param name="translated_query_coverage_threshold" value="50"/>\n+            <param name="taxonomic_profile_test" value="false"/>\n+            <param name="metaphlan2_db_selector" value="history"/>\n+            <param name="bowtie2db" value="marker_sequences.fasta"/>\n+            <param name="mpa_pkl" value="marker_metadata.json"/>\n+            <param name="translated_alignment" value="rapsearch"/>\n+            <param name="xipe" value="off"/>\n+            <param name="minpath" value="on"/>\n+            <param name="pick_frames" value="on"/>\n+            <param name="gap_fill" value="off"/>\n+            <param name="output_format" value="tsv"/>\n+            <param name="output_max_dec" value="10"/>\n+            <param name="remove_statified_output" value=""/>\n+            <param name="remove_column_description_output" value=""/>\n+            <param name="pathways" value="unipathway"/>\n+            <output name="gene_families_tsv">\n+                <assert_contents>\n+                    <has_text text="UNMAPPED" />\n+                    <has_text text="# Gene Family" />\n+                </assert_contents>\n+            </output>\n+            <output name="pathcoverage_tsv">\n+                <assert_contents>\n+                    <has_text text="UNMAPPED" />\n+                    <has_text text="UNINTEGRATED" />\n+                </assert_contents>\n+            </output>\n+            <output name="pathabundance_tsv">\n+                <assert_contents>\n+                    <has_text text="UNMAPPED" />\n+                    <has_text text="UNINTEGRATED" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+@HELP_HEADER@\n+\n+This tool corresponds to the main tool in HUMAnN2 pipeline.\n+\n+**Inputs**\n+\n+The input is a single file corresponding either to filtered shotgun sequencing metagenome file (fastq, fastq.gz, fasta, or fasta.gz format), alignment file (sam, bam or blastm8 format) or gene table file (tsv or biom format).\n+\n+A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. Otherwise, default MetaPhlAn2 or custom databases can be used for taxonomic profiling. For custom databases, a fasta file with marker gene sequences is required and also a json file containing metadata:\n+\n+::\n+\n+  {\n+    "taxonomy": {\n+            "taxonomy of genome1": genome1_length,\n+            "taxonomy of genome2": genome2_length,\n+            ...\n+        }\n+    "markers": {\n+            "marker1_name": {\n+                "clade": the clade that the marker belongs to,\n+                "ext": [list of external genomes where the marker appears],\n+                "len": length of the marker,\n+                "score": score of the marker,\n+                "taxon": the taxon of the marker\n+            }\n+            ...\n+        }\n+  }\n+\n+For functional profiling, HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset.\n+\n+**Outputs**\n+\n+HUMAnN creates three output files:\n+\n+ - A file with gene families and their abundance\n+ - A file with pathways and their abundance\n+ - A file with pathways and their coverage\n+\n+    ]]></help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 1ab06263e083 humann2_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_macros.xml Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@WRAPPER_VERSION@">0.9.9</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@WRAPPER_VERSION@">humann2</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command>humann2 --version</version_command>
+    </xml>
+    <token name="@HELP_HEADER@">
+**What it does**
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. 
+
+Read more about the tool: http://huttenhower.sph.harvard.edu/humann2/manual.
+    </token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1002358</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 1ab06263e083 repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<repositories description="This requires the HUMAnN2 data manager definition to install all required databases.">
+    <repository changeset_revision="9c4ad82be5bd" name="data_manager_metaphlan2_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="1316375a8cbb" name="data_manager_humann2_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
b
diff -r 000000000000 -r 1ab06263e083 test-data/cpm_community_renormalized_pathway_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cpm_community_renormalized_pathway_abundance.tsv Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,24 @@
+# Pathway demo_Abundance
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 180020
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 180020
+PWY-6305: putrescine biosynthesis IV 161410
+PWY-6305: putrescine biosynthesis IV|unclassified 161410
+PWY490-3: nitrate reduction VI (assimilatory) 143637
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 143637
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 139077
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 117223
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 83200
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 62649.4
+PWY-6700: queuosine biosynthesis 72792.1
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 44461.5
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 18957.3
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 41366.2
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 30998.5
+VALSYN-PWY: L-valine biosynthesis 41366.2
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 30998.5
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 37515.4
+PWY-5097: L-lysine biosynthesis VI 34829.8
+PWY-2942: L-lysine biosynthesis III 33055.4
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 23325.7
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 31729.7
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 16294.6
b
diff -r 000000000000 -r 1ab06263e083 test-data/demo_Abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_Abundance.tsv Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Abundance
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 6.3694267516
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 2.4290890774
+PWY-6305: putrescine biosynthesis IV 12.3568523173
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 1.7857142857
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 13.7815543238
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1957.0469180460
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 3.1668118122
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 8.9740635174
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 1.2474418751
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 1.4512868417
+PWY-2942: L-lysine biosynthesis III 2.5305778153
+PWY-6305: putrescine biosynthesis IV|unclassified 12.3568523173
+PWY-5097: L-lysine biosynthesis VI 2.6664127226
+UNINTEGRATED|unclassified 46.5907339353
+UNINTEGRATED 6378.3363291102
+VALSYN-PWY: L-valine biosynthesis 3.1668118122
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 4.7961630695
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 2909.3950875399
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 13.7815543238
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 3.4037771853
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 2.8720157961
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 10.9962495261
+PWY-6700: queuosine biosynthesis 5.5726434156
+PWY490-3: nitrate reduction VI (assimilatory) 10.9962495261
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 10.6471569863
+UNMAPPED 1548.1865983490
b
diff -r 000000000000 -r 1ab06263e083 test-data/demo_Coverage.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_Coverage.tsv Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Coverage
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 0.7609346153
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 0.2181895299
+PWY-6305: putrescine biosynthesis IV 0.9518375285
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 0.4389950257
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 0.8805252394
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1.0000000000
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 0.3826032161
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 0.5431316496
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.3040136356
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.3826190602
+PWY-2942: L-lysine biosynthesis III 0.2651956314
+PWY-6305: putrescine biosynthesis IV|unclassified 0.7234100116
+PWY-5097: L-lysine biosynthesis VI 0.2856160532
+UNINTEGRATED|unclassified 1.0000000000
+UNINTEGRATED 1.0000000000
+VALSYN-PWY: L-valine biosynthesis 0.3826032161
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.5877696572
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 0.8557878872
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 0.5877696572
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 1.0000000000
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 0.9874950891
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.7183078347
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 0.3368670323
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 0.7290147297
+PWY-6700: queuosine biosynthesis 0.6918160989
+PWY490-3: nitrate reduction VI (assimilatory) 0.9568614047
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 0.9392767343
+UNMAPPED 1.0000000000
b
diff -r 000000000000 -r 1ab06263e083 test-data/demo_genefamilies.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_genefamilies.tsv Mon Mar 13 12:39:25 2017 -0400
b
b'@@ -0,0 +1,8420 @@\n+# Gene Family\tdemo_Abundance-RPKs\n+UNMAPPED\t4531.0000000000\n+UniRef90_unknown\t1635.4482975639\n+UniRef90_unknown|g__Bacteroides.s__Bacteroides_dorei\t883.2117957343\n+UniRef90_unknown|g__Bacteroides.s__Bacteroides_vulgatus\t752.2365018295\n+UniRef90_R6HHA8\t333.3333333333\n+UniRef90_R6HHA8|g__Bacteroides.s__Bacteroides_dorei\t333.3333333333\n+UniRef90_R7NYS9\t166.6666666667\n+UniRef90_R7NYS9|g__Bacteroides.s__Bacteroides_vulgatus\t166.6666666667\n+UniRef90_D1K9F5\t66.6666666667\n+UniRef90_D1K9F5|g__Bacteroides.s__Bacteroides_dorei\t66.6666666667\n+UniRef90_G1UMF5\t66.6666666667\n+UniRef90_G1UMF5|g__Bacteroides.s__Bacteroides_vulgatus\t66.6666666667\n+UniRef90_R7NVD8\t56.3492063492\n+UniRef90_R7NVD8|g__Bacteroides.s__Bacteroides_vulgatus\t56.3492063492\n+UniRef90_R7P3A5\t55.5555555556\n+UniRef90_R7P3A5|g__Bacteroides.s__Bacteroides_dorei\t55.5555555556\n+UniRef90_D1K046\t47.6190476190\n+UniRef90_D1K046|g__Bacteroides.s__Bacteroides_dorei\t47.6190476190\n+UniRef90_D4VBJ0\t47.6190476190\n+UniRef90_D4VBJ0|g__Bacteroides.s__Bacteroides_vulgatus\t47.6190476190\n+UniRef90_R6HVY6\t47.6190476190\n+UniRef90_R6HVY6|g__Bacteroides.s__Bacteroides_dorei\t47.6190476190\n+UniRef90_R6I0Z3\t47.6190476190\n+UniRef90_R6I0Z3|g__Bacteroides.s__Bacteroides_vulgatus\t47.6190476190\n+UniRef90_C3R7K2\t45.7650273224\n+UniRef90_C3R7K2|g__Bacteroides.s__Bacteroides_dorei\t45.7650273224\n+UniRef90_B6VX47\t41.6666666667\n+UniRef90_B6VX47|g__Bacteroides.s__Bacteroides_dorei\t41.6666666667\n+UniRef90_R9HIC6\t41.6666666667\n+UniRef90_R9HIC6|g__Bacteroides.s__Bacteroides_vulgatus\t41.6666666667\n+UniRef90_A7AE97\t37.0370370370\n+UniRef90_A7AE97|g__Bacteroides.s__Bacteroides_dorei\t37.0370370370\n+UniRef90_I9GA88\t35.3063343718\n+UniRef90_I9GA88|g__Bacteroides.s__Bacteroides_vulgatus\t19.7300103842\n+UniRef90_I9GA88|g__Bacteroides.s__Bacteroides_dorei\t15.5763239875\n+UniRef90_A6KZ97\t33.3333333333\n+UniRef90_A6KZ97|g__Bacteroides.s__Bacteroides_dorei\t33.3333333333\n+UniRef90_B6VTL0\t33.3333333333\n+UniRef90_B6VTL0|g__Bacteroides.s__Bacteroides_dorei\t33.3333333333\n+UniRef90_B6VXB0\t30.3030303030\n+UniRef90_B6VXB0|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_B6VZR2\t30.3030303030\n+UniRef90_B6VZR2|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_C3Q4T2\t30.3030303030\n+UniRef90_C3Q4T2|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_D1K8B6\t30.3030303030\n+UniRef90_D1K8B6|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_R6HMF6\t30.3030303030\n+UniRef90_R6HMF6|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_B6VVE6\t28.9855072464\n+UniRef90_B6VVE6|g__Bacteroides.s__Bacteroides_dorei\t28.9855072464\n+UniRef90_D1JZ48\t27.7777777778\n+UniRef90_D1JZ48|g__Bacteroides.s__Bacteroides_dorei\t27.7777777778\n+UniRef90_D4VC99\t27.7777777778\n+UniRef90_D4VC99|g__Bacteroides.s__Bacteroides_vulgatus\t27.7777777778\n+UniRef90_G1ULF8\t27.7777777778\n+UniRef90_G1ULF8|g__Bacteroides.s__Bacteroides_dorei\t27.7777777778\n+UniRef90_A6KXM7\t26.1437908497\n+UniRef90_A6KXM7|g__Bacteroides.s__Bacteroides_vulgatus\t26.1437908497\n+UniRef90_B6W1W2\t25.6410256410\n+UniRef90_B6W1W2|g__Bacteroides.s__Bacteroides_dorei\t25.6410256410\n+UniRef90_A6KXY0\t23.8095238095\n+UniRef90_A6KXY0|g__Bacteroides.s__Bacteroides_vulgatus\t23.8095238095\n+UniRef90_D4V534\t23.8095238095\n+UniRef90_D4V534|g__Bacteroides.s__Bacteroides_vulgatus\t23.8095238095\n+UniRef90_I0PXX6\t23.8095238095\n+UniRef90_I0PXX6|g__Bacteroides.s__Bacteroides_dorei\t23.8095238095\n+UniRef90_D1JXM3\t22.9885057471\n+UniRef90_D1JXM3|g__Bacteroides.s__Bacteroides_dorei\t22.9885057471\n+UniRef90_R6HVW3\t22.7272727273\n+UniRef90_R6HVW3|g__Bacteroides.s__Bacteroides_dorei\t22.7272727273\n+UniRef90_D1JXH6\t22.2222222222\n+UniRef90_D1JXH6|g__Bacteroides.s__Bacteroides_dorei\t22.2222222222\n+UniRef90_D1JXW7\t22.2222222222\n+UniRef90_D1JXW7|g__Bacteroides.s__Bacteroides_dorei\t22.2222222222\n+UniRef90_I8VUS4\t22.2222222222\n+UniRef90_I8VUS4|g__Bacteroides.s__Bacteroides_dorei\t22.2222222222\n+UniRef90_R6HFI1\t22.2222222222\n+UniRef90_R6HFI1|g__Bacteroides.s__Bacteroides_dorei\t22.2222222222\n+UniRef90_R6HV'..b'6872852234\n+UniRef90_A6KZC6|g__Bacteroides.s__Bacteroides_dorei\t0.6872852234\n+UniRef90_R6HWZ1\t0.6788866259\n+UniRef90_R6HWZ1|g__Bacteroides.s__Bacteroides_vulgatus\t0.6788866259\n+UniRef90_R9H303\t0.6788866259\n+UniRef90_R9H303|g__Bacteroides.s__Bacteroides_dorei\t0.6788866259\n+UniRef90_C3RF19\t0.6740815639\n+UniRef90_C3RF19|g__Bacteroides.s__Bacteroides_vulgatus\t0.6740815639\n+UniRef90_R9H4S0\t0.6734006734\n+UniRef90_R9H4S0|g__Bacteroides.s__Bacteroides_vulgatus\t0.6734006734\n+UniRef90_C3RFH8\t0.6706908115\n+UniRef90_C3RFH8|g__Bacteroides.s__Bacteroides_dorei\t0.6706908115\n+UniRef90_R7NVK1\t0.6680026720\n+UniRef90_R7NVK1|g__Bacteroides.s__Bacteroides_dorei\t0.6680026720\n+UniRef90_C3RCI2\t0.6626905235\n+UniRef90_C3RCI2|g__Bacteroides.s__Bacteroides_dorei\t0.6626905235\n+UniRef90_A6KZV6\t0.6613756614\n+UniRef90_A6KZV6|g__Bacteroides.s__Bacteroides_vulgatus\t0.6613756614\n+UniRef90_R6HUT5\t0.6600660066\n+UniRef90_R6HUT5|g__Bacteroides.s__Bacteroides_vulgatus\t0.6600660066\n+UniRef90_R6HZ28\t0.6600660066\n+UniRef90_R6HZ28|g__Bacteroides.s__Bacteroides_dorei\t0.6600660066\n+UniRef90_R6HSW5\t0.6472491909\n+UniRef90_R6HSW5|g__Bacteroides.s__Bacteroides_dorei\t0.6472491909\n+UniRef90_R6HHG0\t0.6447453256\n+UniRef90_R6HHG0|g__Bacteroides.s__Bacteroides_dorei\t0.6447453256\n+UniRef90_I9IJN0\t0.6422607579\n+UniRef90_I9IJN0|g__Bacteroides.s__Bacteroides_vulgatus\t0.6422607579\n+UniRef90_A6L1H2\t0.6410256410\n+UniRef90_A6L1H2|g__Bacteroides.s__Bacteroides_vulgatus\t0.6410256410\n+UniRef90_R6HHZ5\t0.6410256410\n+UniRef90_R6HHZ5|g__Bacteroides.s__Bacteroides_dorei\t0.6410256410\n+UniRef90_R7P064\t0.6283380459\n+UniRef90_R7P064|g__Bacteroides.s__Bacteroides_vulgatus\t0.6283380459\n+UniRef90_R6HY39\t0.6277463905\n+UniRef90_R6HY39|g__Bacteroides.s__Bacteroides_dorei\t0.6277463905\n+UniRef90_R9H3B8\t0.6265664160\n+UniRef90_R9H3B8|g__Bacteroides.s__Bacteroides_vulgatus\t0.6265664160\n+UniRef90_D4VAA3\t0.6178560395\n+UniRef90_D4VAA3|g__Bacteroides.s__Bacteroides_vulgatus\t0.6178560395\n+UniRef90_I8VJA2\t0.6138735421\n+UniRef90_I8VJA2|g__Bacteroides.s__Bacteroides_dorei\t0.6138735421\n+UniRef90_R7P010\t0.5957700328\n+UniRef90_R7P010|g__Bacteroides.s__Bacteroides_dorei\t0.5957700328\n+UniRef90_R7NZU3\t0.5889281508\n+UniRef90_R7NZU3|g__Bacteroides.s__Bacteroides_vulgatus\t0.5889281508\n+UniRef90_R6HLC4\t0.5712653528\n+UniRef90_R6HLC4|g__Bacteroides.s__Bacteroides_dorei\t0.5712653528\n+UniRef90_C3RCJ1\t0.5688282139\n+UniRef90_C3RCJ1|g__Bacteroides.s__Bacteroides_dorei\t0.5688282139\n+UniRef90_C3REY0\t0.5621135469\n+UniRef90_C3REY0|g__Bacteroides.s__Bacteroides_dorei\t0.5621135469\n+UniRef90_R9HNV3\t0.5574136009\n+UniRef90_R9HNV3|g__Bacteroides.s__Bacteroides_vulgatus\t0.5574136009\n+UniRef90_R9ILG9\t0.5420054201\n+UniRef90_R9ILG9|g__Bacteroides.s__Bacteroides_vulgatus\t0.5420054201\n+UniRef90_A6KY14\t0.5216484090\n+UniRef90_A6KY14|g__Bacteroides.s__Bacteroides_dorei\t0.5216484090\n+UniRef90_I8WHD1\t0.5167958656\n+UniRef90_I8WHD1|g__Bacteroides.s__Bacteroides_dorei\t0.5167958656\n+UniRef90_A6KZH7\t0.5148005148\n+UniRef90_A6KZH7|g__Bacteroides.s__Bacteroides_dorei\t0.5148005148\n+UniRef90_E5UXW6\t0.5120327701\n+UniRef90_E5UXW6|g__Bacteroides.s__Bacteroides_vulgatus\t0.5120327701\n+UniRef90_A6KXQ8\t0.5096839959\n+UniRef90_A6KXQ8|g__Bacteroides.s__Bacteroides_vulgatus\t0.5096839959\n+UniRef90_A6L536\t0.4642525534\n+UniRef90_A6L536|g__Bacteroides.s__Bacteroides_vulgatus\t0.4642525534\n+UniRef90_J9F9P5\t0.4547521601\n+UniRef90_J9F9P5|g__Bacteroides.s__Bacteroides_dorei\t0.4547521601\n+UniRef90_R7P1T3\t0.4438526409\n+UniRef90_R7P1T3|g__Bacteroides.s__Bacteroides_vulgatus\t0.4438526409\n+UniRef90_R6IJZ8\t0.4251700680\n+UniRef90_R6IJZ8|g__Bacteroides.s__Bacteroides_vulgatus\t0.4251700680\n+UniRef90_C3RF26\t0.4145936982\n+UniRef90_C3RF26|g__Bacteroides.s__Bacteroides_dorei\t0.4145936982\n+UniRef90_A6L3C1\t0.3766478343\n+UniRef90_A6L3C1|g__Bacteroides.s__Bacteroides_dorei\t0.3766478343\n+UniRef90_C6Z3L0\t0.3144654088\n+UniRef90_C6Z3L0|g__Bacteroides.s__Bacteroides_dorei\t0.3144654088\n+UniRef90_I9RGY5\t0.2213368747\n+UniRef90_I9RGY5|g__Bacteroides.s__Bacteroides_dorei\t0.2213368747\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 1ab06263e083 test-data/demo_pathabundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_pathabundance.tsv Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Abundance
+UNMAPPED 1548.1865983490
+UNINTEGRATED 6378.3363291102
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 2909.3950875399
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1957.0469180460
+UNINTEGRATED|unclassified 46.5907339353
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 13.7815543238
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 13.7815543238
+PWY-6305: putrescine biosynthesis IV 12.3568523173
+PWY-6305: putrescine biosynthesis IV|unclassified 12.3568523173
+PWY490-3: nitrate reduction VI (assimilatory) 10.9962495261
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 10.9962495261
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 10.6471569863
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 8.9740635174
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 6.3694267516
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 4.7961630695
+PWY-6700: queuosine biosynthesis 5.5726434156
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 3.4037771853
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 1.4512868417
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 3.1668118122
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339
+VALSYN-PWY: L-valine biosynthesis 3.1668118122
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 2.8720157961
+PWY-5097: L-lysine biosynthesis VI 2.6664127226
+PWY-2942: L-lysine biosynthesis III 2.5305778153
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 1.7857142857
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 2.4290890774
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 1.2474418751
\ No newline at end of file
b
diff -r 000000000000 -r 1ab06263e083 test-data/demo_pathcoverage.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_pathcoverage.tsv Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Coverage
+UNMAPPED 1.0000000000
+UNINTEGRATED 1.0000000000
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1.0000000000
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 1.0000000000
+UNINTEGRATED|unclassified 1.0000000000
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 0.9874950891
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 0.8805252394
+PWY-6305: putrescine biosynthesis IV 0.9518375285
+PWY-6305: putrescine biosynthesis IV|unclassified 0.7234100116
+PWY490-3: nitrate reduction VI (assimilatory) 0.9568614047
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 0.7290147297
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 0.9392767343
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 0.5431316496
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 0.7609346153
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 0.8557878872
+PWY-6700: queuosine biosynthesis 0.6918160989
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.7183078347
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.3826190602
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 0.3826032161
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 0.5877696572
+VALSYN-PWY: L-valine biosynthesis 0.3826032161
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.5877696572
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 0.3368670323
+PWY-5097: L-lysine biosynthesis VI 0.2856160532
+PWY-2942: L-lysine biosynthesis III 0.2651956314
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 0.4389950257
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 0.2181895299
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.3040136356
\ No newline at end of file
b
diff -r 000000000000 -r 1ab06263e083 test-data/g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz
b
Binary file test-data/g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz has changed
b
diff -r 000000000000 -r 1ab06263e083 test-data/g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz
b
Binary file test-data/g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz has changed
b
diff -r 000000000000 -r 1ab06263e083 test-data/input_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_sequences.fasta Mon Mar 13 12:39:25 2017 -0400
b
b'@@ -0,0 +1,155 @@\n+>r2|637000026.fna|5753889|5754040|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln383_#0/1\n+CTCCGTCCGCTGGTAGAACGTCTGGTAGAAGCTCAAAAAGCATTGGCAACCAAATACCTTTCCGAAGCCAAACGACTGATTGCCTCCAACGACAAGAAGGAAGTGGAAGAAGGATTCCTTGCCCTTTATCGTAGCCACAAGTGTCTTCCGA\n+>r3|637000026.fna|1749333|1749484|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln165_#0/1\n+AAGTAAAAGTCTACACACAGGAAGGAACGATAATGGAAAGTACGATTGAAGTAAATCGTCCGATGGAGATAGCAGGATGGAAAATTTATCAGCTTAGTTATGATGAGTCGAAAGGTCGCTGGAGCGATATCAGTGTCTTTGAGCTGGTTCG\n+>r8|637000026.fna|2991814|2991965|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln402_#0/1\n+AGAAAGTTTTGATCAAAGCATGGGATGCTGAAAACGGACCGGTAATTATCGACTCAGGAGAAAGTACCTACAACACAACCGCCAAAAAGTTCAGCCTCAAATATACGATCGGTAACACGCTATATGAAGAGCAACTGACCAAAGAAAAAGA\n+>r24|641736196.fna|214158|214309|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln388_#0/1\n+CAGATACGAGTACCAATTGGCCGTCAACGACTATTGGAAGGAAGTCGGCGGATTGCAGATGCTGCCCGGAACCAACCGCTCCAGCGACCGCTTTGTACGCGCGTCATTCTACATTCATGCCATTCCGCAGACAGCAGACGCGGCGATTGCA\n+>r32|637000026.fna|4140951|4141102|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln422_#0/1\n+TATTCTCAGGGGACTAATTGGTGATATAGTGCTCGATTATGATAGAGGTGAAGGAATTATGTCCTTTAAGACTCAAAAGGTAGGTTATATATCAGGATATTATTTGGGATGTACTACATATTGTTATGCCATGGGATTTTATCCTACATAT\n+>r55|637000026.fna|6212867|6213018|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln342_#0/1\n+CAGGCGTCGGGTGAGCTGGACTTTGAAGAGTTGTGCGAAGCGATCACCAGTCGTTCTACTTGTACGGAAACGGATGTACGTGCGGCTATTTCGGGTATTCTTTACGAAGTGAAGCGTGCGTTGAAGGCAGGAAGAATTGCGAGACTGGGTG\n+>r94|641736196.fna|124792|124943|_from_ |NZ_ABFZ02000020_Bacteroides_stercoris_ATCC_43183,|_ln396_#0/1\n+AATCAGTTTCATACGCTATCAATATCCGGGGTCAATAGTTTCCATTTGTTGCTCGCTTGACACGCTTTCAAAGCTAACATCTTCCGCATCAATCAACTGTTTGTTTGCGTAATTGTCGGTCAATATTTCACGCTGTGAAGTTTCGTCACCC\n+>r115|641736196.fna|175677|175828|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln360_#0/1\n+CTGTCAGATACGTGAAAGATTACATTCGATACGTGAGTATGTAATGAAATATCCTGAGTTTGACGATAGAAACGATATTGACCCGTCAATGCGAATGTTCTATATCCAGTCTGTTGAAGCACAAGCAAAAAATCGAAGTTATTCAGATGCA\n+>r121|641736196.fna|7934|8085|_from_ |NZ_ABFZ02000017_Bacteroides_stercoris_ATCC_43183,|_ln383_#0/1\n+TGAGCGGTTCGGTCTATACCCTTATGAATAAAATCTTCAGGGATTCAAAAGAACCGCTGTATGGCCGTGCTGACAATATCATGAAGTTAGCTCCTTTTACTACTTCCGTATTGAAAGAAATCATATCGGATCATAAGGCCGACTATACTAA\n+>r145|637000026.fna|1378791|1378942|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln396_#0/1\n+TCCGTCTGATTCGTAGTGTACCAGCATATCCGCCATGACATCGCTCACTGTAGCGCTGAAATCTTTCAGAAGTTTATGGCTGCCCACCCAGTAAGTCTTGCCTTGATAAGACACCTTTATACCTTTGCCTGTGATGCTCTCAAAACTGCTC\n+>r158|637000026.fna|3242194|3242345|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln388_#0/1\n+GGACAGCAGTATCAAACGGTTAAAATACGTGAGATACGCAGATGACTTTCTGATTGGAATTACCGGTAATCTTGAAGACTGCAAAACAGTAAAAGAGGATATTAAGAATTATTTGAATGAAGCTCTTAAACTGGAACTGTCAGACGAAAAG\n+>r160|637000026.fna|3948359|3948510|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln347_#0/1\n+CGGAAGTGACTCATACATATCAGGTGGTTCTTCCAGTTATAAGAACCTGTCTGCAAGTGAAATGAAGAACACATCTTCATTCACAAACTGGAATTTCAGTACGGTTTGGGAAATGGGAAGTGAATACCCGACATTACAGGGCTTATTAAAA\n+>r187|637000026.fna|5220534|5220685|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln396_#0/1\n+GTTACCTTGATCAATGGATTTGCATATTCCAGCAACCACGAGATAGAGGCGTGTCCTGCTTCATGCAAGGCAATCGAGCGTCTTTCCGCTTCTGTGGTAATCTTGGTCTTCTTTTCCAGACCACCGATGATACGGTCTACCGCATCCAGAA\n+>r192|637000026.fna|2858128|2858279|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln325_#0/1\n+ACGTGGACGGTTACGCACTGCCGGAAAAAATCAGGAACGCATTCCTGGGATTGGAGGAAAAGGAGAAAACACTCATCAGTTACTTCACCCAGCACAATGAACAGTATGCAAAGAAGGTCGGCAAGACTGCCACGCAAAAGACCTATTCCCG\n+>r218|641736196.fna|272465|272616|_from_ |NZ_ABFZ02000022_Bacteroides_stercoris_ATCC_43183,|_ln348_#0/1\n+ATTTCTTCGTCGGTTTCCGTATGGAAGAAGTACACCACGGCCAACTAGGTGAAGTGACGGATGTGGACACCTCTACCATCAACACTCTGTTCGTGGTAGATTATAAAGGAGAAGAATTGCTGATTCCGGCACAGGAAGATTTCATTATGGA\n+>r222|637000026.fna|2630231|2630382|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c'..b'TTCTCTTGAAAAGAAAGCAATGGAATGGGTAGCTTCACTCAATCTGGATGATGAAAAGAAATCAGGTTTTGCGGTGACGACTATTTATAATCATCTTCGCCAGGTTCGAGACTGG\n+>r356|637000026.fna|394123|394274|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln352_#0/1\n+AGTTCGCCACCGATATAGATTTCTGTCTCATTGTATTTATCCTGCTTTAGCGGGTCTAGGCTCATTAATGAATATTTGCTCAGCTTATGTGAGGCAAAGGCTGTCAACCCGGCTTTTGCATATTTGTTGAAGCCTTCGAGCAAAGCAATAC\n+>r358|637000026.fna|4503085|4503236|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln341_#0/1\n+GACCATTATCGCCTCGCTTTCGACCGCAATAATACCTGGAGTCAAAAGTATAATATTATCTGGGATAAGATGTGGAATTTGAATATATTCCCTAATAATGTAATCAGTAAAGAAATCAACTACTATCTGACAAAGCAGAATCTATATGGAT\n+>r364|641736196.fna|256980|257131|_from_ |NZ_ABFZ02000019_Bacteroides_stercoris_ATCC_43183,|_ln319_#0/1\n+CCACCGTAGATGCCGGCGACGTGGTAGAGGTAGTGGGAACCAGGCAGGAAGTGGAAACAGCCGCCAGACGTTTGGGCTATATAGACCGTCCTACCAATCAGACTGATATGATTTTCGTAGGGCTCGGCATCCTTGTAGGCGGGTTGTTCGG\n+>r388|637000026.fna|1186959|1187110|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln349_#0/1\n+ACTGCTAAGTGCAGGATGTATCCTCTTTCTTTTTTGGAGTATCACTCGCCTGGTCAGATCTCTTCTGAAAGAAGAAAAAGAGAATCTGTCAGTAACCGACGTGATTATCATTCTCGGTTCCGGGTTGGTAGGAGCTTTGGCATATACTTTT\n+>r390|637000026.fna|2094824|2094975|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln402_#0/1\n+GTTGGAAGCAGCGTCGAACAGACGGAAAGTATTGCTTCCGATTTCTTTCACCAGATTACCGAACATCAGCATACCGATCAAAGGTACTGCACTTGGGACGAAGAGAGCTACAATCGTAGTCACTACAATCGGGAAAATAATCTTCAGTACA\n+>r392|641736196.fna|89576|89727|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln410_#0/1\n+AACAGGCAGGCGCAATATCCCGAAAATGCACCGCCGCAGATACGCAGTCCGGCGGATAGTATCGTTGCCAACCATGTCATAATCGGGCGTCCGGGACTGTATGCCAAGGAGGAAACCCGGTCCAAAGGATTGAAAATATAACCTAAGGAAT\n+>r396|641736196.fna|49107|49258|_from_ |NZ_ABFZ02000013_Bacteroides_stercoris_ATCC_43183,|_ln380_#0/1\n+ATCCGGATGCGCAGTTTGCCGCCGTATCGCGTCTGCCCTACTACTCCACCATCATCAAAGAACGGCTGCCCAAACTACGCACCGTACAGTACGAATACAAACATGAAATATTCCGTGAACTGAATCCGGATGAAATATTGGACAAATACTT\n+>r401|637000026.fna|696947|697098|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln352_#0/1\n+TCGTACGCAACCGGATAATTTCATCATCATCCTGCATCTGCTTCTCCAACGAACGGACAGACTGATCCTGCTGCGTCATTTCCAGTCTCGTATTAAAAAGAAACACATCCCGATTGCTGTCCAGTTGCTGCCGTTTGTTTTCAATCACCCG\n+>r439|637000026.fna|1571176|1571327|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln352_#0/1\n+GCAGCTCATCAGTGATATCCTTGATTTATCAAAAATAGAAGCAGGAACATTGGACCTGCACTATTCCAATGTTGAAATCAATGATTTAATGAAAGACCTGGAAAACATGTGTCAACTGAAACTTAAGTCGGATGCTGTCAAACTGGAATTC\n+>r442|641736196.fna|41704|41855|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln332_#0/1\n+CACATCGCAGGAGAAATACATCATGGTACTGTCTTTCAATGAAGCGATAGCCATTTGCTTGATATCCTCTACGGGCAGGTTTACGTACGTCCAGTTCTTGCCGTCGTAGCGGTGGCGGTCGTAGTCAATCTCATAGCATTTGTAATACTCG\n+>r452|637000026.fna|5083361|5083512|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln393_#0/1\n+TAGTCCAGTCAATGCCCAGCAGTTGCAGGCGATGGAAGAAGATACTTCGCTCCAGATCGTTCGGCTTGCGGAGGTCTAATATCAGTTCTTTGATCTCGGCGGTGAACGGAACACGCAGACGTTTTTGTATTTTCTCCACGTCTACCAGCAG\n+>r453|637000026.fna|2491055|2491206|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln325_#0/1\n+ACTCCCGCCATGCTCTCAGATATAGTGAATGAACTTAAAGAGGTGCTGGAACACCAGCCTGTAACAAAGGACAAGGAACAAAAGAAGGTCATCCGGGAAAAGAAGAAACAGGTCCGGGAACTTGAGGAGTACCGTGACAAGTTGATAGAAT\n+>r479|641736196.fna|531701|531852|_from_ |NZ_ABFZ02000020_Bacteroides_stercoris_ATCC_43183,|_ln380_#0/1\n+CCGTGAGCTATCGTCATTGGAACGCCAGTTAGGAAAATTGCAGCGCGACTTGAAGGACAAGAAGAAGAAATACGAGTCGTCTGTCCAGTACCTTTATAAGAACCGTTCCATTGAGGAGAAGCTGATGTTTATTTTCTCTGCACGGAGTCTG\n+>r495|637000026.fna|898444|898595|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln166_#0/1\n+GTAACTGGAGTATCTTACAAAAGACTGAAAAGACAATTTCTCTTACAATATTCCTGTTACTCTTTATCCTTGGAGTATCCATTGGCTCTAATAGCCTGATTGTGAATAATCTCGGTAAGTTTGGATGGCAGGCGATCATTCTTGCCGTATC\n+>r497|641736196.fna|48795|48946|_from_ |NZ_ABFZ02000020_Bacteroides_stercoris_ATCC_43183,|_ln354_#0/1\n+TTGCCATCCCGCTCCCGTGCAAGGACTCCACACCTTTAAGATTCTGTCGAGCCACGGCAAGTTCCGATTGCAAAGAACTCAATTCGACCGAACGTATATCTACCATCCTTTTACCTTTTTCTGCATAATCGCCAAGTGAAAAGTAGCTTTT\n'
b
diff -r 000000000000 -r 1ab06263e083 test-data/joined_pathway_coverage_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/joined_pathway_coverage_abundance.tsv Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Abundance demo_Coverage
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 6.3694267516 0.7609346153
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 4.7961630695 0.8557878872
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 10.6471569863 0.9392767343
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 8.9740635174 0.5431316496
+PWY-2942: L-lysine biosynthesis III 2.5305778153 0.2651956314
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 1.7857142857 0.4389950257
+PWY-5097: L-lysine biosynthesis VI 2.6664127226 0.2856160532
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 13.7815543238 0.9874950891
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 13.7815543238 0.8805252394
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 2.8720157961 0.3368670323
+PWY-6305: putrescine biosynthesis IV 12.3568523173 0.9518375285
+PWY-6305: putrescine biosynthesis IV|unclassified 12.3568523173 0.7234100116
+PWY-6700: queuosine biosynthesis 5.5726434156 0.6918160989
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 3.4037771853 0.7183078347
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 1.4512868417 0.3826190602
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 3.1668118122 0.3826032161
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339 0.5877696572
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 2.4290890774 0.2181895299
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 1.2474418751 0.3040136356
+PWY490-3: nitrate reduction VI (assimilatory) 10.9962495261 0.9568614047
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 10.9962495261 0.7290147297
+UNINTEGRATED 6378.3363291102 1.0000000000
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1957.0469180460 1.0000000000
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 2909.3950875399 1.0000000000
+UNINTEGRATED|unclassified 46.5907339353 1.0000000000
+UNMAPPED 1548.1865983490 1.0000000000
+VALSYN-PWY: L-valine biosynthesis 3.1668118122 0.3826032161
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339 0.5877696572
b
diff -r 000000000000 -r 1ab06263e083 test-data/marker_metadata.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_metadata.json Mon Mar 13 12:39:25 2017 -0400
[
b'@@ -0,0 +1,1 @@\n+{"taxonomy": {"k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Micromonosporaceae|g__Salinispora|s__Salinispora_pacifica|t__GCF_000374725": 5438612, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Mononegavirales|f__Rhabdoviridae|g__Nucleorhabdovirus|s__Maize_mosaic_virus|t__PRJNA14920": 12133, "k__Archaea|p__Euryarchaeota|c__Halobacteria|o__Halobacteriales|f__Halobacteriaceae|g__Halorubrum|s__Halorubrum_sp_T3|t__GCF_000296615": 3168011, "k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Propionibacteriaceae|g__Propionibacterium|s__Propionibacterium_acnes|t__GCF_000145075": 2614131, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__GCF_000485655": 5022965, "k__Bacteria|p__Chlamydiae|c__Chlamydiia|o__Chlamydiales|f__Chlamydiaceae|g__Chlamydia|s__Chlamydia_trachomatis|t__GCF_000304515": 1042736, "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Leuconostocaceae|g__Weissella|s__Weissella_koreensis|t__GCF_000277645": 1728940, "k__Bacteria|p__Spirochaetes|c__Spirochaetia|o__Spirochaetales|f__Leptospiraceae|g__Leptospira|s__Leptospira_interrogans|t__GCF_000244635": 4459519, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Buchnera|s__Buchnera_aphidicola|t__GCF_000007365": 641454, "k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae|g__Ruminococcus|s__Ruminococcus_sp|t__GCF_000209835": 3545606, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Acinetobacter|s__Acinetobacter_baumannii|t__GCF_000302035": 3973165, "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_mitis_oralis_pneumoniae|t__GCF_000495335": 2144825, "k__Bacteria|p__Proteobacteria|c__Alphaproteobacteria|o__Rickettsiales|f__Rickettsiaceae|g__Rickettsia|s__Rickettsia_parkeri|t__GCF_000284195": 1300386, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__GCF_000316745": 5198097, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__GCF_000355055": 5070534, "k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Clostridiaceae|g__Candidatus_Arthromitus|s__Candidatus_Arthromitus_sp_SFB_2|t__GCF_000252685": 1135256, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Caudovirales|f__Siphoviridae|g__Siphoviridae_noname|s__Lactobacillus_phage_J_1|t__PRJNA227005": 40931, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Vibrionales|f__Vibrionaceae|g__Vibrio|s__Vibrio_cholerae|t__GCF_000305055": 3943387, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Caudovirales|f__Siphoviridae|g__Siphoviridae_noname|s__Bacillus_phage_Fah|t__PRJNA16382": 37974, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Acinetobacter|s__Acinetobacter_johnsonii|t__GCF_000162055": 3690010, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Klebsiella|s__Klebsiella_pneumoniae|t__GCF_000409125": 5671251, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Viruses_noname|f__Viruses_noname|g__Viruses_noname|s__Geobacillus_virus_E2|t__PRJNA19797": 40863, "k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinobacteria_noname|f__Actinobacteria_noname|g__Actinobacteria_noname|s__actinobacterium_SCGC_AAA278_O22|t__GCF_000372185": 1138490, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Alteromonadales|f__Pseudoalteromonadaceae|g__Pseudoalteromonas|s__Pseudoalteromonas_sp_BSi20429|t__GCF_000238895": 4495777, "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_agalactiae|t__GCF_000427035": 2138694, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__P'..b'teria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|514974163|ref|NZ_KE340313.1|:332123-333478": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 1356, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|384096987|ref|NZ_JH636042.1|:382588-383040": {"ext": ["GCF_000162515", "GCF_000159875"], "score": 2.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 453, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|514974745|ref|NZ_KE340318.1|:c38550-38392": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 159, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|512436172|ref|NZ_KE159460.1|:c349752-348871": {"ext": ["GCF_000159875"], "score": 1.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 882, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|514974549|ref|NZ_KE340315.1|:c186066-185032": {"ext": ["GCF_000374365"], "score": 1.0, "clade": "s__Bacteroides_stercoris", "len": 1035, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|384096980|ref|NZ_JH636035.1|:50682-52622": {"ext": ["GCF_000159875"], "score": 1.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 1941, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|223955876|ref|NZ_DS499677.1|:c358511-356622": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 1890, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|223955871|ref|NZ_DS499672.1|:29963-30130": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 168, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|514973000|ref|NZ_KE340309.1|:c227121-226093": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 1029, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|512436172|ref|NZ_KE159460.1|:c266283-265069": {"ext": ["GCF_000159875"], "score": 1.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 1215, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|298384780|ref|NZ_GG774704.1|:496496-497020": {"ext": ["GCF_000162515", "GCF_000159875"], "score": 2.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 525, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|223955871|ref|NZ_DS499672.1|:193538-194158": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 621, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|298385318|ref|NZ_GG774705.1|:324177-324773": {"ext": ["GCF_000162515", "GCF_000159875"], "score": 2.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 597, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|223955873|ref|NZ_DS499674.1|:c270435-267715": {"ext": ["GCF_000243215"], "score": 1.0, "clade": "s__Bacteroides_stercoris", "len": 2721, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}}}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 1ab06263e083 test-data/marker_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_sequences.fasta Mon Mar 13 12:39:25 2017 -0400
b
b'@@ -0,0 +1,7704 @@\n+>gi|223955874|ref|NZ_DS499675.1|:82124-83086\n+ATGAAAAAATCGATACTGTTAATGGGCCTTGCAGCTCTCATAGCAAGCTGTTCGGACGAT\n+GACCGGAACAACGATACTGATAATCAGCATCTTGTCAGTCCCTCAATGGCTGTGAGCATC\n+AGCAACACCGACGGCCGCCAAAGCCCGTTTACCGGCATTCTTACAATTATGCCATGCGAT\n+GCAAACAGTTCCATATACTATGGAAATTATGTAAAAGGGAAACTGTCTCCTTTCTACGGC\n+TATTACCGGGTAAAGGACGGAAGTTTCCATGACAATTCCATCAATAGGGAGATTTCTCTG\n+CCGATAGGCACTTACAATATGATTTATTGGGGAACACCCCAATACCAAACTCCCATTTAT\n+GCCCACCCTGCCATAAAAGAGGCGGCTCACAGCATAGGTGCCGATATGTCCAAACAGTCT\n+CTCGGCTTGTTCAGAATATCAGCCGATACCATTTACTATCCCGTATTCGACCTGGTGTAC\n+GCCACACAGCCGGTAAACATCGGAAGCGAAAGCCTCAGTGGCTCTTTAAAACGAGTGGTA\n+GCCGGAATAAAGGTTGTCCTGAAGGACAAAGACAATGCCGTACTAAGTTCCAGTATAGAC\n+AGTGTGTCCGTACGCATCACCAACATTTACAGCGAACTCAACTATTATACGGGAAAACCG\n+CAAGGTACACCACGTACAATCGCGTTCCCTCTGATTCGTTCGAACGATGGTACTCAGATG\n+AGCAACAGCACCGTTATGCTGTTCCCGTCCGCCGGAAAACCGGAATTCCAACTAGCAATC\n+ATTCTTAAAAACGGTAATGTGAAAAGCTTCAGACAAGCATTGAGCAGCCCGCTCGATGCC\n+AATGCCAAACTGACATTAACGTTAAGTCTGGGTGATATTTTCTCTGAAGAATCCTCCGGT\n+GACTTTACCATCGATGACTGGAACGAGAAAAACGAGAATATTGATATACCGATAATAGAG\n+TAG\n+>gi|29345410|ref|NC_004663.1|:2392014-2392814\n+ATGAAAATTACAATTATAGGAGCCGGAAACATAGGGAGCGCAGTTGCTGCCTGTCTGGCA\n+AAAGGACATCTCTACAACGAAAAAGATATTATCATTTCTACCCCTCATACAGACAAGCTA\n+GAAAATCTGCACAAACAATTTCCTGCGATACGCATAATGACAGAGAATCAGTATGCCATA\n+TCGGAAGCCGACATTATTATTCTGGCTGTCAAACCATGTATAGTAGACGAAGTATTATCT\n+CCGTTACGATTCTCACGCACCCAAATCCTGGTTTCACTGGTAACCGGAATATCCATTTCT\n+CACCTGGCACATTTATCGGAAACCGAAATGCCTATATTCCGGGTTGTTCCAAACATCGCC\n+ATTACGGAACATTCGAGCCTGACCCTGATAACTTCACGCAAAGCCGGCAAAGAACACCAG\n+CAACTCATAAAACAGACATTTGAAGAAGGAGGAAAGTGTTTGTTCGCAGAAGAGAAACAG\n+CTGGATATCATTTCTGCATTAACTTCCAGTGGAATCGCTTTCGCATTAAAATATATTCAT\n+GCAGCCATGCAAGCCGGCATAGAATTGGGTATCTCCGCTGAAGATGCAATGCGAATGACG\n+GCATATAGCATGGAAGGAGCTACAGAACTGATTTTAAATCACGACACCCACCCGCTGTTA\n+GAAATAGAAAAAGCAGCTACTCCCGGTGGAGCCACCATTAAAGGATTGAATGAACTGGAG\n+CACAGGGGATTTACTTCCGCCGTTATTCATGCCATAAAAAGTAGCGCCACAGTATCGACC\n+GATAAAGAGACTGAAGAATAG\n+>gi|298385318|ref|NZ_GG774705.1|:275923-276495\n+ATGATAAATTCAATTAATATACAAATCAGAGAGACCAATACAGATGATTTCGACAGCATA\n+ATGACTGTTGAAAAACAAGCGTTTGGCTATGACAAAGAAGCACAGTTGGTAGCCGATCTG\n+CTGGCTGACAAAACTGCAAAACCAATGGTTTCGTTGCTCGCTTTCTACAAAGGTGAGGCT\n+GTCGGACACATTCTCTTCACCAGAGCCTATTTTGATGGTCAGGGAGCACAACAGATGATG\n+CACATTCTTGCACCTTTAGCTGTCAAGCCGGAATATCAACGGCAGGGTATAGGCGGAATG\n+CTGATACGGGCAGGTATAGAGAGGTTGCAGGAAAAAGGTTCGTGCCTTGTGTTTGTATTG\n+GGGCATAAAGAATATTATCCAAAATATGGTTTTATACCGGATGCAGCCAGGCTGGGTTAT\n+CCTGCTCCTTACCCGATACTGGAACAGTTCTCGGATTATTGGATGGTTCAGGCAATCAGT\n+CCGAAGGGATTTGATGTGGATAAAGGAAAGATCAGGTGTTCGGATGAGTTGAACAAACCT\n+GAACATTGGAGAGATGATGAATCGGACAGATAG\n+>gi|514973850|ref|NZ_KE340312.1|:c10221-7987\n+ATGAAACATTTGTCTGTTATTATATCTGTATTATTTTCAATACTTTGTATCGGATGCTCT\n+AAGGAGGATTTTAACGATGGAAAAACCAATGATGTTCAGAACAAGCATTCTCTCCGGCTT\n+TTGGTCTACACACCGACTTCAGAAACTGTTCTTAGTACGGACCTGCCGGGAAACATAGAG\n+GCTTATCTGTTCAAAGAGGGAGTTCTGTCCGACGTTTACAAAAATCTGACAGTGGATAAG\n+AACGGATACACTACAATCAGCTCGTTAGCCGAAGGAGAGCAAATCTATTTCTTTGTAAAC\n+ACCGGTAATCTGCTGGATGGGATTACACAGGAAATAGGCCGATTGAAAGAGAACGAACTG\n+CTTGCCACTACAATACTCTCTGCCTCTCCTCAAGCAGATGGAGAGAAACCGGTGATGACC\n+GGAAAGGCAGACTTAACCGGAAGCCAGGAGAGTACCACCCAAGTATTGCTTACACGGGCC\n+ATTGCCAGAGTAGACTTAAACATAGCTGACGATGCCGACATACAAATCAATAGGATAAGT\n+ATGGATAACATACATTGTGAAGCATTCCTGCTTCCGCAAAATCCGGTCAGTTCGCCGTCC\n+GGTGCAGCATTGGCAAAAATAGACACTACATTCAATACCCCGTTGAAGCCGGGTGAATAT\n+GCCGGATTGGTCCACCTGTATGAACAAGTGGGGGACGGTATTCCTGTGGAGCTGCATGGC\n+ACCCTTGAAGGCGATCCTGTCACACTTTCACTGGCACTGCCTAACACAATCCATAGAAAC\n+CACATTTATAAAATAAAGTTATTTAGCGGAGACAGTTCTAATTTGCAGGCAAGCATATCC\n+GTGGAAAATGAAAGTTGGGAAGTCGAAGAAACTATCACGGCAAAACCTTCTACCAATATT\n+CTGGTTAATAGTGAACTTTCTACTTTGGCTGAAGGGGCGTATATAAGTGCAACAAAAGAT\n+ACGGTTTACCTCCCAAGCAAGGAGTCAACGTCAATACTTGTATTGGATAAGGTGCCGGAA\n+GATGCGGAATTCACAATTGATGGGACAACAGCCTCCATTACTCCGTATACGGAAACTCGT\n+GCCGACCTTCAAGGGAAAAAGTTCTTGGTACGCAATTCTTGGAAAAAGCCGGGTACTAAG\n+ACGGAATATATGTACCTGAATATGCACAGCAAGAGGCACCCGGACTACTACAGTGGCCGG\n+TTGGTAATTGTTCAGTCCAATGCAACGACCTTCAAGGGAGAATTGTATAATCATCTTACA\n+AACACTCCTCCCTATAACATTCATTTCAATAAATATGTCGACAGTGCTTTAGGCCAGATA\n+GAGGTACCT'..b'CGCTTTTCCTATAGGGGGAATTGGTGCAGGTATGTATTGT\n+CTGGAAGGAACAGGCTATATATCTCATATGTCAGTATGGCATCGACCGGAAGTTTTTCAT\n+GAACCGGGAATGTTTGCTGCTCTGTATGTGAAGGGGGTATGTAATGGGGCTAAGGTACTT\n+GAAGGACCTGTATCTGATTGGAGAAAATTTGGAATGCCCAATTACGGTACAGGAGGCAGT\n+ATGGGATCAATATTAGGACTTCCCCGTTTTGATACGGTTGAATTTGAAGCACGTTTTCCG\n+TTTGCCAAAGTTTCATTAACAGATAAAGATATCCCCGTTAAGGTAACCATTTTGGGGTGG\n+AGTCCTTTTATTCCGGGTGATCCGGATAATTCCAGTTTACCGGTAGGAGGATTGGAATAT\n+AGTTTAGAGAATACTAGCAAAGAGGTTCAGGAAACTATTTTCTCCTACCATGCTCGTAAC\n+TTTCTGAGTTCGGGTAAAGGATTGGATGCTATAAAAACGATGCCTCATGGGTTTATCCTT\n+TCACAGTCAGGTACAGAAACGGAGCCTCATTTGCAGGGTGATTTTGCTATCTTTACGGAT\n+CAGGATTCTTTAAAGATCAATTATTGTTGGTTTCGTGGAGGATGGTTTGATAGTCTTACG\n+ATGGTTTGGAACGCTATTGAGACAGGTTTAATGCCACAATCTCCAGCAATAGAGAAAGGT\n+GCACCAGGAGCTTCTATGTTTGTCCCGGTAACATTGATGCCAGGGGAAAAGAAAACAATC\n+AGAATTTATACGGCGTGGTATGTTCCTAACTCGACTTTAAGGCTTGGAAAGGAACCGGAA\n+GACTGGAATGACAATAATGTCGACTCCGCAAGACTAGCTGTAGAAAAGGCAGATAAGGGT\n+AATTATAAACCTTGGTATAGTAGCCGCTTTACAGGAGTGAATGAGGTTATTGATTATTTT\n+CTGTCTCATTATAAGATTTTGCGCAATCAGACGGAAAGGTTCACAGACTCTTTTTATCGT\n+TCTACCTTACCGCCTGAAGTGATTGAAGCTGTTTCTGCTAACTTAAGTATTTTAAAGTCT\n+CCAACGGTGATGCGGCAATATGATGGACGCTTATGGACTTGGGAAGGATGTGCCGATAAT\n+TGGGGCTCGTGTCATGGCTCATGTACTCATGTCTGGAATTATGCACAAGCTATTCCACAC\n+TTATTTCCTTCCTTGGAACGTTCGTTAAGGCATACTGAATTTGAAGAAGGGCAAGATTTG\n+AAAGGCCATCAAGTGTTTCGTGCCAATTTACCGATTCGTCCTACTCGGCACGACTTTCAC\n+TCAGCTGCTGACGGGCAATTGGGAGGTATAATGAAAGTATATCGTGAGTGGCGAATTTCA\n+GGCGATAATGAGTTCCTTATCTCTATGTATCCAAAAGTAAAAAAGAGTTTGGACTACTGT\n+ATCTCAACTTGGGATCCTCGTAGGGTAGGAAGTATTGAAGAACCACACCATAATACTTAT\n+GATATTGAGTTCTGGGGGCCGGATGGTATGCATAATAGTTTCTATTATGGAGCTTTATCA\n+GCTTTCATTCGTATGAGTGAGTTTCTTGATAAAGACGTTACTGAATATAAGAAACTATTG\n+AAAAAAGGACGAAAATTTACTGAAACAGGCTTGTTTAATGGTGAGTATTTTATCCAAAAG\n+ATAGAGTGGAGGGGATTGAATGCAAAAGATCCGACTGTTGCACAAAGTTTCCATAGTTCT\n+TATTCTCCCGAAGCGAAAGAAATACTGGAGAAGGAAGGTCCTAAATATCAGTATGGGAAC\n+GGTTGTCTGTCTGATGGAGTTTTGGGGTCGTGGCTCTCCCGAATGTGTGGGATGGAAGAA\n+ACTCTGAATACAGAAAAAGTGAAAAGCCATTTATTATCAGTACATCGATATAATTTTAAA\n+AAAGATCTGACTGATCACGCCAATCCTCAACGTTCCCCTTACGCTTTAGGCAAAGAAGGA\n+GGCTTATTATTAGGAAGTTGGCCTAAAGGGAGCAAGTTGTCATTGCCTTTTGTCTATAGT\n+AATGAAGTCTGGACAGGAATAGAATATCAAGTAGCTTCACATTTGATGCTACAGGGAGAG\n+GTAGAAAAAGGCCTTGAGATCGTGCGTGCCTGTAGACAACGTTATGATGGAAGTGTCCGT\n+AACCCTTTTAATGAGTATGAGTGTGGACATTGGTACGGACGAGCGTTATCTAGTTATGGC\n+TTACTTCAAGGATTGACAGGAGTCCGATATGATGCTGTAGATAAAACACTGTATATTAAT\n+TCAAAAATAGGAGACTTTATTAGCTTTATCAGTACGGAGTCTGGATTTGGTAATGTAGAA\n+CTTCGTTCCGGAAAACCTTTTGTAAAAGTAGTATCCGGTCATATAGAGGTCGACAGATTT\n+ATCGTATCAGGGAAAGTAGTTGAATAA\n+>gi|298386634|ref|NZ_GG774708.1|:c143839-142661\n+ATGGAGCAAAAGAAAATCGTGCTATTCATCCTGATCATTCATCTGGCAGCATTTCTAGCA\n+GGATGCAGCGGAAACAAAAATAGCGGAAATAATGACTCCTCTGATCTATGGAATAAGTTA\n+TCGAGCTACTTCCGCACGCCCGCAGAATACGAAAATGTATATGGGAATTTTCGCTCTCCT\n+TTATTATATTATAATGGGGATACAGTCAGAACCGTTGAAGACTGGCAAAGACGACGAACT\n+GAAATCAAGGACAGATGGATGAGCCTGTTAGGACAATGGCCGCCTGTCATTACCGGACAA\n+ACATTTGAAATTCTGGATACTCTCCACCGTGAAAACTTCATGCAATACCGTGTCCGCTTC\n+TACTGGACTCCCAACGAACAGACTGAAGGTTATTTGCTGGTTCCGGACAAAGAAGGCAAA\n+AAGCCTGCCGTTATCACTACCTTTTATGAACCCGAAACGGCTATCGGATTGGGTGGAAAA\n+CCTTATAGAGACTTTGCATATCAATTGACGAAAAGAGGATTTGTCACATTATCAATAGGA\n+ACAACGAAGACCACAGAGAATCAGACATATTCCATTTATTACCCCAGCATTGAAAATGCA\n+ACTCTCCAACCACTTTCAGCATTAGCTTATGCAGCCGCAAATGCATGGGAAGTATTAGCC\n+AAAGTACAGGACGTCGATTCTACAAGAATAGGCATCACAGGGCATTCTTATGGTGGGAAG\n+TGGGCAATGTTTGCCTCATGCCTATACGAAAAGTTCGCTTGTGCGGCATGGGGAGACCCC\n+GGAATTGTATTCGACGAAACAAAAGAGGGATATATCAATTACTGGGAACCCTGGTATTTG\n+GGATATTATCCGCCACCATGGGAAAATACATGGAGTAAAAATGGGCATGATTATGCTAAA\n+GGCATTTATCCGAAGCTCCGCAAAGAAGGATATGATTTGCATGAATTGCATGCGCTGATG\n+GCACCTCGCCCATTTCTTGTTTCCGGAGGATACTCTGACGGAACAGACCGGTGGATAGCG\n+CTAAACCATACAATAGCGGTCAACCGGCTCTTAGGATACCGCAATAATGTCGCGATGAGC\n+AACAGAGTCAATCATGACCCAACCCCTGAATCAAACGAAATTATATATGATTTTTTTAAA\n+TGGTACTTGCATTCAGCAAATAAATCTACCAAAGAGTAG\n+>gi|223955875|ref|NZ_DS499676.1|:c386522-386337\n+TTGGCTTTTAGTTCTGTCCAGTTTTCTTATATTTTTGTAGAAAAAGTATGGGACAGGTAT\n+ATAAAAACAGCCTGGGTTTACGTATTCAACGTAATCCCAGGCTGTTTTTACGGGTATTTG\n+CGGGGGCTTGTTTACCGATTGTTTTTTATTCCGCCAAAGTCGCCCTTCTACTTCAGTCCG\n+AAATAA\n'
b
diff -r 000000000000 -r 1ab06263e083 test-data/max_reduced_gene_family_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/max_reduced_gene_family_abundance.tsv Mon Mar 13 12:39:25 2017 -0400
b
b'@@ -0,0 +1,8420 @@\n+# Gene Family\tmax\n+UNMAPPED\t4531.0\n+UniRef90_A0A015N9J5\t4.4150110375\n+UniRef90_A0A015N9J5|g__Bacteroides.s__Bacteroides_dorei\t4.4150110375\n+UniRef90_A0A015P8C6\t5.7471264368\n+UniRef90_A0A015P8C6|g__Bacteroides.s__Bacteroides_dorei\t5.7471264368\n+UniRef90_A0A015P8Y2\t2.6560424967\n+UniRef90_A0A015P8Y2|g__Bacteroides.s__Bacteroides_dorei\t2.6560424967\n+UniRef90_A0A015PWK0\t3.7986704653\n+UniRef90_A0A015PWK0|g__Bacteroides.s__Bacteroides_dorei\t3.7986704653\n+UniRef90_A0A015QXQ5\t2.1505376344\n+UniRef90_A0A015QXQ5|g__Bacteroides.s__Bacteroides_dorei\t2.1505376344\n+UniRef90_A0A015SXD6\t3.7453183521\n+UniRef90_A0A015SXD6|g__Bacteroides.s__Bacteroides_dorei\t3.7453183521\n+UniRef90_A0A015TX99\t5.4330964066\n+UniRef90_A0A015TX99|g__Bacteroides.s__Bacteroides_dorei\t2.1436227224\n+UniRef90_A0A015TX99|g__Bacteroides.s__Bacteroides_vulgatus\t3.2894736842\n+UniRef90_A0A015TXS0\t1.5503875969\n+UniRef90_A0A015TXS0|g__Bacteroides.s__Bacteroides_dorei\t1.5503875969\n+UniRef90_A0A015U3E0\t1.872659176\n+UniRef90_A0A015U3E0|g__Bacteroides.s__Bacteroides_dorei\t1.872659176\n+UniRef90_A0A015U517\t4.016064257\n+UniRef90_A0A015U517|g__Bacteroides.s__Bacteroides_dorei\t4.016064257\n+UniRef90_A0A015UAM4\t2.8735632184\n+UniRef90_A0A015UAM4|g__Bacteroides.s__Bacteroides_dorei\t2.8735632184\n+UniRef90_A0A015UAR2\t4.2553191489\n+UniRef90_A0A015UAR2|g__Bacteroides.s__Bacteroides_dorei\t4.2553191489\n+UniRef90_A0A015UU70\t7.7519379845\n+UniRef90_A0A015UU70|g__Bacteroides.s__Bacteroides_dorei\t7.7519379845\n+UniRef90_A0A015V1U6\t1.239157373\n+UniRef90_A0A015V1U6|g__Bacteroides.s__Bacteroides_dorei\t1.239157373\n+UniRef90_A0A015V946\t11.4942528736\n+UniRef90_A0A015V946|g__Bacteroides.s__Bacteroides_dorei\t11.4942528736\n+UniRef90_A0A015V960\t2.1030494217\n+UniRef90_A0A015V960|g__Bacteroides.s__Bacteroides_dorei\t2.1030494217\n+UniRef90_A0A015VSY3\t8.4388185654\n+UniRef90_A0A015VSY3|g__Bacteroides.s__Bacteroides_dorei\t5.6258790436\n+UniRef90_A0A015VSY3|g__Bacteroides.s__Bacteroides_vulgatus\t2.8129395218\n+UniRef90_A0A015VSZ2\t3.2588454376\n+UniRef90_A0A015VSZ2|g__Bacteroides.s__Bacteroides_dorei\t1.3966480447\n+UniRef90_A0A015VSZ2|g__Bacteroides.s__Bacteroides_vulgatus\t1.8621973929\n+UniRef90_A0A015WJ01\t5.7471264368\n+UniRef90_A0A015WJ01|g__Bacteroides.s__Bacteroides_vulgatus\t5.7471264368\n+UniRef90_A0A015X9A0\t2.624671916\n+UniRef90_A0A015X9A0|g__Bacteroides.s__Bacteroides_dorei\t2.624671916\n+UniRef90_A0A015XGT9\t1.7182130584\n+UniRef90_A0A015XGT9|g__Bacteroides.s__Bacteroides_dorei\t1.7182130584\n+UniRef90_A0A015XPQ3\t3.8535645472\n+UniRef90_A0A015XPQ3|g__Bacteroides.s__Bacteroides_vulgatus\t3.8535645472\n+UniRef90_A0A015XQT0\t3.4722222222\n+UniRef90_A0A015XQT0|g__Bacteroides.s__Bacteroides_vulgatus\t3.4722222222\n+UniRef90_A0A015Y5P7\t4.8309178744\n+UniRef90_A0A015Y5P7|g__Bacteroides.s__Bacteroides_dorei\t4.8309178744\n+UniRef90_A0A015YC48\t9.1324200913\n+UniRef90_A0A015YC48|g__Bacteroides.s__Bacteroides_dorei\t9.1324200913\n+UniRef90_A0A015YDI9\t5.291005291\n+UniRef90_A0A015YDI9|g__Bacteroides.s__Bacteroides_dorei\t5.291005291\n+UniRef90_A0A015YDJ3\t1.4814814815\n+UniRef90_A0A015YDJ3|g__Bacteroides.s__Bacteroides_dorei\t1.4814814815\n+UniRef90_A0A016AWW1\t1.3020833333\n+UniRef90_A0A016AWW1|g__Bacteroides.s__Bacteroides_dorei\t1.3020833333\n+UniRef90_A0A016B3G9\t2.5157232704\n+UniRef90_A0A016B3G9|g__Bacteroides.s__Bacteroides_dorei\t2.5157232704\n+UniRef90_A0A016C4A5\t1.996007984\n+UniRef90_A0A016C4A5|g__Bacteroides.s__Bacteroides_dorei\t1.996007984\n+UniRef90_A0A016CX89\t1.4306151645\n+UniRef90_A0A016CX89|g__Bacteroides.s__Bacteroides_dorei\t1.4306151645\n+UniRef90_A0A016GIZ2\t3.0581039755\n+UniRef90_A0A016GIZ2|g__Bacteroides.s__Bacteroides_dorei\t3.0581039755\n+UniRef90_A0A016LN29\t1.7683465959\n+UniRef90_A0A016LN29|g__Bacteroides.s__Bacteroides_dorei\t1.7683465959\n+UniRef90_A0A016NLU2\t2.743484225\n+UniRef90_A0A016NLU2|g__Bacteroides.s__Bacteroides_dorei\t2.743484225\n+UniRef90_A0A017P703\t1.0752688172\n+UniRef90_A0A017P703|g__Bacteroides.s__Bacteroides_dorei\t1.0752688172\n+UniRef90_A6KWB7\t3.7243947858\n+UniRef90_A6KWB7|g__Bacteroides.s__Bacteroides_dorei\t3.724'..b'eroides_dorei\t8.7719298246\n+UniRef90_R9ID01\t2.3980815348\n+UniRef90_R9ID01|g__Bacteroides.s__Bacteroides_dorei\t2.3980815348\n+UniRef90_R9ILG9\t0.5420054201\n+UniRef90_R9ILG9|g__Bacteroides.s__Bacteroides_vulgatus\t0.5420054201\n+UniRef90_S0FBR1\t1.2626262626\n+UniRef90_S0FBR1|g__Bacteroides.s__Bacteroides_dorei\t1.2626262626\n+UniRef90_S3ZHA6\t8.3597446144\n+UniRef90_S3ZHA6|g__Bacteroides.s__Bacteroides_dorei\t2.7100271003\n+UniRef90_S3ZHA6|g__Bacteroides.s__Bacteroides_vulgatus\t5.6497175141\n+UniRef90_T4HXA1\t1.2820512821\n+UniRef90_T4HXA1|g__Bacteroides.s__Bacteroides_dorei\t1.2820512821\n+UniRef90_U2CVX4\t7.1377587438\n+UniRef90_U2CVX4|g__Bacteroides.s__Bacteroides_dorei\t7.1377587438\n+UniRef90_U3KI10\t11.558989771\n+UniRef90_U3KI10|unclassified\t11.558989771\n+UniRef90_U3KP22\t11.6979338377\n+UniRef90_U3KP22|unclassified\t11.6979338377\n+UniRef90_U5C7R6\t19.0476190476\n+UniRef90_U5C7R6|g__Bacteroides.s__Bacteroides_vulgatus\t19.0476190476\n+UniRef90_U5FT06\t16.7693829744\n+UniRef90_U5FT06|unclassified\t16.7693829744\n+UniRef90_U6R8C4\t5.7471264368\n+UniRef90_U6R8C4|g__Bacteroides.s__Bacteroides_dorei\t2.8735632184\n+UniRef90_U6R8C4|g__Bacteroides.s__Bacteroides_vulgatus\t2.8735632184\n+UniRef90_U6R9S1\t2.1299254526\n+UniRef90_U6R9S1|g__Bacteroides.s__Bacteroides_vulgatus\t2.1299254526\n+UniRef90_U6RDV1\t5.0890585242\n+UniRef90_U6RDV1|g__Bacteroides.s__Bacteroides_dorei\t5.0890585242\n+UniRef90_U6RFA3\t4.4150110375\n+UniRef90_U6RFA3|g__Bacteroides.s__Bacteroides_vulgatus\t4.4150110375\n+UniRef90_U6RFL8\t5.4644808743\n+UniRef90_U6RFL8|g__Bacteroides.s__Bacteroides_vulgatus\t5.4644808743\n+UniRef90_U6RFX0\t3.3333333333\n+UniRef90_U6RFX0|g__Bacteroides.s__Bacteroides_dorei\t2.6666666667\n+UniRef90_U6RFX0|g__Bacteroides.s__Bacteroides_vulgatus\t0.6666666667\n+UniRef90_U6RGV5\t3.8461538462\n+UniRef90_U6RGV5|g__Bacteroides.s__Bacteroides_vulgatus\t3.8461538462\n+UniRef90_U6RSC0\t2.4154589372\n+UniRef90_U6RSC0|g__Bacteroides.s__Bacteroides_dorei\t2.4154589372\n+UniRef90_UPI0004695389\t3.294892916\n+UniRef90_UPI0004695389|g__Bacteroides.s__Bacteroides_dorei\t3.294892916\n+UniRef90_UPI0004697FB6\t3.5938903863\n+UniRef90_UPI0004697FB6|g__Bacteroides.s__Bacteroides_vulgatus\t3.5938903863\n+UniRef90_UPI00046A51E2\t2.1459227468\n+UniRef90_UPI00046A51E2|g__Bacteroides.s__Bacteroides_vulgatus\t2.1459227468\n+UniRef90_UPI00046E868E\t5.6497175141\n+UniRef90_UPI00046E868E|g__Bacteroides.s__Bacteroides_dorei\t5.6497175141\n+UniRef90_V3KIQ1\t9.5054275569\n+UniRef90_V3KIQ1|unclassified\t9.5054275569\n+UniRef90_V5V2L3\t8.8892176814\n+UniRef90_V5V2L3|unclassified\t8.8892176814\n+UniRef90_W0ETG2\t2.0449897751\n+UniRef90_W0ETG2|g__Bacteroides.s__Bacteroides_vulgatus\t2.0449897751\n+UniRef90_W0EV06\t2.0325203252\n+UniRef90_W0EV06|g__Bacteroides.s__Bacteroides_dorei\t2.0325203252\n+UniRef90_W0EVY8\t5.1380860629\n+UniRef90_W0EVY8|g__Bacteroides.s__Bacteroides_dorei\t5.1380860629\n+UniRef90_W1IYL7\t12.1580837819\n+UniRef90_W1IYL7|unclassified\t12.1580837819\n+UniRef90_W7DVV5\t11.5904796756\n+UniRef90_W7DVV5|unclassified\t11.5904796756\n+UniRef90_W8YTG4\t12.6386317258\n+UniRef90_W8YTG4|unclassified\t12.6386317258\n+UniRef90_X5M7Z0\t9.0719420554\n+UniRef90_X5M7Z0|unclassified\t9.0719420554\n+UniRef90_X6L320\t8.6463137003\n+UniRef90_X6L320|unclassified\t8.6463137003\n+UniRef90_X7F2P9\t7.3126169751\n+UniRef90_X7F2P9|unclassified\t7.3126169751\n+UniRef90_Y0KIL9\t8.744795425\n+UniRef90_Y0KIL9|unclassified\t8.744795425\n+UniRef90_Z5XKU5\t11.3736016532\n+UniRef90_Z5XKU5|unclassified\t11.3736016532\n+UniRef90_Z5XTI9\t7.4649158985\n+UniRef90_Z5XTI9|unclassified\t7.4649158985\n+UniRef90_Z5XVM9\t10.665534346\n+UniRef90_Z5XVM9|unclassified\t10.665534346\n+UniRef90_Z9JLB1\t8.4767761266\n+UniRef90_Z9JLB1|unclassified\t8.4767761266\n+UniRef90_Z9JRB3\t9.1884719908\n+UniRef90_Z9JRB3|unclassified\t9.1884719908\n+UniRef90_Z9JXD8\t9.3092450619\n+UniRef90_Z9JXD8|unclassified\t9.3092450619\n+UniRef90_Z9K4C5\t12.7438684783\n+UniRef90_Z9K4C5|unclassified\t12.7438684783\n+UniRef90_unknown\t1635.44829756\n+UniRef90_unknown|g__Bacteroides.s__Bacteroides_dorei\t883.211795734\n+UniRef90_unknown|g__Bacteroides.s__Bacteroides_vulgatus\t752.236501829\n'
b
diff -r 000000000000 -r 1ab06263e083 test-data/reduced_uniref50.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reduced_uniref50.fasta Mon Mar 13 12:39:25 2017 -0400
b
b'@@ -0,0 +1,1478 @@\n+>UniRef50_B0NQY6 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NQY6_BACSE\n+MYFRAQEVKNTSKSKRGKIRFTLILIMQETIYELYVAKVLIFLINGIERKEFSFGLRIFA\n+QS\n+>UniRef50_F3PHD0 Uncharacterized protein n=2 Tax=Bacteroides RepID=F3PHD0_9BACE\n+MIQTEFQHLLKPVPTAGIGDTKHWYEEYQPLAQTIPSLGTSNTGRWYERYQALEREFFPG\n+IYSKG\n+>UniRef50_B0NP96 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NP96_BACSE\n+MDIRQENAVINKDIAFLYHLCTRGTIIFNILRINSCRMYPFRKRLGTDISALKKRRFFFA\n+YFI\n+>UniRef50_B0NTS9 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NTS9_BACSE\n+MYFKDRMVSDRSPDTSSFFFLFQENRSCRIALKRFSLFNYLIINKMQINNLCYVNR\n+>UniRef50_Q8A9G3 Uncharacterized protein n=1 Tax=Bacteroides thetaiotaomicron (strain ATCC 29148 / DSM 2079 / NCTC 10582 / E50 / VPI-5482) RepID=Q8A9G3_BACTN\n+MTHILYHFAVCYVWQDSSGVIHVSGLDIVAGGYILRFSIMIDSNFYIGCSILFYMSSCLE\n+NDRFKCCHTVLMFFKCLKLH\n+>UniRef50_B0NV53 Uncharacterized protein n=3 Tax=Bacteroides stercoris RepID=B0NV53_BACSE\n+MLQQHVVFLVLQLRAVEPAVANPAVRSWQPQGPQLENGVFIVVRLHLSCCFSTKKNKESA\n+VFVWEKRVRLRQKRMEEAGIFYVELLL\n+>UniRef50_I0Q078 Uncharacterized protein n=2 Tax=Bacteroides RepID=I0Q078_9BACE\n+MFNQEWQNLRLLFKYSIKRATKTINQPLLLKTDLSPSLFIYDIVNHRKVPIITSIRCIKS\n+NREVFIY\n+>UniRef50_W0ESG7 DNA-binding protein n=14 RepID=W0ESG7_9PORP\n+MSNEIREKDHEWVKAFHSNFDRLLALLEKLLEKRQPSAYGDELLTDKEVAFLLKVSRRTL\n+QDYRNNGILPYTQVGGKILYRASDIEKTLMKGYKEAYKYKRN\n+>UniRef50_Q5LIQ7 Shikimate kinase n=97 RepID=AROK_BACFN\n+MIRIFLTGYMGAGKTTLGKALARELHIPFIDLDWYIEERFHKTVGELFSERGEASFRELE\n+KNMLHEVGEFEDVVISTGGGAPCFFDNMEYMNRVGTTVFLDVDPKVLFSRLRVAKQQRPI\n+LQGKKDDELLDFIVQALEKRAPFYRQANYIYCADKLEDRSQIETSVQQLRKLLNLHIAS\n+>UniRef50_B0NW87 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NW87_BACSE\n+MSASIEETVSLELYAPQKGARLISGIGDYGGFTHFDLNRPDSLGSHANPHFGNTNGVTGA\n+WLKQDLIVRVGTLFGHQPDAKTISYSEDGGRRWTMCATVPTEKSRNGHITVAADGSSWI\n+>UniRef50_Q8AB79 Excisionase n=8 Tax=Bacteroidales RepID=Q8AB79_BACTN\n+MNKELTFNDLPMVVAQLRDEVVGMKQMIVSLQSQNKPHKANTHIPMSVEEASAYLKMPMA\n+TLYMKLGNGSIPATKPGKRYCLYQDELDKWLETNRKNPVPLTAEEENAAILAGNKRKPKP\n+LNW\n+>UniRef50_Q8A980 Uncharacterized protein n=3 Tax=Bacteroides RepID=Q8A980_BACTN\n+MINKDEEVANRFSDPSKKLVEYVTDFSDMRDEDIELTKPIYENYGNFQLLNETGIIRTDE\n+EIKELYNCKGEFGSTDEVTDMSF\n+>UniRef50_Q8A9S1 2-amino-4-hydroxy-6-hydroxymethyldihydropteridine pyrophosphokinase n=25 Tax=Bacteroides RepID=Q8A9S1_BACTN\n+MTRLWDTARRKRLNCPKVLTLVPVRKHRTEKSSKKRDWTICLIKPLIVHKCIICIGSNYN\n+RKENLLLARRRLVDLFPTIRFTSEQETRPLFFRSPALFSNQVAMFFSEAEEERVRKELKA\n+IEQSAGRRPEDKKEEKVSLDIDLLSFDDRVLKPEDLKREYVVKGLEELKYNQI\n+>UniRef50_Q8A268 Uncharacterized protein n=2 Tax=Bacteroides RepID=Q8A268_BACTN\n+MNTIKTLLLLSFSILLFNCSDEEDKKTEKEFIFSASELKQTEWEGEFLYLTNGEIDSKGS\n+IKIVFYTEKKGVCEYKFDYHIDPETISFEYEISDKFMYIDGPLPIHGNWIQQKYNGNSLE\n+IADSKAAFTNSRMIKLTRVN\n+>UniRef50_B0NTK8 Uncharacterized protein n=3 RepID=B0NTK8_BACSE\n+MGEWIYNSSFDTVMDFAVTVDNVFKRYGQVEALQGVSLSVRPGELFGVIGPDGAGKTSLF\n+RILATLLLADEGKATVCGLDMVKDYKAIRQRVGYMPGRFSLYQDLTVEENLSFFATVFHT\n+TIEENYDLVRDIYRQIEPFRKRRAGALSGGMK\n+>UniRef50_B3ESB4 Transcription elongation factor GreA n=112 RepID=GREA_AMOA5\n+MEKVSYYTEEGLQRLKGELTQLKSEGRAKVAEQLSEARDKGDLSENAEYDAAKEAQEILE\n+RRIAKLEELMINARVINKDNINTSAVSILSKVKIKNKKLGKVSTYTMVSEEEADLKEGKI\n+SIESPIGKGLLGKKAGEVAIVEAPAGKIEFEILDISF\n+>UniRef50_E6SQ70 Uncharacterized protein n=31 Tax=Bacteroidales RepID=E6SQ70_BACT6\n+MIRYKKYQVTGEKSSLRGLWYARPLIEDTFDTEKLAKHMANHNTPYSAGLIKGVLTDMIS\n+CIKELILDGKNVKLDDLAIFSVGIVSKKGAASAEEFKVSDNVKSLKLRARATGELSNAQI\n+NLEGQLKEAALYTVTDSTTEGTPGGGSGPNNGSGDENENPLG\n+>UniRef50_E6SQW3 Uncharacterized protein n=125 RepID=E6SQW3_BACT6\n+MKKIVLLVCLLVATVAAQAQFEKGKWIVNPSVTGLELSHDTGTKKTTFGLEAKGGAFLVD\n+NVALLVNAGARWNDYGGDVDVYSLGVGGRYYIDAVGVYLGANVNVDRWDWGKDNDDTKFS\n+FGLEAGYAFFLSRTVTIEPAAYWNVNSDRSKFGLKVGFGFYF\n+>UniRef50_P0A864 Thiol peroxidase n=1538 RepID=TPX_ECO57\n+MSQTVHFQGNPVTVANSIPQAGSKAQTFTLVAKDLSDVTLGQFAGKRKVLNIFPSIDTGV\n+CAASVRKFNQLATEIDNTVVLCISADLPFAQSRFCGAEGLNNVITLSTFRNAEFLQAYGV\n+AIADGPLKGLAARAVVVIDENDNVIFSQLVDEITTEPDYEAALAVLKA\n+>UniRef50_Q89YJ6 Histone-like bacteri'..b'ES\n+IQAVTDEENKALANQDNDFLRMLFGDVSQMAFSDLSELLKQARQLRSYLSGKDNKEGITF\n+ISPEQLKAIEESPEELDKLKKALDKLLGAGKKQNKWSNIFETFKTGFADLKSAQGFKEIS\n+GAIGMISGAAGQAAGEIATMFEAMGKDSAANVIGSLGEVLSSISNIGQAFATGGPVAGAF\n+AAVGEIFSLIGKGAQETAKHRQVLEDVMNDTIAQQREYNLLLLEQNLLYEKASTIFGVDS\n+YAKAENAVRVLKDAISDLNKELAGTTEQQKKFAYRKTGSVALDKVFNRNYSQSKDRYSGL\n+ADIEIKTGSYTTGAWFWKKQHDVYTSVLDVYPELIDANGEFNKELAESIINTREMSDEDK\n+AALQGMIDLAEQAEAAFDSLNDYMTDIFGELGGSMSHALVDAFKNGTDAAESFTQSVSEM\n+LETLAEQMIYSVTLGPLLEEAQKEMMTVMKNQNLTDEQRFSQWTNILKGLTNDAVAKQDE\n+AKQLYEAFRQSAGDMGFDVFSPDSTREASQKTGITASQDSVDKIDGMATTMMGHTYSINE\n+NVNRMANGIDSLLNYASSGLSLTTDIERTAKAIESQSRDALNHLANIDNYTSNLVEMREY\n+MYAVKNGIDTLNTKGLTLKR\n+>UniRef50_W4P379 Two-component system sensor histidine kinase/response n=2 Tax=Bacteroides RepID=W4P379_9BACE\n+MFSLKDIIFYLLFLCVGANFTFAASDQITFSHISINEGLSQSTVFSIDQDKRGNMWFATY\n+DGVNKYDGYAFTVYRHDESNPNSIPNDISRIVKADSRGRIWIGTRDGLSCYDEEKDQFKN\n+FFYQKKRGHISVNAIAEITPDQLLINTSEGLTLFDVKSSVFTAAPLNRQMRELPVSTLYR\n+SGNHIYIGTFNKGLFCYSVSERTLQKLTPALDDKQIQAVLQQSPTQVWVATEGHGLFLIN\n+PKTKKAKNYLHSASDSKSISSNYIRSLALDGQNRLWIGTLNDLNIFQEGTDSFISYGSSP\n+TENGSLSQRSVRSIFMDSQGGMWLGTYFGGLNYYHPIRNRFKNIRRIPYKNSLSDNVVSC\n+IVEDKDKNLWIGTNDGGLNLYNTANGQFTHYTLQESERERGMGSNNIKAVYLDEPGGLVY\n+IGTHAGGLNILHRSTGKVEHFDQKNSELINENVYAILPDEEGGLWLGTLGALVRFEPRKQ\n+SFTTVEKEKNGTLFTAKRITTLFRDSEKRLWIGGEEGISVYTQHKGELQRESVFPKSSIT\n+EAFVNCIYESSNGIFWIGTREGVYCFNEKKKKINRYTTAKGLPNNVVYGILEDSSGQIWL\n+STNRGIACFHPETEKFRNFTESDGLQSNQFTSSYCRTSTGQMYFGGIEGITTFRPELLLD\n+NPYAPPVVITRLQLFNKTVRPDDETGILTKNISETKRITLKSSQTAFSLEFVVSNYISGQ\n+HNTFAYQLEGYDKEWYYLTDKRTVSYSNLRQGTYHFHVKAANSDGKWNTTPTTLEIIVLP\n+VWYKTWWALLLFLIAFIAFLTFVFRFFWMRKSMAAELEMERRDKEQQEEINQMKMRFFIN\n+ISHELRTPLTLILAPLQEIINRISDRWTRNQLEYIGRNANRLLHLVNQLMDYRRAELGVF\n+ELKVKKGNAHRLVSENFRYYDKLARHKNIAYSLHSELEEKEMLFDPNYLELILNNLLSNA\n+FKYTGNGQSITVTLKEDNGWLLLQVSDTGIGIPINKQGRYSNVFIRWKANMLAAASVCRW\n+YNVW\n+>UniRef50_E1WVX1 Uncharacterized protein n=7 RepID=E1WVX1_BACF6\n+MNMNYFSFYNKAKELLIDSLASLWFKGQAREQEYIKRVLTEDEPLFAEPVFQSIFPWEES\n+VYSFEAHSSKLGLLTSSFVNALSNEGIDKDLRFPLDRHPYKHQTESWRTMLSPRPQTIVV\n+TSGTGSGKTECFMIPVLQDLAKTNMKDCVQAIFLYPLNALMKSQQKRIHAWCKALPEKVT\n+YAIYNGETDKENRSDRYTAPHYPQLVTRPQIRRTPPQILFTNPTMLNYMLVRAEDHEILE\n+KSKGKLKWILLDEAHTYIGSSAAELSLQIRRVLDAFGVTIDQVNFAVTSATIGDESDPKT\n+MIKLKTFVSQLTGKPFEDIKIIGGKRIIPELNKGIAEEQLSKINKRFGIRLTYSDIERLR\n+KKLNSSPVLKVKEIGSILDKKIGINVDASLEIIDALGEKVKNLNEGSGFGALLPTRAHFF\n+VRSISGVYVCTNPDCQRHKGYRLPIGSLTTYQNINCPVCKSKMLELATCSSCGSPIVVGE\n+TSTTKGFRMHTNIIDLDNTLFYEQKEDLIDSEDMENIENVEQNEADGFSRFFFAIPEKLC\n+LRKNATCTSHIFNHRNGKIELAPENNESSKGITPLERGESTPVRYQSLRHSGDNHVLCPH\n+CGNNLSELKKLDYLRISATQIGRTLATLLLDNAEAIGSNDAGVVYEGRKYIAFTDSRQGS\n+ARSAMGLNQDVERSWIRASIFHKLADMRLNDVKPGGLTPDEEAEYNAYLSIRGCLPALLL\n+EKFKQLEEKKNGVPVIPSPEEVSWSQISQPLENDSNFRKLYEHVDKARGRKNFRNATDYL\n+KALLVDQFGWIPKRANSLETMGFVRLVYPTLKNAKCPTLLIQKRCTDTDWQNFLKICMDY\n+VIRGGRHYMLSGAYKDYLTQNKYCSPIYPSNSELRKNGNPVSKWFKVNVSQKGVDENQNR\n+LVLLLCAVLGYDDISQISQTKIADINSMLDAAWDFLKQNVLEATDAENQGYMLDLTGDKV\n+KLQLIEKGYLCPVDNVIIDTPFCGYSPRMNGYIGRENFDRFKIQTEFVNPFFPFKFAEQI\n+EENVTEWIEKNLFDQKAAGVFGVMNYRVLASKPIFISAEHSAQQSSEDLDRYEKEFNEGK\n+INILSCSTTMEMGVDISGITEVVMNNVPPKSSNYLQRAGRAGRRSETKALALTVCAPNPI\n+GTHTWNNPDYPITHVTETPLLKLESRQLIQRHVNAMVFASFVADQGGIRVTATLRDFFVK\n+AEGMSFYDKFLNYIDSVISGNVERLQRAYSKLIKGTSLAQITLPDAAQVVKKDIAAVHNV\n+FDAHNGALEKALESLRNESETANAIKAIEKQEENLLKTSMLSYLAENSFLPSAGIPIGLV\n+ECLLGGKEKVDGSSPTLHISQAISSYAPGNPVVKNEWVYEPSGIRLKTKYDDSTSRYIIQ\n+NCTHCGYTTITYGSAKTDCPKCGRHGTMHGIKDISLSIEQRFTEVVEPAAFSVAWDSTPI\n+RKMGTLGGMNFIQPILLEMDAWQPKTNSAKMSIRCSTPRSEILFYNKGASGYGYAFCPYC\n+GRMKSEKSPDSTERMMPHHKHLLASTPCLGGENDGAAVRRHVLLVGRYQTDFVEIKFHDK\n+NNNLIEDSETLYSLGVILSRKLTELLGVNDGEIEFGYDVINHSIFIYDTALGGAGYSLLF\n+REYKDEVLKMALEALERCDCERSCTKCLIDRRSQWYLNYLNRTKALEWLRQEIKARIAPK\n+EILRLIPDSHTVTSDITTEFYQLTRNKDISCIKIFVNDNISQWDAEAFPFKKILTELSLE\n+GVDVAFILPSVPDVKSLSSADSATLIAEVFKNDFKGLENTLPAELLPLMVVIMNDGTVKT\n+YFGKNIDTSYSKNWGSGDVFITTQLNSLSYADINRMQLLNTFSSGDTSFMFDYRITEHSS\n+LGHFFDSLKNPEVENWNRIVSNLQGKTVSVEYSDRYLKTPLGCMLLAKMISGLKNEADLN\n+VVSIKVIVTNIVSMDDSDVAVNAIKDFTNGEKRNLFLKNAISELTGIEPEIQDTGYVEHE\n+RCLTVKADNAELCIRPDAGIARGWVPFGRDNAECADCDFREDWNMDLELFNKQQRGAGIL\n+YTISYKQP\n'
b
diff -r 000000000000 -r 1ab06263e083 test-data/regrouped_gene_families_to_infogo1000.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regrouped_gene_families_to_infogo1000.tsv Mon Mar 13 12:39:25 2017 -0400
b
b'@@ -0,0 +1,1393 @@\n+# Gene Family\tdemo_Abundance-RPKs\n+UNMAPPED\t4531.0\n+UNGROUPED\t15490.72\n+UNGROUPED|g__Bacteroides.s__Bacteroides_dorei\t8949.382\n+UNGROUPED|g__Bacteroides.s__Bacteroides_vulgatus\t6476.05\n+UNGROUPED|unclassified\t65.288\n+GO:0000015\t4.177\n+GO:0000015|g__Bacteroides.s__Bacteroides_dorei\t2.506\n+GO:0000015|g__Bacteroides.s__Bacteroides_vulgatus\t1.671\n+GO:0000027\t7.937\n+GO:0000027|g__Bacteroides.s__Bacteroides_dorei\t7.937\n+GO:0000049\t29.232\n+GO:0000049|g__Bacteroides.s__Bacteroides_dorei\t7.205\n+GO:0000049|g__Bacteroides.s__Bacteroides_vulgatus\t22.027\n+GO:0000105\t22.302\n+GO:0000105|g__Bacteroides.s__Bacteroides_dorei\t9.093\n+GO:0000105|g__Bacteroides.s__Bacteroides_vulgatus\t13.209\n+GO:0000107\t5.077\n+GO:0000107|g__Bacteroides.s__Bacteroides_vulgatus\t5.077\n+GO:0000150\t29.481\n+GO:0000150|g__Bacteroides.s__Bacteroides_dorei\t14.86\n+GO:0000150|g__Bacteroides.s__Bacteroides_vulgatus\t14.62\n+GO:0000155\t98.896\n+GO:0000155|g__Bacteroides.s__Bacteroides_dorei\t45.55\n+GO:0000155|g__Bacteroides.s__Bacteroides_vulgatus\t53.346\n+GO:0000160\t71.811\n+GO:0000160|g__Bacteroides.s__Bacteroides_dorei\t43.683\n+GO:0000160|g__Bacteroides.s__Bacteroides_vulgatus\t28.128\n+GO:0000162\t6.418\n+GO:0000162|g__Bacteroides.s__Bacteroides_dorei\t1.115\n+GO:0000162|g__Bacteroides.s__Bacteroides_vulgatus\t5.303\n+GO:0000179\t5.376\n+GO:0000179|g__Bacteroides.s__Bacteroides_dorei\t2.688\n+GO:0000179|g__Bacteroides.s__Bacteroides_vulgatus\t2.688\n+GO:0000287\t144.517\n+GO:0000287|g__Bacteroides.s__Bacteroides_dorei\t69.932\n+GO:0000287|g__Bacteroides.s__Bacteroides_vulgatus\t60.803\n+GO:0000287|unclassified\t13.782\n+GO:0000453\t4.823\n+GO:0000453|g__Bacteroides.s__Bacteroides_dorei\t3.086\n+GO:0000453|g__Bacteroides.s__Bacteroides_vulgatus\t1.736\n+GO:0000723\t4.363\n+GO:0000723|g__Bacteroides.s__Bacteroides_dorei\t1.992\n+GO:0000723|g__Bacteroides.s__Bacteroides_vulgatus\t2.371\n+GO:0000917\t7.611\n+GO:0000917|g__Bacteroides.s__Bacteroides_dorei\t5.952\n+GO:0000917|g__Bacteroides.s__Bacteroides_vulgatus\t1.658\n+GO:0000967\t3.115\n+GO:0000967|g__Bacteroides.s__Bacteroides_vulgatus\t3.115\n+GO:0002094\t2.525\n+GO:0002094|g__Bacteroides.s__Bacteroides_vulgatus\t2.525\n+GO:0002100\t5.952\n+GO:0002100|g__Bacteroides.s__Bacteroides_vulgatus\t5.952\n+GO:0002161\t0.731\n+GO:0002161|g__Bacteroides.s__Bacteroides_vulgatus\t0.731\n+GO:0002935\t7.361\n+GO:0002935|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+GO:0002935|g__Bacteroides.s__Bacteroides_vulgatus\t5.258\n+GO:0002949\t4.47\n+GO:0002949|g__Bacteroides.s__Bacteroides_dorei\t2.778\n+GO:0002949|g__Bacteroides.s__Bacteroides_vulgatus\t1.692\n+GO:0003684\t9.246\n+GO:0003684|g__Bacteroides.s__Bacteroides_dorei\t5.554\n+GO:0003684|g__Bacteroides.s__Bacteroides_vulgatus\t3.692\n+GO:0003697\t24.977\n+GO:0003697|g__Bacteroides.s__Bacteroides_dorei\t16.618\n+GO:0003697|g__Bacteroides.s__Bacteroides_vulgatus\t8.358\n+GO:0003725\t7.91\n+GO:0003725|g__Bacteroides.s__Bacteroides_dorei\t5.972\n+GO:0003725|g__Bacteroides.s__Bacteroides_vulgatus\t1.938\n+GO:0003727\t12.121\n+GO:0003727|g__Bacteroides.s__Bacteroides_dorei\t6.061\n+GO:0003727|g__Bacteroides.s__Bacteroides_vulgatus\t6.061\n+GO:0003729\t4.739\n+GO:0003729|g__Bacteroides.s__Bacteroides_vulgatus\t4.739\n+GO:0003735\t156.347\n+GO:0003735|g__Bacteroides.s__Bacteroides_dorei\t76.023\n+GO:0003735|g__Bacteroides.s__Bacteroides_vulgatus\t80.324\n+GO:0003743\t6.762\n+GO:0003743|g__Bacteroides.s__Bacteroides_dorei\t4.016\n+GO:0003743|g__Bacteroides.s__Bacteroides_vulgatus\t2.746\n+GO:0003746\t16.907\n+GO:0003746|g__Bacteroides.s__Bacteroides_dorei\t13.849\n+GO:0003746|g__Bacteroides.s__Bacteroides_vulgatus\t3.058\n+GO:0003755\t33.832\n+GO:0003755|g__Bacteroides.s__Bacteroides_dorei\t14.287\n+GO:0003755|g__Bacteroides.s__Bacteroides_vulgatus\t19.544\n+GO:0003796\t3.906\n+GO:0003796|g__Bacteroides.s__Bacteroides_dorei\t3.906\n+GO:0003848\t9.37\n+GO:0003848|g__Bacteroides.s__Bacteroides_vulgatus\t9.37\n+GO:0003852\t3.462\n+GO:0003852|g__Bacteroides.s__Bacteroides_dorei\t1.392\n+GO:0003852|g__Bacteroides.s__Bacteroides_vulgatus\t2.07\n+GO:0003856\t4.154\n+GO:0003856|g__Bacteroides.s__Bacteroides_dorei\t4.154\n+G'..b's__Bacteroides_vulgatus\t13.172\n+GO:0048472\t4.796\n+GO:0048472|g__Bacteroides.s__Bacteroides_dorei\t2.398\n+GO:0048472|g__Bacteroides.s__Bacteroides_vulgatus\t2.398\n+GO:0048500\t7.353\n+GO:0048500|g__Bacteroides.s__Bacteroides_dorei\t3.268\n+GO:0048500|g__Bacteroides.s__Bacteroides_vulgatus\t4.085\n+GO:0050380\t1.425\n+GO:0050380|g__Bacteroides.s__Bacteroides_dorei\t1.425\n+GO:0050480\t3.49\n+GO:0050480|g__Bacteroides.s__Bacteroides_vulgatus\t3.49\n+GO:0050511\t8.721\n+GO:0050511|g__Bacteroides.s__Bacteroides_dorei\t5.814\n+GO:0050511|g__Bacteroides.s__Bacteroides_vulgatus\t2.907\n+GO:0050570\t5.988\n+GO:0050570|g__Bacteroides.s__Bacteroides_dorei\t5.988\n+GO:0050577\t2.045\n+GO:0050577|g__Bacteroides.s__Bacteroides_vulgatus\t2.045\n+GO:0051073\t6.144\n+GO:0051073|g__Bacteroides.s__Bacteroides_dorei\t3.072\n+GO:0051073|g__Bacteroides.s__Bacteroides_vulgatus\t3.072\n+GO:0051205\t4.522\n+GO:0051205|g__Bacteroides.s__Bacteroides_dorei\t3.39\n+GO:0051205|g__Bacteroides.s__Bacteroides_vulgatus\t1.132\n+GO:0051537\t26.424\n+GO:0051537|g__Bacteroides.s__Bacteroides_dorei\t15.914\n+GO:0051537|g__Bacteroides.s__Bacteroides_vulgatus\t10.51\n+GO:0051539\t133.729\n+GO:0051539|g__Bacteroides.s__Bacteroides_dorei\t41.322\n+GO:0051539|g__Bacteroides.s__Bacteroides_vulgatus\t54.67\n+GO:0051539|unclassified\t37.738\n+GO:0051607\t2.165\n+GO:0051607|g__Bacteroides.s__Bacteroides_vulgatus\t2.165\n+GO:0051775\t3.623\n+GO:0051775|g__Bacteroides.s__Bacteroides_dorei\t3.623\n+GO:0051920\t25.421\n+GO:0051920|g__Bacteroides.s__Bacteroides_dorei\t12.187\n+GO:0051920|g__Bacteroides.s__Bacteroides_vulgatus\t13.234\n+GO:0051989\t7.465\n+GO:0051989|unclassified\t7.465\n+GO:0051991\t8.721\n+GO:0051991|g__Bacteroides.s__Bacteroides_dorei\t5.814\n+GO:0051991|g__Bacteroides.s__Bacteroides_vulgatus\t2.907\n+GO:0052381\t4.848\n+GO:0052381|g__Bacteroides.s__Bacteroides_dorei\t4.848\n+GO:0052692\t3.49\n+GO:0052692|g__Bacteroides.s__Bacteroides_dorei\t2.327\n+GO:0052692|g__Bacteroides.s__Bacteroides_vulgatus\t1.163\n+GO:0052717\t5.952\n+GO:0052717|g__Bacteroides.s__Bacteroides_vulgatus\t5.952\n+GO:0052865\t5.301\n+GO:0052865|g__Bacteroides.s__Bacteroides_dorei\t4.699\n+GO:0052865|g__Bacteroides.s__Bacteroides_vulgatus\t0.602\n+GO:0052908\t5.376\n+GO:0052908|g__Bacteroides.s__Bacteroides_dorei\t2.688\n+GO:0052908|g__Bacteroides.s__Bacteroides_vulgatus\t2.688\n+GO:0061711\t1.086\n+GO:0061711|g__Bacteroides.s__Bacteroides_dorei\t1.086\n+GO:0070006\t5.507\n+GO:0070006|g__Bacteroides.s__Bacteroides_vulgatus\t5.507\n+GO:0070040\t7.361\n+GO:0070040|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+GO:0070040|g__Bacteroides.s__Bacteroides_vulgatus\t5.258\n+GO:0070084\t5.507\n+GO:0070084|g__Bacteroides.s__Bacteroides_vulgatus\t5.507\n+GO:0070181\t3.831\n+GO:0070181|g__Bacteroides.s__Bacteroides_dorei\t2.554\n+GO:0070181|g__Bacteroides.s__Bacteroides_vulgatus\t1.277\n+GO:0070204\t2.54\n+GO:0070204|g__Bacteroides.s__Bacteroides_vulgatus\t2.54\n+GO:0070401\t11.207\n+GO:0070401|g__Bacteroides.s__Bacteroides_dorei\t2.039\n+GO:0070401|g__Bacteroides.s__Bacteroides_vulgatus\t9.168\n+GO:0070402\t3.81\n+GO:0070402|g__Bacteroides.s__Bacteroides_dorei\t1.905\n+GO:0070402|g__Bacteroides.s__Bacteroides_vulgatus\t1.905\n+GO:0070403\t5.0\n+GO:0070403|g__Bacteroides.s__Bacteroides_vulgatus\t5.0\n+GO:0070475\t7.361\n+GO:0070475|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+GO:0070475|g__Bacteroides.s__Bacteroides_vulgatus\t5.258\n+GO:0070626\t2.404\n+GO:0070626|g__Bacteroides.s__Bacteroides_dorei\t0.801\n+GO:0070626|g__Bacteroides.s__Bacteroides_vulgatus\t1.603\n+GO:0070677\t4.823\n+GO:0070677|g__Bacteroides.s__Bacteroides_dorei\t3.086\n+GO:0070677|g__Bacteroides.s__Bacteroides_vulgatus\t1.736\n+GO:0070814\t15.615\n+GO:0070814|g__Bacteroides.s__Bacteroides_dorei\t9.504\n+GO:0070814|g__Bacteroides.s__Bacteroides_vulgatus\t6.111\n+GO:0071436\t2.407\n+GO:0071436|g__Bacteroides.s__Bacteroides_vulgatus\t2.407\n+GO:0090071\t3.831\n+GO:0090071|g__Bacteroides.s__Bacteroides_dorei\t3.831\n+GO:0097264\t3.745\n+GO:0097264|g__Bacteroides.s__Bacteroides_dorei\t3.745\n+GO:1990077\t8.016\n+GO:1990077|g__Bacteroides.s__Bacteroides_dorei\t1.517\n+GO:1990077|g__Bacteroides.s__Bacteroides_vulgatus\t6.499\n'
b
diff -r 000000000000 -r 1ab06263e083 test-data/regrouped_gene_families_to_ko.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regrouped_gene_families_to_ko.tsv Mon Mar 13 12:39:25 2017 -0400
b
b'@@ -0,0 +1,1203 @@\n+# Gene Family\tdemo_Abundance-RPKs\n+UNMAPPED\t4531.0\n+UNGROUPED\t5.228\n+UNGROUPED|g__Bacteroides.s__Bacteroides_dorei\t4.38\n+UNGROUPED|g__Bacteroides.s__Bacteroides_vulgatus\t3.959\n+UNGROUPED|unclassified\t10.529\n+K00012\t3.292\n+K00012|g__Bacteroides.s__Bacteroides_dorei\t3.292\n+K00013\t2.538\n+K00013|g__Bacteroides.s__Bacteroides_vulgatus\t2.538\n+K00014\t3.115\n+K00014|g__Bacteroides.s__Bacteroides_dorei\t3.115\n+K00018\t4.662\n+K00018|g__Bacteroides.s__Bacteroides_dorei\t3.497\n+K00018|g__Bacteroides.s__Bacteroides_vulgatus\t1.166\n+K00024\t2.808\n+K00024|g__Bacteroides.s__Bacteroides_vulgatus\t2.808\n+K00031\t3.82\n+K00031|g__Bacteroides.s__Bacteroides_dorei\t2.865\n+K00031|g__Bacteroides.s__Bacteroides_vulgatus\t0.955\n+K00033\t5.232\n+K00033|g__Bacteroides.s__Bacteroides_dorei\t0.747\n+K00033|g__Bacteroides.s__Bacteroides_vulgatus\t4.484\n+K00036\t1.418\n+K00036|g__Bacteroides.s__Bacteroides_vulgatus\t1.418\n+K00041\t4.474\n+K00041|g__Bacteroides.s__Bacteroides_dorei\t2.983\n+K00041|g__Bacteroides.s__Bacteroides_vulgatus\t1.491\n+K00046\t4.255\n+K00046|g__Bacteroides.s__Bacteroides_vulgatus\t4.255\n+K00052\t5.192\n+K00052|g__Bacteroides.s__Bacteroides_dorei\t2.077\n+K00052|g__Bacteroides.s__Bacteroides_vulgatus\t3.115\n+K00053\t3.175\n+K00053|g__Bacteroides.s__Bacteroides_dorei\t1.058\n+K00053|g__Bacteroides.s__Bacteroides_vulgatus\t2.116\n+K00059\t6.173\n+K00059|g__Bacteroides.s__Bacteroides_dorei\t3.086\n+K00059|g__Bacteroides.s__Bacteroides_vulgatus\t3.086\n+K00075\t7.65\n+K00075|g__Bacteroides.s__Bacteroides_dorei\t5.464\n+K00075|g__Bacteroides.s__Bacteroides_vulgatus\t2.186\n+K00097\t5.988\n+K00097|g__Bacteroides.s__Bacteroides_dorei\t5.988\n+K00099\t3.81\n+K00099|g__Bacteroides.s__Bacteroides_dorei\t1.905\n+K00099|g__Bacteroides.s__Bacteroides_vulgatus\t1.905\n+K00100\t0.929\n+K00100|g__Bacteroides.s__Bacteroides_dorei\t0.929\n+K00133\t3.289\n+K00133|g__Bacteroides.s__Bacteroides_dorei\t1.096\n+K00133|g__Bacteroides.s__Bacteroides_vulgatus\t2.193\n+K00174\t3.017\n+K00174|g__Bacteroides.s__Bacteroides_dorei\t2.033\n+K00174|g__Bacteroides.s__Bacteroides_vulgatus\t4.002\n+K00175\t2.208\n+K00175|g__Bacteroides.s__Bacteroides_dorei\t1.104\n+K00175|g__Bacteroides.s__Bacteroides_vulgatus\t1.104\n+K00179\t3.953\n+K00179|g__Bacteroides.s__Bacteroides_dorei\t1.318\n+K00179|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+K00180\t6.211\n+K00180|g__Bacteroides.s__Bacteroides_dorei\t4.141\n+K00180|g__Bacteroides.s__Bacteroides_vulgatus\t2.07\n+K00215\t4.566\n+K00215|g__Bacteroides.s__Bacteroides_dorei\t4.566\n+K00240\t16.743\n+K00240|g__Bacteroides.s__Bacteroides_dorei\t10.654\n+K00240|g__Bacteroides.s__Bacteroides_vulgatus\t6.088\n+K00257\t1.244\n+K00257|g__Bacteroides.s__Bacteroides_dorei\t0.622\n+K00257|g__Bacteroides.s__Bacteroides_vulgatus\t0.622\n+K00262\t4.043\n+K00262|g__Bacteroides.s__Bacteroides_dorei\t2.427\n+K00262|g__Bacteroides.s__Bacteroides_vulgatus\t2.83\n+K00297\t3.509\n+K00297|g__Bacteroides.s__Bacteroides_dorei\t1.754\n+K00297|g__Bacteroides.s__Bacteroides_vulgatus\t3.509\n+K00331\t4.016\n+K00331|g__Bacteroides.s__Bacteroides_dorei\t2.008\n+K00331|g__Bacteroides.s__Bacteroides_vulgatus\t2.008\n+K00337\t8.18\n+K00337|g__Bacteroides.s__Bacteroides_dorei\t4.09\n+K00337|g__Bacteroides.s__Bacteroides_vulgatus\t4.09\n+K00338\t5.65\n+K00338|g__Bacteroides.s__Bacteroides_dorei\t2.825\n+K00338|g__Bacteroides.s__Bacteroides_vulgatus\t2.825\n+K00339\t4.938\n+K00339|g__Bacteroides.s__Bacteroides_dorei\t4.938\n+K00342\t5.061\n+K00342|g__Bacteroides.s__Bacteroides_dorei\t1.446\n+K00342|g__Bacteroides.s__Bacteroides_vulgatus\t3.615\n+K00343\t3.023\n+K00343|g__Bacteroides.s__Bacteroides_dorei\t1.512\n+K00343|g__Bacteroides.s__Bacteroides_vulgatus\t1.512\n+K00346\t0.799\n+K00346|g__Bacteroides.s__Bacteroides_dorei\t0.799\n+K00347\t3.604\n+K00347|g__Bacteroides.s__Bacteroides_dorei\t1.802\n+K00347|g__Bacteroides.s__Bacteroides_vulgatus\t1.802\n+K00348\t3.472\n+K00348|g__Bacteroides.s__Bacteroides_vulgatus\t3.472\n+K00349\t1.852\n+K00349|g__Bacteroides.s__Bacteroides_dorei\t1.852\n+K00351\t1.701\n+K00351|g__Bacteroides.s__Bacteroides_vulgatus\t1.701\n+K00367\t8.889\n+K00367|unclassified\t8.889\n+K00382\t3.221\n+K'..b's__Bacteroides_dorei\t1.705\n+K09691|g__Bacteroides.s__Bacteroides_vulgatus\t0.853\n+K09710\t3.831\n+K09710|g__Bacteroides.s__Bacteroides_dorei\t3.831\n+K09760\t0.88\n+K09760|g__Bacteroides.s__Bacteroides_vulgatus\t0.88\n+K09761\t1.658\n+K09761|g__Bacteroides.s__Bacteroides_dorei\t1.658\n+K09797\t6.633\n+K09797|g__Bacteroides.s__Bacteroides_dorei\t3.317\n+K09797|g__Bacteroides.s__Bacteroides_vulgatus\t3.317\n+K09808\t2.632\n+K09808|g__Bacteroides.s__Bacteroides_dorei\t1.754\n+K09808|g__Bacteroides.s__Bacteroides_vulgatus\t0.877\n+K09810\t5.348\n+K09810|g__Bacteroides.s__Bacteroides_dorei\t3.565\n+K09810|g__Bacteroides.s__Bacteroides_vulgatus\t1.783\n+K09811\t2.594\n+K09811|g__Bacteroides.s__Bacteroides_dorei\t1.297\n+K09811|g__Bacteroides.s__Bacteroides_vulgatus\t1.297\n+K09816\t6.64\n+K09816|g__Bacteroides.s__Bacteroides_dorei\t5.312\n+K09816|g__Bacteroides.s__Bacteroides_vulgatus\t1.328\n+K09903\t3.268\n+K09903|g__Bacteroides.s__Bacteroides_dorei\t3.268\n+K09922\t3.663\n+K09922|g__Bacteroides.s__Bacteroides_vulgatus\t3.663\n+K10206\t3.537\n+K10206|g__Bacteroides.s__Bacteroides_dorei\t1.768\n+K10206|g__Bacteroides.s__Bacteroides_vulgatus\t1.768\n+K10716\t7.905\n+K10716|g__Bacteroides.s__Bacteroides_dorei\t5.27\n+K10716|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+K11071\t5.722\n+K11071|g__Bacteroides.s__Bacteroides_dorei\t1.431\n+K11071|g__Bacteroides.s__Bacteroides_vulgatus\t4.292\n+K11085\t1.149\n+K11085|g__Bacteroides.s__Bacteroides_dorei\t0.575\n+K11085|g__Bacteroides.s__Bacteroides_vulgatus\t0.575\n+K11175\t8.333\n+K11175|g__Bacteroides.s__Bacteroides_dorei\t4.167\n+K11175|g__Bacteroides.s__Bacteroides_vulgatus\t4.167\n+K11527\t1.921\n+K11527|g__Bacteroides.s__Bacteroides_vulgatus\t1.921\n+K11537\t5.937\n+K11537|g__Bacteroides.s__Bacteroides_dorei\t5.089\n+K11537|g__Bacteroides.s__Bacteroides_vulgatus\t0.848\n+K11720\t8.38\n+K11720|g__Bacteroides.s__Bacteroides_dorei\t2.793\n+K11720|g__Bacteroides.s__Bacteroides_vulgatus\t5.587\n+K11749\t5.622\n+K11749|g__Bacteroides.s__Bacteroides_dorei\t4.819\n+K11749|g__Bacteroides.s__Bacteroides_vulgatus\t0.803\n+K11934\t5.089\n+K11934|g__Bacteroides.s__Bacteroides_dorei\t5.089\n+K11991\t5.952\n+K11991|g__Bacteroides.s__Bacteroides_vulgatus\t5.952\n+K11996\t1.58\n+K11996|g__Bacteroides.s__Bacteroides_dorei\t1.58\n+K12340\t7.229\n+K12340|g__Bacteroides.s__Bacteroides_dorei\t1.606\n+K12340|g__Bacteroides.s__Bacteroides_vulgatus\t5.622\n+K12343\t1.502\n+K12343|g__Bacteroides.s__Bacteroides_vulgatus\t1.502\n+K12373\t1.242\n+K12373|g__Bacteroides.s__Bacteroides_dorei\t1.413\n+K12373|g__Bacteroides.s__Bacteroides_vulgatus\t0.9\n+K12410\t5.0\n+K12410|g__Bacteroides.s__Bacteroides_vulgatus\t5.0\n+K12467\t13.782\n+K12467|unclassified\t13.782\n+K13038\t1.821\n+K13038|g__Bacteroides.s__Bacteroides_dorei\t1.821\n+K13378\t4.107\n+K13378|g__Bacteroides.s__Bacteroides_dorei\t1.369\n+K13378|g__Bacteroides.s__Bacteroides_vulgatus\t2.738\n+K13747\t6.573\n+K13747|g__Bacteroides.s__Bacteroides_dorei\t1.878\n+K13747|g__Bacteroides.s__Bacteroides_vulgatus\t4.695\n+K14441\t3.333\n+K14441|g__Bacteroides.s__Bacteroides_dorei\t3.333\n+K14652\t0.896\n+K14652|g__Bacteroides.s__Bacteroides_dorei\t0.896\n+K15342\t2.165\n+K15342|g__Bacteroides.s__Bacteroides_vulgatus\t2.165\n+K15460\t1.65\n+K15460|g__Bacteroides.s__Bacteroides_vulgatus\t1.65\n+K15532\t2.732\n+K15532|g__Bacteroides.s__Bacteroides_dorei\t2.732\n+K15633\t4.944\n+K15633|g__Bacteroides.s__Bacteroides_dorei\t2.825\n+K15633|g__Bacteroides.s__Bacteroides_vulgatus\t2.119\n+K15923\t2.965\n+K15923|g__Bacteroides.s__Bacteroides_vulgatus\t2.965\n+K16089\t3.058\n+K16089|g__Bacteroides.s__Bacteroides_dorei\t0.51\n+K16089|g__Bacteroides.s__Bacteroides_vulgatus\t2.548\n+K17103\t4.926\n+K17103|g__Bacteroides.s__Bacteroides_dorei\t4.926\n+K17828\t2.46\n+K17828|g__Bacteroides.s__Bacteroides_dorei\t1.23\n+K17828|g__Bacteroides.s__Bacteroides_vulgatus\t1.23\n+K18220\t2.189\n+K18220|g__Bacteroides.s__Bacteroides_dorei\t2.189\n+K18682\t2.784\n+K18682|g__Bacteroides.s__Bacteroides_dorei\t1.392\n+K18682|g__Bacteroides.s__Bacteroides_vulgatus\t1.392\n+K18785\t1.107\n+K18785|g__Bacteroides.s__Bacteroides_vulgatus\t1.107\n+K19271\t3.724\n+K19271|g__Bacteroides.s__Bacteroides_dorei\t3.724\n'
b
diff -r 000000000000 -r 1ab06263e083 test-data/regrouped_gene_families_to_rxn.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regrouped_gene_families_to_rxn.tsv Mon Mar 13 12:39:25 2017 -0400
b
b'@@ -0,0 +1,1051 @@\n+# Gene Family\tdemo_Abundance-RPKs\n+UNMAPPED\t4531.0\n+UNGROUPED\t17667.167\n+UNGROUPED|g__Bacteroides.s__Bacteroides_dorei\t10095.832\n+UNGROUPED|g__Bacteroides.s__Bacteroides_vulgatus\t7522.359\n+UNGROUPED|unclassified\t48.976\n+1.1.1.271-RXN\t2.045\n+1.1.1.271-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.045\n+1.2.7.8-RXN\t3.953\n+1.2.7.8-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.318\n+1.2.7.8-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+1.3.1.9-RXN\t5.27\n+1.3.1.9-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.635\n+1.3.1.9-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+1.5.1.20-RXN\t7.018\n+1.5.1.20-RXN|g__Bacteroides.s__Bacteroides_dorei\t3.509\n+1.5.1.20-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t3.509\n+1.6.99.5-RXN\t33.721\n+1.6.99.5-RXN|g__Bacteroides.s__Bacteroides_dorei\t11.804\n+1.6.99.5-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t13.172\n+1.6.99.5-RXN|unclassified\t8.745\n+1.7.7.2-RXN\t8.889\n+1.7.7.2-RXN|unclassified\t8.889\n+1.8.1.4-RXN\t3.221\n+1.8.1.4-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.61\n+1.8.1.4-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.61\n+2-DEHYDROPANTOATE-REDUCT-RXN\t1.23\n+2-DEHYDROPANTOATE-REDUCT-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.23\n+2-ISOPROPYLMALATESYN-RXN\t3.462\n+2-ISOPROPYLMALATESYN-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.392\n+2-ISOPROPYLMALATESYN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.07\n+2.3.1.179-RXN\t7.712\n+2.3.1.179-RXN|g__Bacteroides.s__Bacteroides_dorei\t3.428\n+2.3.1.179-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.284\n+2.3.1.180-RXN\t2.2\n+2.3.1.180-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.1\n+2.3.1.180-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.1\n+2.5.1.64-RXN\t2.54\n+2.5.1.64-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.54\n+2.6.1.37-RXN\t6.061\n+2.6.1.37-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t6.061\n+2.7.7.33-RXN\t4.405\n+2.7.7.33-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.405\n+2.8.1.6-RXN\t2.323\n+2.8.1.6-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.323\n+2PGADEHYDRAT-RXN\t4.177\n+2PGADEHYDRAT-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.506\n+2PGADEHYDRAT-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.671\n+3-DEHYDROQUINATE-SYNTHASE-RXN\t4.154\n+3-DEHYDROQUINATE-SYNTHASE-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.154\n+3.1.11.6-RXN\t5.79\n+3.1.11.6-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.136\n+3.1.11.6-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.654\n+3.1.21.2-RXN\t6.775\n+3.1.21.2-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t6.775\n+3.1.22.4-RXN\t4.246\n+3.1.22.4-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.123\n+3.1.22.4-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.123\n+3.1.26.11-RXN\t2.575\n+3.1.26.11-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.203\n+3.1.26.11-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.372\n+3.1.26.3-RXN\t1.208\n+3.1.26.3-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.208\n+3.1.26.4-RXN\t1.972\n+3.1.26.4-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.972\n+3.2.1.23-RXN\t6.884\n+3.2.1.23-RXN|g__Bacteroides.s__Bacteroides_dorei\t5.582\n+3.2.1.23-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.302\n+3.2.1.89-RXN\t5.594\n+3.2.1.89-RXN|g__Bacteroides.s__Bacteroides_dorei\t0.958\n+3.2.1.89-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.636\n+3.4.11.18-RXN\t5.507\n+3.4.11.18-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t5.507\n+3.4.11.4-RXN\t3.565\n+3.4.11.4-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.783\n+3.4.11.4-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.783\n+3.4.21.89-RXN\t10.09\n+3.4.21.89-RXN|g__Bacteroides.s__Bacteroides_dorei\t7.463\n+3.4.21.89-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.627\n+3.4.23.36-RXN\t3.704\n+3.4.23.36-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.852\n+3.4.23.36-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.852\n+3.5.1.27-RXN\t8.772\n+3.5.1.27-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.386\n+3.5.1.27-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.386\n+3.5.1.88-RXN\t8.772\n+3.5.1.88-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.386\n+3.5.1.88-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.386\n+3.6.3.3-RXN\t2.479\n+3.6.3.3-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.862\n+3.6.3.3-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t0.617\n+3PGAREARR-RXN\t'..b'cteroides_dorei\t3.428\n+RXN1G-94|g__Bacteroides.s__Bacteroides_vulgatus\t4.284\n+RXN1G-962\t5.27\n+RXN1G-962|g__Bacteroides.s__Bacteroides_dorei\t2.635\n+RXN1G-962|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+RXN66-469\t8.477\n+RXN66-469|unclassified\t8.477\n+RXN66-477\t8.477\n+RXN66-477|unclassified\t8.477\n+RXN66-480\t8.477\n+RXN66-480|unclassified\t8.477\n+RXN66-483\t8.477\n+RXN66-483|unclassified\t8.477\n+RXN66-484\t8.477\n+RXN66-484|unclassified\t8.477\n+RXNQT-4191\t1.475\n+RXNQT-4191|g__Bacteroides.s__Bacteroides_vulgatus\t1.475\n+S-ADENMETSYN-RXN\t2.513\n+S-ADENMETSYN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.513\n+SHIKIMATE-KINASE-RXN\t9.324\n+SHIKIMATE-KINASE-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.662\n+SHIKIMATE-KINASE-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.662\n+SUCCINATE-DEHYDROGENASE-UBIQUINONE-RXN\t9.188\n+SUCCINATE-DEHYDROGENASE-UBIQUINONE-RXN|unclassified\t9.188\n+SULFATE-ADENYLYLTRANS-RXN\t7.863\n+SULFATE-ADENYLYLTRANS-RXN|g__Bacteroides.s__Bacteroides_dorei\t3.69\n+SULFATE-ADENYLYLTRANS-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.173\n+SULFOCYS-RXN\t2.347\n+SULFOCYS-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.347\n+TDCEACT-RXN\t7.937\n+TDCEACT-RXN|g__Bacteroides.s__Bacteroides_dorei\t7.937\n+TETRAACYLDISACC4KIN-RXN\t3.992\n+TETRAACYLDISACC4KIN-RXN|g__Bacteroides.s__Bacteroides_dorei\t0.998\n+TETRAACYLDISACC4KIN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.994\n+THI-P-SYN-RXN\t1.894\n+THI-P-SYN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.894\n+THIAZOLSYN2-RXN\t4.444\n+THIAZOLSYN2-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.963\n+THIAZOLSYN2-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.481\n+THIOREDOXIN-REDUCT-NADPH-RXN\t9.423\n+THIOREDOXIN-REDUCT-NADPH-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t9.423\n+TRANS-RXN-214\t13.436\n+TRANS-RXN-214|g__Bacteroides.s__Bacteroides_dorei\t6.461\n+TRANS-RXN-214|g__Bacteroides.s__Bacteroides_vulgatus\t6.975\n+TRANSALDOL-RXN\t3.584\n+TRANSALDOL-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.792\n+TRANSALDOL-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.792\n+TRIOSEPISOMERIZATION-RXN\t9.132\n+TRIOSEPISOMERIZATION-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.522\n+TRIOSEPISOMERIZATION-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t7.61\n+TRNA-GUANINE-N7--METHYLTRANSFERASE-RXN\t1.515\n+TRNA-GUANINE-N7--METHYLTRANSFERASE-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.515\n+TRYPSYN-RXN\t18.92\n+TRYPSYN-RXN|g__Bacteroides.s__Bacteroides_dorei\t10.876\n+TRYPSYN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t8.044\n+UDP-NACMUR-ALA-LIG-RXN\t3.13\n+UDP-NACMUR-ALA-LIG-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.347\n+UDP-NACMUR-ALA-LIG-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t0.782\n+UDP-NACMURALGLDAPAALIG-RXN\t6.667\n+UDP-NACMURALGLDAPAALIG-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.167\n+UDP-NACMURALGLDAPAALIG-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.5\n+UDP-NACMURALGLDAPLIG-RXN\t3.687\n+UDP-NACMURALGLDAPLIG-RXN|g__Bacteroides.s__Bacteroides_dorei\t0.737\n+UDP-NACMURALGLDAPLIG-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.95\n+UDPGLUCEPIM-RXN\t1.068\n+UDPGLUCEPIM-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.068\n+UDPNACETYLGLUCOSAMACYLTRANS-RXN\t13.453\n+UDPNACETYLGLUCOSAMACYLTRANS-RXN|g__Bacteroides.s__Bacteroides_dorei\t7.474\n+UDPNACETYLGLUCOSAMACYLTRANS-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t5.979\n+UDPNACETYLGLUCOSAMENOLPYRTRANS-RXN\t0.829\n+UDPNACETYLGLUCOSAMENOLPYRTRANS-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t0.829\n+UDPNACETYLMURAMATEDEHYDROG-RXN\t7.65\n+UDPNACETYLMURAMATEDEHYDROG-RXN|g__Bacteroides.s__Bacteroides_dorei\t5.464\n+UDPNACETYLMURAMATEDEHYDROG-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.186\n+UDPREDUCT-RXN\t3.155\n+UDPREDUCT-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+UDPREDUCT-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.052\n+UNDECAPRENYL-DIPHOSPHATASE-RXN\t1.425\n+UNDECAPRENYL-DIPHOSPHATASE-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.425\n+XANTHOSINEPHOSPHORY-RXN\t2.813\n+XANTHOSINEPHOSPHORY-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.813\n+XMPXAN-RXN\t2.963\n+XMPXAN-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.963\n+XYLISOM-RXN\t0.821\n+XYLISOM-RXN|g__Bacteroides.s__Bacteroides_dorei\t0.821\n'
b
diff -r 000000000000 -r 1ab06263e083 test-data/relab_levelwise_renormalized_pathway_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/relab_levelwise_renormalized_pathway_abundance.tsv Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Abundance
+UNMAPPED 0.193449
+UNINTEGRATED 0.796985
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 0.584618
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 0.393252
+UNINTEGRATED|unclassified 0.00936201
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 0.00172203
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 0.00276929
+PWY-6305: putrescine biosynthesis IV 0.00154401
+PWY-6305: putrescine biosynthesis IV|unclassified 0.002483
+PWY490-3: nitrate reduction VI (assimilatory) 0.001374
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 0.0022096
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 0.00133038
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 0.00180326
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 0.000795872
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 0.000963748
+PWY-6700: queuosine biosynthesis 0.000696312
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.00068396
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.000291624
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 0.000395699
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 0.000476857
+VALSYN-PWY: L-valine biosynthesis 0.000395699
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.000476857
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 0.000358864
+PWY-5097: L-lysine biosynthesis VI 0.000333173
+PWY-2942: L-lysine biosynthesis III 0.000316201
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 0.000358824
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 0.000303519
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.000250663
b
diff -r 000000000000 -r 1ab06263e083 test-data/taxonomic_profile.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/taxonomic_profile.tabular Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,11 @@
+#SampleID Metaphlan2_Analysis
+k__Bacteria 100.0
+k__Bacteria|p__Bacteroidetes 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris 68.44368
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron 31.55632
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris|t__Bacteroides_stercoris_unclassified 68.44368
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron|t__Bacteroides_thetaiotaomicron_unclassified 31.55632
b
diff -r 000000000000 -r 1ab06263e083 tool-data/humann2_nucleotide_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/humann2_nucleotide_database.loc.sample Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,4 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+#02_16_2014 ChocoPhlAn chocophlan /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r 1ab06263e083 tool-data/humann2_protein_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/humann2_protein_database.loc.sample Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,8 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+#02_16_2014 Full UniRef50 uniref50_diamond /path/to/data
+#02_16_2014 EC-filtered UniRef50 uniref50_ec_filtered_diamond /path/to/data
+#02_16_2014 GO filtered UniRef50 for rapsearch2 uniref50_GO_filtered_rapsearch2 /path/to/data
+#02_16_2014 Full UniRef90 uniref90_diamond /path/to/data
+#02_16_2014 EC-filtered UniRef90 uniref90_ec_filtered_diamond /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r 1ab06263e083 tool-data/metaphlan2_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metaphlan2_database.loc.sample Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,4 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+#02_16_2014  MetaPhlAn2 clade-specific marker genes db_v20 /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r 1ab06263e083 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Mar 13 12:39:25 2017 -0400
b
@@ -0,0 +1,14 @@
+<tables>
+    <table name="metaphlan2_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/metaphlan2_database.loc" />
+    </table>
+    <table name="humann2_nucleotide_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/humann2_nucleotide_database.loc" />
+    </table>
+    <table name="humann2_protein_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/humann2_protein_database.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 1ab06263e083 transform_json_to_pkl.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/transform_json_to_pkl.py Mon Mar 13 12:39:25 2017 -0400
[
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+import bz2
+import cPickle as pickle
+import json
+
+
+def transform_json_to_pkl(args):
+    with open(args.json_input, 'r') as json_file:
+        json_str = json_file.read()
+        metadata = json.loads(json_str)
+
+        for marker in metadata["markers"]:
+            a_set = set(metadata["markers"][marker]["ext"])
+            metadata["markers"][marker]["ext"] = a_set
+
+    pkl_output = bz2.BZ2File(args.pkl_output, 'w')
+    pickle.dump(metadata, pkl_output, pickle.HIGHEST_PROTOCOL)
+    pkl_output.close()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--json_input', required=True)
+    parser.add_argument('--pkl_output', required=True)
+    args = parser.parse_args()
+
+    transform_json_to_pkl(args)