Repository 'humann2'
hg clone https://toolshed.g2.bx.psu.edu/repos/bebatut/humann2

Changeset 0:3d6f37e7e3a8 (2016-05-26)
Next changeset 1:bcd414bb721b (2016-06-02)
Commit message:
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/humann2/ commit 7aadbbdef6da644837f62ea428cb859eeff34f04-dirty
added:
humann2.xml
humann2_download_databases.xml
humann2_join_tables.xml
humann2_macros.xml
humann2_merge_abundance_tables.xml
humann2_reduce_table.xml
humann2_regroup_table.xml
humann2_rename_table.xml
humann2_renorm_table.xml
humann2_split_table.xml
test-data/cpm_renormalized_pathway_abundance.tsv
test-data/expected_gene_family_abundance.tsv
test-data/expected_pathway_abundance.tsv
test-data/expected_pathway_coverage.tsv
test-data/g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz
test-data/g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz
test-data/gene_families_humann2_output.tabular
test-data/gene_select_humann2_output.tabular
test-data/input_sequences.fasta
test-data/joined_pathway_coverage_abundance.tsv
test-data/max_reduced_gene_family_abundance.tsv
test-data/merged_gene_families_pathways_abundances.tsv
test-data/pathway_rename_mapping.tsv
test-data/reduced_uniref50.fasta
test-data/regrouped_gene_families_to_ec.tsv
test-data/regrouped_gene_families_to_ko.tsv
test-data/relab_renormalized_pathway_abundance.tsv
test-data/renamed_metacyc_pathways.tsv
test-data/split_joined_table_abundances.tsv
test-data/split_joined_table_coverage.tsv
test-data/taxonomic_profile.tabular
tool-data/metaphlan2_bowtie_db.loc.sample
tool-data/metaphlan2_metadata.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2.xml Thu May 26 10:20:59 2016 -0400
[
b'@@ -0,0 +1,273 @@\n+<tool id="humann2" name="HUMAnN2" version="0.6.1">\n+    <description>to profile presence/absence and abundance of microbial pathways and gene families</description>\n+\n+    <macros>\n+        <import>humann2_macros.xml</import>\n+    </macros>\n+\n+    <expand macro="requirements"/>\n+    <expand macro="stdio"/>\n+\n+    <version_command>\n+<![CDATA[\n+    humann2 --version\n+]]>\n+    </version_command>\n+\n+    <command><![CDATA[\n+        `mkdir nucleotide_db`\n+\n+        &&\n+\n+        #if $nucleotide_db.nucleotide_db_selector == "history"\n+            #for $file in $nucleotide_db.history_nucleotide_db:\n+                `cp $file nucleotide_db/$file.name`\n+            #end for\n+\n+            &&\n+        #end if        \n+\n+        `mkdir protein_db`\n+\n+        &&\n+\n+        #if $protein_db.protein_db_selector == "history"\n+            diamond makedb \n+                --in $protein_db.history_protein_db \n+                --db protein_db/protein_db\n+        #end if\n+\n+        &&\n+\n+        humann2 \n+            -i "$input_file"\n+            \n+            #set $metaphlan_option = "-t rel_ab"\n+            #if $taxonomic_profile.taxonomic_profile_test == "true":\n+                --taxonomic-profile $taxonomic_profile.taxonomic_profile_file\n+            #else\n+\n+                #if $taxonomic_profile.mpa_pkl.mpa_pkl_selector == "cached"\n+                    #set $mpa_pkl_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.mpa_pkl.cached_mpa_pkl.input.options.tool_data_table.data])\n+                    #set $mpa_pkl_db = $taxonomic_profile.mpa_pkl.cached_mpa_pkl.value\n+                    #set $metaphlan_option += " --mpa_pkl " + $mpa_pkl_table[$mpa_pkl_db]\n+                #else\n+                    #set $metaphlan_option += " --mpa_pkl " + $taxonomic_profile.mpa_pkl.history_mpa_pkl\n+                #end if\n+\n+                #if $taxonomic_profile.bowtie2db.bowtie2db_selector == "cached"\n+                    #set $bowtie2_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.bowtie2db.cached_bowtie2db.input.options.tool_data_table.data])\n+                    #set $bowtie2db_choice = $taxonomic_profile.bowtie2db.cached_bowtie2db.value\n+                    #set $metaphlan_option += " --bowtie2db " + $bowtie2_table[$bowtie2db_choice]\n+                #else\n+                    #set $metaphlan_option += " --bowtie2db " + $taxonomic_profile.bowtie2db.history_bowtie2db\n+                #end if\n+\n+            #end if\n+\n+            --metaphlan-options="$metaphlan_option"\n+\n+            --evalue $e_value\n+            --identity-threshold $identity\n+            --prescreen-threshold $prescreen\n+\n+            --pathways $pathways\n+\n+            #if $nucleotide_db.nucleotide_db_selector == "cached"\n+                --nucleotide-database \\${HUMANN2_DIR}/databases/chocophlan\n+            #else\n+                --nucleotide-database nucleotide_db\n+            #end if\n+\n+            #if $protein_db.protein_db_selector == "cached"\n+                --protein-database \\${HUMANN2_DIR}/databases/uniref\n+            #else\n+                --protein-database protein_db\n+            #end if\n+\n+            --threads \\${GALAXY_SLOTS:-4}\n+\n+            --xipe $xipe\n+            --minpath $minpath\n+            --pick-frames $pick_frames\n+\n+            -o "output"\n+            --output-format $output_format\n+            --output-max-decimals $output_max_dec\n+            --output-basename "humann2"\n+            $remove_statified_output            \n+    ]]></command>\n+\n+    <inputs>\n+        <param name="input_file" type="data" format="fastq,fasta,sam,bam,biom" \n+            label="Input sequence file" help=""/>\n+\n+        <conditional name="taxonomic_profile">\n+            <param name=\'taxonomic_profile_test\' type=\'select\' label="Use a custom taxonomic profile?" help="The file must have been created by MetaPhlan2">\n+                <option value="true">Yes</option>\n+                <option value="false" selected="true">No</option>\n+            </para'..b'ut/humann2_pathcoverage.tsv" label="${tool.name} on ${on_string}: Pathways and their coverage" >\n+            <filter>output_format == "tsv"</filter>\n+        </data>\n+        <data format="biom" name="pathcoverage_biom" from_work_dir="output/humann2_pathcoverage.biom" label="${tool.name} on ${on_string}: Pathways and their coverage" >\n+            <filter>output_format == "biom"</filter>\n+        </data>\n+\n+        <data format="tsv" name="pathabundance_tsv" from_work_dir="output/humann2_pathabundance.tsv" label="${tool.name} on ${on_string}: Pathways and their abundance" >\n+            <filter>output_format == "tsv"</filter>\n+        </data>\n+        <data format="biom" name="pathabundance_biom" from_work_dir="output/humann2_pathabundance.biom" label="${tool.name} on ${on_string}: Pathways and their abundance" >\n+            <filter>output_format == "biom"</filter>\n+        </data>\n+    </outputs>\n+\n+    <tests>\n+        <test>\n+            <param name="input_file" value="input_sequences.fasta"/>\n+            <param name=\'taxonomic_profile_test\' value="true"/>\n+            <param name="taxonomic_profile_file" value="taxonomic_profile.tabular"/>\n+            <param name="nucleotide_db_selector" value="history"/>\n+            <param name="history_nucleotide_db">\n+                <collection type="list">\n+                    <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1" ftype="fasta" value="g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz" />\n+                    <element name="g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1" ftype="fasta" value="g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz" />\n+                </collection>\n+            </param>\n+            <param name="protein_db_selector" value="history"/>\n+            <param name="history_protein_db" value="reduced_uniref50.fasta"/>\n+            <param name="e_value" value="1"/>\n+            <param name="identity" value="40"/>\n+            <param name="prescreen" value="0.01"/>\n+            <param name=\'pathways\' value="metacyc"/>\n+            <param name=\'xipe\' value="false"/>\n+            <param name=\'minpath\' value="true"/>\n+            <param name=\'pick_frames\' value="true"/>\n+            <param name=\'output_format\' value="tsv"/>\n+            <param name="output_max_dec" value="10"/>\n+            <output name="gene_families_tsv" file="expected_gene_family_abundance.tsv"/>\n+            <output name="pathcoverage_tsv" file="expected_pathway_coverage.tsv"/>\n+            <output name="pathabundance_tsv" file="expected_pathway_abundance.tsv"/>\n+        </test>\n+    </tests>\n+\n+    <help><![CDATA[\n+**What it does**\n+\n+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.\n+\n+This tool corresponds to the main tool in HUMAnN2 pipeline.\n+\n+**Inputs**\n+\n+The input is a single file corresponding either to filtered shotgun sequencing metagenome file (fastq, fastq.gz, fasta, or fasta.gz format), alignment file (sam, bam or blastm8 format) or gene table file (tsv or biom format).\n+\n+A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. \n+\n+HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset.\n+\n+**Outputs**\n+\n+HUMAnN creates three output files:\n+\n+ - A file with gene families and their abundance\n+ - A file with pathways and their abundance\n+ - A file with pathways and their coverage\n+\n+    ]]></help>\n+\n+    <expand macro="citations"/>\n+</tool>\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2_download_databases.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_download_databases.xml Thu May 26 10:20:59 2016 -0400
[
@@ -0,0 +1,47 @@
+<tool id="humann2_download_databases" name="Download" version="0.6.1">
+    <description>HUMAnN2 databases</description>
+
+    <macros>
+        <import>humann2_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <version_command>
+<![CDATA[
+    humann2_databases --version
+]]>
+    </version_command>
+
+    <command><![CDATA[
+        humann2_databases
+            --download $database_selector \${HUMANN2_DIR}/databases
+            
+            > $log
+    ]]></command>
+
+    <inputs>
+        <param name="database_selector" type="select" label="Database to download" help="">
+            <option value="chocophlan full">Nucleotide (ChocoPhlAn)</option>
+            <option value="uniref diamond">Protein (UniRef50)</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="log" label="${tool.name}: Log" />
+    </outputs>
+
+    <tests>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.
+
+Download HUMAnN2 databases is a tool to download needed nucleotide or protein databases. This tool must be run before using HUMAnN2.
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2_join_tables.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_join_tables.xml Thu May 26 10:20:59 2016 -0400
[
@@ -0,0 +1,58 @@
+<tool id="humann2_join_tables" name="Join" version="0.6.1">
+    <description>HUMAnN2 generated tables</description>
+
+    <macros>
+        <import>humann2_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <version_command>
+<![CDATA[
+    humann2_join_tables --version
+]]>
+    </version_command>
+
+    <command><![CDATA[
+        `mkdir tmp_dir`
+
+        && 
+
+        #for $table in $input_tables
+            `cp $table.input_table tmp_dir`
+        #end for
+
+        && 
+        humann2_join_tables
+            -i "tmp_dir"
+            -o $joined_table           
+    ]]></command>
+
+    <inputs>
+        <repeat name="input_tables" title="Gene/pathway tables to join">
+            <param name="input_table" type="data" format="tsv,tabular" multiple="true" label="Gene/pathway table" help=""/>
+        </repeat>
+    </inputs>
+
+    <outputs>
+        <data format="tsv" name="joined_table" label="${tool.name} on ${on_string}: Joined table" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_tables_0|input_table" value="expected_pathway_coverage.tsv"/>
+            <param name="input_tables_1|input_table" value="expected_pathway_abundance.tsv"/>
+            <output name="joined_table" file="joined_pathway_coverage_abundance.tsv"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.
+
+Join HUMAnN2 tables is a tool to join gene or pathway tables of multiple samples into a single table.
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_macros.xml Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="2.2.5">bowtie2</requirement>
+            <requirement type="package" version="2.2.0">metaphlan2</requirement>
+            <requirement type="package" version="0.7.10">diamond</requirement>
+            <requirement type="package" version="0.6.1">humann2</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+        </stdio>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1002358</citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2_merge_abundance_tables.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_merge_abundance_tables.xml Thu May 26 10:20:59 2016 -0400
[
@@ -0,0 +1,86 @@
+<tool id="humann2_merge_abundance_tables" name="Merge" version="0.6.1">
+    <description> HUMAnN2 generated gene and pathway abundance tables</description>
+
+    <macros>
+        <import>humann2_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <version_command>
+<![CDATA[
+    humann2_merge_abundance_tables --version
+]]>
+    </version_command>
+
+    <command><![CDATA[
+        humann2_merge_abundance_tables
+
+            --input-genes $input_gene
+            --input-pathways $input_pathway
+
+            #if $gene_mapping_selection.gene_mapping_selection_test == "true":
+                --gene-mapping $gene_mapping
+            #end if 
+
+            #if $pathway_mapping_selection.pathway_mapping_selection_test == "true":
+                --pathway-mapping $pathway-mapping
+            #end if 
+
+            $remove_taxonomy
+                                  
+            -o $output_table
+    ]]></command>
+
+    <inputs>
+        <param name="input_gene" type="data" format="tsv" label="Gene family or EC abundance file" help="(--input-genes)"/>
+
+        <param name="input_pathway" type="data" format="tsv" label="Pathway abundance file" help="(--input-pathways)"/>
+
+        <conditional name="gene_mapping_selection">
+            <param name='gene_mapping_selection_test' type='boolean' checked="false" truevalue='true' falsevalue='false' label="Use a gene mapping file?" help=""/>
+                <when value="true">
+                    <param name="gene_mapping" type="data" format="tsv" label="Gene family to reaction mapping file" help="(--gene-mapping)"/>
+                </when>
+                <when value="false">
+                </when>
+        </conditional>
+        <conditional name="pathway_mapping_selection">
+            <param name='pathway_mapping_selection_test' type='boolean' checked="false" truevalue='true' falsevalue='false' label="Use a pathway mapping file?" help=""/>
+                <when value="true">
+                    <param name="pathway_mapping" type="data" format="tsv" label="Reaction to pathway mapping file" help="(--pathway-mapping)"/>
+                </when>
+                <when value="false">
+                </when>
+        </conditional>
+
+        <param name='remove_taxonomy' type='boolean' checked="false" truevalue='--remove-taxonomy' falsevalue='' label="Remove the taxonomy from the output file?" help="(--remove-taxonomy)"/>
+    </inputs>
+
+    <outputs>
+        <data format="tsv" name="output_table" label="${tool.name} on ${on_string}: Merged table" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_gene" value="expected_gene_family_abundance.tsv"/>
+            <param name="input_pathway" value="expected_pathway_abundance.tsv"/>
+            <param name="gene_mapping_selection_test" value="false"/>
+            <param name="pathway_mapping_selection" value="false"/>
+            <param name="remove_taxonomy" value="false"/>
+            <output name="output_table" file="merged_gene_families_pathways_abundances.tsv" /> 
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.
+
+The currest tool merge a table containing gene family abundances with a table containing pathway abundances, given a gene to pathway mapping (default or custom one).
+
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2_reduce_table.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_reduce_table.xml Thu May 26 10:20:59 2016 -0400
[
@@ -0,0 +1,67 @@
+<tool id="humann2_reduce_table" name="Reduce" version="0.6.1">
+    <description>a HUMAnN2 generated table</description>
+
+    <macros>
+        <import>humann2_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <version_command>
+<![CDATA[
+    humann2_reduce_table --version
+]]>
+    </version_command>
+
+    <command><![CDATA[
+        humann2_reduce_table
+            -i $input_table
+            -o $output_table
+            --function $function
+            --sort-by $sort 
+    ]]></command>
+
+    <inputs>
+        <param name="input_table" type="data" format="tsv" label="Gene/pathway table" help="(-i)"/>
+
+        <param name="function" type="select" label="Function to apply" help="(--function)">
+            <option value="max" selected="true">Max</option>
+            <option value="sum">Sum</option>
+            <option value="mean">Mean</option>
+            <option value="min">Min</option>
+        </param>
+
+        <param name="sort" type="select" label="How to sort the output" help="(--sort-by)">
+            <option value="name" selected="true">Name</option>
+            <option value="value">value</option>
+            <option value="level">level</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data format="tsv" name="output_table" label="${tool.name} on ${on_string}: Reduced table" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_table" value="expected_gene_family_abundance.tsv"/>
+            <param name="function" value="max"/>
+            <param name="sort" value="name"/>
+            <output name="output_table" file="max_reduced_gene_family_abundance.tsv"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.
+
+Reduce HUMAnN2 table is a tool to reduce the table given a function (max, sum, mean or min).
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.
+
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2_regroup_table.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_regroup_table.xml Thu May 26 10:20:59 2016 -0400
[
@@ -0,0 +1,91 @@
+<tool id="humann2_regroup_table" name="Regroup" version="0.6.1">
+    <description> a HUMAnN2 generated table by features</description>
+
+    <macros>
+        <import>humann2_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <version_command>
+<![CDATA[
+    humann2_regroup_table --version
+]]>
+    </version_command>
+
+    <command><![CDATA[
+        humann2_regroup_table
+            -i $input_table
+            -o $output_table
+            --function $function
+
+            #if $built_in.built_in_test == "true":
+                --groups $built_in.groups
+            #else:
+                --custom $built_in.file
+                $built_in.reversed
+            #end if  
+    ]]></command>
+
+    <inputs>
+        <param name="input_table" type="data" format="tsv" label="Gene/pathway table" help="(-i)"/>
+
+        <param name="function" type="select" label="How to combine grouped features?" help="(--function)">
+            <option value="sum" selected="true">Sum</option>
+            <option value="mean">Mean</option>
+        </param>
+
+        <conditional name="built_in">
+            <param name='built_in_test' type='select' label="Use built-in grouping options?" help="">
+                <option value="true" selected="true">Yes</option>
+                <option value="false">No</option>
+            </param>
+
+            <when value="true">
+                <param name="groups" type="select" label="Grouping options" help="(--groups)">
+                    <option value="uniref50_ec" selected="true">Collapses UniRef50 gene families into top level enzyme commission (EC) categories (associations inferred from UniProt)</option>
+                    <option value="uniref50_go">Collapses UniRef50 gene families into a subset of non-redundant, high-level Gene Ontology (GO) categories (associations inferred from UniProt and a HUMAnN2-specific parsing of the GO hierarchy)</option>
+                    <option value="uniref50_ko">Collapses UniRef50 gene families into KEGG Orthogroups (KOs) (associations inferred from UniProt)</option>
+                    <option value="uniref50_rxn">Collapses UniRef50 gene families into metacyc reactions (use of reaction abundances to compute the abundance and coverage of broader metabolic pathways)</option>
+                </param>
+            </when>
+
+            <when value="false">
+                <param name="file" type="data" format="tsv" label="Custom groups file" help="(--custom)"/>
+                <param name='reversed' type='boolean' checked="false" truevalue='--reversed' falsevalue='' label="Is the groups file reversed?" help="Mapping from features to groups. (--reversed)"/>
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data format="tsv" name="output_table" label="${tool.name} on ${on_string}: Regrouped table" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_table" value="expected_gene_family_abundance.tsv"/>
+            <param name="function" value="sum"/>
+            <param name="built_in_test" value="true"/>
+            <param name="groups" value="uniref50_ec"/>
+            <output name="output_table" file="regrouped_gene_families_to_ec.tsv"/>
+        </test>
+         <test>
+            <param name="input_table" value="expected_gene_family_abundance.tsv"/>
+            <param name="function" value="mean"/>
+            <param name="built_in_test" value="true"/>
+            <param name="groups" value="uniref50_ko"/>
+            <output name="output_table" file="regrouped_gene_families_to_ko.tsv"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.
+
+Regroup HUMAnN2 table features is a tool for regrouping table features (abundances or coverage) given a table of feature values and a mapping of groups to component features. It produces a new table with group values in place of feature values.
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2_rename_table.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_rename_table.xml Thu May 26 10:20:59 2016 -0400
[
@@ -0,0 +1,68 @@
+<tool id="humann2_rename_table" name="Rename" version="0.6.1">
+    <description>features of a HUMAnN2 generated table</description>
+
+    <macros>
+        <import>humann2_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <version_command>
+<![CDATA[
+    humann2_rename_table --version
+]]>
+    </version_command>
+
+    <command><![CDATA[
+        humann2_rename_table
+            -i $input_table
+            -o $output_table
+
+            --names $names
+            $simplify 
+
+            -c $mapping_file
+    ]]></command>
+
+    <inputs>
+        <param name="input_table" type="data" format="tsv" label="Gene/pathway table" help="(-i)"/>
+
+        <param name="names" type="select" label="Table features that can be renamed" help="(--names)">
+            <option value="metacyc-rxn" selected="true">MetaCyc reactions</option>
+            <option value="metacyc-pwy">MetaCyc pathways</option>
+            <option value="ko">KEGG Orthogroups (KO)</option>
+            <option value="ec">Enzyme commision (EC)</option>
+            <option value="uniref50">UniRef50 gene families</option>
+        </param>
+
+        <param name='simplify' type='boolean' checked="false" truevalue='-s' falsevalue='' label="Remove non-alphanumeric characters from names?" help="(--simplify)"/>
+
+        <param name="mapping_file" type="data" format="tsv" label="Mapping file" help="The mapping file must be tabular format file with two tab-separated columns. The first column must contain the value you want to modify and the second contain the new value (--custom)"/>
+       
+    </inputs>
+
+    <outputs>
+        <data format="tsv" name="output_table" label="${tool.name} on ${on_string}: Renamed table" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_table" value="expected_pathway_abundance.tsv"/>
+            <param name="names" value="metacyc-pwy" />
+            <param name='simplify' value=""/>
+            <param name="mapping_file" value="pathway_rename_mapping.tsv"/>
+            <output name="output_table" file="renamed_metacyc_pathways.tsv" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.
+
+Rename HUMAnN2 table features is a tool for renaming table features given a custom mapping. The mapping file must be tabular format file with two tab-separated columns. The first column must contain the value you want to modify and the second contain the new valu
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2_renorm_table.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_renorm_table.xml Thu May 26 10:20:59 2016 -0400
[
@@ -0,0 +1,60 @@
+<tool id="humann2_renorm_table" name="Renormalize" version="0.6.1">
+    <description>a HUMAnN2 generated table</description>
+
+    <macros>
+        <import>humann2_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <version_command>
+<![CDATA[
+    humann2_renorm_table --version
+]]>
+    </version_command>
+
+    <command><![CDATA[
+        humann2_renorm_table
+            -i $input_table
+            -o $output_table
+            --norm $norm
+    ]]></command>
+
+    <inputs>
+        <param name="input_table" type="data" format="tsv" label="Gene/pathway table" help="(-i)"/>
+
+        <param name="norm" type="select" label="Normalization scheme" help="(--norm)">
+            <option value="cpm" selected="true">Copies per million</option>
+            <option value="relab">Relative abundance</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data format="tsv" name="output_table" label="${tool.name} on ${on_string}: Normalized table" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_table" value="expected_pathway_abundance.tsv"/>
+            <param name="norm" value="cpm"/>
+            <output name="output_table" file="cpm_renormalized_pathway_abundance.tsv"/>
+        </test>
+        <test>
+            <param name="input_table" value="expected_pathway_abundance.tsv"/>
+            <param name="norm" value="relab"/>
+            <output name="output_table" file="relab_renormalized_pathway_abundance.tsv"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.
+
+Renorm HUMAnN2 table is a tool to renormalize a table, either in copies per million or in relative abundance. Each level of a stratified table will be normalized using the desired scheme.
+
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 humann2_split_table.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_split_table.xml Thu May 26 10:20:59 2016 -0400
[
@@ -0,0 +1,52 @@
+<tool id="humann2_split_table" name="Split" version="0.6.1">
+    <description> a HUMAnN2 generated table</description>
+
+    <macros>
+        <import>humann2_macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <version_command>
+<![CDATA[
+    humann2_split_table --version
+]]>
+    </version_command>
+
+    <command><![CDATA[
+        humann2_split_table
+            -i $input_file
+            -o "output"
+    ]]></command>
+
+    <inputs>
+        <param name="input_file" type="data" format="tsv,biom" label="Table to split into multiple tables" help="(-i)"/>
+    </inputs>
+
+    <outputs>
+        <collection type="list" name="split_tables" label="${tool.name} on ${on_string}: Split tables">
+            <discover_datasets pattern="__designation_and_ext__" directory="output"/>
+        </collection>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_file" value="joined_pathway_coverage_abundance.tsv"/>
+            <output_collection name="split_tables" type="list" >
+                <element name="humann2_Abundance" file="split_joined_table_abundances.tsv" />
+                <element name="humann2_Coverage" file="split_joined_table_coverage.tsv" />
+            </output_collection>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. `Read more about the tool <http://huttenhower.sph.harvard.edu/humann2/manual>`_.
+
+Split HUMAnN2 table is a tool to split a table with multiple columns. This file can have been generated with the join table tool.
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/cpm_renormalized_pathway_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cpm_renormalized_pathway_abundance.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,7 @@
+# Pathway humann2_Abundance
+UNMAPPED 5956.49
+UNINTEGRATED 983482
+PWY-3841: folate transformations II 3222.17
+HOMOSER-METSYN-PWY: L-methionine biosynthesis I 2969.34
+HSERMETANA-PWY: L-methionine biosynthesis III 2555.95
+VALSYN-PWY: L-valine biosynthesis 1813.8
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/expected_gene_family_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_gene_family_abundance.tsv Thu May 26 10:20:59 2016 -0400
b
b'@@ -0,0 +1,1891 @@\n+# Gene Family\thumann2_Abundance\n+UNMAPPED\t8.0000000000\n+UniRef50_R5IH84\t37.0370370370\n+UniRef50_R5IH84|g__Bacteroides.s__Bacteroides_stercoris\t37.0370370370\n+UniRef50_B0NQY6\t25.6410256410\n+UniRef50_B0NQY6|g__Bacteroides.s__Bacteroides_stercoris\t25.6410256410\n+UniRef50_F3PHD0\t25.6410256410\n+UniRef50_F3PHD0|g__Bacteroides.s__Bacteroides_stercoris\t25.6410256410\n+UniRef50_B0NP96\t23.8095238095\n+UniRef50_B0NP96|g__Bacteroides.s__Bacteroides_stercoris\t23.8095238095\n+UniRef50_B0NTS9\t21.2765957447\n+UniRef50_B0NTS9|g__Bacteroides.s__Bacteroides_stercoris\t21.2765957447\n+UniRef50_unknown\t19.7255928786\n+UniRef50_unknown|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t11.3533534073\n+UniRef50_unknown|g__Bacteroides.s__Bacteroides_stercoris\t8.3722394713\n+UniRef50_R6XJU3: Sec-independent protein translocase protein TatA\t12.1951219512\n+UniRef50_R6XJU3: Sec-independent protein translocase protein TatA|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t12.1951219512\n+UniRef50_R5JQA6\t11.9047619048\n+UniRef50_R5JQA6|g__Bacteroides.s__Bacteroides_stercoris\t11.9047619048\n+UniRef50_Q8A9G3\t10.7526881720\n+UniRef50_Q8A9G3|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t10.7526881720\n+UniRef50_A6L108\t10.4166666667\n+UniRef50_A6L108|g__Bacteroides.s__Bacteroides_stercoris\t10.4166666667\n+UniRef50_R7JA39: PF13711 domain protein\t9.8039215686\n+UniRef50_R7JA39: PF13711 domain protein|g__Bacteroides.s__Bacteroides_stercoris\t9.8039215686\n+UniRef50_E1WUR5\t9.5238095238\n+UniRef50_E1WUR5|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t9.5238095238\n+UniRef50_B0NV53\t8.1967213115\n+UniRef50_B0NV53|g__Bacteroides.s__Bacteroides_stercoris\t8.1967213115\n+UniRef50_R5F6Z7\t7.7519379845\n+UniRef50_R5F6Z7|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t7.7519379845\n+UniRef50_R5CEU4\t7.0921985816\n+UniRef50_R5CEU4|g__Bacteroides.s__Bacteroides_stercoris\t7.0921985816\n+UniRef50_R5I0U3\t7.0921985816\n+UniRef50_R5I0U3|g__Bacteroides.s__Bacteroides_stercoris\t7.0921985816\n+UniRef50_Q8A3G7\t6.8027210884\n+UniRef50_Q8A3G7|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t6.8027210884\n+UniRef50_R5V945\t6.6666666667\n+UniRef50_R5V945|g__Bacteroides.s__Bacteroides_stercoris\t6.6666666667\n+UniRef50_R6L4G2\t6.2893081761\n+UniRef50_R6L4G2|g__Bacteroides.s__Bacteroides_stercoris\t6.2893081761\n+UniRef50_W0ESG7: DNA-binding protein\t6.2893081761\n+UniRef50_W0ESG7: DNA-binding protein|g__Bacteroides.s__Bacteroides_stercoris\t6.2893081761\n+UniRef50_A0A016LIR2\t6.2500000000\n+UniRef50_A0A016LIR2|g__Bacteroides.s__Bacteroides_stercoris\t6.2500000000\n+UniRef50_R5JE58\t5.6497175141\n+UniRef50_R5JE58|g__Bacteroides.s__Bacteroides_stercoris\t5.6497175141\n+UniRef50_F9D2T3: Ribosome-binding factor A\t5.4644808743\n+UniRef50_F9D2T3: Ribosome-binding factor A|g__Bacteroides.s__Bacteroides_stercoris\t5.4644808743\n+UniRef50_Q8A7R3\t5.3763440860\n+UniRef50_Q8A7R3|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t5.3763440860\n+UniRef50_Q5LIQ7: Shikimate kinase\t5.2910052910\n+UniRef50_Q5LIQ7: Shikimate kinase|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t5.2910052910\n+UniRef50_R5C4D7\t5.2083333333\n+UniRef50_R5C4D7|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t5.2083333333\n+UniRef50_R5JPY8: RNA polymerase sigma factor\t5.1282051282\n+UniRef50_R5JPY8: RNA polymerase sigma factor|g__Bacteroides.s__Bacteroides_stercoris\t5.1282051282\n+UniRef50_E1WVZ2\t4.9019607843\n+UniRef50_E1WVZ2|g__Bacteroides.s__Bacteroides_stercoris\t4.9019607843\n+UniRef50_B0NW87\t4.7619047619\n+UniRef50_B0NW87|g__Bacteroides.s__Bacteroides_stercoris\t4.7619047619\n+UniRef50_R5M4C8\t4.4444444444\n+UniRef50_R5M4C8|g__Bacteroides.s__Bacteroides_stercoris\t4.4444444444\n+UniRef50_R5RJG0\t4.4444444444\n+UniRef50_R5RJG0|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t4.4444444444\n+UniRef50_D6D2K4: Putative fluoride ion transporter CrcB\t4.3859649123\n+UniRef50_D6D2K4: Putative fluoride ion transporter CrcB|g__Bacteroides.s__Bacteroides_stercoris\t4.3859649123\n+UniRef50_Q5LG50: Dephospho-CoA kinase\t4.3010752688\n+UniRef50_Q5LG50: Dephospho-CoA kinase|g__Bacteroides.s__Bacteroides_stercoris\t2.15'..b'P5X8: SusC/RagA family TonB-linked outer membrane protein\t0.2911208151\n+UniRef50_R5P5X8: SusC/RagA family TonB-linked outer membrane protein|g__Bacteroides.s__Bacteroides_stercoris\t0.2911208151\n+UniRef50_Q89YI7\t0.2841716397\n+UniRef50_Q89YI7|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2841716397\n+UniRef50_R5VD83\t0.2812939522\n+UniRef50_R5VD83|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2812939522\n+UniRef50_R7DJ50: DNA polymerase III alpha subunit\t0.2736726875\n+UniRef50_R7DJ50: DNA polymerase III alpha subunit|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2736726875\n+UniRef50_I8V7H4\t0.2718868951\n+UniRef50_I8V7H4|g__Bacteroides.s__Bacteroides_stercoris\t0.2718868951\n+UniRef50_Q8A8I4: Two-component system sensor histidine kinase/response regulator, hybrid (One component system)\t0.2590002590\n+UniRef50_Q8A8I4: Two-component system sensor histidine kinase/response regulator, hybrid (One component system)|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2590002590\n+UniRef50_Q8A2V3: Cell well associated RhsD protein\t0.2587991718\n+UniRef50_Q8A2V3: Cell well associated RhsD protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2587991718\n+UniRef50_R5P153: Response regulator receiver domain protein\t0.2587991718\n+UniRef50_R5P153: Response regulator receiver domain protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2587991718\n+UniRef50_B0NL59: Putative serine--tRNA ligase domain protein\t0.2581977795\n+UniRef50_B0NL59: Putative serine--tRNA ligase domain protein|g__Bacteroides.s__Bacteroides_stercoris\t0.2581977795\n+UniRef50_R7KUE4\t0.2575991757\n+UniRef50_R7KUE4|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2575991757\n+UniRef50_Q8A241: Two-component system sensor histidine kinase/response regulator, hybrid (One-component system)\t0.2572016461\n+UniRef50_Q8A241: Two-component system sensor histidine kinase/response regulator, hybrid (One-component system)|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2572016461\n+UniRef50_R6ACZ9: ATPase/histidine kinase/DNA gyrase B/HSP90 domain protein\t0.2556237219\n+UniRef50_R6ACZ9: ATPase/histidine kinase/DNA gyrase B/HSP90 domain protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2556237219\n+UniRef50_W4P379: Two-component system sensor histidine kinase/response\t0.2554278416\n+UniRef50_W4P379: Two-component system sensor histidine kinase/response|g__Bacteroides.s__Bacteroides_stercoris\t0.2554278416\n+UniRef50_R6W605: Two-component system sensor histidine kinase/response regulator hybrid (One-component system)\t0.2495009980\n+UniRef50_R6W605: Two-component system sensor histidine kinase/response regulator hybrid (One-component system)|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2495009980\n+UniRef50_D6D1E3: Signal transduction histidine kinase\t0.2411963338\n+UniRef50_D6D1E3: Signal transduction histidine kinase|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2411963338\n+UniRef50_R6JLD1: Two-component system sensor histidine kinase/response regulator hybrid (One component system)\t0.2342468962\n+UniRef50_R6JLD1: Two-component system sensor histidine kinase/response regulator hybrid (One component system)|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2342468962\n+UniRef50_R6L7N8\t0.2302025783\n+UniRef50_R6L7N8|g__Bacteroides.s__Bacteroides_stercoris\t0.2302025783\n+UniRef50_R6D9I7\t0.2142245073\n+UniRef50_R6D9I7|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.2142245073\n+UniRef50_A0A016I611: Prophage LambdaSa1, N-acetylmuramoyl-L-alanine amidase, family 4\t0.1902587519\n+UniRef50_A0A016I611: Prophage LambdaSa1, N-acetylmuramoyl-L-alanine amidase, family 4|g__Bacteroides.s__Bacteroides_stercoris\t0.1902587519\n+UniRef50_R7KMV8: Outer membrane protein\t0.1818512457\n+UniRef50_R7KMV8: Outer membrane protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.1818512457\n+UniRef50_E1WVX1\t0.1668335002\n+UniRef50_E1WVX1|g__Bacteroides.s__Bacteroides_stercoris\t0.1668335002\n+UniRef50_Q89ZD7\t0.1562011871\n+UniRef50_Q89ZD7|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.1562011871\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/expected_pathway_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_pathway_abundance.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,7 @@
+# Pathway humann2_Abundance
+UNMAPPED 2.6377384941
+UNINTEGRATED 435.5199558332
+PWY-3841: folate transformations II 1.4268868747
+HOMOSER-METSYN-PWY: L-methionine biosynthesis I 1.3149243918
+HSERMETANA-PWY: L-methionine biosynthesis III 1.1318619128
+VALSYN-PWY: L-valine biosynthesis 0.8032128514
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/expected_pathway_coverage.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_pathway_coverage.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,7 @@
+# Pathway humann2_Coverage
+UNMAPPED 1.0000000000
+UNINTEGRATED 1.0000000000
+PWY-3841: folate transformations II 0.6798402001
+HOMOSER-METSYN-PWY: L-methionine biosynthesis I 0.6570127063
+HSERMETANA-PWY: L-methionine biosynthesis III 0.6143434179
+VALSYN-PWY: L-valine biosynthesis 0.5180160607
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz
b
Binary file test-data/g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz has changed
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz
b
Binary file test-data/g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz has changed
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/gene_families_humann2_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_families_humann2_output.tabular Thu May 26 10:20:59 2016 -0400
b
b'@@ -0,0 +1,204097 @@\n+# Gene Family\thumann2_Abundance\n+UniRef50_P19529: Replication initiation protein\t8491.6203916670\n+UniRef50_P19529: Replication initiation protein|g__Staphylococcus.s__Staphylococcus_epidermidis\t4033.9801448086\n+UniRef50_P19529: Replication initiation protein|g__Staphylococcus.s__Staphylococcus_aureus\t2589.8276215214\n+UniRef50_P19529: Replication initiation protein|g__Propionibacterium.s__Propionibacterium_acnes\t1867.8126253369\n+UniRef50_Q5HJZ6: Plasmid recombination enzyme type 3\t6541.5278380159\n+UniRef50_Q5HJZ6: Plasmid recombination enzyme type 3|g__Staphylococcus.s__Staphylococcus_aureus\t3354.3540695429\n+UniRef50_Q5HJZ6: Plasmid recombination enzyme type 3|g__Staphylococcus.s__Staphylococcus_epidermidis\t2955.9410606524\n+UniRef50_Q5HJZ6: Plasmid recombination enzyme type 3|g__Escherichia.s__Escherichia_coli\t231.2327078206\n+UniRef50_P02983: Tetracycline resistance protein\t5063.7609262711\n+UniRef50_P02983: Tetracycline resistance protein|g__Staphylococcus.s__Staphylococcus_aureus\t2535.1510022555\n+UniRef50_P02983: Tetracycline resistance protein|g__Staphylococcus.s__Staphylococcus_epidermidis\t2528.6099240155\n+UniRef50_Q93GF3: Rep\t3620.4854732918\n+UniRef50_Q93GF3: Rep|g__Staphylococcus.s__Staphylococcus_epidermidis\t3193.0835831399\n+UniRef50_Q93GF3: Rep|g__Staphylococcus.s__Staphylococcus_aureus\t427.4018901518\n+UniRef50_V6QG63: Integrase\t3006.6923019119\n+UniRef50_V6QG63: Integrase|g__Staphylococcus.s__Staphylococcus_epidermidis\t3004.5443880088\n+UniRef50_V6QG63: Integrase|unclassified\t2.1479139031\n+UniRef50_W1W6K4\t2456.8919290157\n+UniRef50_W1W6K4|g__Staphylococcus.s__Staphylococcus_epidermidis\t2454.8821878197\n+UniRef50_W1W6K4|unclassified\t2.0097411960\n+UniRef50_D4FM51: Plasmid recombination enzyme\t2120.1874806725\n+UniRef50_D4FM51: Plasmid recombination enzyme|g__Staphylococcus.s__Staphylococcus_epidermidis\t2120.1874806725\n+UniRef50_Z6ILY0\t2001.4420730481\n+UniRef50_Z6ILY0|g__Staphylococcus.s__Staphylococcus_epidermidis\t1949.0714679824\n+UniRef50_Z6ILY0|unclassified\t52.3706050657\n+UniRef50_F0P516: Replication initiation protein, truncated\t1937.6411893465\n+UniRef50_F0P516: Replication initiation protein, truncated|g__Staphylococcus.s__Staphylococcus_epidermidis\t1184.9438800291\n+UniRef50_F0P516: Replication initiation protein, truncated|g__Staphylococcus.s__Staphylococcus_aureus\t752.6973093174\n+UniRef50_Q8CU99\t1714.1756039229\n+UniRef50_Q8CU99|g__Staphylococcus.s__Staphylococcus_epidermidis\t1714.1756039229\n+UniRef50_P14491: Protein rlx\t1713.0351893643\n+UniRef50_P14491: Protein rlx|g__Staphylococcus.s__Staphylococcus_epidermidis\t1704.6104696986\n+UniRef50_P14491: Protein rlx|unclassified\t7.3698673450\n+UniRef50_P14491: Protein rlx|g__Staphylococcus.s__Staphylococcus_aureus\t1.0548523207\n+UniRef50_P18358: Transposon Tn552 resolvase\t1615.8824453625\n+UniRef50_P18358: Transposon Tn552 resolvase|g__Staphylococcus.s__Staphylococcus_aureus\t1309.3342380259\n+UniRef50_P18358: Transposon Tn552 resolvase|g__Staphylococcus.s__Staphylococcus_epidermidis\t306.5482073366\n+UniRef50_Q5HRN3: ISSep1-like transposase\t1436.3479542640\n+UniRef50_Q5HRN3: ISSep1-like transposase|g__Staphylococcus.s__Staphylococcus_epidermidis\t1243.7685169618\n+UniRef50_Q5HRN3: ISSep1-like transposase|g__Staphylococcus.s__Staphylococcus_aureus\t181.4072761300\n+UniRef50_Q5HRN3: ISSep1-like transposase|g__Propionibacterium.s__Propionibacterium_acnes\t11.1721611722\n+UniRef50_Q4L351: Staphylococcus haemolyticus JCSC1435 DNA, complete genome\t1295.3309025030\n+UniRef50_Q4L351: Staphylococcus haemolyticus JCSC1435 DNA, complete genome|g__Staphylococcus.s__Staphylococcus_epidermidis\t1295.3309025030\n+UniRef50_Q3J5T4: 50S ribosomal protein L11\t1229.1802787878\n+UniRef50_Q3J5T4: 50S ribosomal protein L11|g__Rhodobacter.s__Rhodobacter_sphaeroides\t1229.1802787878\n+UniRef50_N6A8S2\t1135.8005834263\n+UniRef50_N6A8S2|g__Staphylococcus.s__Staphylococcus_epidermidis\t1121.6796828769\n+UniRef50_N6A8S2|unclassified\t14.1209005495\n+UniRef50_L7WXY9\t1065.4899191617\n+UniRef50_L7WXY9|g__Staphyloc'..b'444DC: hypothetical protein\t0.0017973217\n+UniRef50_UPI00037444DC: hypothetical protein|unclassified\t0.0017973217\n+UniRef50_UPI000378A614: hypothetical protein\t0.0017691421\n+UniRef50_UPI000378A614: hypothetical protein|unclassified\t0.0017691421\n+UniRef50_R4LEH4: Yd repeat-containing protein\t0.0017472130\n+UniRef50_R4LEH4: Yd repeat-containing protein|unclassified\t0.0017472130\n+UniRef50_S4YMU8: Filamentous hemagglutinin\t0.0017432148\n+UniRef50_S4YMU8: Filamentous hemagglutinin|unclassified\t0.0017432148\n+UniRef50_UPI000443E2D6: PREDICTED: tetratricopeptide repeat protein 40\t0.0016819660\n+UniRef50_UPI000443E2D6: PREDICTED: tetratricopeptide repeat protein 40|unclassified\t0.0016819660\n+UniRef50_UPI00036DCFC8: hypothetical protein\t0.0016327044\n+UniRef50_UPI00036DCFC8: hypothetical protein|unclassified\t0.0016327044\n+UniRef50_UPI0001BF6B99: 90S preribosome component RRP12\t0.0016313392\n+UniRef50_UPI0001BF6B99: 90S preribosome component RRP12|unclassified\t0.0016313392\n+UniRef50_UPI000365699C: hypothetical protein\t0.0016224188\n+UniRef50_UPI000365699C: hypothetical protein|unclassified\t0.0016224188\n+UniRef50_UPI000344F009: hypothetical protein\t0.0015763429\n+UniRef50_UPI000344F009: hypothetical protein|unclassified\t0.0015763429\n+UniRef50_U6M5E8\t0.0015640805\n+UniRef50_U6M5E8|unclassified\t0.0015640805\n+UniRef50_UPI000349BE1A: hypothetical protein\t0.0015464606\n+UniRef50_UPI000349BE1A: hypothetical protein|unclassified\t0.0015464606\n+UniRef50_UPI0003773ED0: hypothetical protein\t0.0014975465\n+UniRef50_UPI0003773ED0: hypothetical protein|unclassified\t0.0014975465\n+UniRef50_R0ISA3\t0.0014873041\n+UniRef50_R0ISA3|unclassified\t0.0014873041\n+UniRef50_F4GI46\t0.0014628012\n+UniRef50_F4GI46|unclassified\t0.0014628012\n+UniRef50_UPI0002D336FD: hypothetical protein\t0.0014421264\n+UniRef50_UPI0002D336FD: hypothetical protein|unclassified\t0.0014421264\n+UniRef50_C0N8P3: Type I secretion target GGXGXDXXX repeat protein domain protein\t0.0014134994\n+UniRef50_C0N8P3: Type I secretion target GGXGXDXXX repeat protein domain protein|unclassified\t0.0014134994\n+UniRef50_W7A2A5\t0.0013833667\n+UniRef50_W7A2A5|unclassified\t0.0013833667\n+UniRef50_D2QX58: Peptidase domain protein\t0.0013763922\n+UniRef50_D2QX58: Peptidase domain protein|unclassified\t0.0013763922\n+UniRef50_UPI0004446091: PREDICTED: LOW QUALITY PROTEIN: histone-lysine N-methyltransferase 2C\t0.0013693947\n+UniRef50_UPI0004446091: PREDICTED: LOW QUALITY PROTEIN: histone-lysine N-methyltransferase 2C|unclassified\t0.0013693947\n+UniRef50_UPI0003644B17: hypothetical protein, partial\t0.0013488169\n+UniRef50_UPI0003644B17: hypothetical protein, partial|unclassified\t0.0013488169\n+UniRef50_A0A011N6I9\t0.0013414891\n+UniRef50_A0A011N6I9|unclassified\t0.0013414891\n+UniRef50_A0A058ZAA0\t0.0012939818\n+UniRef50_A0A058ZAA0|unclassified\t0.0012939818\n+UniRef50_UPI000468C770: hypothetical protein\t0.0012465542\n+UniRef50_UPI000468C770: hypothetical protein|unclassified\t0.0012465542\n+UniRef50_UPI00036FCEE3: hypothetical protein\t0.0012113651\n+UniRef50_UPI00036FCEE3: hypothetical protein|unclassified\t0.0012113651\n+UniRef50_UPI00035C33AC: hypothetical protein\t0.0010786375\n+UniRef50_UPI00035C33AC: hypothetical protein|unclassified\t0.0010786375\n+UniRef50_N1Q3A9\t0.0009733696\n+UniRef50_N1Q3A9|unclassified\t0.0009733696\n+UniRef50_A0A031GKF7: Polymorphic membrane protein, Filamentous hemagglutinin/Adhesin\t0.0008849280\n+UniRef50_A0A031GKF7: Polymorphic membrane protein, Filamentous hemagglutinin/Adhesin|unclassified\t0.0008849280\n+UniRef50_D8M1X0: Singapore isolate B (sub-type 7) whole genome shotgun sequence assembly, scaffold_16\t0.0008849116\n+UniRef50_D8M1X0: Singapore isolate B (sub-type 7) whole genome shotgun sequence assembly, scaffold_16|unclassified\t0.0008849116\n+UniRef50_A8LV91\t0.0008555354\n+UniRef50_A8LV91|unclassified\t0.0008555354\n+UniRef50_D3E2A1: Adhesin-like protein\t0.0007987026\n+UniRef50_D3E2A1: Adhesin-like protein|unclassified\t0.0007987026\n+UniRef50_U6MJL1\t0.0007379580\n+UniRef50_U6MJL1|unclassified\t0.0007379580\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/gene_select_humann2_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_select_humann2_output.tabular Thu May 26 10:20:59 2016 -0400
b
b'@@ -0,0 +1,98570 @@\n+# Gene Family\thumann2_Abundance\n+UniRef50_P19529: Replication initiation protein\t8491.6203916670\n+UniRef50_Q5HJZ6: Plasmid recombination enzyme type 3\t6541.5278380159\n+UniRef50_P02983: Tetracycline resistance protein\t5063.7609262711\n+UniRef50_Q93GF3: Rep\t3620.4854732918\n+UniRef50_V6QG63: Integrase\t3006.6923019119\n+UniRef50_W1W6K4\t2456.8919290157\n+UniRef50_D4FM51: Plasmid recombination enzyme\t2120.1874806725\n+UniRef50_Z6ILY0\t2001.4420730481\n+UniRef50_F0P516: Replication initiation protein, truncated\t1937.6411893465\n+UniRef50_Q8CU99\t1714.1756039229\n+UniRef50_P14491: Protein rlx\t1713.0351893643\n+UniRef50_P18358: Transposon Tn552 resolvase\t1615.8824453625\n+UniRef50_Q5HRN3: ISSep1-like transposase\t1436.3479542640\n+UniRef50_Q4L351: Staphylococcus haemolyticus JCSC1435 DNA, complete genome\t1295.3309025030\n+UniRef50_Q3J5T4: 50S ribosomal protein L11\t1229.1802787878\n+UniRef50_N6A8S2\t1135.8005834263\n+UniRef50_L7WXY9\t1065.4899191617\n+UniRef50_P18357: Regulatory protein BlaR1\t1065.0580195099\n+UniRef50_K0LDS3\t1037.3597919116\n+UniRef50_I3U5U5: Mobilization protein C\t1000.6288671941\n+UniRef50_Q8CUF7\t973.3166630783\n+UniRef50_F3SP64: Techoic acid ABC transporter, permease family protein\t947.1038892942\n+UniRef50_P20384: Putative transposon Tn552 DNA-invertase bin3\t924.6260429245\n+UniRef50_P18416: Transposase for transposon Tn552\t916.8592876685\n+UniRef50_P0A042: Penicillinase repressor\t907.2620340107\n+UniRef50_P00807: Beta-lactamase\t905.1256628462\n+UniRef50_Q3Y3U4: Mobilization protein MobC\t886.5410288112\n+UniRef50_Q8CUE7: Mobilization protein\t872.1202834681\n+UniRef50_P18179: Potential ATP-binding protein\t850.6971824970\n+UniRef50_Q4L460: Teichoic acid translocation permease protein\t811.2148180466\n+UniRef50_J1A645\t777.6353655805\n+UniRef50_C5QCS1: Mobilization protein\t772.9364283639\n+UniRef50_Q51952: OrfD\t709.6261688748\n+UniRef50_Q6GJM8\t666.6980603857\n+UniRef50_V6QFW8\t652.7350327943\n+UniRef50_P80705: Aldehyde dehydrogenase gamma chain (Fragment)\t647.0009783631\n+UniRef50_C5N6U7\t631.7953914966\n+UniRef50_P0A0H1: rRNA adenine N-6-methyltransferase\t627.5689728741\n+UniRef50_B9EB85: Signal peptidase I\t618.9812468096\n+UniRef50_C5QVZ6\t596.5815037424\n+UniRef50_G7ZS60\t573.1559210039\n+UniRef50_Q98B43: Two-component response regulator\t569.7012707747\n+UniRef50_E8SI43: ABC transporter ATP-binding protein\t565.0876299174\n+UniRef50_P0A0C8: Replication and maintenance protein\t550.8915944410\n+UniRef50_Q6GFH6: Low molecular weight protein-tyrosine-phosphatase PtpA\t532.2918562060\n+UniRef50_B4RP36: CadB\t531.1509559529\n+UniRef50_D2JDC8\t503.0632772263\n+UniRef50_Q6G9C7\t502.6560671708\n+UniRef50_O31705: Molybdopterin synthase catalytic subunit\t501.3452309651\n+UniRef50_Q3HKI3: Possible virC1 gene, ATPase\t500.0872493616\n+UniRef50_E8SJI8\t488.4524569499\n+UniRef50_A5F3J8: 30S ribosomal protein S7\t487.0760696217\n+UniRef50_Q5HIS8: Single-stranded DNA-binding protein 1\t486.1104531708\n+UniRef50_Q3IUW8: TraE protein\t480.0571094793\n+UniRef50_Q6GJN1\t479.6158335459\n+UniRef50_J0MRE5\t466.4114881174\n+UniRef50_Q3HKI5\t466.0863415041\n+UniRef50_Q8CUB3\t462.0595725869\n+UniRef50_Q6GGX9: UPF0403 protein SAR1441\t461.7441881574\n+UniRef50_B9E8I0: S-ribosylhomocysteine lyase\t457.8085639697\n+UniRef50_J0XSF7\t453.0408406949\n+UniRef50_Q8CUE8\t452.5991820392\n+UniRef50_Q5KX02: Deoxyribose-phosphate aldolase\t449.0482597143\n+UniRef50_D2J7Y2\t442.8075182168\n+UniRef50_P26423: Galactose-6-phosphate isomerase subunit LacA\t439.9610954047\n+UniRef50_U5UNN1: GNAT family acetyltransferase\t437.8988812128\n+UniRef50_A5IWE0\t432.4786090242\n+UniRef50_Q3IUV2: TraG\t429.8639819879\n+UniRef50_D2JFR8\t423.3722239425\n+UniRef50_Q7A7G4\t412.6481590460\n+UniRef50_V9WFJ2\t409.0709236519\n+UniRef50_Q3IVA1: Translation initiation factor 2, gamma subunit, GTPase\t405.7109435708\n+UniRef50_Q28UY2: 50S ribosomal protein L11\t397.9108773463\n+UniRef50_D9REY1\t397.3684324359\n+UniRef50_Q4L5M1: Antibacterial protein 3 homolog\t395.2010741317\n+UniRef50_A9H9A8: ATP synthase subunit beta\t392.5914097524\n+UniRef50_Q2FFK2: Ferritin\t386.22181'..b'utative hemagglutinin/hemolysin-related protein\t0.0026265241\n+UniRef50_UPI0003691FBA: hypothetical protein\t0.0026176788\n+UniRef50_J7PC82: Leucine-rich repeat domain protein (LPXTG motif)\t0.0025810288\n+UniRef50_I3AIQ0: Filamentous hemagglutinin (Fragment)\t0.0025656163\n+UniRef50_X5M9G0: TPR repeat, SEL1 subfamily\t0.0025505511\n+UniRef50_UPI0003F50C11: hypothetical protein\t0.0025389324\n+UniRef50_C0VWL5: Thrombospondin type 3 repeat protein\t0.0024885873\n+UniRef50_N8V7J5\t0.0024813901\n+UniRef50_A0A011NPI5: Cyclolysin\t0.0024770022\n+UniRef50_UPI0004431B20: PREDICTED: rho guanine nucleotide exchange factor 3\t0.0024701297\n+UniRef50_UPI000465F2BE: peptide synthetase\t0.0024478087\n+UniRef50_UPI00036BF596: hypothetical protein\t0.0024151249\n+UniRef50_UPI0002B4ABD9: PREDICTED: gamma-glutamyltranspeptidase 1-like\t0.0023879334\n+UniRef50_UPI00037E9746: hypothetical protein, partial\t0.0023806858\n+UniRef50_UPI00047580F1: hypothetical protein, partial\t0.0023733753\n+UniRef50_Q7ML81: Putative RTX protein\t0.0023586381\n+UniRef50_D7FZX1\t0.0023583855\n+UniRef50_UPI0004622947: hypothetical protein TRAVEDRAFT_68878\t0.0022944099\n+UniRef50_U6GVJ1\t0.0022384031\n+UniRef50_U6JU30\t0.0021715124\n+UniRef50_UPI0003645BA6: hypothetical protein\t0.0021393252\n+UniRef50_UPI0004672358: hypothetical protein\t0.0021389942\n+UniRef50_UPI00035E25C2: hypothetical protein\t0.0021371831\n+UniRef50_D9CIX9: SAD1f\t0.0021285314\n+UniRef50_Q3JP46\t0.0021188512\n+UniRef50_J8AXF8\t0.0021160245\n+UniRef50_W4FFI6\t0.0020701533\n+UniRef50_UPI000378EC20: hypothetical protein\t0.0020652271\n+UniRef50_J3DU69\t0.0020425474\n+UniRef50_UPI0003637D6D: hypothetical protein\t0.0020088197\n+UniRef50_R0KT14\t0.0019394397\n+UniRef50_UPI00037EF227: hypothetical protein, partial\t0.0019328841\n+UniRef50_B5SDA9: Hemagglutinin-related protein\t0.0019268390\n+UniRef50_UPI0003EAD270: PREDICTED: protein bassoon-like\t0.0018866196\n+UniRef50_UPI0003602AC0: hypothetical protein\t0.0018857298\n+UniRef50_UPI0003B5F3E5: hypothetical protein\t0.0018613244\n+UniRef50_M7YSB0\t0.0018482051\n+UniRef50_B2I527: Filamentous haemagglutinin family outer membrane protein\t0.0018395479\n+UniRef50_X2D8N8: CCR4-NOT transcription complex subunit 1-like protein\t0.0018313691\n+UniRef50_A4P1C2\t0.0018132989\n+UniRef50_UPI00037444DC: hypothetical protein\t0.0017973217\n+UniRef50_UPI000378A614: hypothetical protein\t0.0017691421\n+UniRef50_R4LEH4: Yd repeat-containing protein\t0.0017472130\n+UniRef50_S4YMU8: Filamentous hemagglutinin\t0.0017432148\n+UniRef50_UPI000443E2D6: PREDICTED: tetratricopeptide repeat protein 40\t0.0016819660\n+UniRef50_UPI00036DCFC8: hypothetical protein\t0.0016327044\n+UniRef50_UPI0001BF6B99: 90S preribosome component RRP12\t0.0016313392\n+UniRef50_UPI000365699C: hypothetical protein\t0.0016224188\n+UniRef50_UPI000344F009: hypothetical protein\t0.0015763429\n+UniRef50_U6M5E8\t0.0015640805\n+UniRef50_UPI000349BE1A: hypothetical protein\t0.0015464606\n+UniRef50_UPI0003773ED0: hypothetical protein\t0.0014975465\n+UniRef50_R0ISA3\t0.0014873041\n+UniRef50_F4GI46\t0.0014628012\n+UniRef50_UPI0002D336FD: hypothetical protein\t0.0014421264\n+UniRef50_C0N8P3: Type I secretion target GGXGXDXXX repeat protein domain protein\t0.0014134994\n+UniRef50_W7A2A5\t0.0013833667\n+UniRef50_D2QX58: Peptidase domain protein\t0.0013763922\n+UniRef50_UPI0004446091: PREDICTED: LOW QUALITY PROTEIN: histone-lysine N-methyltransferase 2C\t0.0013693947\n+UniRef50_UPI0003644B17: hypothetical protein, partial\t0.0013488169\n+UniRef50_A0A011N6I9\t0.0013414891\n+UniRef50_A0A058ZAA0\t0.0012939818\n+UniRef50_UPI000468C770: hypothetical protein\t0.0012465542\n+UniRef50_UPI00036FCEE3: hypothetical protein\t0.0012113651\n+UniRef50_UPI00035C33AC: hypothetical protein\t0.0010786375\n+UniRef50_N1Q3A9\t0.0009733696\n+UniRef50_A0A031GKF7: Polymorphic membrane protein, Filamentous hemagglutinin/Adhesin\t0.0008849280\n+UniRef50_D8M1X0: Singapore isolate B (sub-type 7) whole genome shotgun sequence assembly, scaffold_16\t0.0008849116\n+UniRef50_A8LV91\t0.0008555354\n+UniRef50_D3E2A1: Adhesin-like protein\t0.0007987026\n+UniRef50_U6MJL1\t0.0007379580\n'
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/input_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_sequences.fasta Thu May 26 10:20:59 2016 -0400
b
b'@@ -0,0 +1,2260 @@\n+>r2|637000026.fna|5753889|5754040|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln383_#0/1\n+CTCCGTCCGCTGGTAGAACGTCTGGTAGAAGCTCAAAAAGCATTGGCAACCAAATACCTTTCCGAAGCCAAACGACTGATTGCCTCCAACGACAAGAAGGAAGTGGAAGAAGGATTCCTTGCCCTTTATCGTAGCCACAAGTGTCTTCCGA\n+>r3|637000026.fna|1749333|1749484|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln165_#0/1\n+AAGTAAAAGTCTACACACAGGAAGGAACGATAATGGAAAGTACGATTGAAGTAAATCGTCCGATGGAGATAGCAGGATGGAAAATTTATCAGCTTAGTTATGATGAGTCGAAAGGTCGCTGGAGCGATATCAGTGTCTTTGAGCTGGTTCG\n+>r8|637000026.fna|2991814|2991965|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln402_#0/1\n+AGAAAGTTTTGATCAAAGCATGGGATGCTGAAAACGGACCGGTAATTATCGACTCAGGAGAAAGTACCTACAACACAACCGCCAAAAAGTTCAGCCTCAAATATACGATCGGTAACACGCTATATGAAGAGCAACTGACCAAAGAAAAAGA\n+>r24|641736196.fna|214158|214309|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln388_#0/1\n+CAGATACGAGTACCAATTGGCCGTCAACGACTATTGGAAGGAAGTCGGCGGATTGCAGATGCTGCCCGGAACCAACCGCTCCAGCGACCGCTTTGTACGCGCGTCATTCTACATTCATGCCATTCCGCAGACAGCAGACGCGGCGATTGCA\n+>r32|637000026.fna|4140951|4141102|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln422_#0/1\n+TATTCTCAGGGGACTAATTGGTGATATAGTGCTCGATTATGATAGAGGTGAAGGAATTATGTCCTTTAAGACTCAAAAGGTAGGTTATATATCAGGATATTATTTGGGATGTACTACATATTGTTATGCCATGGGATTTTATCCTACATAT\n+>r55|637000026.fna|6212867|6213018|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln342_#0/1\n+CAGGCGTCGGGTGAGCTGGACTTTGAAGAGTTGTGCGAAGCGATCACCAGTCGTTCTACTTGTACGGAAACGGATGTACGTGCGGCTATTTCGGGTATTCTTTACGAAGTGAAGCGTGCGTTGAAGGCAGGAAGAATTGCGAGACTGGGTG\n+>r94|641736196.fna|124792|124943|_from_ |NZ_ABFZ02000020_Bacteroides_stercoris_ATCC_43183,|_ln396_#0/1\n+AATCAGTTTCATACGCTATCAATATCCGGGGTCAATAGTTTCCATTTGTTGCTCGCTTGACACGCTTTCAAAGCTAACATCTTCCGCATCAATCAACTGTTTGTTTGCGTAATTGTCGGTCAATATTTCACGCTGTGAAGTTTCGTCACCC\n+>r115|641736196.fna|175677|175828|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln360_#0/1\n+CTGTCAGATACGTGAAAGATTACATTCGATACGTGAGTATGTAATGAAATATCCTGAGTTTGACGATAGAAACGATATTGACCCGTCAATGCGAATGTTCTATATCCAGTCTGTTGAAGCACAAGCAAAAAATCGAAGTTATTCAGATGCA\n+>r121|641736196.fna|7934|8085|_from_ |NZ_ABFZ02000017_Bacteroides_stercoris_ATCC_43183,|_ln383_#0/1\n+TGAGCGGTTCGGTCTATACCCTTATGAATAAAATCTTCAGGGATTCAAAAGAACCGCTGTATGGCCGTGCTGACAATATCATGAAGTTAGCTCCTTTTACTACTTCCGTATTGAAAGAAATCATATCGGATCATAAGGCCGACTATACTAA\n+>r145|637000026.fna|1378791|1378942|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln396_#0/1\n+TCCGTCTGATTCGTAGTGTACCAGCATATCCGCCATGACATCGCTCACTGTAGCGCTGAAATCTTTCAGAAGTTTATGGCTGCCCACCCAGTAAGTCTTGCCTTGATAAGACACCTTTATACCTTTGCCTGTGATGCTCTCAAAACTGCTC\n+>r158|637000026.fna|3242194|3242345|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln388_#0/1\n+GGACAGCAGTATCAAACGGTTAAAATACGTGAGATACGCAGATGACTTTCTGATTGGAATTACCGGTAATCTTGAAGACTGCAAAACAGTAAAAGAGGATATTAAGAATTATTTGAATGAAGCTCTTAAACTGGAACTGTCAGACGAAAAG\n+>r160|637000026.fna|3948359|3948510|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln347_#0/1\n+CGGAAGTGACTCATACATATCAGGTGGTTCTTCCAGTTATAAGAACCTGTCTGCAAGTGAAATGAAGAACACATCTTCATTCACAAACTGGAATTTCAGTACGGTTTGGGAAATGGGAAGTGAATACCCGACATTACAGGGCTTATTAAAA\n+>r187|637000026.fna|5220534|5220685|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln396_#0/1\n+GTTACCTTGATCAATGGATTTGCATATTCCAGCAACCACGAGATAGAGGCGTGTCCTGCTTCATGCAAGGCAATCGAGCGTCTTTCCGCTTCTGTGGTAATCTTGGTCTTCTTTTCCAGACCACCGATGATACGGTCTACCGCATCCAGAA\n+>r192|637000026.fna|2858128|2858279|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln325_#0/1\n+ACGTGGACGGTTACGCACTGCCGGAAAAAATCAGGAACGCATTCCTGGGATTGGAGGAAAAGGAGAAAACACTCATCAGTTACTTCACCCAGCACAATGAACAGTATGCAAAGAAGGTCGGCAAGACTGCCACGCAAAAGACCTATTCCCG\n+>r218|641736196.fna|272465|272616|_from_ |NZ_ABFZ02000022_Bacteroides_stercoris_ATCC_43183,|_ln348_#0/1\n+ATTTCTTCGTCGGTTTCCGTATGGAAGAAGTACACCACGGCCAACTAGGTGAAGTGACGGATGTGGACACCTCTACCATCAACACTCTGTTCGTGGTAGATTATAAAGGAGAAGAATTGCTGATTCCGGCACAGGAAGATTTCATTATGGA\n+>r222|637000026.fna|2630231|2630382|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_'..b'CGGACTTTCGGTATCTATCTATATGTCTGAGGTCGCCAATCCGAAGGTGCGCAACTGGCTGAAACCGATCATTGAA\n+>r12070|641736196.fna|375596|375747|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln407_#0/1\n+GAATGTTTTTCCATGAATATTTATGTTTTCTGACCGGGAGTCTTTCATGGAATTAATCAATAAATCATACATATGTTCCATTGCTCCCTTAGAAGGAGCTTTGATGTAGCTAACGGTTAAACTGCTGACACCATGTCCTCGAAGTTGCAAT\n+>r12072|637000026.fna|2532372|2532523|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln392_#0/1\n+GGCTCCGGGATTGATGTGCAGCATGTCCAACGTTTTGTCGTATTTCACCTTTAATATATGGGAGTGTCCGCTGATGAAAAGTTTCGGCGGACGGGCCATCAAACTGCCGATAACCGAAGGGTCGTACTTGCCCGGATACCCGCCGATATGC\n+>r12073|637000026.fna|6194446|6194597|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln420_#0/1\n+CTGAATATTATAGTCATATCCCTTCACCGGTTCTTCCAGCTCTTTGAATATCACAAAGGCGCGTTGCCCGTCTTTATTCTTGTCACTGATCCATCCGTTTCCATTGCTCGGGAACATTGTTTTTCCATCATCCAGAGTGAAATAGAAATCC\n+>r12077|641736196.fna|245225|245376|_from_ |NZ_ABFZ02000016_Bacteroides_stercoris_ATCC_43183,|_ln342_#0/1\n+ATGCAGTGTCGCCCACCATGTCCATGTCGCTCTCACGGTCGAAAAGCAATGTATAATAATTCTGTTTGGTAAACAGGTAGTCGTGCTTCCAGAAGCGTTCCTCTATCTTCGAAGGAGTGTCCCGGTTGTAATAAGGCAAAGGAAACTCCGT\n+>r12078|641736196.fna|76492|76643|_from_ |NZ_ABFZ02000017_Bacteroides_stercoris_ATCC_43183,|_ln385_#0/1\n+GATCCTTTCAGTTCGTACCACTCGTCATCCGGCAATCCGTTTCCGTTTATATCTTGCATGACCCATACGATACCCGGTTCGGAACTGCCGTCAAAGGCGTTCCCCTGAACACAGAAATCATATTGGTTACCCGAATTGGGAATACTGTGGT\n+>r12088|641736196.fna|108362|108513|_from_ |NZ_ABFZ02000016_Bacteroides_stercoris_ATCC_43183,|_ln368_#0/1\n+AGGTTTCCACACTTCCTCGCCAAAATAGTTGTGAGGACCACCGAAGCGGCAGTCCAATATTCCGGCAAGCGCCGCTTCCGGATAGCCGGAATTGGGACTGGCATGCTTGCTGCCGTATTTCCCCACAAAAAGCAGCAACGGGAATCTGCCG\n+>r12120|637000026.fna|4257090|4257241|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln262_#0/1\n+ACGGGCTTCTTCGCCAATGCTCTTCCCATAAAGATAAGACATTTCGGGATTCCATGTTGCAGAAAGGCAAGTCAATGCAGGATAGGCTATACAGGAATCGTTTGTCCATCCTGCCTGATCCCACTCATCCCATAATACTTCGGGACGGATG\n+>r12152|637000026.fna|2685134|2685285|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln353_#0/1\n+CACTCAAATCATCAATCAAGCTGACTTTCTTTAATGTATCCTTCTTAGCATCATAATATTTCATCTCCAGATTATTAGCAATCGAGAAACTAACCGTTCCCGAACCCTCACGTGAAGGGACTCCAAAAGGCTGTCCCGTATAGGGTGAATA\n+>r12163|637000026.fna|5651468|5651619|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln367_#0/1\n+AATGAACAACACCATTAAAAACGGTTGGCTATTCGACTGTGATGATGAGAGCTTCGAGCAAGTAAGCAAGCGCATTGAAGATGCTGTATATGTATATAATCATGTGCGGCCTCATCAAGGGATAAACATGAGGACACCTATGGAAGTGGTC\n+>r12201|641736196.fna|253613|253764|_from_ |NZ_ABFZ02000023_Bacteroides_stercoris_ATCC_43183,|_ln348_#0/1\n+CCACTCCGGAAGGTATTATTGGCAATGAAGACTGCGGGCAGATGTCGGCATGGTATATTCTTTCAAGTATGGGACTTTATTCGGTTTGTCCTGGTAGCAACGAATATGTTCTTACTACTCCCTTGTTCGAGAAAGTTGTAGTGCATTTGGC\n+>r12208|637000026.fna|1887139|1887290|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln435_#0/1\n+ACATCGCGGTTGCTTTTCGCTACCCGTACCTTTCCGCGTCTCTTTCCCCAGTCCAGAATCGGAATCTGAACGCCTACCTGCACAATCTGATTATCCTGCAAGTTCCTGTAGACACTGGAAAGTTCACGACTCTCTCCCGTATATCCTACAC\n+>r12218|641736196.fna|422226|422377|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln296_#0/1\n+GTCCAATTGCGCACGTGCCAGCCGGGTGGCATTAACCAGTTTTGTACCGTTGAAGAGTGTCCAACTGAGGCTTGGCGCTATTTCAAACGTCATACTGTTCCGTTTGGTGAAATCTTTCAGGTCATGGGAAGAATATCCTACGGAACCTTTT\n+>r12223|637000026.fna|4646221|4646372|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln436_#0/1\n+CGGTACTGCTTTTAGAAAGGAACGGATACGGGGTTGAGTCCATTTCTTTTTGTCATAGAAGAACATCCAGGTCATTTGTAACCATTGGGCTGCTGAGTCTACCTGATGAATCGACTCGTAAATCTTGCTTGAAACATTGGCTAAAAAATCC\n+>r12228|641736196.fna|168782|168933|_from_ |NZ_ABFZ02000023_Bacteroides_stercoris_ATCC_43183,|_ln401_#0/1\n+GGTCTCTACCATTTCCGAAATGTTACCTTTCTCTACACGGAACATTTTATAATCACTGCCCACCTTATGCAGAATACGATCCAAATGTTCACGATTGGTATCTGTTATAAAGATCTGTCCAAAGTTATCACCTGCCACCAGTTTAATTATC\n+>r12236|637000026.fna|5656063|5656214|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln344_#0/1\n+TTCAGTGAGACAGATGTGCGGACCATGTTTCAGTACTATAAAGACGCCGGACAACTGCCGGCAGATACAGACATAGATGCACTGATTCGGGAAATAAAACCGTGGTATGACAATTATTGCTTTGCCAAAGAAAGTCTGGAACGTGATCCGA\n'
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/joined_pathway_coverage_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/joined_pathway_coverage_abundance.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,7 @@
+# Pathway humann2_Abundance humann2_Coverage
+HOMOSER-METSYN-PWY: L-methionine biosynthesis I 1.3149243918 0.6570127063
+HSERMETANA-PWY: L-methionine biosynthesis III 1.1318619128 0.6143434179
+PWY-3841: folate transformations II 1.4268868747 0.6798402001
+UNINTEGRATED 435.5199558332 1.0000000000
+UNMAPPED 2.6377384941 1.0000000000
+VALSYN-PWY: L-valine biosynthesis 0.8032128514 0.5180160607
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/max_reduced_gene_family_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/max_reduced_gene_family_abundance.tsv Thu May 26 10:20:59 2016 -0400
b
b'@@ -0,0 +1,1891 @@\n+# Gene Family\tmax\n+UNMAPPED\t8.0\n+UniRef50_A0A015R3S0: TonB-dependent Receptor Plug domain protein (Fragment)\t0.3527336861\n+UniRef50_A0A015R3S0: TonB-dependent Receptor Plug domain protein (Fragment)|g__Bacteroides.s__Bacteroides_stercoris\t0.3527336861\n+UniRef50_A0A016A5N0: Calcineurin-like phosphoesterase family protein\t0.3894080997\n+UniRef50_A0A016A5N0: Calcineurin-like phosphoesterase family protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.3894080997\n+UniRef50_A0A016FAQ7: N-6 DNA Methylase family protein (Fragment)\t0.3524855559\n+UniRef50_A0A016FAQ7: N-6 DNA Methylase family protein (Fragment)|g__Bacteroides.s__Bacteroides_stercoris\t0.3524855559\n+UniRef50_A0A016I611: Prophage LambdaSa1, N-acetylmuramoyl-L-alanine amidase, family 4\t0.1902587519\n+UniRef50_A0A016I611: Prophage LambdaSa1, N-acetylmuramoyl-L-alanine amidase, family 4|g__Bacteroides.s__Bacteroides_stercoris\t0.1902587519\n+UniRef50_A0A016KJM6: Type I restriction modification DNA specificity domain protein\t0.702247191\n+UniRef50_A0A016KJM6: Type I restriction modification DNA specificity domain protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.702247191\n+UniRef50_A0A016LIR2\t6.25\n+UniRef50_A0A016LIR2|g__Bacteroides.s__Bacteroides_stercoris\t6.25\n+UniRef50_A0A024GWJ3: Uridine diphosphate galacturonate 4-epimerase\t1.1074197121\n+UniRef50_A0A024GWJ3: Uridine diphosphate galacturonate 4-epimerase|g__Bacteroides.s__Bacteroides_stercoris\t1.1074197121\n+UniRef50_A0LYR8: Protein translocase subunit SecA\t0.3177629488\n+UniRef50_A0LYR8: Protein translocase subunit SecA|g__Bacteroides.s__Bacteroides_stercoris\t0.3177629488\n+UniRef50_A0M554: Bifunctional protein FolD\t1.3661202186\n+UniRef50_A0M554: Bifunctional protein FolD|g__Bacteroides.s__Bacteroides_stercoris\t1.3661202186\n+UniRef50_A2U077: Methylmalonyl-CoA mutase large subunit\t0.5005005005\n+UniRef50_A2U077: Methylmalonyl-CoA mutase large subunit|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.5005005005\n+UniRef50_A3PAU4: Chorismate synthase\t1.0787486516\n+UniRef50_A3PAU4: Chorismate synthase|g__Bacteroides.s__Bacteroides_stercoris\t1.0787486516\n+UniRef50_A3QG19: Homoserine O-succinyltransferase\t1.3020833333\n+UniRef50_A3QG19: Homoserine O-succinyltransferase|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.3020833333\n+UniRef50_A4CN78: Queuine tRNA-ribosyltransferase\t1.0193679918\n+UniRef50_A4CN78: Queuine tRNA-ribosyltransferase|g__Bacteroides.s__Bacteroides_stercoris\t1.0193679918\n+UniRef50_A4T8K0: ATP synthase subunit alpha\t0.6973500697\n+UniRef50_A4T8K0: ATP synthase subunit alpha|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.6973500697\n+UniRef50_A4X5J8: 4-diphosphocytidyl-2C-methyl-D-erythritol synthase\t0.8250825083\n+UniRef50_A4X5J8: 4-diphosphocytidyl-2C-methyl-D-erythritol synthase|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.8250825083\n+UniRef50_A5FAU9: Phosphodiesterase, MJ0936 family\t2.9239766082\n+UniRef50_A5FAU9: Phosphodiesterase, MJ0936 family|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.9239766082\n+UniRef50_A5FFH5: Bacteroides conjugative transposon MobC/BfmC-like protein\t1.0650605689\n+UniRef50_A5FFH5: Bacteroides conjugative transposon MobC/BfmC-like protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.0650605689\n+UniRef50_A5FIF9: Potassium-transporting ATPase A chain\t0.6422607579\n+UniRef50_A5FIF9: Potassium-transporting ATPase A chain|g__Bacteroides.s__Bacteroides_stercoris\t0.6422607579\n+UniRef50_A6KXA8: Transposase\t2.3724792408\n+UniRef50_A6KXA8: Transposase|g__Bacteroides.s__Bacteroides_stercoris\t2.3724792408\n+UniRef50_A6KXG9: Adenylyl-sulfate kinase\t2.1929824561\n+UniRef50_A6KXG9: Adenylyl-sulfate kinase|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.1929824561\n+UniRef50_A6KY75: Helicase, putative\t0.4734848485\n+UniRef50_A6KY75: Helicase, putative|g__Bacteroides.s__Bacteroides_stercoris\t0.4734848485\n+UniRef50_A6KYD5: Aldose 1-epimerase\t1.1574074074\n+UniRef50_A6KYD5: Aldose 1-epimerase|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1574074074\n+UniRef50_A6KYK1: Transposa'..b'50_S3YBR0\t1.1074197121\n+UniRef50_S3YBR0|g__Bacteroides.s__Bacteroides_stercoris\t1.1074197121\n+UniRef50_S3YU07\t0.7898894155\n+UniRef50_S3YU07|g__Bacteroides.s__Bacteroides_stercoris\t0.7898894155\n+UniRef50_S3ZKH9\t1.0482180294\n+UniRef50_S3ZKH9|g__Bacteroides.s__Bacteroides_stercoris\t1.0482180294\n+UniRef50_T2N9I9: Conjugative transposon TraJ protein\t1.1574074074\n+UniRef50_T2N9I9: Conjugative transposon TraJ protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1574074074\n+UniRef50_U2IIX4\t0.3293807642\n+UniRef50_U2IIX4|g__Bacteroides.s__Bacteroides_stercoris\t0.3293807642\n+UniRef50_U2JMV5: Beta-galactosidase family protein\t0.3100775194\n+UniRef50_U2JMV5: Beta-galactosidase family protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.3100775194\n+UniRef50_U5Q6L5: RagB/SusD Domain-Containing Protein\t0.6613756614\n+UniRef50_U5Q6L5: RagB/SusD Domain-Containing Protein|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.6613756614\n+UniRef50_U5Q7A5: Spore coat polysaccharide biosynthesis protein spsK\t1.4184397163\n+UniRef50_U5Q7A5: Spore coat polysaccharide biosynthesis protein spsK|g__Bacteroides.s__Bacteroides_stercoris\t1.4184397163\n+UniRef50_U5Q855: DNA gyrase subunit B\t0.5518763797\n+UniRef50_U5Q855: DNA gyrase subunit B|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.5518763797\n+UniRef50_UPI000468D7B3: membrane protein\t0.3775009438\n+UniRef50_UPI000468D7B3: membrane protein|g__Bacteroides.s__Bacteroides_stercoris\t0.3775009438\n+UniRef50_W0ERH5: Conjugate transposon protein\t0.8312551953\n+UniRef50_W0ERH5: Conjugate transposon protein|g__Bacteroides.s__Bacteroides_stercoris\t0.8312551953\n+UniRef50_W0ESG7: DNA-binding protein\t6.2893081761\n+UniRef50_W0ESG7: DNA-binding protein|g__Bacteroides.s__Bacteroides_stercoris\t6.2893081761\n+UniRef50_W0EUZ5: Membrane protein\t0.5995203837\n+UniRef50_W0EUZ5: Membrane protein|g__Bacteroides.s__Bacteroides_stercoris\t0.5995203837\n+UniRef50_W0EY48: Cell division protein FtsK\t0.4224757076\n+UniRef50_W0EY48: Cell division protein FtsK|g__Bacteroides.s__Bacteroides_stercoris\t0.4224757076\n+UniRef50_W0F1V4: Transcriptional regulator\t0.998003992\n+UniRef50_W0F1V4: Transcriptional regulator|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.998003992\n+UniRef50_W0F5J4: Phosphonate ABC transporter ATP-binding protein\t1.6103059581\n+UniRef50_W0F5J4: Phosphonate ABC transporter ATP-binding protein|g__Bacteroides.s__Bacteroides_stercoris\t1.6103059581\n+UniRef50_W4P379: Two-component system sensor histidine kinase/response\t0.2554278416\n+UniRef50_W4P379: Two-component system sensor histidine kinase/response|g__Bacteroides.s__Bacteroides_stercoris\t0.2554278416\n+UniRef50_W4P8H9: ATP-dependent DNA helicase RecG\t0.5136106831\n+UniRef50_W4P8H9: ATP-dependent DNA helicase RecG|g__Bacteroides.s__Bacteroides_stercoris\t0.5136106831\n+UniRef50_W4PDX5: Immunoreactive 53 kDa antigen PG123\t0.6485084306\n+UniRef50_W4PDX5: Immunoreactive 53 kDa antigen PG123|g__Bacteroides.s__Bacteroides_stercoris\t0.6485084306\n+UniRef50_W4PK35: NADP-dependent malic enzyme\t0.466853408\n+UniRef50_W4PK35: NADP-dependent malic enzyme|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.466853408\n+UniRef50_W4PR50: DNA-directed RNA polymerase beta subunit\t0.546000546\n+UniRef50_W4PR50: DNA-directed RNA polymerase beta subunit|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.546000546\n+UniRef50_W4UNV5: Protein translocase subunit SecA\t0.6307158625\n+UniRef50_W4UNV5: Protein translocase subunit SecA|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.6307158625\n+UniRef50_W4USI9: Na(+)/H(+) antiporter\t0.4816955684\n+UniRef50_W4USI9: Na(+)/H(+) antiporter|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.4816955684\n+UniRef50_W6P1M0: Glycoside hydrolase family 2, sugar binding\t0.3156565657\n+UniRef50_W6P1M0: Glycoside hydrolase family 2, sugar binding|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.3156565657\n+UniRef50_unknown\t19.7255928786\n+UniRef50_unknown|g__Bacteroides.s__Bacteroides_stercoris\t8.3722394713\n+UniRef50_unknown|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t11.3533534073\n'
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/merged_gene_families_pathways_abundances.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merged_gene_families_pathways_abundances.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,7 @@
+# Pathway humann2_Abundance
+HOMOSER-METSYN-PWY: L-methionine biosynthesis I 1.3149243918
+HSERMETANA-PWY: L-methionine biosynthesis III 1.1318619128
+PWY-3841: folate transformations II 1.4268868747
+UNINTEGRATED 435.5199558332
+UNMAPPED 2.6377384941
+VALSYN-PWY: L-valine biosynthesis 0.8032128514
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/pathway_rename_mapping.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pathway_rename_mapping.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,6 @@
+UNMAPPED name1
+UNINTEGRATED name2
+PWY-3841 name3
+HOMOSER-METSYN-PWY name4
+HSERMETANA-PWY name5
+VALSYN-PWY name6
\ No newline at end of file
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/reduced_uniref50.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reduced_uniref50.fasta Thu May 26 10:20:59 2016 -0400
b
b'@@ -0,0 +1,1478 @@\n+>UniRef50_B0NQY6 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NQY6_BACSE\n+MYFRAQEVKNTSKSKRGKIRFTLILIMQETIYELYVAKVLIFLINGIERKEFSFGLRIFA\n+QS\n+>UniRef50_F3PHD0 Uncharacterized protein n=2 Tax=Bacteroides RepID=F3PHD0_9BACE\n+MIQTEFQHLLKPVPTAGIGDTKHWYEEYQPLAQTIPSLGTSNTGRWYERYQALEREFFPG\n+IYSKG\n+>UniRef50_B0NP96 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NP96_BACSE\n+MDIRQENAVINKDIAFLYHLCTRGTIIFNILRINSCRMYPFRKRLGTDISALKKRRFFFA\n+YFI\n+>UniRef50_B0NTS9 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NTS9_BACSE\n+MYFKDRMVSDRSPDTSSFFFLFQENRSCRIALKRFSLFNYLIINKMQINNLCYVNR\n+>UniRef50_Q8A9G3 Uncharacterized protein n=1 Tax=Bacteroides thetaiotaomicron (strain ATCC 29148 / DSM 2079 / NCTC 10582 / E50 / VPI-5482) RepID=Q8A9G3_BACTN\n+MTHILYHFAVCYVWQDSSGVIHVSGLDIVAGGYILRFSIMIDSNFYIGCSILFYMSSCLE\n+NDRFKCCHTVLMFFKCLKLH\n+>UniRef50_B0NV53 Uncharacterized protein n=3 Tax=Bacteroides stercoris RepID=B0NV53_BACSE\n+MLQQHVVFLVLQLRAVEPAVANPAVRSWQPQGPQLENGVFIVVRLHLSCCFSTKKNKESA\n+VFVWEKRVRLRQKRMEEAGIFYVELLL\n+>UniRef50_I0Q078 Uncharacterized protein n=2 Tax=Bacteroides RepID=I0Q078_9BACE\n+MFNQEWQNLRLLFKYSIKRATKTINQPLLLKTDLSPSLFIYDIVNHRKVPIITSIRCIKS\n+NREVFIY\n+>UniRef50_W0ESG7 DNA-binding protein n=14 RepID=W0ESG7_9PORP\n+MSNEIREKDHEWVKAFHSNFDRLLALLEKLLEKRQPSAYGDELLTDKEVAFLLKVSRRTL\n+QDYRNNGILPYTQVGGKILYRASDIEKTLMKGYKEAYKYKRN\n+>UniRef50_Q5LIQ7 Shikimate kinase n=97 RepID=AROK_BACFN\n+MIRIFLTGYMGAGKTTLGKALARELHIPFIDLDWYIEERFHKTVGELFSERGEASFRELE\n+KNMLHEVGEFEDVVISTGGGAPCFFDNMEYMNRVGTTVFLDVDPKVLFSRLRVAKQQRPI\n+LQGKKDDELLDFIVQALEKRAPFYRQANYIYCADKLEDRSQIETSVQQLRKLLNLHIAS\n+>UniRef50_B0NW87 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NW87_BACSE\n+MSASIEETVSLELYAPQKGARLISGIGDYGGFTHFDLNRPDSLGSHANPHFGNTNGVTGA\n+WLKQDLIVRVGTLFGHQPDAKTISYSEDGGRRWTMCATVPTEKSRNGHITVAADGSSWI\n+>UniRef50_Q8AB79 Excisionase n=8 Tax=Bacteroidales RepID=Q8AB79_BACTN\n+MNKELTFNDLPMVVAQLRDEVVGMKQMIVSLQSQNKPHKANTHIPMSVEEASAYLKMPMA\n+TLYMKLGNGSIPATKPGKRYCLYQDELDKWLETNRKNPVPLTAEEENAAILAGNKRKPKP\n+LNW\n+>UniRef50_Q8A980 Uncharacterized protein n=3 Tax=Bacteroides RepID=Q8A980_BACTN\n+MINKDEEVANRFSDPSKKLVEYVTDFSDMRDEDIELTKPIYENYGNFQLLNETGIIRTDE\n+EIKELYNCKGEFGSTDEVTDMSF\n+>UniRef50_Q8A9S1 2-amino-4-hydroxy-6-hydroxymethyldihydropteridine pyrophosphokinase n=25 Tax=Bacteroides RepID=Q8A9S1_BACTN\n+MTRLWDTARRKRLNCPKVLTLVPVRKHRTEKSSKKRDWTICLIKPLIVHKCIICIGSNYN\n+RKENLLLARRRLVDLFPTIRFTSEQETRPLFFRSPALFSNQVAMFFSEAEEERVRKELKA\n+IEQSAGRRPEDKKEEKVSLDIDLLSFDDRVLKPEDLKREYVVKGLEELKYNQI\n+>UniRef50_Q8A268 Uncharacterized protein n=2 Tax=Bacteroides RepID=Q8A268_BACTN\n+MNTIKTLLLLSFSILLFNCSDEEDKKTEKEFIFSASELKQTEWEGEFLYLTNGEIDSKGS\n+IKIVFYTEKKGVCEYKFDYHIDPETISFEYEISDKFMYIDGPLPIHGNWIQQKYNGNSLE\n+IADSKAAFTNSRMIKLTRVN\n+>UniRef50_B0NTK8 Uncharacterized protein n=3 RepID=B0NTK8_BACSE\n+MGEWIYNSSFDTVMDFAVTVDNVFKRYGQVEALQGVSLSVRPGELFGVIGPDGAGKTSLF\n+RILATLLLADEGKATVCGLDMVKDYKAIRQRVGYMPGRFSLYQDLTVEENLSFFATVFHT\n+TIEENYDLVRDIYRQIEPFRKRRAGALSGGMK\n+>UniRef50_B3ESB4 Transcription elongation factor GreA n=112 RepID=GREA_AMOA5\n+MEKVSYYTEEGLQRLKGELTQLKSEGRAKVAEQLSEARDKGDLSENAEYDAAKEAQEILE\n+RRIAKLEELMINARVINKDNINTSAVSILSKVKIKNKKLGKVSTYTMVSEEEADLKEGKI\n+SIESPIGKGLLGKKAGEVAIVEAPAGKIEFEILDISF\n+>UniRef50_E6SQ70 Uncharacterized protein n=31 Tax=Bacteroidales RepID=E6SQ70_BACT6\n+MIRYKKYQVTGEKSSLRGLWYARPLIEDTFDTEKLAKHMANHNTPYSAGLIKGVLTDMIS\n+CIKELILDGKNVKLDDLAIFSVGIVSKKGAASAEEFKVSDNVKSLKLRARATGELSNAQI\n+NLEGQLKEAALYTVTDSTTEGTPGGGSGPNNGSGDENENPLG\n+>UniRef50_E6SQW3 Uncharacterized protein n=125 RepID=E6SQW3_BACT6\n+MKKIVLLVCLLVATVAAQAQFEKGKWIVNPSVTGLELSHDTGTKKTTFGLEAKGGAFLVD\n+NVALLVNAGARWNDYGGDVDVYSLGVGGRYYIDAVGVYLGANVNVDRWDWGKDNDDTKFS\n+FGLEAGYAFFLSRTVTIEPAAYWNVNSDRSKFGLKVGFGFYF\n+>UniRef50_P0A864 Thiol peroxidase n=1538 RepID=TPX_ECO57\n+MSQTVHFQGNPVTVANSIPQAGSKAQTFTLVAKDLSDVTLGQFAGKRKVLNIFPSIDTGV\n+CAASVRKFNQLATEIDNTVVLCISADLPFAQSRFCGAEGLNNVITLSTFRNAEFLQAYGV\n+AIADGPLKGLAARAVVVIDENDNVIFSQLVDEITTEPDYEAALAVLKA\n+>UniRef50_Q89YJ6 Histone-like bacteri'..b'ES\n+IQAVTDEENKALANQDNDFLRMLFGDVSQMAFSDLSELLKQARQLRSYLSGKDNKEGITF\n+ISPEQLKAIEESPEELDKLKKALDKLLGAGKKQNKWSNIFETFKTGFADLKSAQGFKEIS\n+GAIGMISGAAGQAAGEIATMFEAMGKDSAANVIGSLGEVLSSISNIGQAFATGGPVAGAF\n+AAVGEIFSLIGKGAQETAKHRQVLEDVMNDTIAQQREYNLLLLEQNLLYEKASTIFGVDS\n+YAKAENAVRVLKDAISDLNKELAGTTEQQKKFAYRKTGSVALDKVFNRNYSQSKDRYSGL\n+ADIEIKTGSYTTGAWFWKKQHDVYTSVLDVYPELIDANGEFNKELAESIINTREMSDEDK\n+AALQGMIDLAEQAEAAFDSLNDYMTDIFGELGGSMSHALVDAFKNGTDAAESFTQSVSEM\n+LETLAEQMIYSVTLGPLLEEAQKEMMTVMKNQNLTDEQRFSQWTNILKGLTNDAVAKQDE\n+AKQLYEAFRQSAGDMGFDVFSPDSTREASQKTGITASQDSVDKIDGMATTMMGHTYSINE\n+NVNRMANGIDSLLNYASSGLSLTTDIERTAKAIESQSRDALNHLANIDNYTSNLVEMREY\n+MYAVKNGIDTLNTKGLTLKR\n+>UniRef50_W4P379 Two-component system sensor histidine kinase/response n=2 Tax=Bacteroides RepID=W4P379_9BACE\n+MFSLKDIIFYLLFLCVGANFTFAASDQITFSHISINEGLSQSTVFSIDQDKRGNMWFATY\n+DGVNKYDGYAFTVYRHDESNPNSIPNDISRIVKADSRGRIWIGTRDGLSCYDEEKDQFKN\n+FFYQKKRGHISVNAIAEITPDQLLINTSEGLTLFDVKSSVFTAAPLNRQMRELPVSTLYR\n+SGNHIYIGTFNKGLFCYSVSERTLQKLTPALDDKQIQAVLQQSPTQVWVATEGHGLFLIN\n+PKTKKAKNYLHSASDSKSISSNYIRSLALDGQNRLWIGTLNDLNIFQEGTDSFISYGSSP\n+TENGSLSQRSVRSIFMDSQGGMWLGTYFGGLNYYHPIRNRFKNIRRIPYKNSLSDNVVSC\n+IVEDKDKNLWIGTNDGGLNLYNTANGQFTHYTLQESERERGMGSNNIKAVYLDEPGGLVY\n+IGTHAGGLNILHRSTGKVEHFDQKNSELINENVYAILPDEEGGLWLGTLGALVRFEPRKQ\n+SFTTVEKEKNGTLFTAKRITTLFRDSEKRLWIGGEEGISVYTQHKGELQRESVFPKSSIT\n+EAFVNCIYESSNGIFWIGTREGVYCFNEKKKKINRYTTAKGLPNNVVYGILEDSSGQIWL\n+STNRGIACFHPETEKFRNFTESDGLQSNQFTSSYCRTSTGQMYFGGIEGITTFRPELLLD\n+NPYAPPVVITRLQLFNKTVRPDDETGILTKNISETKRITLKSSQTAFSLEFVVSNYISGQ\n+HNTFAYQLEGYDKEWYYLTDKRTVSYSNLRQGTYHFHVKAANSDGKWNTTPTTLEIIVLP\n+VWYKTWWALLLFLIAFIAFLTFVFRFFWMRKSMAAELEMERRDKEQQEEINQMKMRFFIN\n+ISHELRTPLTLILAPLQEIINRISDRWTRNQLEYIGRNANRLLHLVNQLMDYRRAELGVF\n+ELKVKKGNAHRLVSENFRYYDKLARHKNIAYSLHSELEEKEMLFDPNYLELILNNLLSNA\n+FKYTGNGQSITVTLKEDNGWLLLQVSDTGIGIPINKQGRYSNVFIRWKANMLAAASVCRW\n+YNVW\n+>UniRef50_E1WVX1 Uncharacterized protein n=7 RepID=E1WVX1_BACF6\n+MNMNYFSFYNKAKELLIDSLASLWFKGQAREQEYIKRVLTEDEPLFAEPVFQSIFPWEES\n+VYSFEAHSSKLGLLTSSFVNALSNEGIDKDLRFPLDRHPYKHQTESWRTMLSPRPQTIVV\n+TSGTGSGKTECFMIPVLQDLAKTNMKDCVQAIFLYPLNALMKSQQKRIHAWCKALPEKVT\n+YAIYNGETDKENRSDRYTAPHYPQLVTRPQIRRTPPQILFTNPTMLNYMLVRAEDHEILE\n+KSKGKLKWILLDEAHTYIGSSAAELSLQIRRVLDAFGVTIDQVNFAVTSATIGDESDPKT\n+MIKLKTFVSQLTGKPFEDIKIIGGKRIIPELNKGIAEEQLSKINKRFGIRLTYSDIERLR\n+KKLNSSPVLKVKEIGSILDKKIGINVDASLEIIDALGEKVKNLNEGSGFGALLPTRAHFF\n+VRSISGVYVCTNPDCQRHKGYRLPIGSLTTYQNINCPVCKSKMLELATCSSCGSPIVVGE\n+TSTTKGFRMHTNIIDLDNTLFYEQKEDLIDSEDMENIENVEQNEADGFSRFFFAIPEKLC\n+LRKNATCTSHIFNHRNGKIELAPENNESSKGITPLERGESTPVRYQSLRHSGDNHVLCPH\n+CGNNLSELKKLDYLRISATQIGRTLATLLLDNAEAIGSNDAGVVYEGRKYIAFTDSRQGS\n+ARSAMGLNQDVERSWIRASIFHKLADMRLNDVKPGGLTPDEEAEYNAYLSIRGCLPALLL\n+EKFKQLEEKKNGVPVIPSPEEVSWSQISQPLENDSNFRKLYEHVDKARGRKNFRNATDYL\n+KALLVDQFGWIPKRANSLETMGFVRLVYPTLKNAKCPTLLIQKRCTDTDWQNFLKICMDY\n+VIRGGRHYMLSGAYKDYLTQNKYCSPIYPSNSELRKNGNPVSKWFKVNVSQKGVDENQNR\n+LVLLLCAVLGYDDISQISQTKIADINSMLDAAWDFLKQNVLEATDAENQGYMLDLTGDKV\n+KLQLIEKGYLCPVDNVIIDTPFCGYSPRMNGYIGRENFDRFKIQTEFVNPFFPFKFAEQI\n+EENVTEWIEKNLFDQKAAGVFGVMNYRVLASKPIFISAEHSAQQSSEDLDRYEKEFNEGK\n+INILSCSTTMEMGVDISGITEVVMNNVPPKSSNYLQRAGRAGRRSETKALALTVCAPNPI\n+GTHTWNNPDYPITHVTETPLLKLESRQLIQRHVNAMVFASFVADQGGIRVTATLRDFFVK\n+AEGMSFYDKFLNYIDSVISGNVERLQRAYSKLIKGTSLAQITLPDAAQVVKKDIAAVHNV\n+FDAHNGALEKALESLRNESETANAIKAIEKQEENLLKTSMLSYLAENSFLPSAGIPIGLV\n+ECLLGGKEKVDGSSPTLHISQAISSYAPGNPVVKNEWVYEPSGIRLKTKYDDSTSRYIIQ\n+NCTHCGYTTITYGSAKTDCPKCGRHGTMHGIKDISLSIEQRFTEVVEPAAFSVAWDSTPI\n+RKMGTLGGMNFIQPILLEMDAWQPKTNSAKMSIRCSTPRSEILFYNKGASGYGYAFCPYC\n+GRMKSEKSPDSTERMMPHHKHLLASTPCLGGENDGAAVRRHVLLVGRYQTDFVEIKFHDK\n+NNNLIEDSETLYSLGVILSRKLTELLGVNDGEIEFGYDVINHSIFIYDTALGGAGYSLLF\n+REYKDEVLKMALEALERCDCERSCTKCLIDRRSQWYLNYLNRTKALEWLRQEIKARIAPK\n+EILRLIPDSHTVTSDITTEFYQLTRNKDISCIKIFVNDNISQWDAEAFPFKKILTELSLE\n+GVDVAFILPSVPDVKSLSSADSATLIAEVFKNDFKGLENTLPAELLPLMVVIMNDGTVKT\n+YFGKNIDTSYSKNWGSGDVFITTQLNSLSYADINRMQLLNTFSSGDTSFMFDYRITEHSS\n+LGHFFDSLKNPEVENWNRIVSNLQGKTVSVEYSDRYLKTPLGCMLLAKMISGLKNEADLN\n+VVSIKVIVTNIVSMDDSDVAVNAIKDFTNGEKRNLFLKNAISELTGIEPEIQDTGYVEHE\n+RCLTVKADNAELCIRPDAGIARGWVPFGRDNAECADCDFREDWNMDLELFNKQQRGAGIL\n+YTISYKQP\n'
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/regrouped_gene_families_to_ec.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regrouped_gene_families_to_ec.tsv Thu May 26 10:20:59 2016 -0400
b
b'@@ -0,0 +1,499 @@\n+# Gene Family\thumann2_Abundance\r\n+1.1.1.100\t2.5001243843000003\r\n+1.1.1.100|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.5001243843000003\r\n+1.1.1.132\t1.7137960583\r\n+1.1.1.132|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.7137960583\r\n+1.1.1.133\t1.4184397163\r\n+1.1.1.133|g__Bacteroides.s__Bacteroides_stercoris\t1.4184397163\r\n+1.1.1.136\t0.946969697\r\n+1.1.1.136|g__Bacteroides.s__Bacteroides_stercoris\t0.946969697\r\n+1.1.1.205\t1.5004164697\r\n+1.1.1.205|g__Bacteroides.s__Bacteroides_stercoris\t1.5004164697\r\n+1.1.1.22\t2.6607657553\r\n+1.1.1.22|g__Bacteroides.s__Bacteroides_stercoris\t0.946969697\r\n+1.1.1.22|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.7137960583\r\n+1.1.1.262\t1.0582010582\r\n+1.1.1.262|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.0582010582\r\n+1.1.1.290\t2.4081279723\r\n+1.1.1.290|g__Bacteroides.s__Bacteroides_stercoris\t1.2970168612\r\n+1.1.1.290|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1111111111\r\n+1.1.1.37\t1.2626262626\r\n+1.1.1.37|g__Bacteroides.s__Bacteroides_stercoris\t1.2626262626\r\n+1.1.1.44\t0.754147813\r\n+1.1.1.44|g__Bacteroides.s__Bacteroides_stercoris\t0.754147813\r\n+1.1.1.58\t0.7524454477\r\n+1.1.1.58|g__Bacteroides.s__Bacteroides_stercoris\t0.7524454477\r\n+1.1.1.85\t1.0964912281\r\n+1.1.1.85|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.0964912281\r\n+1.1.1.86\t1.1185682327\r\n+1.1.1.86|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1185682327\r\n+1.1.1.95\t1.2970168612\r\n+1.1.1.95|g__Bacteroides.s__Bacteroides_stercoris\t1.2970168612\r\n+1.11.1.15\t1.0384215992\r\n+1.11.1.15|g__Bacteroides.s__Bacteroides_stercoris\t1.0384215992\r\n+1.17.4.2\t0.4852013586\r\n+1.17.4.2|g__Bacteroides.s__Bacteroides_stercoris\t0.4852013586\r\n+1.2.1.38\t1.221001221\r\n+1.2.1.38|g__Bacteroides.s__Bacteroides_stercoris\t1.221001221\r\n+1.2.4.4\t0.5688282139\r\n+1.2.4.4|g__Bacteroides.s__Bacteroides_stercoris\t0.5688282139\r\n+1.2.7.1\t1.9194434902\r\n+1.2.7.1|g__Bacteroides.s__Bacteroides_stercoris\t1.6260162602\r\n+1.2.7.1|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.29342723\r\n+1.2.7.3\t3.9570470892999996\r\n+1.2.7.3|g__Bacteroides.s__Bacteroides_stercoris\t2.7956069035\r\n+1.2.7.3|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1614401858\r\n+1.2.7.7\t1.6260162602\r\n+1.2.7.7|g__Bacteroides.s__Bacteroides_stercoris\t1.6260162602\r\n+1.2.7.8\t0.693000693\r\n+1.2.7.8|g__Bacteroides.s__Bacteroides_stercoris\t0.693000693\r\n+1.3.1.12\t1.6025641026\r\n+1.3.1.12|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.6025641026\r\n+1.3.1.98\t1.1990407674\r\n+1.3.1.98|g__Bacteroides.s__Bacteroides_stercoris\t1.1990407674\r\n+1.3.5.1\t1.8018018018\r\n+1.3.5.1|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.8018018018\r\n+1.3.99.1\t1.8018018018\r\n+1.3.99.1|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.8018018018\r\n+1.4.1.13\t0.4411317895\r\n+1.4.1.13|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.4411317895\r\n+1.4.3.5\t2.0325203252\r\n+1.4.3.5|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.0325203252\r\n+1.5.1.5\t1.3661202186\r\n+1.5.1.5|g__Bacteroides.s__Bacteroides_stercoris\t1.3661202186\r\n+1.6.5.11\t0.6844626968\r\n+1.6.5.11|g__Bacteroides.s__Bacteroides_stercoris\t0.6844626968\r\n+1.6.5.3\t0.6844626968\r\n+1.6.5.3|g__Bacteroides.s__Bacteroides_stercoris\t0.6844626968\r\n+1.6.99.5\t0.6844626968\r\n+1.6.99.5|g__Bacteroides.s__Bacteroides_stercoris\t0.6844626968\r\n+1.8.1.14\t0.429000429\r\n+1.8.1.14|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.429000429\r\n+1.8.1.4\t0.837520938\r\n+1.8.1.4|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.837520938\r\n+2.1.1.13\t2.6560424967\r\n+2.1.1.133\t0.5787037037\r\n+2.1.1.133|g__Bacteroides.s__Bacteroides_stercoris\t0.5787037037\r\n+2.1.1.13|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.6560424967\r\n+2.1.1.182\t1.5290519878\r\n+2.1.1.182|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.5290519878\r\n+2.1.1.45\t1.5503875969\r\n+2.1.1.45|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.5503875969\r\n+2.1.2.1\t0.884173298\r\n+2.1.2.10\t1.0615711253\r\n+2.1.2.10|g__Bacteroides.s__Bacteroides_stercoris\t1.0615711253\r\n+2.1.2.1|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.884173298\r\n+2.2.1.7\t0.5688282139\r\n+2.2.1.7|g'..b'502\r\n+5.4.2.2\t1.614205004\r\n+5.4.2.2|g__Bacteroides.s__Bacteroides_stercoris\t0.807102502\r\n+5.4.2.2|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.807102502\r\n+5.4.2.8\t1.614205004\r\n+5.4.2.8|g__Bacteroides.s__Bacteroides_stercoris\t0.807102502\r\n+5.4.2.8|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.807102502\r\n+5.4.2.9\t0.8474576271\r\n+5.4.2.9|g__Bacteroides.s__Bacteroides_stercoris\t0.8474576271\r\n+5.4.99.2\t1.0722557892\r\n+5.4.99.2|g__Bacteroides.s__Bacteroides_stercoris\t0.5717552887\r\n+5.4.99.2|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.5005005005\r\n+5.4.99.5\t1.6025641026\r\n+5.4.99.5|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.6025641026\r\n+5.5.1.1\t0.9633911368\r\n+5.5.1.1|g__Bacteroides.s__Bacteroides_stercoris\t0.9633911368\r\n+5.5.1.4\t0.872600349\r\n+5.5.1.4|g__Bacteroides.s__Bacteroides_stercoris\t0.872600349\r\n+5.5.1.7\t0.9633911368\r\n+5.5.1.7|g__Bacteroides.s__Bacteroides_stercoris\t0.9633911368\r\n+5.99.1.2\t1.7370184109\r\n+5.99.1.2|g__Bacteroides.s__Bacteroides_stercoris\t1.1162859466\r\n+5.99.1.2|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.6207324643\r\n+5.99.1.3\t1.7920854921\r\n+5.99.1.3|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.7920854921\r\n+6.1.1.12\t0.6218905473\r\n+6.1.1.12|g__Bacteroides.s__Bacteroides_stercoris\t0.6218905473\r\n+6.1.1.16\t1.5048985637\r\n+6.1.1.16|g__Bacteroides.s__Bacteroides_stercoris\t0.754147813\r\n+6.1.1.16|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.7507507508\r\n+6.1.1.19\t0.6082725061\r\n+6.1.1.19|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.6082725061\r\n+6.1.1.20\t1.1312217195\r\n+6.1.1.20|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1312217195\r\n+6.1.1.21\t0.8230452675\r\n+6.1.1.21|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.8230452675\r\n+6.1.1.22\t0.6218905473\r\n+6.1.1.22|g__Bacteroides.s__Bacteroides_stercoris\t0.6218905473\r\n+6.1.1.23\t0.6218905473\r\n+6.1.1.23|g__Bacteroides.s__Bacteroides_stercoris\t0.6218905473\r\n+6.1.1.9\t0.7960837433\r\n+6.1.1.9|g__Bacteroides.s__Bacteroides_stercoris\t0.3944773176\r\n+6.1.1.9|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.4016064257\r\n+6.2.1.1\t0.6640106242\r\n+6.2.1.16\t0.668002672\r\n+6.2.1.16|g__Bacteroides.s__Bacteroides_stercoris\t0.668002672\r\n+6.2.1.1|g__Bacteroides.s__Bacteroides_stercoris\t0.6640106242\r\n+6.2.1.2\t0.6640106242\r\n+6.2.1.26\t0.668002672\r\n+6.2.1.26|g__Bacteroides.s__Bacteroides_stercoris\t0.668002672\r\n+6.2.1.2|g__Bacteroides.s__Bacteroides_stercoris\t0.6640106242\r\n+6.2.1.3\t2.5842033643\r\n+6.2.1.3|g__Bacteroides.s__Bacteroides_stercoris\t0.668002672\r\n+6.2.1.3|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.9162006923\r\n+6.3.1.2\t0.7390983001\r\n+6.3.1.2|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.7390983001\r\n+6.3.2.10\t0.8703220191\r\n+6.3.2.10|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.8703220191\r\n+6.3.2.12\t0.754147813\r\n+6.3.2.12|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.754147813\r\n+6.3.2.17\t0.754147813\r\n+6.3.2.17|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.754147813\r\n+6.3.4.15\t1.675041876\r\n+6.3.4.15|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.675041876\r\n+6.3.4.3\t0.6361323155\r\n+6.3.4.3|g__Bacteroides.s__Bacteroides_stercoris\t0.6361323155\r\n+6.3.4.5\t0.9442870633\r\n+6.3.4.5|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.9442870633\r\n+6.3.4.6\t0.3255208333\r\n+6.3.4.6|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.3255208333\r\n+6.3.5.2\t0.7278020378\r\n+6.3.5.2|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.7278020378\r\n+6.3.5.4\t0.6535947712\r\n+6.3.5.4|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.6535947712\r\n+6.3.5.5\t0.3255208333\r\n+6.3.5.5|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.3255208333\r\n+6.4.1.1\t0.7457121551\r\n+6.4.1.1|g__Bacteroides.s__Bacteroides_stercoris\t0.7457121551\r\n+6.5.1.2\t1.0822510823\r\n+6.5.1.2|g__Bacteroides.s__Bacteroides_stercoris\t1.0822510823\r\n+6.6.1.1\t0.7199424046\r\n+6.6.1.1|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.7199424046\r\n+UNGROUPED\t1096.0077936473997\r\n+UNGROUPED|g__Bacteroides.s__Bacteroides_stercoris\t568.1266146241995\r\n+UNGROUPED|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t527.8811790233003\r\n+UNMAPPED\t8.0\r\n'
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/regrouped_gene_families_to_ko.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regrouped_gene_families_to_ko.tsv Thu May 26 10:20:59 2016 -0400
b
b'@@ -0,0 +1,568 @@\n+# Gene Family\thumann2_Abundance\r\n+K00012\t1.7137960583\r\n+K00012|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.7137960583\r\n+K00024\t1.2626262626\r\n+K00024|g__Bacteroides.s__Bacteroides_stercoris\t1.2626262626\r\n+K00033\t0.754147813\r\n+K00033|g__Bacteroides.s__Bacteroides_stercoris\t0.754147813\r\n+K00041\t0.7524454477\r\n+K00041|g__Bacteroides.s__Bacteroides_stercoris\t0.7524454477\r\n+K00052\t1.0964912281\r\n+K00052|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.0964912281\r\n+K00053\t1.1185682327\r\n+K00053|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1185682327\r\n+K00058\t1.2970168612\r\n+K00058|g__Bacteroides.s__Bacteroides_stercoris\t1.2970168612\r\n+K00059\t1.675041876\r\n+K00059|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.675041876\r\n+K00067\t1.4184397163\r\n+K00067|g__Bacteroides.s__Bacteroides_stercoris\t1.4184397163\r\n+K00075\t1.1990407674\r\n+K00075|g__Bacteroides.s__Bacteroides_stercoris\t1.1990407674\r\n+K00088\t1.5004164697\r\n+K00088|g__Bacteroides.s__Bacteroides_stercoris\t1.5004164697\r\n+K00097\t1.0582010582\r\n+K00097|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.0582010582\r\n+K00145\t1.221001221\r\n+K00145|g__Bacteroides.s__Bacteroides_stercoris\t1.221001221\r\n+K00156\t1.0649627263\r\n+K00156|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.0649627263\r\n+K00175\t1.9785235446499998\r\n+K00175|g__Bacteroides.s__Bacteroides_stercoris\t1.39780345175\r\n+K00175|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1614401858\r\n+K00179\t0.693000693\r\n+K00179|g__Bacteroides.s__Bacteroides_stercoris\t0.693000693\r\n+K00210\t1.6025641026\r\n+K00210|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.6025641026\r\n+K00241\t1.8018018018\r\n+K00241|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.8018018018\r\n+K00275\t2.0325203252\r\n+K00275|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.0325203252\r\n+K00382\t0.837520938\r\n+K00382|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.837520938\r\n+K00425\t0.7077140835\r\n+K00425|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.7077140835\r\n+K00527\t0.4852013586\r\n+K00527|g__Bacteroides.s__Bacteroides_stercoris\t0.4852013586\r\n+K00560\t1.5503875969\r\n+K00560|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.5503875969\r\n+K00599\t1.5360983103\r\n+K00599|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.5360983103\r\n+K00600\t0.884173298\r\n+K00600|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.884173298\r\n+K00605\t1.0615711253\r\n+K00605|g__Bacteroides.s__Bacteroides_stercoris\t1.0615711253\r\n+K00638\t2.9067268429000004\r\n+K00638|g__Bacteroides.s__Bacteroides_stercoris\t1.7730496454\r\n+K00638|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t4.0404040404\r\n+K00645\t1.3550135501\r\n+K00645|g__Bacteroides.s__Bacteroides_stercoris\t1.3550135501\r\n+K00651\t1.3020833333\r\n+K00651|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.3020833333\r\n+K00652\t0.9661835749\r\n+K00652|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.9661835749\r\n+K00655\t1.4749262537\r\n+K00655|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.4749262537\r\n+K00666\t0.668002672\r\n+K00666|g__Bacteroides.s__Bacteroides_stercoris\t0.668002672\r\n+K00688\t1.2422360248\r\n+K00688|g__Bacteroides.s__Bacteroides_stercoris\t0.8281573499\r\n+K00688|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.4140786749\r\n+K00762\t2.0449897751\r\n+K00762|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.0449897751\r\n+K00773\t1.0193679918\r\n+K00773|g__Bacteroides.s__Bacteroides_stercoris\t1.0193679918\r\n+K00796\t1.4005602241\r\n+K00796|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.4005602241\r\n+K00826\t1.1494252874\r\n+K00826|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1494252874\r\n+K00845\t1.4814814815\r\n+K00845|g__Bacteroides.s__Bacteroides_stercoris\t1.4814814815\r\n+K00848\t0.7645259939\r\n+K00848|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.7645259939\r\n+K00859\t4.3010752688\r\n+K00859|g__Bacteroides.s__Bacteroides_stercoris\t2.1505376344\r\n+K00859|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.1505376344\r\n+K00860\t2.1929824561\r\n+K00860|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.1929824561\r\n+K00874\t1.0822510823\r\n+K00874|g__Bacteroides.s__Bacteroides_stercoris\t1.08'..b'9239766082\r\n+K07133\t0.90136054425\r\n+K07133|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.90136054425\r\n+K07139\t1.872659176\r\n+K07139|g__Bacteroides.s__Bacteroides_stercoris\t1.872659176\r\n+K07240\t1.8018018018\r\n+K07240|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.8018018018\r\n+K07263\t0.64767107415\r\n+K07263|g__Bacteroides.s__Bacteroides_stercoris\t0.64767107415\r\n+K07391\t0.7199424046\r\n+K07391|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.7199424046\r\n+K07405\t0.8110300081\r\n+K07405|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.8110300081\r\n+K07455\t1.2300123001\r\n+K07455|g__Bacteroides.s__Bacteroides_stercoris\t1.2300123001\r\n+K07507\t2.1786492375\r\n+K07507|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.1786492375\r\n+K07568\t1.01690841355\r\n+K07568|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.01690841355\r\n+K07636\t0.9663599439\r\n+K07636|g__Bacteroides.s__Bacteroides_stercoris\t0.9663599439\r\n+K07783\t1.6708437761\r\n+K07783|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.6708437761\r\n+K07787\t0.3356831151\r\n+K07787|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.3356831151\r\n+K08679\t1.1074197121\r\n+K08679|g__Bacteroides.s__Bacteroides_stercoris\t1.1074197121\r\n+K09680\t1.4619883041\r\n+K09680|g__Bacteroides.s__Bacteroides_stercoris\t1.4619883041\r\n+K09702\t1.9120458891\r\n+K09702|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.9120458891\r\n+K09760\t0.7698229407\r\n+K09760|g__Bacteroides.s__Bacteroides_stercoris\t0.7698229407\r\n+K09765\t2.2075055188\r\n+K09765|g__Bacteroides.s__Bacteroides_stercoris\t2.2075055188\r\n+K09808\t0.9034485291500001\r\n+K09808|g__Bacteroides.s__Bacteroides_stercoris\t0.9034485291500001\r\n+K09810\t1.9723865878\r\n+K09810|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.9723865878\r\n+K09888\t7.0921985816\r\n+K09888|g__Bacteroides.s__Bacteroides_stercoris\t7.0921985816\r\n+K10773\t1.4792899408\r\n+K10773|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.4792899408\r\n+K11065\t2.8248587571\r\n+K11065|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t2.8248587571\r\n+K11068\t1.8518518519\r\n+K11068|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.8518518519\r\n+K11070\t1.5576323988\r\n+K11070|g__Bacteroides.s__Bacteroides_stercoris\t1.5576323988\r\n+K11717\t0.9416195857\r\n+K11717|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.9416195857\r\n+K11720\t0.8771929825\r\n+K11720|g__Bacteroides.s__Bacteroides_stercoris\t0.8771929825\r\n+K11754\t0.8299724572\r\n+K11754|g__Bacteroides.s__Bacteroides_stercoris\t0.9057971014\r\n+K11754|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.754147813\r\n+K11927\t1.0319917441\r\n+K11927|g__Bacteroides.s__Bacteroides_stercoris\t1.0319917441\r\n+K12251\t1.3605442177\r\n+K12251|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.3605442177\r\n+K12257\t0.3497726478\r\n+K12257|g__Bacteroides.s__Bacteroides_stercoris\t0.3497726478\r\n+K12373\t0.536850098475\r\n+K12373|g__Bacteroides.s__Bacteroides_stercoris\t0.4597701149\r\n+K12373|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.5625434263333333\r\n+K13004\t1.0582010582\r\n+K13004|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.0582010582\r\n+K13018\t2.3148148148\r\n+K13018|g__Bacteroides.s__Bacteroides_stercoris\t2.3148148148\r\n+K13378\t0.6844626968\r\n+K13378|g__Bacteroides.s__Bacteroides_stercoris\t0.6844626968\r\n+K13403\t1.3661202186\r\n+K13403|g__Bacteroides.s__Bacteroides_stercoris\t1.3661202186\r\n+K13789\t1.2121212121\r\n+K13789|g__Bacteroides.s__Bacteroides_stercoris\t1.2121212121\r\n+K14155\t0.9803921569\r\n+K14155|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.9803921569\r\n+K16089\t0.71194493835\r\n+K16089|g__Bacteroides.s__Bacteroides_stercoris\t0.4642525534\r\n+K16089|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.47981866165\r\n+K16211\t0.819000819\r\n+K16211|g__Bacteroides.s__Bacteroides_stercoris\t0.819000819\r\n+K16345\t0.8547008547\r\n+K16345|g__Bacteroides.s__Bacteroides_stercoris\t0.8547008547\r\n+K17103\t2.0833333333\r\n+K17103|g__Bacteroides.s__Bacteroides_stercoris\t2.0833333333\r\n+UNGROUPED\t1.477499435554157\r\n+UNGROUPED|g__Bacteroides.s__Bacteroides_stercoris\t1.9462800052220868\r\n+UNGROUPED|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.1385122672014965\r\n+UNMAPPED\t8.0\r\n'
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/relab_renormalized_pathway_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/relab_renormalized_pathway_abundance.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,7 @@
+# Pathway humann2_Abundance
+UNMAPPED 0.00595649
+UNINTEGRATED 0.983482
+PWY-3841: folate transformations II 0.00322217
+HOMOSER-METSYN-PWY: L-methionine biosynthesis I 0.00296934
+HSERMETANA-PWY: L-methionine biosynthesis III 0.00255595
+VALSYN-PWY: L-valine biosynthesis 0.0018138
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/renamed_metacyc_pathways.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/renamed_metacyc_pathways.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,7 @@
+# Pathway humann2_Abundance
+UNMAPPED: name1 2.6377384941
+UNINTEGRATED: name2 435.5199558332
+PWY-3841: NO_NAME 1.4268868747
+HOMOSER-METSYN-PWY: NO_NAME 1.3149243918
+HSERMETANA-PWY: NO_NAME 1.1318619128
+VALSYN-PWY: NO_NAME 0.8032128514
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/split_joined_table_abundances.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_joined_table_abundances.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,7 @@
+# Pathway humann2_Abundance
+VALSYN-PWY: L-valine biosynthesis 0.8032128514
+HSERMETANA-PWY: L-methionine biosynthesis III 1.1318619128
+HOMOSER-METSYN-PWY: L-methionine biosynthesis I 1.3149243918
+PWY-3841: folate transformations II 1.4268868747
+UNMAPPED 2.6377384941
+UNINTEGRATED 435.5199558332
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/split_joined_table_coverage.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_joined_table_coverage.tsv Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,7 @@
+# Pathway humann2_Coverage
+VALSYN-PWY: L-valine biosynthesis 0.5180160607
+HSERMETANA-PWY: L-methionine biosynthesis III 0.6143434179
+HOMOSER-METSYN-PWY: L-methionine biosynthesis I 0.6570127063
+PWY-3841: folate transformations II 0.6798402001
+UNMAPPED 1.0000000000
+UNINTEGRATED 1.0000000000
b
diff -r 000000000000 -r 3d6f37e7e3a8 test-data/taxonomic_profile.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/taxonomic_profile.tabular Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,11 @@
+#SampleID Metaphlan2_Analysis
+k__Bacteria 100.0
+k__Bacteria|p__Bacteroidetes 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris 68.44368
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron 31.55632
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris|t__Bacteroides_stercoris_unclassified 68.44368
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron|t__Bacteroides_thetaiotaomicron_unclassified 31.55632
b
diff -r 000000000000 -r 3d6f37e7e3a8 tool-data/metaphlan2_bowtie_db.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metaphlan2_bowtie_db.loc.sample Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,5 @@
+# Bowtie2 db have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/src/5424bb911dfc/db_v20/?at=default (whole directory)
+#
+#Since MetaPhlAn comes bundled with 1 Bowtie2 database, you can use it
+#by downloading it as explained above and uncommenting the following lines.
+#bowtie_db_v20 Defaut BowTie2 database $METAPHLAN2_DIR/db_v20/mpa_v20_m200
b
diff -r 000000000000 -r 3d6f37e7e3a8 tool-data/metaphlan2_metadata.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metaphlan2_metadata.loc.sample Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,5 @@
+#Metadata have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/raw/5424bb911dfcdb7212ea0949d4faeb6e69cfa61f/db_v20/mpa_v20_m200.pkl
+#
+#Since MetaPhlAn comes bundled with 1 metadata database, you can use it
+#by downloading it as explained above and uncommenting the following lines.
+#metadata_db_v20 Defaut MetaPhlAn2 metadata $METAPHLAN2_DIR/db_v20/mpa_v20_m200.pkl
b
diff -r 000000000000 -r 3d6f37e7e3a8 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,19 @@
+<tables>
+    <!-- Locations of public ribosomal databases -->
+    <table name="humann2_protein_database" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/humann2_protein_database.loc" />
+    </table>
+    <table name="humann2_nucleotide_database" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/humann2_nucleotide_database.loc" />
+    </table>
+    <table name="metaphlan2_metadata" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/metaphlan2_metadata.loc" />
+    </table>
+    <table name="metaphlan2_bowtie_db" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/metaphlan2_bowtie_db.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 3d6f37e7e3a8 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu May 26 10:20:59 2016 -0400
b
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="bowtie2" version="2.2.5">
+        <repository changeset_revision="30bd7eaeddbf" name="package_bowtie_2_2_5" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="metaphlan2" version="2.2.0">
+        <repository changeset_revision="e9ab4fcb8278" name="package_metaphlan2_2_2_0" owner="bebatut" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="diamond" version="0.7.10">
+        <repository changeset_revision="c538574b1553" name="package_diamond_0_7_10" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="humann2" version="0.6.1">
+        <repository changeset_revision="655e87ac3364" name="package_humann2_0_6_1" owner="bebatut" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>