Repository 'humann2_regroup_table'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/humann2_regroup_table

Changeset 0:a1747df2bc21 (2017-03-13)
Next changeset 1:1d141730ec69 (2017-08-01)
Commit message:
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/humann2/ commit b46aa969c01b7e5f4f133192899fa4da286ecf89-dirty
added:
README.rst
generate_test_data
humann2_macros.xml
humann2_regroup_table.xml
repository_dependencies.xml
test-data/cpm_community_renormalized_pathway_abundance.tsv
test-data/demo_Abundance.tsv
test-data/demo_Coverage.tsv
test-data/demo_genefamilies.tsv
test-data/demo_pathabundance.tsv
test-data/demo_pathcoverage.tsv
test-data/g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz
test-data/g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz
test-data/input_sequences.fasta
test-data/joined_pathway_coverage_abundance.tsv
test-data/marker_metadata.json
test-data/marker_sequences.fasta
test-data/max_reduced_gene_family_abundance.tsv
test-data/reduced_uniref50.fasta
test-data/regrouped_gene_families_to_infogo1000.tsv
test-data/regrouped_gene_families_to_ko.tsv
test-data/regrouped_gene_families_to_rxn.tsv
test-data/relab_levelwise_renormalized_pathway_abundance.tsv
test-data/taxonomic_profile.tabular
tool-data/humann2_nucleotide_database.loc.sample
tool-data/humann2_protein_database.loc.sample
tool-data/metaphlan2_database.loc.sample
tool_data_table_conf.xml.sample
transform_json_to_pkl.py
b
diff -r 000000000000 -r a1747df2bc21 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,12 @@
+Galaxy wrappers for HUMAnN2
+===========================
+
+Galaxy should be able to automatically install the dependencies, i.e. the
+HUMAnN2 binaries and its dependencies.
+
+After installation, you must tell Galaxy about the default databases for MetaPhlAn2 and HUMAnN2. 
+You need to use the data managers for that and installing:
+
+- MetaPhlAn2 database
+- HUMAnN2 nucleotide database
+- HUMAnN2 protein database
\ No newline at end of file
b
diff -r 000000000000 -r a1747df2bc21 generate_test_data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/generate_test_data Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,124 @@
+#/usr/bin/env bash
+
+# humann2
+humann2 \
+    --input 'test-data/input_sequences.fasta' \
+    -o 'humann2_output' \
+    --annotation-gene-index 8 \
+    --taxonomic-profile 'test-data/taxonomic_profile.tabular' \
+    --metaphlan-options="-t rel_ab" \
+    --nucleotide-database 'test-data/' \
+    --protein-database 'test-data/' \
+    --evalue '1' \
+    --search-mode 'uniref50' \
+    --prescreen-threshold '0.01' \
+    --identity-threshold '50' \
+    --translated-subject-coverage-threshold '50' \
+    --translated-query-coverage-threshold '50' \
+    --translated-alignment 'diamond' \
+    --xipe 'off' \
+    --minpath 'on' \
+    --pick-frames 'on' \
+    --gap-fill 'off' \
+    --output-format 'tsv' \
+    --output-max-decimals '10' \
+    --output-basename 'humann2' \
+    --pathways 'metacyc'
+
+humann2 \
+    --input 'test-data/input_sequences.fasta' \
+    -o 'humann2_output' \
+    --annotation-gene-index 8 \
+    --metaphlan-options="-t rel_ab" \
+    --nucleotide-database 'test-data/' \
+    --protein-database 'test-data/' \
+    --evalue '1' \
+    --search-mode 'uniref90' \
+    --prescreen-threshold '0.01' \
+    --identity-threshold '50' \
+    --translated-subject-coverage-threshold '50' \
+    --translated-query-coverage-threshold '50' \
+    --translated-alignment 'rapsearch' \
+    --xipe 'off' \
+    --minpath 'on' \
+    --pick-frames 'on' \
+    --gap-fill 'off' \
+    --output-format 'tsv' \
+    --output-max-decimals '10' \
+    --output-basename 'humann2' \
+    --pathways 'unipathway'
+
+# humann2_regroup_table
+humann2_regroup_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/regrouped_gene_families_to_infogo1000.tsv' \
+    --function 'sum' \
+    --groups 'uniref90_infogo1000' \
+    --ungrouped 'Y' \
+    --protected 'Y'
+
+humann2_regroup_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/regrouped_gene_families_to_ko.tsv' \
+    --function 'mean' \
+    --groups 'uniref90_ko' \
+    --ungrouped 'Y' \
+    --protected 'Y'
+
+humann2_regroup_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/regrouped_gene_families_to_rxn.tsv' \
+    --function 'sum' \
+    --groups 'uniref90_rxn' \
+    --ungrouped 'Y' \
+    --protected 'Y'
+
+# humann2_renorm_table
+humann2_renorm_table \
+    --input 'test-data/demo_pathabundance.tsv' \
+    -o 'test-data/cpm_community_renormalized_pathway_abundance.tsv' \
+    --units 'cpm' \
+    --mode 'community' \
+    --special 'n'
+
+humann2_renorm_table \
+    --input 'test-data/demo_pathabundance.tsv' \
+    -o 'test-data/relab_levelwise_renormalized_pathway_abundance.tsv' \
+    --units 'relab' \
+    --mode 'levelwise' \
+    --special 'y'
+
+# humann2_join_tables
+mkdir join_table_tmp_dir
+cp 'test-data/demo_pathabundance.tsv' join_table_tmp_dir
+cp 'test-data/demo_pathcoverage.tsv' join_table_tmp_dir
+humann2_join_tables \
+    -i 'join_table_tmp_dir' \
+    -o 'test-data/joined_pathway_coverage_abundance.tsv'
+
+# humann2_merge_abundance_tables
+humann2_merge_abundance_tables \
+    --input-genes 'test-data/demo_genefamilies.tsv' \
+    --input-pathways 'test-data/demo_pathabundance.tsv' \
+    -o 'test-data/merged_gene_families_pathways_abundances.tsv' 
+
+# humann2_reduce_table
+humann2_reduce_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/max_reduced_gene_family_abundance.tsv' \
+    --function 'max' \
+    --sort-by 'name'
+
+# humann2_rename_table
+humann2_rename_table \
+    --input 'test-data/demo_genefamilies.tsv' \
+    -o 'test-data/renamed_genefamilies.tsv' \
+    --names 'metacyc-pwy'
+
+# humann2_split_table
+mkdir split_table_tmp_dir
+humann2_split_table \
+    --input 'test-data/joined_pathway_coverage_abundance.tsv' \
+    -o 'split_table_tmp_dir'
+cp 'split_table_tmp_dir/demo_Abundance.tsv' 'test-data/'
+cp 'split_table_tmp_dir/demo_Coverage.tsv' 'test-data/'
b
diff -r 000000000000 -r a1747df2bc21 humann2_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_macros.xml Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@WRAPPER_VERSION@">0.9.9</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@WRAPPER_VERSION@">humann2</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command>humann2 --version</version_command>
+    </xml>
+    <token name="@HELP_HEADER@">
+**What it does**
+
+HUMAnN is a pipeline for efficiently and accuretly profiling the presence/absence and abundance of microbial pathways in a community from metagenomic or metatranscriptomic sequencing data. 
+
+Read more about the tool: http://huttenhower.sph.harvard.edu/humann2/manual.
+    </token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1002358</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r a1747df2bc21 humann2_regroup_table.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/humann2_regroup_table.xml Mon Mar 13 12:38:02 2017 -0400
[
@@ -0,0 +1,115 @@
+<tool id="humann2_regroup_table" name="Regroup" version="@WRAPPER_VERSION@.0">
+    <description> a HUMAnN2 generated table by features</description>
+    <macros>
+        <import>humann2_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version"/>
+    <command detect_errors="exit_code"><![CDATA[
+        humann2_regroup_table
+            --input '$input'
+            -o '$output_table'
+            --function '$function'
+
+            #if $built_in.built_in_test == "true":
+                --groups '$built_in.gene_family_groups.groups'
+            #else:
+                --custom '$built_in.custom'
+                $built_in.reversed
+            #end if  
+
+            --ungrouped '$ungrouped'
+            --protected '$protected'
+    ]]></command>
+    <inputs>
+        <param argument="--input" type="data" format="tsv,tabular" label="Gene/pathway table"/>
+        <param argument="--function" type="select" label="How to combine grouped features?">
+            <option value="sum" selected="true">Sum</option>
+            <option value="mean">Mean</option>
+        </param>
+        <conditional name="built_in">
+            <param name="built_in_test" type="select" label="Use built-in grouping options?" help="">
+                <option value="true" selected="true">Yes</option>
+                <option value="false">No</option>
+            </param>
+            <when value="true">
+                <conditional name="gene_family_groups">
+                    <param name="gene_family_type" type="select" label="Gene family type">
+                        <option value="uniref50">UniRef50 gene families</option>
+                        <option value="uniref90">UniRef90 gene families</option>
+                    </param>
+                    <when value="uniref50">
+                        <param name="groups" type="select" label="Grouping options" help="(--groups)">
+                            <option value="uniref50_go">UniRef50 gene families into GO</option>
+                            <option value="uniref50_ko">UniRef90 gene families into KEGG Orthogroups (KOs)</option>
+                            <option value="uniref50_eggnog">UniRef50 gene families into eggNOG</option>
+                            <option value="uniref50_pfam">UniRef50 gene families into PFAM</option>
+                            <option value="uniref50_level4ec">UniRef50 gene families into level4ec</option>
+                            <option value="uniref50_infogo1000">UniRef50 gene families into InfoGO1000</option>
+                            <option value="uniref50_rxn">UniRef50 gene families into metacyc reactions</option>
+                        </param>
+                    </when>
+                    <when value="uniref90">
+                        <param name="groups" type="select" label="Grouping options" help="(--groups)">
+                            <option value="uniref90_go">UniRef90 gene families into GO</option>
+                            <option value="uniref90_ko">UniRef90 gene families into KEGG Orthogroups (KOs)</option>
+                            <option value="uniref90_eggnog">UniRef90 gene families into eggNOG</option>
+                            <option value="uniref90_pfam">UniRef90 gene families into PFAM</option>
+                            <option value="uniref90_level4ec">UniRef90 gene families into level4ec</option>
+                            <option value="uniref90_infogo1000">UniRef90 gene families into InfoGO1000</option>
+                            <option value="uniref90_rxn">UniRef90 gene families into metacyc reactions</option>
+                        </param>
+                    </when>
+                </conditional>
+            </when>
+            <when value="false">
+                <param argument="--custom" type="data" format="tsv" label="Custom groups file"/>
+                <param argument="--reversed" type="boolean" checked="false" truevalue="--reversed" falsevalue="" label="Is the groups file reversed?" help="Mapping from features to groups"/>
+            </when>
+        </conditional>
+        <param argument="--precision" type="integer" value="3" label="Decimal places to round to after applying function"/>
+        <param argument="--ungrouped" type="boolean" checked="true" truevalue="Y" falsevalue="N" label="Include an 'UNGROUPED' group to capture features that did not belong to other groups?"/>
+        <param argument="--protected" type="boolean" checked="true" truevalue="Y" falsevalue="N" label="Carry through protected features, such as 'UNMAPPED'?"/>
+    </inputs>
+    <outputs>
+        <data format="tsv" name="output_table" label="${tool.name} on ${on_string}: Regrouped table" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="demo_genefamilies.tsv"/>
+            <param name="function" value="sum"/>
+            <param name="built_in_test" value="true"/>
+            <param name="gene_family_type" value="uniref90"/>
+            <param name="groups" value="uniref90_infogo1000"/>
+            <param name="ungrouped" value="Y"/>
+            <param name="protected" value="Y"/>
+            <output name="output_table" file="regrouped_gene_families_to_infogo1000.tsv"/>
+        </test>
+        <test>
+            <param name="input" value="demo_genefamilies.tsv"/>
+            <param name="function" value="mean"/>
+            <param name="built_in_test" value="true"/>
+            <param name="gene_family_type" value="uniref90"/>
+            <param name="groups" value="uniref90_ko"/>
+            <param name="ungrouped" value="Y"/>
+            <param name="protected" value="Y"/>
+            <output name="output_table" file="regrouped_gene_families_to_ko.tsv"/>
+        </test>
+        <test>
+            <param name="input" value="demo_genefamilies.tsv"/>
+            <param name="function" value="sum"/>
+            <param name="built_in_test" value="true"/>
+            <param name="gene_family_type" value="uniref90"/>
+            <param name="groups" value="uniref90_rxn"/>
+            <param name="ungrouped" value="Y"/>
+            <param name="protected" value="Y"/>
+            <output name="output_table" file="regrouped_gene_families_to_rxn.tsv"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+Regroup HUMAnN2 table features is a tool for regrouping table features (abundances or coverage) given a table of feature values and a mapping of groups to component features. It produces a new table with group values in place of feature values.
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r a1747df2bc21 repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<repositories description="This requires the HUMAnN2 data manager definition to install all required databases.">
+    <repository changeset_revision="9c4ad82be5bd" name="data_manager_metaphlan2_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="1316375a8cbb" name="data_manager_humann2_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
b
diff -r 000000000000 -r a1747df2bc21 test-data/cpm_community_renormalized_pathway_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cpm_community_renormalized_pathway_abundance.tsv Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,24 @@
+# Pathway demo_Abundance
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 180020
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 180020
+PWY-6305: putrescine biosynthesis IV 161410
+PWY-6305: putrescine biosynthesis IV|unclassified 161410
+PWY490-3: nitrate reduction VI (assimilatory) 143637
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 143637
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 139077
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 117223
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 83200
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 62649.4
+PWY-6700: queuosine biosynthesis 72792.1
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 44461.5
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 18957.3
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 41366.2
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 30998.5
+VALSYN-PWY: L-valine biosynthesis 41366.2
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 30998.5
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 37515.4
+PWY-5097: L-lysine biosynthesis VI 34829.8
+PWY-2942: L-lysine biosynthesis III 33055.4
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 23325.7
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 31729.7
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 16294.6
b
diff -r 000000000000 -r a1747df2bc21 test-data/demo_Abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_Abundance.tsv Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Abundance
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 6.3694267516
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 2.4290890774
+PWY-6305: putrescine biosynthesis IV 12.3568523173
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 1.7857142857
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 13.7815543238
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1957.0469180460
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 3.1668118122
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 8.9740635174
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 1.2474418751
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 1.4512868417
+PWY-2942: L-lysine biosynthesis III 2.5305778153
+PWY-6305: putrescine biosynthesis IV|unclassified 12.3568523173
+PWY-5097: L-lysine biosynthesis VI 2.6664127226
+UNINTEGRATED|unclassified 46.5907339353
+UNINTEGRATED 6378.3363291102
+VALSYN-PWY: L-valine biosynthesis 3.1668118122
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 4.7961630695
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 2909.3950875399
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 13.7815543238
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 3.4037771853
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 2.8720157961
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 10.9962495261
+PWY-6700: queuosine biosynthesis 5.5726434156
+PWY490-3: nitrate reduction VI (assimilatory) 10.9962495261
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 10.6471569863
+UNMAPPED 1548.1865983490
b
diff -r 000000000000 -r a1747df2bc21 test-data/demo_Coverage.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_Coverage.tsv Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Coverage
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 0.7609346153
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 0.2181895299
+PWY-6305: putrescine biosynthesis IV 0.9518375285
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 0.4389950257
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 0.8805252394
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1.0000000000
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 0.3826032161
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 0.5431316496
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.3040136356
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.3826190602
+PWY-2942: L-lysine biosynthesis III 0.2651956314
+PWY-6305: putrescine biosynthesis IV|unclassified 0.7234100116
+PWY-5097: L-lysine biosynthesis VI 0.2856160532
+UNINTEGRATED|unclassified 1.0000000000
+UNINTEGRATED 1.0000000000
+VALSYN-PWY: L-valine biosynthesis 0.3826032161
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.5877696572
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 0.8557878872
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 0.5877696572
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 1.0000000000
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 0.9874950891
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.7183078347
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 0.3368670323
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 0.7290147297
+PWY-6700: queuosine biosynthesis 0.6918160989
+PWY490-3: nitrate reduction VI (assimilatory) 0.9568614047
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 0.9392767343
+UNMAPPED 1.0000000000
b
diff -r 000000000000 -r a1747df2bc21 test-data/demo_genefamilies.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_genefamilies.tsv Mon Mar 13 12:38:02 2017 -0400
b
b'@@ -0,0 +1,8420 @@\n+# Gene Family\tdemo_Abundance-RPKs\n+UNMAPPED\t4531.0000000000\n+UniRef90_unknown\t1635.4482975639\n+UniRef90_unknown|g__Bacteroides.s__Bacteroides_dorei\t883.2117957343\n+UniRef90_unknown|g__Bacteroides.s__Bacteroides_vulgatus\t752.2365018295\n+UniRef90_R6HHA8\t333.3333333333\n+UniRef90_R6HHA8|g__Bacteroides.s__Bacteroides_dorei\t333.3333333333\n+UniRef90_R7NYS9\t166.6666666667\n+UniRef90_R7NYS9|g__Bacteroides.s__Bacteroides_vulgatus\t166.6666666667\n+UniRef90_D1K9F5\t66.6666666667\n+UniRef90_D1K9F5|g__Bacteroides.s__Bacteroides_dorei\t66.6666666667\n+UniRef90_G1UMF5\t66.6666666667\n+UniRef90_G1UMF5|g__Bacteroides.s__Bacteroides_vulgatus\t66.6666666667\n+UniRef90_R7NVD8\t56.3492063492\n+UniRef90_R7NVD8|g__Bacteroides.s__Bacteroides_vulgatus\t56.3492063492\n+UniRef90_R7P3A5\t55.5555555556\n+UniRef90_R7P3A5|g__Bacteroides.s__Bacteroides_dorei\t55.5555555556\n+UniRef90_D1K046\t47.6190476190\n+UniRef90_D1K046|g__Bacteroides.s__Bacteroides_dorei\t47.6190476190\n+UniRef90_D4VBJ0\t47.6190476190\n+UniRef90_D4VBJ0|g__Bacteroides.s__Bacteroides_vulgatus\t47.6190476190\n+UniRef90_R6HVY6\t47.6190476190\n+UniRef90_R6HVY6|g__Bacteroides.s__Bacteroides_dorei\t47.6190476190\n+UniRef90_R6I0Z3\t47.6190476190\n+UniRef90_R6I0Z3|g__Bacteroides.s__Bacteroides_vulgatus\t47.6190476190\n+UniRef90_C3R7K2\t45.7650273224\n+UniRef90_C3R7K2|g__Bacteroides.s__Bacteroides_dorei\t45.7650273224\n+UniRef90_B6VX47\t41.6666666667\n+UniRef90_B6VX47|g__Bacteroides.s__Bacteroides_dorei\t41.6666666667\n+UniRef90_R9HIC6\t41.6666666667\n+UniRef90_R9HIC6|g__Bacteroides.s__Bacteroides_vulgatus\t41.6666666667\n+UniRef90_A7AE97\t37.0370370370\n+UniRef90_A7AE97|g__Bacteroides.s__Bacteroides_dorei\t37.0370370370\n+UniRef90_I9GA88\t35.3063343718\n+UniRef90_I9GA88|g__Bacteroides.s__Bacteroides_vulgatus\t19.7300103842\n+UniRef90_I9GA88|g__Bacteroides.s__Bacteroides_dorei\t15.5763239875\n+UniRef90_A6KZ97\t33.3333333333\n+UniRef90_A6KZ97|g__Bacteroides.s__Bacteroides_dorei\t33.3333333333\n+UniRef90_B6VTL0\t33.3333333333\n+UniRef90_B6VTL0|g__Bacteroides.s__Bacteroides_dorei\t33.3333333333\n+UniRef90_B6VXB0\t30.3030303030\n+UniRef90_B6VXB0|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_B6VZR2\t30.3030303030\n+UniRef90_B6VZR2|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_C3Q4T2\t30.3030303030\n+UniRef90_C3Q4T2|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_D1K8B6\t30.3030303030\n+UniRef90_D1K8B6|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_R6HMF6\t30.3030303030\n+UniRef90_R6HMF6|g__Bacteroides.s__Bacteroides_dorei\t30.3030303030\n+UniRef90_B6VVE6\t28.9855072464\n+UniRef90_B6VVE6|g__Bacteroides.s__Bacteroides_dorei\t28.9855072464\n+UniRef90_D1JZ48\t27.7777777778\n+UniRef90_D1JZ48|g__Bacteroides.s__Bacteroides_dorei\t27.7777777778\n+UniRef90_D4VC99\t27.7777777778\n+UniRef90_D4VC99|g__Bacteroides.s__Bacteroides_vulgatus\t27.7777777778\n+UniRef90_G1ULF8\t27.7777777778\n+UniRef90_G1ULF8|g__Bacteroides.s__Bacteroides_dorei\t27.7777777778\n+UniRef90_A6KXM7\t26.1437908497\n+UniRef90_A6KXM7|g__Bacteroides.s__Bacteroides_vulgatus\t26.1437908497\n+UniRef90_B6W1W2\t25.6410256410\n+UniRef90_B6W1W2|g__Bacteroides.s__Bacteroides_dorei\t25.6410256410\n+UniRef90_A6KXY0\t23.8095238095\n+UniRef90_A6KXY0|g__Bacteroides.s__Bacteroides_vulgatus\t23.8095238095\n+UniRef90_D4V534\t23.8095238095\n+UniRef90_D4V534|g__Bacteroides.s__Bacteroides_vulgatus\t23.8095238095\n+UniRef90_I0PXX6\t23.8095238095\n+UniRef90_I0PXX6|g__Bacteroides.s__Bacteroides_dorei\t23.8095238095\n+UniRef90_D1JXM3\t22.9885057471\n+UniRef90_D1JXM3|g__Bacteroides.s__Bacteroides_dorei\t22.9885057471\n+UniRef90_R6HVW3\t22.7272727273\n+UniRef90_R6HVW3|g__Bacteroides.s__Bacteroides_dorei\t22.7272727273\n+UniRef90_D1JXH6\t22.2222222222\n+UniRef90_D1JXH6|g__Bacteroides.s__Bacteroides_dorei\t22.2222222222\n+UniRef90_D1JXW7\t22.2222222222\n+UniRef90_D1JXW7|g__Bacteroides.s__Bacteroides_dorei\t22.2222222222\n+UniRef90_I8VUS4\t22.2222222222\n+UniRef90_I8VUS4|g__Bacteroides.s__Bacteroides_dorei\t22.2222222222\n+UniRef90_R6HFI1\t22.2222222222\n+UniRef90_R6HFI1|g__Bacteroides.s__Bacteroides_dorei\t22.2222222222\n+UniRef90_R6HV'..b'6872852234\n+UniRef90_A6KZC6|g__Bacteroides.s__Bacteroides_dorei\t0.6872852234\n+UniRef90_R6HWZ1\t0.6788866259\n+UniRef90_R6HWZ1|g__Bacteroides.s__Bacteroides_vulgatus\t0.6788866259\n+UniRef90_R9H303\t0.6788866259\n+UniRef90_R9H303|g__Bacteroides.s__Bacteroides_dorei\t0.6788866259\n+UniRef90_C3RF19\t0.6740815639\n+UniRef90_C3RF19|g__Bacteroides.s__Bacteroides_vulgatus\t0.6740815639\n+UniRef90_R9H4S0\t0.6734006734\n+UniRef90_R9H4S0|g__Bacteroides.s__Bacteroides_vulgatus\t0.6734006734\n+UniRef90_C3RFH8\t0.6706908115\n+UniRef90_C3RFH8|g__Bacteroides.s__Bacteroides_dorei\t0.6706908115\n+UniRef90_R7NVK1\t0.6680026720\n+UniRef90_R7NVK1|g__Bacteroides.s__Bacteroides_dorei\t0.6680026720\n+UniRef90_C3RCI2\t0.6626905235\n+UniRef90_C3RCI2|g__Bacteroides.s__Bacteroides_dorei\t0.6626905235\n+UniRef90_A6KZV6\t0.6613756614\n+UniRef90_A6KZV6|g__Bacteroides.s__Bacteroides_vulgatus\t0.6613756614\n+UniRef90_R6HUT5\t0.6600660066\n+UniRef90_R6HUT5|g__Bacteroides.s__Bacteroides_vulgatus\t0.6600660066\n+UniRef90_R6HZ28\t0.6600660066\n+UniRef90_R6HZ28|g__Bacteroides.s__Bacteroides_dorei\t0.6600660066\n+UniRef90_R6HSW5\t0.6472491909\n+UniRef90_R6HSW5|g__Bacteroides.s__Bacteroides_dorei\t0.6472491909\n+UniRef90_R6HHG0\t0.6447453256\n+UniRef90_R6HHG0|g__Bacteroides.s__Bacteroides_dorei\t0.6447453256\n+UniRef90_I9IJN0\t0.6422607579\n+UniRef90_I9IJN0|g__Bacteroides.s__Bacteroides_vulgatus\t0.6422607579\n+UniRef90_A6L1H2\t0.6410256410\n+UniRef90_A6L1H2|g__Bacteroides.s__Bacteroides_vulgatus\t0.6410256410\n+UniRef90_R6HHZ5\t0.6410256410\n+UniRef90_R6HHZ5|g__Bacteroides.s__Bacteroides_dorei\t0.6410256410\n+UniRef90_R7P064\t0.6283380459\n+UniRef90_R7P064|g__Bacteroides.s__Bacteroides_vulgatus\t0.6283380459\n+UniRef90_R6HY39\t0.6277463905\n+UniRef90_R6HY39|g__Bacteroides.s__Bacteroides_dorei\t0.6277463905\n+UniRef90_R9H3B8\t0.6265664160\n+UniRef90_R9H3B8|g__Bacteroides.s__Bacteroides_vulgatus\t0.6265664160\n+UniRef90_D4VAA3\t0.6178560395\n+UniRef90_D4VAA3|g__Bacteroides.s__Bacteroides_vulgatus\t0.6178560395\n+UniRef90_I8VJA2\t0.6138735421\n+UniRef90_I8VJA2|g__Bacteroides.s__Bacteroides_dorei\t0.6138735421\n+UniRef90_R7P010\t0.5957700328\n+UniRef90_R7P010|g__Bacteroides.s__Bacteroides_dorei\t0.5957700328\n+UniRef90_R7NZU3\t0.5889281508\n+UniRef90_R7NZU3|g__Bacteroides.s__Bacteroides_vulgatus\t0.5889281508\n+UniRef90_R6HLC4\t0.5712653528\n+UniRef90_R6HLC4|g__Bacteroides.s__Bacteroides_dorei\t0.5712653528\n+UniRef90_C3RCJ1\t0.5688282139\n+UniRef90_C3RCJ1|g__Bacteroides.s__Bacteroides_dorei\t0.5688282139\n+UniRef90_C3REY0\t0.5621135469\n+UniRef90_C3REY0|g__Bacteroides.s__Bacteroides_dorei\t0.5621135469\n+UniRef90_R9HNV3\t0.5574136009\n+UniRef90_R9HNV3|g__Bacteroides.s__Bacteroides_vulgatus\t0.5574136009\n+UniRef90_R9ILG9\t0.5420054201\n+UniRef90_R9ILG9|g__Bacteroides.s__Bacteroides_vulgatus\t0.5420054201\n+UniRef90_A6KY14\t0.5216484090\n+UniRef90_A6KY14|g__Bacteroides.s__Bacteroides_dorei\t0.5216484090\n+UniRef90_I8WHD1\t0.5167958656\n+UniRef90_I8WHD1|g__Bacteroides.s__Bacteroides_dorei\t0.5167958656\n+UniRef90_A6KZH7\t0.5148005148\n+UniRef90_A6KZH7|g__Bacteroides.s__Bacteroides_dorei\t0.5148005148\n+UniRef90_E5UXW6\t0.5120327701\n+UniRef90_E5UXW6|g__Bacteroides.s__Bacteroides_vulgatus\t0.5120327701\n+UniRef90_A6KXQ8\t0.5096839959\n+UniRef90_A6KXQ8|g__Bacteroides.s__Bacteroides_vulgatus\t0.5096839959\n+UniRef90_A6L536\t0.4642525534\n+UniRef90_A6L536|g__Bacteroides.s__Bacteroides_vulgatus\t0.4642525534\n+UniRef90_J9F9P5\t0.4547521601\n+UniRef90_J9F9P5|g__Bacteroides.s__Bacteroides_dorei\t0.4547521601\n+UniRef90_R7P1T3\t0.4438526409\n+UniRef90_R7P1T3|g__Bacteroides.s__Bacteroides_vulgatus\t0.4438526409\n+UniRef90_R6IJZ8\t0.4251700680\n+UniRef90_R6IJZ8|g__Bacteroides.s__Bacteroides_vulgatus\t0.4251700680\n+UniRef90_C3RF26\t0.4145936982\n+UniRef90_C3RF26|g__Bacteroides.s__Bacteroides_dorei\t0.4145936982\n+UniRef90_A6L3C1\t0.3766478343\n+UniRef90_A6L3C1|g__Bacteroides.s__Bacteroides_dorei\t0.3766478343\n+UniRef90_C6Z3L0\t0.3144654088\n+UniRef90_C6Z3L0|g__Bacteroides.s__Bacteroides_dorei\t0.3144654088\n+UniRef90_I9RGY5\t0.2213368747\n+UniRef90_I9RGY5|g__Bacteroides.s__Bacteroides_dorei\t0.2213368747\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r a1747df2bc21 test-data/demo_pathabundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_pathabundance.tsv Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Abundance
+UNMAPPED 1548.1865983490
+UNINTEGRATED 6378.3363291102
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 2909.3950875399
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1957.0469180460
+UNINTEGRATED|unclassified 46.5907339353
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 13.7815543238
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 13.7815543238
+PWY-6305: putrescine biosynthesis IV 12.3568523173
+PWY-6305: putrescine biosynthesis IV|unclassified 12.3568523173
+PWY490-3: nitrate reduction VI (assimilatory) 10.9962495261
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 10.9962495261
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 10.6471569863
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 8.9740635174
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 6.3694267516
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 4.7961630695
+PWY-6700: queuosine biosynthesis 5.5726434156
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 3.4037771853
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 1.4512868417
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 3.1668118122
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339
+VALSYN-PWY: L-valine biosynthesis 3.1668118122
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 2.8720157961
+PWY-5097: L-lysine biosynthesis VI 2.6664127226
+PWY-2942: L-lysine biosynthesis III 2.5305778153
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 1.7857142857
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 2.4290890774
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 1.2474418751
\ No newline at end of file
b
diff -r 000000000000 -r a1747df2bc21 test-data/demo_pathcoverage.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_pathcoverage.tsv Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Coverage
+UNMAPPED 1.0000000000
+UNINTEGRATED 1.0000000000
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1.0000000000
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 1.0000000000
+UNINTEGRATED|unclassified 1.0000000000
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 0.9874950891
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 0.8805252394
+PWY-6305: putrescine biosynthesis IV 0.9518375285
+PWY-6305: putrescine biosynthesis IV|unclassified 0.7234100116
+PWY490-3: nitrate reduction VI (assimilatory) 0.9568614047
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 0.7290147297
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 0.9392767343
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 0.5431316496
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 0.7609346153
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 0.8557878872
+PWY-6700: queuosine biosynthesis 0.6918160989
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.7183078347
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.3826190602
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 0.3826032161
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 0.5877696572
+VALSYN-PWY: L-valine biosynthesis 0.3826032161
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.5877696572
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 0.3368670323
+PWY-5097: L-lysine biosynthesis VI 0.2856160532
+PWY-2942: L-lysine biosynthesis III 0.2651956314
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 0.4389950257
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 0.2181895299
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.3040136356
\ No newline at end of file
b
diff -r 000000000000 -r a1747df2bc21 test-data/g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz
b
Binary file test-data/g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz has changed
b
diff -r 000000000000 -r a1747df2bc21 test-data/g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz
b
Binary file test-data/g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz has changed
b
diff -r 000000000000 -r a1747df2bc21 test-data/input_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_sequences.fasta Mon Mar 13 12:38:02 2017 -0400
b
b'@@ -0,0 +1,155 @@\n+>r2|637000026.fna|5753889|5754040|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln383_#0/1\n+CTCCGTCCGCTGGTAGAACGTCTGGTAGAAGCTCAAAAAGCATTGGCAACCAAATACCTTTCCGAAGCCAAACGACTGATTGCCTCCAACGACAAGAAGGAAGTGGAAGAAGGATTCCTTGCCCTTTATCGTAGCCACAAGTGTCTTCCGA\n+>r3|637000026.fna|1749333|1749484|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln165_#0/1\n+AAGTAAAAGTCTACACACAGGAAGGAACGATAATGGAAAGTACGATTGAAGTAAATCGTCCGATGGAGATAGCAGGATGGAAAATTTATCAGCTTAGTTATGATGAGTCGAAAGGTCGCTGGAGCGATATCAGTGTCTTTGAGCTGGTTCG\n+>r8|637000026.fna|2991814|2991965|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln402_#0/1\n+AGAAAGTTTTGATCAAAGCATGGGATGCTGAAAACGGACCGGTAATTATCGACTCAGGAGAAAGTACCTACAACACAACCGCCAAAAAGTTCAGCCTCAAATATACGATCGGTAACACGCTATATGAAGAGCAACTGACCAAAGAAAAAGA\n+>r24|641736196.fna|214158|214309|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln388_#0/1\n+CAGATACGAGTACCAATTGGCCGTCAACGACTATTGGAAGGAAGTCGGCGGATTGCAGATGCTGCCCGGAACCAACCGCTCCAGCGACCGCTTTGTACGCGCGTCATTCTACATTCATGCCATTCCGCAGACAGCAGACGCGGCGATTGCA\n+>r32|637000026.fna|4140951|4141102|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln422_#0/1\n+TATTCTCAGGGGACTAATTGGTGATATAGTGCTCGATTATGATAGAGGTGAAGGAATTATGTCCTTTAAGACTCAAAAGGTAGGTTATATATCAGGATATTATTTGGGATGTACTACATATTGTTATGCCATGGGATTTTATCCTACATAT\n+>r55|637000026.fna|6212867|6213018|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln342_#0/1\n+CAGGCGTCGGGTGAGCTGGACTTTGAAGAGTTGTGCGAAGCGATCACCAGTCGTTCTACTTGTACGGAAACGGATGTACGTGCGGCTATTTCGGGTATTCTTTACGAAGTGAAGCGTGCGTTGAAGGCAGGAAGAATTGCGAGACTGGGTG\n+>r94|641736196.fna|124792|124943|_from_ |NZ_ABFZ02000020_Bacteroides_stercoris_ATCC_43183,|_ln396_#0/1\n+AATCAGTTTCATACGCTATCAATATCCGGGGTCAATAGTTTCCATTTGTTGCTCGCTTGACACGCTTTCAAAGCTAACATCTTCCGCATCAATCAACTGTTTGTTTGCGTAATTGTCGGTCAATATTTCACGCTGTGAAGTTTCGTCACCC\n+>r115|641736196.fna|175677|175828|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln360_#0/1\n+CTGTCAGATACGTGAAAGATTACATTCGATACGTGAGTATGTAATGAAATATCCTGAGTTTGACGATAGAAACGATATTGACCCGTCAATGCGAATGTTCTATATCCAGTCTGTTGAAGCACAAGCAAAAAATCGAAGTTATTCAGATGCA\n+>r121|641736196.fna|7934|8085|_from_ |NZ_ABFZ02000017_Bacteroides_stercoris_ATCC_43183,|_ln383_#0/1\n+TGAGCGGTTCGGTCTATACCCTTATGAATAAAATCTTCAGGGATTCAAAAGAACCGCTGTATGGCCGTGCTGACAATATCATGAAGTTAGCTCCTTTTACTACTTCCGTATTGAAAGAAATCATATCGGATCATAAGGCCGACTATACTAA\n+>r145|637000026.fna|1378791|1378942|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln396_#0/1\n+TCCGTCTGATTCGTAGTGTACCAGCATATCCGCCATGACATCGCTCACTGTAGCGCTGAAATCTTTCAGAAGTTTATGGCTGCCCACCCAGTAAGTCTTGCCTTGATAAGACACCTTTATACCTTTGCCTGTGATGCTCTCAAAACTGCTC\n+>r158|637000026.fna|3242194|3242345|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln388_#0/1\n+GGACAGCAGTATCAAACGGTTAAAATACGTGAGATACGCAGATGACTTTCTGATTGGAATTACCGGTAATCTTGAAGACTGCAAAACAGTAAAAGAGGATATTAAGAATTATTTGAATGAAGCTCTTAAACTGGAACTGTCAGACGAAAAG\n+>r160|637000026.fna|3948359|3948510|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln347_#0/1\n+CGGAAGTGACTCATACATATCAGGTGGTTCTTCCAGTTATAAGAACCTGTCTGCAAGTGAAATGAAGAACACATCTTCATTCACAAACTGGAATTTCAGTACGGTTTGGGAAATGGGAAGTGAATACCCGACATTACAGGGCTTATTAAAA\n+>r187|637000026.fna|5220534|5220685|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln396_#0/1\n+GTTACCTTGATCAATGGATTTGCATATTCCAGCAACCACGAGATAGAGGCGTGTCCTGCTTCATGCAAGGCAATCGAGCGTCTTTCCGCTTCTGTGGTAATCTTGGTCTTCTTTTCCAGACCACCGATGATACGGTCTACCGCATCCAGAA\n+>r192|637000026.fna|2858128|2858279|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln325_#0/1\n+ACGTGGACGGTTACGCACTGCCGGAAAAAATCAGGAACGCATTCCTGGGATTGGAGGAAAAGGAGAAAACACTCATCAGTTACTTCACCCAGCACAATGAACAGTATGCAAAGAAGGTCGGCAAGACTGCCACGCAAAAGACCTATTCCCG\n+>r218|641736196.fna|272465|272616|_from_ |NZ_ABFZ02000022_Bacteroides_stercoris_ATCC_43183,|_ln348_#0/1\n+ATTTCTTCGTCGGTTTCCGTATGGAAGAAGTACACCACGGCCAACTAGGTGAAGTGACGGATGTGGACACCTCTACCATCAACACTCTGTTCGTGGTAGATTATAAAGGAGAAGAATTGCTGATTCCGGCACAGGAAGATTTCATTATGGA\n+>r222|637000026.fna|2630231|2630382|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c'..b'TTCTCTTGAAAAGAAAGCAATGGAATGGGTAGCTTCACTCAATCTGGATGATGAAAAGAAATCAGGTTTTGCGGTGACGACTATTTATAATCATCTTCGCCAGGTTCGAGACTGG\n+>r356|637000026.fna|394123|394274|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln352_#0/1\n+AGTTCGCCACCGATATAGATTTCTGTCTCATTGTATTTATCCTGCTTTAGCGGGTCTAGGCTCATTAATGAATATTTGCTCAGCTTATGTGAGGCAAAGGCTGTCAACCCGGCTTTTGCATATTTGTTGAAGCCTTCGAGCAAAGCAATAC\n+>r358|637000026.fna|4503085|4503236|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln341_#0/1\n+GACCATTATCGCCTCGCTTTCGACCGCAATAATACCTGGAGTCAAAAGTATAATATTATCTGGGATAAGATGTGGAATTTGAATATATTCCCTAATAATGTAATCAGTAAAGAAATCAACTACTATCTGACAAAGCAGAATCTATATGGAT\n+>r364|641736196.fna|256980|257131|_from_ |NZ_ABFZ02000019_Bacteroides_stercoris_ATCC_43183,|_ln319_#0/1\n+CCACCGTAGATGCCGGCGACGTGGTAGAGGTAGTGGGAACCAGGCAGGAAGTGGAAACAGCCGCCAGACGTTTGGGCTATATAGACCGTCCTACCAATCAGACTGATATGATTTTCGTAGGGCTCGGCATCCTTGTAGGCGGGTTGTTCGG\n+>r388|637000026.fna|1186959|1187110|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln349_#0/1\n+ACTGCTAAGTGCAGGATGTATCCTCTTTCTTTTTTGGAGTATCACTCGCCTGGTCAGATCTCTTCTGAAAGAAGAAAAAGAGAATCTGTCAGTAACCGACGTGATTATCATTCTCGGTTCCGGGTTGGTAGGAGCTTTGGCATATACTTTT\n+>r390|637000026.fna|2094824|2094975|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln402_#0/1\n+GTTGGAAGCAGCGTCGAACAGACGGAAAGTATTGCTTCCGATTTCTTTCACCAGATTACCGAACATCAGCATACCGATCAAAGGTACTGCACTTGGGACGAAGAGAGCTACAATCGTAGTCACTACAATCGGGAAAATAATCTTCAGTACA\n+>r392|641736196.fna|89576|89727|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln410_#0/1\n+AACAGGCAGGCGCAATATCCCGAAAATGCACCGCCGCAGATACGCAGTCCGGCGGATAGTATCGTTGCCAACCATGTCATAATCGGGCGTCCGGGACTGTATGCCAAGGAGGAAACCCGGTCCAAAGGATTGAAAATATAACCTAAGGAAT\n+>r396|641736196.fna|49107|49258|_from_ |NZ_ABFZ02000013_Bacteroides_stercoris_ATCC_43183,|_ln380_#0/1\n+ATCCGGATGCGCAGTTTGCCGCCGTATCGCGTCTGCCCTACTACTCCACCATCATCAAAGAACGGCTGCCCAAACTACGCACCGTACAGTACGAATACAAACATGAAATATTCCGTGAACTGAATCCGGATGAAATATTGGACAAATACTT\n+>r401|637000026.fna|696947|697098|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln352_#0/1\n+TCGTACGCAACCGGATAATTTCATCATCATCCTGCATCTGCTTCTCCAACGAACGGACAGACTGATCCTGCTGCGTCATTTCCAGTCTCGTATTAAAAAGAAACACATCCCGATTGCTGTCCAGTTGCTGCCGTTTGTTTTCAATCACCCG\n+>r439|637000026.fna|1571176|1571327|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln352_#0/1\n+GCAGCTCATCAGTGATATCCTTGATTTATCAAAAATAGAAGCAGGAACATTGGACCTGCACTATTCCAATGTTGAAATCAATGATTTAATGAAAGACCTGGAAAACATGTGTCAACTGAAACTTAAGTCGGATGCTGTCAAACTGGAATTC\n+>r442|641736196.fna|41704|41855|_from_ |NZ_ABFZ02000018_Bacteroides_stercoris_ATCC_43183,|_ln332_#0/1\n+CACATCGCAGGAGAAATACATCATGGTACTGTCTTTCAATGAAGCGATAGCCATTTGCTTGATATCCTCTACGGGCAGGTTTACGTACGTCCAGTTCTTGCCGTCGTAGCGGTGGCGGTCGTAGTCAATCTCATAGCATTTGTAATACTCG\n+>r452|637000026.fna|5083361|5083512|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln393_#0/1\n+TAGTCCAGTCAATGCCCAGCAGTTGCAGGCGATGGAAGAAGATACTTCGCTCCAGATCGTTCGGCTTGCGGAGGTCTAATATCAGTTCTTTGATCTCGGCGGTGAACGGAACACGCAGACGTTTTTGTATTTTCTCCACGTCTACCAGCAG\n+>r453|637000026.fna|2491055|2491206|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln325_#0/1\n+ACTCCCGCCATGCTCTCAGATATAGTGAATGAACTTAAAGAGGTGCTGGAACACCAGCCTGTAACAAAGGACAAGGAACAAAAGAAGGTCATCCGGGAAAAGAAGAAACAGGTCCGGGAACTTGAGGAGTACCGTGACAAGTTGATAGAAT\n+>r479|641736196.fna|531701|531852|_from_ |NZ_ABFZ02000020_Bacteroides_stercoris_ATCC_43183,|_ln380_#0/1\n+CCGTGAGCTATCGTCATTGGAACGCCAGTTAGGAAAATTGCAGCGCGACTTGAAGGACAAGAAGAAGAAATACGAGTCGTCTGTCCAGTACCTTTATAAGAACCGTTCCATTGAGGAGAAGCTGATGTTTATTTTCTCTGCACGGAGTCTG\n+>r495|637000026.fna|898444|898595|_from_ |NC_004663_Bacteroides_thetaiotaomicron_VPI-5482,_c|_ln166_#0/1\n+GTAACTGGAGTATCTTACAAAAGACTGAAAAGACAATTTCTCTTACAATATTCCTGTTACTCTTTATCCTTGGAGTATCCATTGGCTCTAATAGCCTGATTGTGAATAATCTCGGTAAGTTTGGATGGCAGGCGATCATTCTTGCCGTATC\n+>r497|641736196.fna|48795|48946|_from_ |NZ_ABFZ02000020_Bacteroides_stercoris_ATCC_43183,|_ln354_#0/1\n+TTGCCATCCCGCTCCCGTGCAAGGACTCCACACCTTTAAGATTCTGTCGAGCCACGGCAAGTTCCGATTGCAAAGAACTCAATTCGACCGAACGTATATCTACCATCCTTTTACCTTTTTCTGCATAATCGCCAAGTGAAAAGTAGCTTTT\n'
b
diff -r 000000000000 -r a1747df2bc21 test-data/joined_pathway_coverage_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/joined_pathway_coverage_abundance.tsv Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Abundance demo_Coverage
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 6.3694267516 0.7609346153
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 4.7961630695 0.8557878872
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 10.6471569863 0.9392767343
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 8.9740635174 0.5431316496
+PWY-2942: L-lysine biosynthesis III 2.5305778153 0.2651956314
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 1.7857142857 0.4389950257
+PWY-5097: L-lysine biosynthesis VI 2.6664127226 0.2856160532
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 13.7815543238 0.9874950891
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 13.7815543238 0.8805252394
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 2.8720157961 0.3368670323
+PWY-6305: putrescine biosynthesis IV 12.3568523173 0.9518375285
+PWY-6305: putrescine biosynthesis IV|unclassified 12.3568523173 0.7234100116
+PWY-6700: queuosine biosynthesis 5.5726434156 0.6918160989
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 3.4037771853 0.7183078347
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 1.4512868417 0.3826190602
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 3.1668118122 0.3826032161
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339 0.5877696572
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 2.4290890774 0.2181895299
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 1.2474418751 0.3040136356
+PWY490-3: nitrate reduction VI (assimilatory) 10.9962495261 0.9568614047
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 10.9962495261 0.7290147297
+UNINTEGRATED 6378.3363291102 1.0000000000
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 1957.0469180460 1.0000000000
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 2909.3950875399 1.0000000000
+UNINTEGRATED|unclassified 46.5907339353 1.0000000000
+UNMAPPED 1548.1865983490 1.0000000000
+VALSYN-PWY: L-valine biosynthesis 3.1668118122 0.3826032161
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 2.3731126339 0.5877696572
b
diff -r 000000000000 -r a1747df2bc21 test-data/marker_metadata.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_metadata.json Mon Mar 13 12:38:02 2017 -0400
[
b'@@ -0,0 +1,1 @@\n+{"taxonomy": {"k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Micromonosporaceae|g__Salinispora|s__Salinispora_pacifica|t__GCF_000374725": 5438612, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Mononegavirales|f__Rhabdoviridae|g__Nucleorhabdovirus|s__Maize_mosaic_virus|t__PRJNA14920": 12133, "k__Archaea|p__Euryarchaeota|c__Halobacteria|o__Halobacteriales|f__Halobacteriaceae|g__Halorubrum|s__Halorubrum_sp_T3|t__GCF_000296615": 3168011, "k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Propionibacteriaceae|g__Propionibacterium|s__Propionibacterium_acnes|t__GCF_000145075": 2614131, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__GCF_000485655": 5022965, "k__Bacteria|p__Chlamydiae|c__Chlamydiia|o__Chlamydiales|f__Chlamydiaceae|g__Chlamydia|s__Chlamydia_trachomatis|t__GCF_000304515": 1042736, "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Leuconostocaceae|g__Weissella|s__Weissella_koreensis|t__GCF_000277645": 1728940, "k__Bacteria|p__Spirochaetes|c__Spirochaetia|o__Spirochaetales|f__Leptospiraceae|g__Leptospira|s__Leptospira_interrogans|t__GCF_000244635": 4459519, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Buchnera|s__Buchnera_aphidicola|t__GCF_000007365": 641454, "k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae|g__Ruminococcus|s__Ruminococcus_sp|t__GCF_000209835": 3545606, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Acinetobacter|s__Acinetobacter_baumannii|t__GCF_000302035": 3973165, "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_mitis_oralis_pneumoniae|t__GCF_000495335": 2144825, "k__Bacteria|p__Proteobacteria|c__Alphaproteobacteria|o__Rickettsiales|f__Rickettsiaceae|g__Rickettsia|s__Rickettsia_parkeri|t__GCF_000284195": 1300386, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__GCF_000316745": 5198097, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__GCF_000355055": 5070534, "k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Clostridiaceae|g__Candidatus_Arthromitus|s__Candidatus_Arthromitus_sp_SFB_2|t__GCF_000252685": 1135256, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Caudovirales|f__Siphoviridae|g__Siphoviridae_noname|s__Lactobacillus_phage_J_1|t__PRJNA227005": 40931, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Vibrionales|f__Vibrionaceae|g__Vibrio|s__Vibrio_cholerae|t__GCF_000305055": 3943387, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Caudovirales|f__Siphoviridae|g__Siphoviridae_noname|s__Bacillus_phage_Fah|t__PRJNA16382": 37974, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Acinetobacter|s__Acinetobacter_johnsonii|t__GCF_000162055": 3690010, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Klebsiella|s__Klebsiella_pneumoniae|t__GCF_000409125": 5671251, "k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Viruses_noname|f__Viruses_noname|g__Viruses_noname|s__Geobacillus_virus_E2|t__PRJNA19797": 40863, "k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinobacteria_noname|f__Actinobacteria_noname|g__Actinobacteria_noname|s__actinobacterium_SCGC_AAA278_O22|t__GCF_000372185": 1138490, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Alteromonadales|f__Pseudoalteromonadaceae|g__Pseudoalteromonas|s__Pseudoalteromonas_sp_BSi20429|t__GCF_000238895": 4495777, "k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_agalactiae|t__GCF_000427035": 2138694, "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__P'..b'teria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|514974163|ref|NZ_KE340313.1|:332123-333478": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 1356, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|384096987|ref|NZ_JH636042.1|:382588-383040": {"ext": ["GCF_000162515", "GCF_000159875"], "score": 2.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 453, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|514974745|ref|NZ_KE340318.1|:c38550-38392": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 159, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|512436172|ref|NZ_KE159460.1|:c349752-348871": {"ext": ["GCF_000159875"], "score": 1.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 882, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|514974549|ref|NZ_KE340315.1|:c186066-185032": {"ext": ["GCF_000374365"], "score": 1.0, "clade": "s__Bacteroides_stercoris", "len": 1035, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|384096980|ref|NZ_JH636035.1|:50682-52622": {"ext": ["GCF_000159875"], "score": 1.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 1941, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|223955876|ref|NZ_DS499677.1|:c358511-356622": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 1890, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|223955871|ref|NZ_DS499672.1|:29963-30130": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 168, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|514973000|ref|NZ_KE340309.1|:c227121-226093": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 1029, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|512436172|ref|NZ_KE159460.1|:c266283-265069": {"ext": ["GCF_000159875"], "score": 1.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 1215, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|298384780|ref|NZ_GG774704.1|:496496-497020": {"ext": ["GCF_000162515", "GCF_000159875"], "score": 2.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 525, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|223955871|ref|NZ_DS499672.1|:193538-194158": {"ext": [], "score": 0.0, "clade": "s__Bacteroides_stercoris", "len": 621, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}, "gi|298385318|ref|NZ_GG774705.1|:324177-324773": {"ext": ["GCF_000162515", "GCF_000159875"], "score": 2.0, "clade": "s__Bacteroides_thetaiotaomicron", "len": 597, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron"}, "gi|223955873|ref|NZ_DS499674.1|:c270435-267715": {"ext": ["GCF_000243215"], "score": 1.0, "clade": "s__Bacteroides_stercoris", "len": 2721, "taxon": "k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris"}}}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r a1747df2bc21 test-data/marker_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/marker_sequences.fasta Mon Mar 13 12:38:02 2017 -0400
b
b'@@ -0,0 +1,7704 @@\n+>gi|223955874|ref|NZ_DS499675.1|:82124-83086\n+ATGAAAAAATCGATACTGTTAATGGGCCTTGCAGCTCTCATAGCAAGCTGTTCGGACGAT\n+GACCGGAACAACGATACTGATAATCAGCATCTTGTCAGTCCCTCAATGGCTGTGAGCATC\n+AGCAACACCGACGGCCGCCAAAGCCCGTTTACCGGCATTCTTACAATTATGCCATGCGAT\n+GCAAACAGTTCCATATACTATGGAAATTATGTAAAAGGGAAACTGTCTCCTTTCTACGGC\n+TATTACCGGGTAAAGGACGGAAGTTTCCATGACAATTCCATCAATAGGGAGATTTCTCTG\n+CCGATAGGCACTTACAATATGATTTATTGGGGAACACCCCAATACCAAACTCCCATTTAT\n+GCCCACCCTGCCATAAAAGAGGCGGCTCACAGCATAGGTGCCGATATGTCCAAACAGTCT\n+CTCGGCTTGTTCAGAATATCAGCCGATACCATTTACTATCCCGTATTCGACCTGGTGTAC\n+GCCACACAGCCGGTAAACATCGGAAGCGAAAGCCTCAGTGGCTCTTTAAAACGAGTGGTA\n+GCCGGAATAAAGGTTGTCCTGAAGGACAAAGACAATGCCGTACTAAGTTCCAGTATAGAC\n+AGTGTGTCCGTACGCATCACCAACATTTACAGCGAACTCAACTATTATACGGGAAAACCG\n+CAAGGTACACCACGTACAATCGCGTTCCCTCTGATTCGTTCGAACGATGGTACTCAGATG\n+AGCAACAGCACCGTTATGCTGTTCCCGTCCGCCGGAAAACCGGAATTCCAACTAGCAATC\n+ATTCTTAAAAACGGTAATGTGAAAAGCTTCAGACAAGCATTGAGCAGCCCGCTCGATGCC\n+AATGCCAAACTGACATTAACGTTAAGTCTGGGTGATATTTTCTCTGAAGAATCCTCCGGT\n+GACTTTACCATCGATGACTGGAACGAGAAAAACGAGAATATTGATATACCGATAATAGAG\n+TAG\n+>gi|29345410|ref|NC_004663.1|:2392014-2392814\n+ATGAAAATTACAATTATAGGAGCCGGAAACATAGGGAGCGCAGTTGCTGCCTGTCTGGCA\n+AAAGGACATCTCTACAACGAAAAAGATATTATCATTTCTACCCCTCATACAGACAAGCTA\n+GAAAATCTGCACAAACAATTTCCTGCGATACGCATAATGACAGAGAATCAGTATGCCATA\n+TCGGAAGCCGACATTATTATTCTGGCTGTCAAACCATGTATAGTAGACGAAGTATTATCT\n+CCGTTACGATTCTCACGCACCCAAATCCTGGTTTCACTGGTAACCGGAATATCCATTTCT\n+CACCTGGCACATTTATCGGAAACCGAAATGCCTATATTCCGGGTTGTTCCAAACATCGCC\n+ATTACGGAACATTCGAGCCTGACCCTGATAACTTCACGCAAAGCCGGCAAAGAACACCAG\n+CAACTCATAAAACAGACATTTGAAGAAGGAGGAAAGTGTTTGTTCGCAGAAGAGAAACAG\n+CTGGATATCATTTCTGCATTAACTTCCAGTGGAATCGCTTTCGCATTAAAATATATTCAT\n+GCAGCCATGCAAGCCGGCATAGAATTGGGTATCTCCGCTGAAGATGCAATGCGAATGACG\n+GCATATAGCATGGAAGGAGCTACAGAACTGATTTTAAATCACGACACCCACCCGCTGTTA\n+GAAATAGAAAAAGCAGCTACTCCCGGTGGAGCCACCATTAAAGGATTGAATGAACTGGAG\n+CACAGGGGATTTACTTCCGCCGTTATTCATGCCATAAAAAGTAGCGCCACAGTATCGACC\n+GATAAAGAGACTGAAGAATAG\n+>gi|298385318|ref|NZ_GG774705.1|:275923-276495\n+ATGATAAATTCAATTAATATACAAATCAGAGAGACCAATACAGATGATTTCGACAGCATA\n+ATGACTGTTGAAAAACAAGCGTTTGGCTATGACAAAGAAGCACAGTTGGTAGCCGATCTG\n+CTGGCTGACAAAACTGCAAAACCAATGGTTTCGTTGCTCGCTTTCTACAAAGGTGAGGCT\n+GTCGGACACATTCTCTTCACCAGAGCCTATTTTGATGGTCAGGGAGCACAACAGATGATG\n+CACATTCTTGCACCTTTAGCTGTCAAGCCGGAATATCAACGGCAGGGTATAGGCGGAATG\n+CTGATACGGGCAGGTATAGAGAGGTTGCAGGAAAAAGGTTCGTGCCTTGTGTTTGTATTG\n+GGGCATAAAGAATATTATCCAAAATATGGTTTTATACCGGATGCAGCCAGGCTGGGTTAT\n+CCTGCTCCTTACCCGATACTGGAACAGTTCTCGGATTATTGGATGGTTCAGGCAATCAGT\n+CCGAAGGGATTTGATGTGGATAAAGGAAAGATCAGGTGTTCGGATGAGTTGAACAAACCT\n+GAACATTGGAGAGATGATGAATCGGACAGATAG\n+>gi|514973850|ref|NZ_KE340312.1|:c10221-7987\n+ATGAAACATTTGTCTGTTATTATATCTGTATTATTTTCAATACTTTGTATCGGATGCTCT\n+AAGGAGGATTTTAACGATGGAAAAACCAATGATGTTCAGAACAAGCATTCTCTCCGGCTT\n+TTGGTCTACACACCGACTTCAGAAACTGTTCTTAGTACGGACCTGCCGGGAAACATAGAG\n+GCTTATCTGTTCAAAGAGGGAGTTCTGTCCGACGTTTACAAAAATCTGACAGTGGATAAG\n+AACGGATACACTACAATCAGCTCGTTAGCCGAAGGAGAGCAAATCTATTTCTTTGTAAAC\n+ACCGGTAATCTGCTGGATGGGATTACACAGGAAATAGGCCGATTGAAAGAGAACGAACTG\n+CTTGCCACTACAATACTCTCTGCCTCTCCTCAAGCAGATGGAGAGAAACCGGTGATGACC\n+GGAAAGGCAGACTTAACCGGAAGCCAGGAGAGTACCACCCAAGTATTGCTTACACGGGCC\n+ATTGCCAGAGTAGACTTAAACATAGCTGACGATGCCGACATACAAATCAATAGGATAAGT\n+ATGGATAACATACATTGTGAAGCATTCCTGCTTCCGCAAAATCCGGTCAGTTCGCCGTCC\n+GGTGCAGCATTGGCAAAAATAGACACTACATTCAATACCCCGTTGAAGCCGGGTGAATAT\n+GCCGGATTGGTCCACCTGTATGAACAAGTGGGGGACGGTATTCCTGTGGAGCTGCATGGC\n+ACCCTTGAAGGCGATCCTGTCACACTTTCACTGGCACTGCCTAACACAATCCATAGAAAC\n+CACATTTATAAAATAAAGTTATTTAGCGGAGACAGTTCTAATTTGCAGGCAAGCATATCC\n+GTGGAAAATGAAAGTTGGGAAGTCGAAGAAACTATCACGGCAAAACCTTCTACCAATATT\n+CTGGTTAATAGTGAACTTTCTACTTTGGCTGAAGGGGCGTATATAAGTGCAACAAAAGAT\n+ACGGTTTACCTCCCAAGCAAGGAGTCAACGTCAATACTTGTATTGGATAAGGTGCCGGAA\n+GATGCGGAATTCACAATTGATGGGACAACAGCCTCCATTACTCCGTATACGGAAACTCGT\n+GCCGACCTTCAAGGGAAAAAGTTCTTGGTACGCAATTCTTGGAAAAAGCCGGGTACTAAG\n+ACGGAATATATGTACCTGAATATGCACAGCAAGAGGCACCCGGACTACTACAGTGGCCGG\n+TTGGTAATTGTTCAGTCCAATGCAACGACCTTCAAGGGAGAATTGTATAATCATCTTACA\n+AACACTCCTCCCTATAACATTCATTTCAATAAATATGTCGACAGTGCTTTAGGCCAGATA\n+GAGGTACCT'..b'CGCTTTTCCTATAGGGGGAATTGGTGCAGGTATGTATTGT\n+CTGGAAGGAACAGGCTATATATCTCATATGTCAGTATGGCATCGACCGGAAGTTTTTCAT\n+GAACCGGGAATGTTTGCTGCTCTGTATGTGAAGGGGGTATGTAATGGGGCTAAGGTACTT\n+GAAGGACCTGTATCTGATTGGAGAAAATTTGGAATGCCCAATTACGGTACAGGAGGCAGT\n+ATGGGATCAATATTAGGACTTCCCCGTTTTGATACGGTTGAATTTGAAGCACGTTTTCCG\n+TTTGCCAAAGTTTCATTAACAGATAAAGATATCCCCGTTAAGGTAACCATTTTGGGGTGG\n+AGTCCTTTTATTCCGGGTGATCCGGATAATTCCAGTTTACCGGTAGGAGGATTGGAATAT\n+AGTTTAGAGAATACTAGCAAAGAGGTTCAGGAAACTATTTTCTCCTACCATGCTCGTAAC\n+TTTCTGAGTTCGGGTAAAGGATTGGATGCTATAAAAACGATGCCTCATGGGTTTATCCTT\n+TCACAGTCAGGTACAGAAACGGAGCCTCATTTGCAGGGTGATTTTGCTATCTTTACGGAT\n+CAGGATTCTTTAAAGATCAATTATTGTTGGTTTCGTGGAGGATGGTTTGATAGTCTTACG\n+ATGGTTTGGAACGCTATTGAGACAGGTTTAATGCCACAATCTCCAGCAATAGAGAAAGGT\n+GCACCAGGAGCTTCTATGTTTGTCCCGGTAACATTGATGCCAGGGGAAAAGAAAACAATC\n+AGAATTTATACGGCGTGGTATGTTCCTAACTCGACTTTAAGGCTTGGAAAGGAACCGGAA\n+GACTGGAATGACAATAATGTCGACTCCGCAAGACTAGCTGTAGAAAAGGCAGATAAGGGT\n+AATTATAAACCTTGGTATAGTAGCCGCTTTACAGGAGTGAATGAGGTTATTGATTATTTT\n+CTGTCTCATTATAAGATTTTGCGCAATCAGACGGAAAGGTTCACAGACTCTTTTTATCGT\n+TCTACCTTACCGCCTGAAGTGATTGAAGCTGTTTCTGCTAACTTAAGTATTTTAAAGTCT\n+CCAACGGTGATGCGGCAATATGATGGACGCTTATGGACTTGGGAAGGATGTGCCGATAAT\n+TGGGGCTCGTGTCATGGCTCATGTACTCATGTCTGGAATTATGCACAAGCTATTCCACAC\n+TTATTTCCTTCCTTGGAACGTTCGTTAAGGCATACTGAATTTGAAGAAGGGCAAGATTTG\n+AAAGGCCATCAAGTGTTTCGTGCCAATTTACCGATTCGTCCTACTCGGCACGACTTTCAC\n+TCAGCTGCTGACGGGCAATTGGGAGGTATAATGAAAGTATATCGTGAGTGGCGAATTTCA\n+GGCGATAATGAGTTCCTTATCTCTATGTATCCAAAAGTAAAAAAGAGTTTGGACTACTGT\n+ATCTCAACTTGGGATCCTCGTAGGGTAGGAAGTATTGAAGAACCACACCATAATACTTAT\n+GATATTGAGTTCTGGGGGCCGGATGGTATGCATAATAGTTTCTATTATGGAGCTTTATCA\n+GCTTTCATTCGTATGAGTGAGTTTCTTGATAAAGACGTTACTGAATATAAGAAACTATTG\n+AAAAAAGGACGAAAATTTACTGAAACAGGCTTGTTTAATGGTGAGTATTTTATCCAAAAG\n+ATAGAGTGGAGGGGATTGAATGCAAAAGATCCGACTGTTGCACAAAGTTTCCATAGTTCT\n+TATTCTCCCGAAGCGAAAGAAATACTGGAGAAGGAAGGTCCTAAATATCAGTATGGGAAC\n+GGTTGTCTGTCTGATGGAGTTTTGGGGTCGTGGCTCTCCCGAATGTGTGGGATGGAAGAA\n+ACTCTGAATACAGAAAAAGTGAAAAGCCATTTATTATCAGTACATCGATATAATTTTAAA\n+AAAGATCTGACTGATCACGCCAATCCTCAACGTTCCCCTTACGCTTTAGGCAAAGAAGGA\n+GGCTTATTATTAGGAAGTTGGCCTAAAGGGAGCAAGTTGTCATTGCCTTTTGTCTATAGT\n+AATGAAGTCTGGACAGGAATAGAATATCAAGTAGCTTCACATTTGATGCTACAGGGAGAG\n+GTAGAAAAAGGCCTTGAGATCGTGCGTGCCTGTAGACAACGTTATGATGGAAGTGTCCGT\n+AACCCTTTTAATGAGTATGAGTGTGGACATTGGTACGGACGAGCGTTATCTAGTTATGGC\n+TTACTTCAAGGATTGACAGGAGTCCGATATGATGCTGTAGATAAAACACTGTATATTAAT\n+TCAAAAATAGGAGACTTTATTAGCTTTATCAGTACGGAGTCTGGATTTGGTAATGTAGAA\n+CTTCGTTCCGGAAAACCTTTTGTAAAAGTAGTATCCGGTCATATAGAGGTCGACAGATTT\n+ATCGTATCAGGGAAAGTAGTTGAATAA\n+>gi|298386634|ref|NZ_GG774708.1|:c143839-142661\n+ATGGAGCAAAAGAAAATCGTGCTATTCATCCTGATCATTCATCTGGCAGCATTTCTAGCA\n+GGATGCAGCGGAAACAAAAATAGCGGAAATAATGACTCCTCTGATCTATGGAATAAGTTA\n+TCGAGCTACTTCCGCACGCCCGCAGAATACGAAAATGTATATGGGAATTTTCGCTCTCCT\n+TTATTATATTATAATGGGGATACAGTCAGAACCGTTGAAGACTGGCAAAGACGACGAACT\n+GAAATCAAGGACAGATGGATGAGCCTGTTAGGACAATGGCCGCCTGTCATTACCGGACAA\n+ACATTTGAAATTCTGGATACTCTCCACCGTGAAAACTTCATGCAATACCGTGTCCGCTTC\n+TACTGGACTCCCAACGAACAGACTGAAGGTTATTTGCTGGTTCCGGACAAAGAAGGCAAA\n+AAGCCTGCCGTTATCACTACCTTTTATGAACCCGAAACGGCTATCGGATTGGGTGGAAAA\n+CCTTATAGAGACTTTGCATATCAATTGACGAAAAGAGGATTTGTCACATTATCAATAGGA\n+ACAACGAAGACCACAGAGAATCAGACATATTCCATTTATTACCCCAGCATTGAAAATGCA\n+ACTCTCCAACCACTTTCAGCATTAGCTTATGCAGCCGCAAATGCATGGGAAGTATTAGCC\n+AAAGTACAGGACGTCGATTCTACAAGAATAGGCATCACAGGGCATTCTTATGGTGGGAAG\n+TGGGCAATGTTTGCCTCATGCCTATACGAAAAGTTCGCTTGTGCGGCATGGGGAGACCCC\n+GGAATTGTATTCGACGAAACAAAAGAGGGATATATCAATTACTGGGAACCCTGGTATTTG\n+GGATATTATCCGCCACCATGGGAAAATACATGGAGTAAAAATGGGCATGATTATGCTAAA\n+GGCATTTATCCGAAGCTCCGCAAAGAAGGATATGATTTGCATGAATTGCATGCGCTGATG\n+GCACCTCGCCCATTTCTTGTTTCCGGAGGATACTCTGACGGAACAGACCGGTGGATAGCG\n+CTAAACCATACAATAGCGGTCAACCGGCTCTTAGGATACCGCAATAATGTCGCGATGAGC\n+AACAGAGTCAATCATGACCCAACCCCTGAATCAAACGAAATTATATATGATTTTTTTAAA\n+TGGTACTTGCATTCAGCAAATAAATCTACCAAAGAGTAG\n+>gi|223955875|ref|NZ_DS499676.1|:c386522-386337\n+TTGGCTTTTAGTTCTGTCCAGTTTTCTTATATTTTTGTAGAAAAAGTATGGGACAGGTAT\n+ATAAAAACAGCCTGGGTTTACGTATTCAACGTAATCCCAGGCTGTTTTTACGGGTATTTG\n+CGGGGGCTTGTTTACCGATTGTTTTTTATTCCGCCAAAGTCGCCCTTCTACTTCAGTCCG\n+AAATAA\n'
b
diff -r 000000000000 -r a1747df2bc21 test-data/max_reduced_gene_family_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/max_reduced_gene_family_abundance.tsv Mon Mar 13 12:38:02 2017 -0400
b
b'@@ -0,0 +1,8420 @@\n+# Gene Family\tmax\n+UNMAPPED\t4531.0\n+UniRef90_A0A015N9J5\t4.4150110375\n+UniRef90_A0A015N9J5|g__Bacteroides.s__Bacteroides_dorei\t4.4150110375\n+UniRef90_A0A015P8C6\t5.7471264368\n+UniRef90_A0A015P8C6|g__Bacteroides.s__Bacteroides_dorei\t5.7471264368\n+UniRef90_A0A015P8Y2\t2.6560424967\n+UniRef90_A0A015P8Y2|g__Bacteroides.s__Bacteroides_dorei\t2.6560424967\n+UniRef90_A0A015PWK0\t3.7986704653\n+UniRef90_A0A015PWK0|g__Bacteroides.s__Bacteroides_dorei\t3.7986704653\n+UniRef90_A0A015QXQ5\t2.1505376344\n+UniRef90_A0A015QXQ5|g__Bacteroides.s__Bacteroides_dorei\t2.1505376344\n+UniRef90_A0A015SXD6\t3.7453183521\n+UniRef90_A0A015SXD6|g__Bacteroides.s__Bacteroides_dorei\t3.7453183521\n+UniRef90_A0A015TX99\t5.4330964066\n+UniRef90_A0A015TX99|g__Bacteroides.s__Bacteroides_dorei\t2.1436227224\n+UniRef90_A0A015TX99|g__Bacteroides.s__Bacteroides_vulgatus\t3.2894736842\n+UniRef90_A0A015TXS0\t1.5503875969\n+UniRef90_A0A015TXS0|g__Bacteroides.s__Bacteroides_dorei\t1.5503875969\n+UniRef90_A0A015U3E0\t1.872659176\n+UniRef90_A0A015U3E0|g__Bacteroides.s__Bacteroides_dorei\t1.872659176\n+UniRef90_A0A015U517\t4.016064257\n+UniRef90_A0A015U517|g__Bacteroides.s__Bacteroides_dorei\t4.016064257\n+UniRef90_A0A015UAM4\t2.8735632184\n+UniRef90_A0A015UAM4|g__Bacteroides.s__Bacteroides_dorei\t2.8735632184\n+UniRef90_A0A015UAR2\t4.2553191489\n+UniRef90_A0A015UAR2|g__Bacteroides.s__Bacteroides_dorei\t4.2553191489\n+UniRef90_A0A015UU70\t7.7519379845\n+UniRef90_A0A015UU70|g__Bacteroides.s__Bacteroides_dorei\t7.7519379845\n+UniRef90_A0A015V1U6\t1.239157373\n+UniRef90_A0A015V1U6|g__Bacteroides.s__Bacteroides_dorei\t1.239157373\n+UniRef90_A0A015V946\t11.4942528736\n+UniRef90_A0A015V946|g__Bacteroides.s__Bacteroides_dorei\t11.4942528736\n+UniRef90_A0A015V960\t2.1030494217\n+UniRef90_A0A015V960|g__Bacteroides.s__Bacteroides_dorei\t2.1030494217\n+UniRef90_A0A015VSY3\t8.4388185654\n+UniRef90_A0A015VSY3|g__Bacteroides.s__Bacteroides_dorei\t5.6258790436\n+UniRef90_A0A015VSY3|g__Bacteroides.s__Bacteroides_vulgatus\t2.8129395218\n+UniRef90_A0A015VSZ2\t3.2588454376\n+UniRef90_A0A015VSZ2|g__Bacteroides.s__Bacteroides_dorei\t1.3966480447\n+UniRef90_A0A015VSZ2|g__Bacteroides.s__Bacteroides_vulgatus\t1.8621973929\n+UniRef90_A0A015WJ01\t5.7471264368\n+UniRef90_A0A015WJ01|g__Bacteroides.s__Bacteroides_vulgatus\t5.7471264368\n+UniRef90_A0A015X9A0\t2.624671916\n+UniRef90_A0A015X9A0|g__Bacteroides.s__Bacteroides_dorei\t2.624671916\n+UniRef90_A0A015XGT9\t1.7182130584\n+UniRef90_A0A015XGT9|g__Bacteroides.s__Bacteroides_dorei\t1.7182130584\n+UniRef90_A0A015XPQ3\t3.8535645472\n+UniRef90_A0A015XPQ3|g__Bacteroides.s__Bacteroides_vulgatus\t3.8535645472\n+UniRef90_A0A015XQT0\t3.4722222222\n+UniRef90_A0A015XQT0|g__Bacteroides.s__Bacteroides_vulgatus\t3.4722222222\n+UniRef90_A0A015Y5P7\t4.8309178744\n+UniRef90_A0A015Y5P7|g__Bacteroides.s__Bacteroides_dorei\t4.8309178744\n+UniRef90_A0A015YC48\t9.1324200913\n+UniRef90_A0A015YC48|g__Bacteroides.s__Bacteroides_dorei\t9.1324200913\n+UniRef90_A0A015YDI9\t5.291005291\n+UniRef90_A0A015YDI9|g__Bacteroides.s__Bacteroides_dorei\t5.291005291\n+UniRef90_A0A015YDJ3\t1.4814814815\n+UniRef90_A0A015YDJ3|g__Bacteroides.s__Bacteroides_dorei\t1.4814814815\n+UniRef90_A0A016AWW1\t1.3020833333\n+UniRef90_A0A016AWW1|g__Bacteroides.s__Bacteroides_dorei\t1.3020833333\n+UniRef90_A0A016B3G9\t2.5157232704\n+UniRef90_A0A016B3G9|g__Bacteroides.s__Bacteroides_dorei\t2.5157232704\n+UniRef90_A0A016C4A5\t1.996007984\n+UniRef90_A0A016C4A5|g__Bacteroides.s__Bacteroides_dorei\t1.996007984\n+UniRef90_A0A016CX89\t1.4306151645\n+UniRef90_A0A016CX89|g__Bacteroides.s__Bacteroides_dorei\t1.4306151645\n+UniRef90_A0A016GIZ2\t3.0581039755\n+UniRef90_A0A016GIZ2|g__Bacteroides.s__Bacteroides_dorei\t3.0581039755\n+UniRef90_A0A016LN29\t1.7683465959\n+UniRef90_A0A016LN29|g__Bacteroides.s__Bacteroides_dorei\t1.7683465959\n+UniRef90_A0A016NLU2\t2.743484225\n+UniRef90_A0A016NLU2|g__Bacteroides.s__Bacteroides_dorei\t2.743484225\n+UniRef90_A0A017P703\t1.0752688172\n+UniRef90_A0A017P703|g__Bacteroides.s__Bacteroides_dorei\t1.0752688172\n+UniRef90_A6KWB7\t3.7243947858\n+UniRef90_A6KWB7|g__Bacteroides.s__Bacteroides_dorei\t3.724'..b'eroides_dorei\t8.7719298246\n+UniRef90_R9ID01\t2.3980815348\n+UniRef90_R9ID01|g__Bacteroides.s__Bacteroides_dorei\t2.3980815348\n+UniRef90_R9ILG9\t0.5420054201\n+UniRef90_R9ILG9|g__Bacteroides.s__Bacteroides_vulgatus\t0.5420054201\n+UniRef90_S0FBR1\t1.2626262626\n+UniRef90_S0FBR1|g__Bacteroides.s__Bacteroides_dorei\t1.2626262626\n+UniRef90_S3ZHA6\t8.3597446144\n+UniRef90_S3ZHA6|g__Bacteroides.s__Bacteroides_dorei\t2.7100271003\n+UniRef90_S3ZHA6|g__Bacteroides.s__Bacteroides_vulgatus\t5.6497175141\n+UniRef90_T4HXA1\t1.2820512821\n+UniRef90_T4HXA1|g__Bacteroides.s__Bacteroides_dorei\t1.2820512821\n+UniRef90_U2CVX4\t7.1377587438\n+UniRef90_U2CVX4|g__Bacteroides.s__Bacteroides_dorei\t7.1377587438\n+UniRef90_U3KI10\t11.558989771\n+UniRef90_U3KI10|unclassified\t11.558989771\n+UniRef90_U3KP22\t11.6979338377\n+UniRef90_U3KP22|unclassified\t11.6979338377\n+UniRef90_U5C7R6\t19.0476190476\n+UniRef90_U5C7R6|g__Bacteroides.s__Bacteroides_vulgatus\t19.0476190476\n+UniRef90_U5FT06\t16.7693829744\n+UniRef90_U5FT06|unclassified\t16.7693829744\n+UniRef90_U6R8C4\t5.7471264368\n+UniRef90_U6R8C4|g__Bacteroides.s__Bacteroides_dorei\t2.8735632184\n+UniRef90_U6R8C4|g__Bacteroides.s__Bacteroides_vulgatus\t2.8735632184\n+UniRef90_U6R9S1\t2.1299254526\n+UniRef90_U6R9S1|g__Bacteroides.s__Bacteroides_vulgatus\t2.1299254526\n+UniRef90_U6RDV1\t5.0890585242\n+UniRef90_U6RDV1|g__Bacteroides.s__Bacteroides_dorei\t5.0890585242\n+UniRef90_U6RFA3\t4.4150110375\n+UniRef90_U6RFA3|g__Bacteroides.s__Bacteroides_vulgatus\t4.4150110375\n+UniRef90_U6RFL8\t5.4644808743\n+UniRef90_U6RFL8|g__Bacteroides.s__Bacteroides_vulgatus\t5.4644808743\n+UniRef90_U6RFX0\t3.3333333333\n+UniRef90_U6RFX0|g__Bacteroides.s__Bacteroides_dorei\t2.6666666667\n+UniRef90_U6RFX0|g__Bacteroides.s__Bacteroides_vulgatus\t0.6666666667\n+UniRef90_U6RGV5\t3.8461538462\n+UniRef90_U6RGV5|g__Bacteroides.s__Bacteroides_vulgatus\t3.8461538462\n+UniRef90_U6RSC0\t2.4154589372\n+UniRef90_U6RSC0|g__Bacteroides.s__Bacteroides_dorei\t2.4154589372\n+UniRef90_UPI0004695389\t3.294892916\n+UniRef90_UPI0004695389|g__Bacteroides.s__Bacteroides_dorei\t3.294892916\n+UniRef90_UPI0004697FB6\t3.5938903863\n+UniRef90_UPI0004697FB6|g__Bacteroides.s__Bacteroides_vulgatus\t3.5938903863\n+UniRef90_UPI00046A51E2\t2.1459227468\n+UniRef90_UPI00046A51E2|g__Bacteroides.s__Bacteroides_vulgatus\t2.1459227468\n+UniRef90_UPI00046E868E\t5.6497175141\n+UniRef90_UPI00046E868E|g__Bacteroides.s__Bacteroides_dorei\t5.6497175141\n+UniRef90_V3KIQ1\t9.5054275569\n+UniRef90_V3KIQ1|unclassified\t9.5054275569\n+UniRef90_V5V2L3\t8.8892176814\n+UniRef90_V5V2L3|unclassified\t8.8892176814\n+UniRef90_W0ETG2\t2.0449897751\n+UniRef90_W0ETG2|g__Bacteroides.s__Bacteroides_vulgatus\t2.0449897751\n+UniRef90_W0EV06\t2.0325203252\n+UniRef90_W0EV06|g__Bacteroides.s__Bacteroides_dorei\t2.0325203252\n+UniRef90_W0EVY8\t5.1380860629\n+UniRef90_W0EVY8|g__Bacteroides.s__Bacteroides_dorei\t5.1380860629\n+UniRef90_W1IYL7\t12.1580837819\n+UniRef90_W1IYL7|unclassified\t12.1580837819\n+UniRef90_W7DVV5\t11.5904796756\n+UniRef90_W7DVV5|unclassified\t11.5904796756\n+UniRef90_W8YTG4\t12.6386317258\n+UniRef90_W8YTG4|unclassified\t12.6386317258\n+UniRef90_X5M7Z0\t9.0719420554\n+UniRef90_X5M7Z0|unclassified\t9.0719420554\n+UniRef90_X6L320\t8.6463137003\n+UniRef90_X6L320|unclassified\t8.6463137003\n+UniRef90_X7F2P9\t7.3126169751\n+UniRef90_X7F2P9|unclassified\t7.3126169751\n+UniRef90_Y0KIL9\t8.744795425\n+UniRef90_Y0KIL9|unclassified\t8.744795425\n+UniRef90_Z5XKU5\t11.3736016532\n+UniRef90_Z5XKU5|unclassified\t11.3736016532\n+UniRef90_Z5XTI9\t7.4649158985\n+UniRef90_Z5XTI9|unclassified\t7.4649158985\n+UniRef90_Z5XVM9\t10.665534346\n+UniRef90_Z5XVM9|unclassified\t10.665534346\n+UniRef90_Z9JLB1\t8.4767761266\n+UniRef90_Z9JLB1|unclassified\t8.4767761266\n+UniRef90_Z9JRB3\t9.1884719908\n+UniRef90_Z9JRB3|unclassified\t9.1884719908\n+UniRef90_Z9JXD8\t9.3092450619\n+UniRef90_Z9JXD8|unclassified\t9.3092450619\n+UniRef90_Z9K4C5\t12.7438684783\n+UniRef90_Z9K4C5|unclassified\t12.7438684783\n+UniRef90_unknown\t1635.44829756\n+UniRef90_unknown|g__Bacteroides.s__Bacteroides_dorei\t883.211795734\n+UniRef90_unknown|g__Bacteroides.s__Bacteroides_vulgatus\t752.236501829\n'
b
diff -r 000000000000 -r a1747df2bc21 test-data/reduced_uniref50.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reduced_uniref50.fasta Mon Mar 13 12:38:02 2017 -0400
b
b'@@ -0,0 +1,1478 @@\n+>UniRef50_B0NQY6 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NQY6_BACSE\n+MYFRAQEVKNTSKSKRGKIRFTLILIMQETIYELYVAKVLIFLINGIERKEFSFGLRIFA\n+QS\n+>UniRef50_F3PHD0 Uncharacterized protein n=2 Tax=Bacteroides RepID=F3PHD0_9BACE\n+MIQTEFQHLLKPVPTAGIGDTKHWYEEYQPLAQTIPSLGTSNTGRWYERYQALEREFFPG\n+IYSKG\n+>UniRef50_B0NP96 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NP96_BACSE\n+MDIRQENAVINKDIAFLYHLCTRGTIIFNILRINSCRMYPFRKRLGTDISALKKRRFFFA\n+YFI\n+>UniRef50_B0NTS9 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NTS9_BACSE\n+MYFKDRMVSDRSPDTSSFFFLFQENRSCRIALKRFSLFNYLIINKMQINNLCYVNR\n+>UniRef50_Q8A9G3 Uncharacterized protein n=1 Tax=Bacteroides thetaiotaomicron (strain ATCC 29148 / DSM 2079 / NCTC 10582 / E50 / VPI-5482) RepID=Q8A9G3_BACTN\n+MTHILYHFAVCYVWQDSSGVIHVSGLDIVAGGYILRFSIMIDSNFYIGCSILFYMSSCLE\n+NDRFKCCHTVLMFFKCLKLH\n+>UniRef50_B0NV53 Uncharacterized protein n=3 Tax=Bacteroides stercoris RepID=B0NV53_BACSE\n+MLQQHVVFLVLQLRAVEPAVANPAVRSWQPQGPQLENGVFIVVRLHLSCCFSTKKNKESA\n+VFVWEKRVRLRQKRMEEAGIFYVELLL\n+>UniRef50_I0Q078 Uncharacterized protein n=2 Tax=Bacteroides RepID=I0Q078_9BACE\n+MFNQEWQNLRLLFKYSIKRATKTINQPLLLKTDLSPSLFIYDIVNHRKVPIITSIRCIKS\n+NREVFIY\n+>UniRef50_W0ESG7 DNA-binding protein n=14 RepID=W0ESG7_9PORP\n+MSNEIREKDHEWVKAFHSNFDRLLALLEKLLEKRQPSAYGDELLTDKEVAFLLKVSRRTL\n+QDYRNNGILPYTQVGGKILYRASDIEKTLMKGYKEAYKYKRN\n+>UniRef50_Q5LIQ7 Shikimate kinase n=97 RepID=AROK_BACFN\n+MIRIFLTGYMGAGKTTLGKALARELHIPFIDLDWYIEERFHKTVGELFSERGEASFRELE\n+KNMLHEVGEFEDVVISTGGGAPCFFDNMEYMNRVGTTVFLDVDPKVLFSRLRVAKQQRPI\n+LQGKKDDELLDFIVQALEKRAPFYRQANYIYCADKLEDRSQIETSVQQLRKLLNLHIAS\n+>UniRef50_B0NW87 Uncharacterized protein n=1 Tax=Bacteroides stercoris ATCC 43183 RepID=B0NW87_BACSE\n+MSASIEETVSLELYAPQKGARLISGIGDYGGFTHFDLNRPDSLGSHANPHFGNTNGVTGA\n+WLKQDLIVRVGTLFGHQPDAKTISYSEDGGRRWTMCATVPTEKSRNGHITVAADGSSWI\n+>UniRef50_Q8AB79 Excisionase n=8 Tax=Bacteroidales RepID=Q8AB79_BACTN\n+MNKELTFNDLPMVVAQLRDEVVGMKQMIVSLQSQNKPHKANTHIPMSVEEASAYLKMPMA\n+TLYMKLGNGSIPATKPGKRYCLYQDELDKWLETNRKNPVPLTAEEENAAILAGNKRKPKP\n+LNW\n+>UniRef50_Q8A980 Uncharacterized protein n=3 Tax=Bacteroides RepID=Q8A980_BACTN\n+MINKDEEVANRFSDPSKKLVEYVTDFSDMRDEDIELTKPIYENYGNFQLLNETGIIRTDE\n+EIKELYNCKGEFGSTDEVTDMSF\n+>UniRef50_Q8A9S1 2-amino-4-hydroxy-6-hydroxymethyldihydropteridine pyrophosphokinase n=25 Tax=Bacteroides RepID=Q8A9S1_BACTN\n+MTRLWDTARRKRLNCPKVLTLVPVRKHRTEKSSKKRDWTICLIKPLIVHKCIICIGSNYN\n+RKENLLLARRRLVDLFPTIRFTSEQETRPLFFRSPALFSNQVAMFFSEAEEERVRKELKA\n+IEQSAGRRPEDKKEEKVSLDIDLLSFDDRVLKPEDLKREYVVKGLEELKYNQI\n+>UniRef50_Q8A268 Uncharacterized protein n=2 Tax=Bacteroides RepID=Q8A268_BACTN\n+MNTIKTLLLLSFSILLFNCSDEEDKKTEKEFIFSASELKQTEWEGEFLYLTNGEIDSKGS\n+IKIVFYTEKKGVCEYKFDYHIDPETISFEYEISDKFMYIDGPLPIHGNWIQQKYNGNSLE\n+IADSKAAFTNSRMIKLTRVN\n+>UniRef50_B0NTK8 Uncharacterized protein n=3 RepID=B0NTK8_BACSE\n+MGEWIYNSSFDTVMDFAVTVDNVFKRYGQVEALQGVSLSVRPGELFGVIGPDGAGKTSLF\n+RILATLLLADEGKATVCGLDMVKDYKAIRQRVGYMPGRFSLYQDLTVEENLSFFATVFHT\n+TIEENYDLVRDIYRQIEPFRKRRAGALSGGMK\n+>UniRef50_B3ESB4 Transcription elongation factor GreA n=112 RepID=GREA_AMOA5\n+MEKVSYYTEEGLQRLKGELTQLKSEGRAKVAEQLSEARDKGDLSENAEYDAAKEAQEILE\n+RRIAKLEELMINARVINKDNINTSAVSILSKVKIKNKKLGKVSTYTMVSEEEADLKEGKI\n+SIESPIGKGLLGKKAGEVAIVEAPAGKIEFEILDISF\n+>UniRef50_E6SQ70 Uncharacterized protein n=31 Tax=Bacteroidales RepID=E6SQ70_BACT6\n+MIRYKKYQVTGEKSSLRGLWYARPLIEDTFDTEKLAKHMANHNTPYSAGLIKGVLTDMIS\n+CIKELILDGKNVKLDDLAIFSVGIVSKKGAASAEEFKVSDNVKSLKLRARATGELSNAQI\n+NLEGQLKEAALYTVTDSTTEGTPGGGSGPNNGSGDENENPLG\n+>UniRef50_E6SQW3 Uncharacterized protein n=125 RepID=E6SQW3_BACT6\n+MKKIVLLVCLLVATVAAQAQFEKGKWIVNPSVTGLELSHDTGTKKTTFGLEAKGGAFLVD\n+NVALLVNAGARWNDYGGDVDVYSLGVGGRYYIDAVGVYLGANVNVDRWDWGKDNDDTKFS\n+FGLEAGYAFFLSRTVTIEPAAYWNVNSDRSKFGLKVGFGFYF\n+>UniRef50_P0A864 Thiol peroxidase n=1538 RepID=TPX_ECO57\n+MSQTVHFQGNPVTVANSIPQAGSKAQTFTLVAKDLSDVTLGQFAGKRKVLNIFPSIDTGV\n+CAASVRKFNQLATEIDNTVVLCISADLPFAQSRFCGAEGLNNVITLSTFRNAEFLQAYGV\n+AIADGPLKGLAARAVVVIDENDNVIFSQLVDEITTEPDYEAALAVLKA\n+>UniRef50_Q89YJ6 Histone-like bacteri'..b'ES\n+IQAVTDEENKALANQDNDFLRMLFGDVSQMAFSDLSELLKQARQLRSYLSGKDNKEGITF\n+ISPEQLKAIEESPEELDKLKKALDKLLGAGKKQNKWSNIFETFKTGFADLKSAQGFKEIS\n+GAIGMISGAAGQAAGEIATMFEAMGKDSAANVIGSLGEVLSSISNIGQAFATGGPVAGAF\n+AAVGEIFSLIGKGAQETAKHRQVLEDVMNDTIAQQREYNLLLLEQNLLYEKASTIFGVDS\n+YAKAENAVRVLKDAISDLNKELAGTTEQQKKFAYRKTGSVALDKVFNRNYSQSKDRYSGL\n+ADIEIKTGSYTTGAWFWKKQHDVYTSVLDVYPELIDANGEFNKELAESIINTREMSDEDK\n+AALQGMIDLAEQAEAAFDSLNDYMTDIFGELGGSMSHALVDAFKNGTDAAESFTQSVSEM\n+LETLAEQMIYSVTLGPLLEEAQKEMMTVMKNQNLTDEQRFSQWTNILKGLTNDAVAKQDE\n+AKQLYEAFRQSAGDMGFDVFSPDSTREASQKTGITASQDSVDKIDGMATTMMGHTYSINE\n+NVNRMANGIDSLLNYASSGLSLTTDIERTAKAIESQSRDALNHLANIDNYTSNLVEMREY\n+MYAVKNGIDTLNTKGLTLKR\n+>UniRef50_W4P379 Two-component system sensor histidine kinase/response n=2 Tax=Bacteroides RepID=W4P379_9BACE\n+MFSLKDIIFYLLFLCVGANFTFAASDQITFSHISINEGLSQSTVFSIDQDKRGNMWFATY\n+DGVNKYDGYAFTVYRHDESNPNSIPNDISRIVKADSRGRIWIGTRDGLSCYDEEKDQFKN\n+FFYQKKRGHISVNAIAEITPDQLLINTSEGLTLFDVKSSVFTAAPLNRQMRELPVSTLYR\n+SGNHIYIGTFNKGLFCYSVSERTLQKLTPALDDKQIQAVLQQSPTQVWVATEGHGLFLIN\n+PKTKKAKNYLHSASDSKSISSNYIRSLALDGQNRLWIGTLNDLNIFQEGTDSFISYGSSP\n+TENGSLSQRSVRSIFMDSQGGMWLGTYFGGLNYYHPIRNRFKNIRRIPYKNSLSDNVVSC\n+IVEDKDKNLWIGTNDGGLNLYNTANGQFTHYTLQESERERGMGSNNIKAVYLDEPGGLVY\n+IGTHAGGLNILHRSTGKVEHFDQKNSELINENVYAILPDEEGGLWLGTLGALVRFEPRKQ\n+SFTTVEKEKNGTLFTAKRITTLFRDSEKRLWIGGEEGISVYTQHKGELQRESVFPKSSIT\n+EAFVNCIYESSNGIFWIGTREGVYCFNEKKKKINRYTTAKGLPNNVVYGILEDSSGQIWL\n+STNRGIACFHPETEKFRNFTESDGLQSNQFTSSYCRTSTGQMYFGGIEGITTFRPELLLD\n+NPYAPPVVITRLQLFNKTVRPDDETGILTKNISETKRITLKSSQTAFSLEFVVSNYISGQ\n+HNTFAYQLEGYDKEWYYLTDKRTVSYSNLRQGTYHFHVKAANSDGKWNTTPTTLEIIVLP\n+VWYKTWWALLLFLIAFIAFLTFVFRFFWMRKSMAAELEMERRDKEQQEEINQMKMRFFIN\n+ISHELRTPLTLILAPLQEIINRISDRWTRNQLEYIGRNANRLLHLVNQLMDYRRAELGVF\n+ELKVKKGNAHRLVSENFRYYDKLARHKNIAYSLHSELEEKEMLFDPNYLELILNNLLSNA\n+FKYTGNGQSITVTLKEDNGWLLLQVSDTGIGIPINKQGRYSNVFIRWKANMLAAASVCRW\n+YNVW\n+>UniRef50_E1WVX1 Uncharacterized protein n=7 RepID=E1WVX1_BACF6\n+MNMNYFSFYNKAKELLIDSLASLWFKGQAREQEYIKRVLTEDEPLFAEPVFQSIFPWEES\n+VYSFEAHSSKLGLLTSSFVNALSNEGIDKDLRFPLDRHPYKHQTESWRTMLSPRPQTIVV\n+TSGTGSGKTECFMIPVLQDLAKTNMKDCVQAIFLYPLNALMKSQQKRIHAWCKALPEKVT\n+YAIYNGETDKENRSDRYTAPHYPQLVTRPQIRRTPPQILFTNPTMLNYMLVRAEDHEILE\n+KSKGKLKWILLDEAHTYIGSSAAELSLQIRRVLDAFGVTIDQVNFAVTSATIGDESDPKT\n+MIKLKTFVSQLTGKPFEDIKIIGGKRIIPELNKGIAEEQLSKINKRFGIRLTYSDIERLR\n+KKLNSSPVLKVKEIGSILDKKIGINVDASLEIIDALGEKVKNLNEGSGFGALLPTRAHFF\n+VRSISGVYVCTNPDCQRHKGYRLPIGSLTTYQNINCPVCKSKMLELATCSSCGSPIVVGE\n+TSTTKGFRMHTNIIDLDNTLFYEQKEDLIDSEDMENIENVEQNEADGFSRFFFAIPEKLC\n+LRKNATCTSHIFNHRNGKIELAPENNESSKGITPLERGESTPVRYQSLRHSGDNHVLCPH\n+CGNNLSELKKLDYLRISATQIGRTLATLLLDNAEAIGSNDAGVVYEGRKYIAFTDSRQGS\n+ARSAMGLNQDVERSWIRASIFHKLADMRLNDVKPGGLTPDEEAEYNAYLSIRGCLPALLL\n+EKFKQLEEKKNGVPVIPSPEEVSWSQISQPLENDSNFRKLYEHVDKARGRKNFRNATDYL\n+KALLVDQFGWIPKRANSLETMGFVRLVYPTLKNAKCPTLLIQKRCTDTDWQNFLKICMDY\n+VIRGGRHYMLSGAYKDYLTQNKYCSPIYPSNSELRKNGNPVSKWFKVNVSQKGVDENQNR\n+LVLLLCAVLGYDDISQISQTKIADINSMLDAAWDFLKQNVLEATDAENQGYMLDLTGDKV\n+KLQLIEKGYLCPVDNVIIDTPFCGYSPRMNGYIGRENFDRFKIQTEFVNPFFPFKFAEQI\n+EENVTEWIEKNLFDQKAAGVFGVMNYRVLASKPIFISAEHSAQQSSEDLDRYEKEFNEGK\n+INILSCSTTMEMGVDISGITEVVMNNVPPKSSNYLQRAGRAGRRSETKALALTVCAPNPI\n+GTHTWNNPDYPITHVTETPLLKLESRQLIQRHVNAMVFASFVADQGGIRVTATLRDFFVK\n+AEGMSFYDKFLNYIDSVISGNVERLQRAYSKLIKGTSLAQITLPDAAQVVKKDIAAVHNV\n+FDAHNGALEKALESLRNESETANAIKAIEKQEENLLKTSMLSYLAENSFLPSAGIPIGLV\n+ECLLGGKEKVDGSSPTLHISQAISSYAPGNPVVKNEWVYEPSGIRLKTKYDDSTSRYIIQ\n+NCTHCGYTTITYGSAKTDCPKCGRHGTMHGIKDISLSIEQRFTEVVEPAAFSVAWDSTPI\n+RKMGTLGGMNFIQPILLEMDAWQPKTNSAKMSIRCSTPRSEILFYNKGASGYGYAFCPYC\n+GRMKSEKSPDSTERMMPHHKHLLASTPCLGGENDGAAVRRHVLLVGRYQTDFVEIKFHDK\n+NNNLIEDSETLYSLGVILSRKLTELLGVNDGEIEFGYDVINHSIFIYDTALGGAGYSLLF\n+REYKDEVLKMALEALERCDCERSCTKCLIDRRSQWYLNYLNRTKALEWLRQEIKARIAPK\n+EILRLIPDSHTVTSDITTEFYQLTRNKDISCIKIFVNDNISQWDAEAFPFKKILTELSLE\n+GVDVAFILPSVPDVKSLSSADSATLIAEVFKNDFKGLENTLPAELLPLMVVIMNDGTVKT\n+YFGKNIDTSYSKNWGSGDVFITTQLNSLSYADINRMQLLNTFSSGDTSFMFDYRITEHSS\n+LGHFFDSLKNPEVENWNRIVSNLQGKTVSVEYSDRYLKTPLGCMLLAKMISGLKNEADLN\n+VVSIKVIVTNIVSMDDSDVAVNAIKDFTNGEKRNLFLKNAISELTGIEPEIQDTGYVEHE\n+RCLTVKADNAELCIRPDAGIARGWVPFGRDNAECADCDFREDWNMDLELFNKQQRGAGIL\n+YTISYKQP\n'
b
diff -r 000000000000 -r a1747df2bc21 test-data/regrouped_gene_families_to_infogo1000.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regrouped_gene_families_to_infogo1000.tsv Mon Mar 13 12:38:02 2017 -0400
b
b'@@ -0,0 +1,1393 @@\n+# Gene Family\tdemo_Abundance-RPKs\n+UNMAPPED\t4531.0\n+UNGROUPED\t15490.72\n+UNGROUPED|g__Bacteroides.s__Bacteroides_dorei\t8949.382\n+UNGROUPED|g__Bacteroides.s__Bacteroides_vulgatus\t6476.05\n+UNGROUPED|unclassified\t65.288\n+GO:0000015\t4.177\n+GO:0000015|g__Bacteroides.s__Bacteroides_dorei\t2.506\n+GO:0000015|g__Bacteroides.s__Bacteroides_vulgatus\t1.671\n+GO:0000027\t7.937\n+GO:0000027|g__Bacteroides.s__Bacteroides_dorei\t7.937\n+GO:0000049\t29.232\n+GO:0000049|g__Bacteroides.s__Bacteroides_dorei\t7.205\n+GO:0000049|g__Bacteroides.s__Bacteroides_vulgatus\t22.027\n+GO:0000105\t22.302\n+GO:0000105|g__Bacteroides.s__Bacteroides_dorei\t9.093\n+GO:0000105|g__Bacteroides.s__Bacteroides_vulgatus\t13.209\n+GO:0000107\t5.077\n+GO:0000107|g__Bacteroides.s__Bacteroides_vulgatus\t5.077\n+GO:0000150\t29.481\n+GO:0000150|g__Bacteroides.s__Bacteroides_dorei\t14.86\n+GO:0000150|g__Bacteroides.s__Bacteroides_vulgatus\t14.62\n+GO:0000155\t98.896\n+GO:0000155|g__Bacteroides.s__Bacteroides_dorei\t45.55\n+GO:0000155|g__Bacteroides.s__Bacteroides_vulgatus\t53.346\n+GO:0000160\t71.811\n+GO:0000160|g__Bacteroides.s__Bacteroides_dorei\t43.683\n+GO:0000160|g__Bacteroides.s__Bacteroides_vulgatus\t28.128\n+GO:0000162\t6.418\n+GO:0000162|g__Bacteroides.s__Bacteroides_dorei\t1.115\n+GO:0000162|g__Bacteroides.s__Bacteroides_vulgatus\t5.303\n+GO:0000179\t5.376\n+GO:0000179|g__Bacteroides.s__Bacteroides_dorei\t2.688\n+GO:0000179|g__Bacteroides.s__Bacteroides_vulgatus\t2.688\n+GO:0000287\t144.517\n+GO:0000287|g__Bacteroides.s__Bacteroides_dorei\t69.932\n+GO:0000287|g__Bacteroides.s__Bacteroides_vulgatus\t60.803\n+GO:0000287|unclassified\t13.782\n+GO:0000453\t4.823\n+GO:0000453|g__Bacteroides.s__Bacteroides_dorei\t3.086\n+GO:0000453|g__Bacteroides.s__Bacteroides_vulgatus\t1.736\n+GO:0000723\t4.363\n+GO:0000723|g__Bacteroides.s__Bacteroides_dorei\t1.992\n+GO:0000723|g__Bacteroides.s__Bacteroides_vulgatus\t2.371\n+GO:0000917\t7.611\n+GO:0000917|g__Bacteroides.s__Bacteroides_dorei\t5.952\n+GO:0000917|g__Bacteroides.s__Bacteroides_vulgatus\t1.658\n+GO:0000967\t3.115\n+GO:0000967|g__Bacteroides.s__Bacteroides_vulgatus\t3.115\n+GO:0002094\t2.525\n+GO:0002094|g__Bacteroides.s__Bacteroides_vulgatus\t2.525\n+GO:0002100\t5.952\n+GO:0002100|g__Bacteroides.s__Bacteroides_vulgatus\t5.952\n+GO:0002161\t0.731\n+GO:0002161|g__Bacteroides.s__Bacteroides_vulgatus\t0.731\n+GO:0002935\t7.361\n+GO:0002935|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+GO:0002935|g__Bacteroides.s__Bacteroides_vulgatus\t5.258\n+GO:0002949\t4.47\n+GO:0002949|g__Bacteroides.s__Bacteroides_dorei\t2.778\n+GO:0002949|g__Bacteroides.s__Bacteroides_vulgatus\t1.692\n+GO:0003684\t9.246\n+GO:0003684|g__Bacteroides.s__Bacteroides_dorei\t5.554\n+GO:0003684|g__Bacteroides.s__Bacteroides_vulgatus\t3.692\n+GO:0003697\t24.977\n+GO:0003697|g__Bacteroides.s__Bacteroides_dorei\t16.618\n+GO:0003697|g__Bacteroides.s__Bacteroides_vulgatus\t8.358\n+GO:0003725\t7.91\n+GO:0003725|g__Bacteroides.s__Bacteroides_dorei\t5.972\n+GO:0003725|g__Bacteroides.s__Bacteroides_vulgatus\t1.938\n+GO:0003727\t12.121\n+GO:0003727|g__Bacteroides.s__Bacteroides_dorei\t6.061\n+GO:0003727|g__Bacteroides.s__Bacteroides_vulgatus\t6.061\n+GO:0003729\t4.739\n+GO:0003729|g__Bacteroides.s__Bacteroides_vulgatus\t4.739\n+GO:0003735\t156.347\n+GO:0003735|g__Bacteroides.s__Bacteroides_dorei\t76.023\n+GO:0003735|g__Bacteroides.s__Bacteroides_vulgatus\t80.324\n+GO:0003743\t6.762\n+GO:0003743|g__Bacteroides.s__Bacteroides_dorei\t4.016\n+GO:0003743|g__Bacteroides.s__Bacteroides_vulgatus\t2.746\n+GO:0003746\t16.907\n+GO:0003746|g__Bacteroides.s__Bacteroides_dorei\t13.849\n+GO:0003746|g__Bacteroides.s__Bacteroides_vulgatus\t3.058\n+GO:0003755\t33.832\n+GO:0003755|g__Bacteroides.s__Bacteroides_dorei\t14.287\n+GO:0003755|g__Bacteroides.s__Bacteroides_vulgatus\t19.544\n+GO:0003796\t3.906\n+GO:0003796|g__Bacteroides.s__Bacteroides_dorei\t3.906\n+GO:0003848\t9.37\n+GO:0003848|g__Bacteroides.s__Bacteroides_vulgatus\t9.37\n+GO:0003852\t3.462\n+GO:0003852|g__Bacteroides.s__Bacteroides_dorei\t1.392\n+GO:0003852|g__Bacteroides.s__Bacteroides_vulgatus\t2.07\n+GO:0003856\t4.154\n+GO:0003856|g__Bacteroides.s__Bacteroides_dorei\t4.154\n+G'..b's__Bacteroides_vulgatus\t13.172\n+GO:0048472\t4.796\n+GO:0048472|g__Bacteroides.s__Bacteroides_dorei\t2.398\n+GO:0048472|g__Bacteroides.s__Bacteroides_vulgatus\t2.398\n+GO:0048500\t7.353\n+GO:0048500|g__Bacteroides.s__Bacteroides_dorei\t3.268\n+GO:0048500|g__Bacteroides.s__Bacteroides_vulgatus\t4.085\n+GO:0050380\t1.425\n+GO:0050380|g__Bacteroides.s__Bacteroides_dorei\t1.425\n+GO:0050480\t3.49\n+GO:0050480|g__Bacteroides.s__Bacteroides_vulgatus\t3.49\n+GO:0050511\t8.721\n+GO:0050511|g__Bacteroides.s__Bacteroides_dorei\t5.814\n+GO:0050511|g__Bacteroides.s__Bacteroides_vulgatus\t2.907\n+GO:0050570\t5.988\n+GO:0050570|g__Bacteroides.s__Bacteroides_dorei\t5.988\n+GO:0050577\t2.045\n+GO:0050577|g__Bacteroides.s__Bacteroides_vulgatus\t2.045\n+GO:0051073\t6.144\n+GO:0051073|g__Bacteroides.s__Bacteroides_dorei\t3.072\n+GO:0051073|g__Bacteroides.s__Bacteroides_vulgatus\t3.072\n+GO:0051205\t4.522\n+GO:0051205|g__Bacteroides.s__Bacteroides_dorei\t3.39\n+GO:0051205|g__Bacteroides.s__Bacteroides_vulgatus\t1.132\n+GO:0051537\t26.424\n+GO:0051537|g__Bacteroides.s__Bacteroides_dorei\t15.914\n+GO:0051537|g__Bacteroides.s__Bacteroides_vulgatus\t10.51\n+GO:0051539\t133.729\n+GO:0051539|g__Bacteroides.s__Bacteroides_dorei\t41.322\n+GO:0051539|g__Bacteroides.s__Bacteroides_vulgatus\t54.67\n+GO:0051539|unclassified\t37.738\n+GO:0051607\t2.165\n+GO:0051607|g__Bacteroides.s__Bacteroides_vulgatus\t2.165\n+GO:0051775\t3.623\n+GO:0051775|g__Bacteroides.s__Bacteroides_dorei\t3.623\n+GO:0051920\t25.421\n+GO:0051920|g__Bacteroides.s__Bacteroides_dorei\t12.187\n+GO:0051920|g__Bacteroides.s__Bacteroides_vulgatus\t13.234\n+GO:0051989\t7.465\n+GO:0051989|unclassified\t7.465\n+GO:0051991\t8.721\n+GO:0051991|g__Bacteroides.s__Bacteroides_dorei\t5.814\n+GO:0051991|g__Bacteroides.s__Bacteroides_vulgatus\t2.907\n+GO:0052381\t4.848\n+GO:0052381|g__Bacteroides.s__Bacteroides_dorei\t4.848\n+GO:0052692\t3.49\n+GO:0052692|g__Bacteroides.s__Bacteroides_dorei\t2.327\n+GO:0052692|g__Bacteroides.s__Bacteroides_vulgatus\t1.163\n+GO:0052717\t5.952\n+GO:0052717|g__Bacteroides.s__Bacteroides_vulgatus\t5.952\n+GO:0052865\t5.301\n+GO:0052865|g__Bacteroides.s__Bacteroides_dorei\t4.699\n+GO:0052865|g__Bacteroides.s__Bacteroides_vulgatus\t0.602\n+GO:0052908\t5.376\n+GO:0052908|g__Bacteroides.s__Bacteroides_dorei\t2.688\n+GO:0052908|g__Bacteroides.s__Bacteroides_vulgatus\t2.688\n+GO:0061711\t1.086\n+GO:0061711|g__Bacteroides.s__Bacteroides_dorei\t1.086\n+GO:0070006\t5.507\n+GO:0070006|g__Bacteroides.s__Bacteroides_vulgatus\t5.507\n+GO:0070040\t7.361\n+GO:0070040|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+GO:0070040|g__Bacteroides.s__Bacteroides_vulgatus\t5.258\n+GO:0070084\t5.507\n+GO:0070084|g__Bacteroides.s__Bacteroides_vulgatus\t5.507\n+GO:0070181\t3.831\n+GO:0070181|g__Bacteroides.s__Bacteroides_dorei\t2.554\n+GO:0070181|g__Bacteroides.s__Bacteroides_vulgatus\t1.277\n+GO:0070204\t2.54\n+GO:0070204|g__Bacteroides.s__Bacteroides_vulgatus\t2.54\n+GO:0070401\t11.207\n+GO:0070401|g__Bacteroides.s__Bacteroides_dorei\t2.039\n+GO:0070401|g__Bacteroides.s__Bacteroides_vulgatus\t9.168\n+GO:0070402\t3.81\n+GO:0070402|g__Bacteroides.s__Bacteroides_dorei\t1.905\n+GO:0070402|g__Bacteroides.s__Bacteroides_vulgatus\t1.905\n+GO:0070403\t5.0\n+GO:0070403|g__Bacteroides.s__Bacteroides_vulgatus\t5.0\n+GO:0070475\t7.361\n+GO:0070475|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+GO:0070475|g__Bacteroides.s__Bacteroides_vulgatus\t5.258\n+GO:0070626\t2.404\n+GO:0070626|g__Bacteroides.s__Bacteroides_dorei\t0.801\n+GO:0070626|g__Bacteroides.s__Bacteroides_vulgatus\t1.603\n+GO:0070677\t4.823\n+GO:0070677|g__Bacteroides.s__Bacteroides_dorei\t3.086\n+GO:0070677|g__Bacteroides.s__Bacteroides_vulgatus\t1.736\n+GO:0070814\t15.615\n+GO:0070814|g__Bacteroides.s__Bacteroides_dorei\t9.504\n+GO:0070814|g__Bacteroides.s__Bacteroides_vulgatus\t6.111\n+GO:0071436\t2.407\n+GO:0071436|g__Bacteroides.s__Bacteroides_vulgatus\t2.407\n+GO:0090071\t3.831\n+GO:0090071|g__Bacteroides.s__Bacteroides_dorei\t3.831\n+GO:0097264\t3.745\n+GO:0097264|g__Bacteroides.s__Bacteroides_dorei\t3.745\n+GO:1990077\t8.016\n+GO:1990077|g__Bacteroides.s__Bacteroides_dorei\t1.517\n+GO:1990077|g__Bacteroides.s__Bacteroides_vulgatus\t6.499\n'
b
diff -r 000000000000 -r a1747df2bc21 test-data/regrouped_gene_families_to_ko.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regrouped_gene_families_to_ko.tsv Mon Mar 13 12:38:02 2017 -0400
b
b'@@ -0,0 +1,1203 @@\n+# Gene Family\tdemo_Abundance-RPKs\n+UNMAPPED\t4531.0\n+UNGROUPED\t5.228\n+UNGROUPED|g__Bacteroides.s__Bacteroides_dorei\t4.38\n+UNGROUPED|g__Bacteroides.s__Bacteroides_vulgatus\t3.959\n+UNGROUPED|unclassified\t10.529\n+K00012\t3.292\n+K00012|g__Bacteroides.s__Bacteroides_dorei\t3.292\n+K00013\t2.538\n+K00013|g__Bacteroides.s__Bacteroides_vulgatus\t2.538\n+K00014\t3.115\n+K00014|g__Bacteroides.s__Bacteroides_dorei\t3.115\n+K00018\t4.662\n+K00018|g__Bacteroides.s__Bacteroides_dorei\t3.497\n+K00018|g__Bacteroides.s__Bacteroides_vulgatus\t1.166\n+K00024\t2.808\n+K00024|g__Bacteroides.s__Bacteroides_vulgatus\t2.808\n+K00031\t3.82\n+K00031|g__Bacteroides.s__Bacteroides_dorei\t2.865\n+K00031|g__Bacteroides.s__Bacteroides_vulgatus\t0.955\n+K00033\t5.232\n+K00033|g__Bacteroides.s__Bacteroides_dorei\t0.747\n+K00033|g__Bacteroides.s__Bacteroides_vulgatus\t4.484\n+K00036\t1.418\n+K00036|g__Bacteroides.s__Bacteroides_vulgatus\t1.418\n+K00041\t4.474\n+K00041|g__Bacteroides.s__Bacteroides_dorei\t2.983\n+K00041|g__Bacteroides.s__Bacteroides_vulgatus\t1.491\n+K00046\t4.255\n+K00046|g__Bacteroides.s__Bacteroides_vulgatus\t4.255\n+K00052\t5.192\n+K00052|g__Bacteroides.s__Bacteroides_dorei\t2.077\n+K00052|g__Bacteroides.s__Bacteroides_vulgatus\t3.115\n+K00053\t3.175\n+K00053|g__Bacteroides.s__Bacteroides_dorei\t1.058\n+K00053|g__Bacteroides.s__Bacteroides_vulgatus\t2.116\n+K00059\t6.173\n+K00059|g__Bacteroides.s__Bacteroides_dorei\t3.086\n+K00059|g__Bacteroides.s__Bacteroides_vulgatus\t3.086\n+K00075\t7.65\n+K00075|g__Bacteroides.s__Bacteroides_dorei\t5.464\n+K00075|g__Bacteroides.s__Bacteroides_vulgatus\t2.186\n+K00097\t5.988\n+K00097|g__Bacteroides.s__Bacteroides_dorei\t5.988\n+K00099\t3.81\n+K00099|g__Bacteroides.s__Bacteroides_dorei\t1.905\n+K00099|g__Bacteroides.s__Bacteroides_vulgatus\t1.905\n+K00100\t0.929\n+K00100|g__Bacteroides.s__Bacteroides_dorei\t0.929\n+K00133\t3.289\n+K00133|g__Bacteroides.s__Bacteroides_dorei\t1.096\n+K00133|g__Bacteroides.s__Bacteroides_vulgatus\t2.193\n+K00174\t3.017\n+K00174|g__Bacteroides.s__Bacteroides_dorei\t2.033\n+K00174|g__Bacteroides.s__Bacteroides_vulgatus\t4.002\n+K00175\t2.208\n+K00175|g__Bacteroides.s__Bacteroides_dorei\t1.104\n+K00175|g__Bacteroides.s__Bacteroides_vulgatus\t1.104\n+K00179\t3.953\n+K00179|g__Bacteroides.s__Bacteroides_dorei\t1.318\n+K00179|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+K00180\t6.211\n+K00180|g__Bacteroides.s__Bacteroides_dorei\t4.141\n+K00180|g__Bacteroides.s__Bacteroides_vulgatus\t2.07\n+K00215\t4.566\n+K00215|g__Bacteroides.s__Bacteroides_dorei\t4.566\n+K00240\t16.743\n+K00240|g__Bacteroides.s__Bacteroides_dorei\t10.654\n+K00240|g__Bacteroides.s__Bacteroides_vulgatus\t6.088\n+K00257\t1.244\n+K00257|g__Bacteroides.s__Bacteroides_dorei\t0.622\n+K00257|g__Bacteroides.s__Bacteroides_vulgatus\t0.622\n+K00262\t4.043\n+K00262|g__Bacteroides.s__Bacteroides_dorei\t2.427\n+K00262|g__Bacteroides.s__Bacteroides_vulgatus\t2.83\n+K00297\t3.509\n+K00297|g__Bacteroides.s__Bacteroides_dorei\t1.754\n+K00297|g__Bacteroides.s__Bacteroides_vulgatus\t3.509\n+K00331\t4.016\n+K00331|g__Bacteroides.s__Bacteroides_dorei\t2.008\n+K00331|g__Bacteroides.s__Bacteroides_vulgatus\t2.008\n+K00337\t8.18\n+K00337|g__Bacteroides.s__Bacteroides_dorei\t4.09\n+K00337|g__Bacteroides.s__Bacteroides_vulgatus\t4.09\n+K00338\t5.65\n+K00338|g__Bacteroides.s__Bacteroides_dorei\t2.825\n+K00338|g__Bacteroides.s__Bacteroides_vulgatus\t2.825\n+K00339\t4.938\n+K00339|g__Bacteroides.s__Bacteroides_dorei\t4.938\n+K00342\t5.061\n+K00342|g__Bacteroides.s__Bacteroides_dorei\t1.446\n+K00342|g__Bacteroides.s__Bacteroides_vulgatus\t3.615\n+K00343\t3.023\n+K00343|g__Bacteroides.s__Bacteroides_dorei\t1.512\n+K00343|g__Bacteroides.s__Bacteroides_vulgatus\t1.512\n+K00346\t0.799\n+K00346|g__Bacteroides.s__Bacteroides_dorei\t0.799\n+K00347\t3.604\n+K00347|g__Bacteroides.s__Bacteroides_dorei\t1.802\n+K00347|g__Bacteroides.s__Bacteroides_vulgatus\t1.802\n+K00348\t3.472\n+K00348|g__Bacteroides.s__Bacteroides_vulgatus\t3.472\n+K00349\t1.852\n+K00349|g__Bacteroides.s__Bacteroides_dorei\t1.852\n+K00351\t1.701\n+K00351|g__Bacteroides.s__Bacteroides_vulgatus\t1.701\n+K00367\t8.889\n+K00367|unclassified\t8.889\n+K00382\t3.221\n+K'..b's__Bacteroides_dorei\t1.705\n+K09691|g__Bacteroides.s__Bacteroides_vulgatus\t0.853\n+K09710\t3.831\n+K09710|g__Bacteroides.s__Bacteroides_dorei\t3.831\n+K09760\t0.88\n+K09760|g__Bacteroides.s__Bacteroides_vulgatus\t0.88\n+K09761\t1.658\n+K09761|g__Bacteroides.s__Bacteroides_dorei\t1.658\n+K09797\t6.633\n+K09797|g__Bacteroides.s__Bacteroides_dorei\t3.317\n+K09797|g__Bacteroides.s__Bacteroides_vulgatus\t3.317\n+K09808\t2.632\n+K09808|g__Bacteroides.s__Bacteroides_dorei\t1.754\n+K09808|g__Bacteroides.s__Bacteroides_vulgatus\t0.877\n+K09810\t5.348\n+K09810|g__Bacteroides.s__Bacteroides_dorei\t3.565\n+K09810|g__Bacteroides.s__Bacteroides_vulgatus\t1.783\n+K09811\t2.594\n+K09811|g__Bacteroides.s__Bacteroides_dorei\t1.297\n+K09811|g__Bacteroides.s__Bacteroides_vulgatus\t1.297\n+K09816\t6.64\n+K09816|g__Bacteroides.s__Bacteroides_dorei\t5.312\n+K09816|g__Bacteroides.s__Bacteroides_vulgatus\t1.328\n+K09903\t3.268\n+K09903|g__Bacteroides.s__Bacteroides_dorei\t3.268\n+K09922\t3.663\n+K09922|g__Bacteroides.s__Bacteroides_vulgatus\t3.663\n+K10206\t3.537\n+K10206|g__Bacteroides.s__Bacteroides_dorei\t1.768\n+K10206|g__Bacteroides.s__Bacteroides_vulgatus\t1.768\n+K10716\t7.905\n+K10716|g__Bacteroides.s__Bacteroides_dorei\t5.27\n+K10716|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+K11071\t5.722\n+K11071|g__Bacteroides.s__Bacteroides_dorei\t1.431\n+K11071|g__Bacteroides.s__Bacteroides_vulgatus\t4.292\n+K11085\t1.149\n+K11085|g__Bacteroides.s__Bacteroides_dorei\t0.575\n+K11085|g__Bacteroides.s__Bacteroides_vulgatus\t0.575\n+K11175\t8.333\n+K11175|g__Bacteroides.s__Bacteroides_dorei\t4.167\n+K11175|g__Bacteroides.s__Bacteroides_vulgatus\t4.167\n+K11527\t1.921\n+K11527|g__Bacteroides.s__Bacteroides_vulgatus\t1.921\n+K11537\t5.937\n+K11537|g__Bacteroides.s__Bacteroides_dorei\t5.089\n+K11537|g__Bacteroides.s__Bacteroides_vulgatus\t0.848\n+K11720\t8.38\n+K11720|g__Bacteroides.s__Bacteroides_dorei\t2.793\n+K11720|g__Bacteroides.s__Bacteroides_vulgatus\t5.587\n+K11749\t5.622\n+K11749|g__Bacteroides.s__Bacteroides_dorei\t4.819\n+K11749|g__Bacteroides.s__Bacteroides_vulgatus\t0.803\n+K11934\t5.089\n+K11934|g__Bacteroides.s__Bacteroides_dorei\t5.089\n+K11991\t5.952\n+K11991|g__Bacteroides.s__Bacteroides_vulgatus\t5.952\n+K11996\t1.58\n+K11996|g__Bacteroides.s__Bacteroides_dorei\t1.58\n+K12340\t7.229\n+K12340|g__Bacteroides.s__Bacteroides_dorei\t1.606\n+K12340|g__Bacteroides.s__Bacteroides_vulgatus\t5.622\n+K12343\t1.502\n+K12343|g__Bacteroides.s__Bacteroides_vulgatus\t1.502\n+K12373\t1.242\n+K12373|g__Bacteroides.s__Bacteroides_dorei\t1.413\n+K12373|g__Bacteroides.s__Bacteroides_vulgatus\t0.9\n+K12410\t5.0\n+K12410|g__Bacteroides.s__Bacteroides_vulgatus\t5.0\n+K12467\t13.782\n+K12467|unclassified\t13.782\n+K13038\t1.821\n+K13038|g__Bacteroides.s__Bacteroides_dorei\t1.821\n+K13378\t4.107\n+K13378|g__Bacteroides.s__Bacteroides_dorei\t1.369\n+K13378|g__Bacteroides.s__Bacteroides_vulgatus\t2.738\n+K13747\t6.573\n+K13747|g__Bacteroides.s__Bacteroides_dorei\t1.878\n+K13747|g__Bacteroides.s__Bacteroides_vulgatus\t4.695\n+K14441\t3.333\n+K14441|g__Bacteroides.s__Bacteroides_dorei\t3.333\n+K14652\t0.896\n+K14652|g__Bacteroides.s__Bacteroides_dorei\t0.896\n+K15342\t2.165\n+K15342|g__Bacteroides.s__Bacteroides_vulgatus\t2.165\n+K15460\t1.65\n+K15460|g__Bacteroides.s__Bacteroides_vulgatus\t1.65\n+K15532\t2.732\n+K15532|g__Bacteroides.s__Bacteroides_dorei\t2.732\n+K15633\t4.944\n+K15633|g__Bacteroides.s__Bacteroides_dorei\t2.825\n+K15633|g__Bacteroides.s__Bacteroides_vulgatus\t2.119\n+K15923\t2.965\n+K15923|g__Bacteroides.s__Bacteroides_vulgatus\t2.965\n+K16089\t3.058\n+K16089|g__Bacteroides.s__Bacteroides_dorei\t0.51\n+K16089|g__Bacteroides.s__Bacteroides_vulgatus\t2.548\n+K17103\t4.926\n+K17103|g__Bacteroides.s__Bacteroides_dorei\t4.926\n+K17828\t2.46\n+K17828|g__Bacteroides.s__Bacteroides_dorei\t1.23\n+K17828|g__Bacteroides.s__Bacteroides_vulgatus\t1.23\n+K18220\t2.189\n+K18220|g__Bacteroides.s__Bacteroides_dorei\t2.189\n+K18682\t2.784\n+K18682|g__Bacteroides.s__Bacteroides_dorei\t1.392\n+K18682|g__Bacteroides.s__Bacteroides_vulgatus\t1.392\n+K18785\t1.107\n+K18785|g__Bacteroides.s__Bacteroides_vulgatus\t1.107\n+K19271\t3.724\n+K19271|g__Bacteroides.s__Bacteroides_dorei\t3.724\n'
b
diff -r 000000000000 -r a1747df2bc21 test-data/regrouped_gene_families_to_rxn.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/regrouped_gene_families_to_rxn.tsv Mon Mar 13 12:38:02 2017 -0400
b
b'@@ -0,0 +1,1051 @@\n+# Gene Family\tdemo_Abundance-RPKs\n+UNMAPPED\t4531.0\n+UNGROUPED\t17667.167\n+UNGROUPED|g__Bacteroides.s__Bacteroides_dorei\t10095.832\n+UNGROUPED|g__Bacteroides.s__Bacteroides_vulgatus\t7522.359\n+UNGROUPED|unclassified\t48.976\n+1.1.1.271-RXN\t2.045\n+1.1.1.271-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.045\n+1.2.7.8-RXN\t3.953\n+1.2.7.8-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.318\n+1.2.7.8-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+1.3.1.9-RXN\t5.27\n+1.3.1.9-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.635\n+1.3.1.9-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+1.5.1.20-RXN\t7.018\n+1.5.1.20-RXN|g__Bacteroides.s__Bacteroides_dorei\t3.509\n+1.5.1.20-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t3.509\n+1.6.99.5-RXN\t33.721\n+1.6.99.5-RXN|g__Bacteroides.s__Bacteroides_dorei\t11.804\n+1.6.99.5-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t13.172\n+1.6.99.5-RXN|unclassified\t8.745\n+1.7.7.2-RXN\t8.889\n+1.7.7.2-RXN|unclassified\t8.889\n+1.8.1.4-RXN\t3.221\n+1.8.1.4-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.61\n+1.8.1.4-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.61\n+2-DEHYDROPANTOATE-REDUCT-RXN\t1.23\n+2-DEHYDROPANTOATE-REDUCT-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.23\n+2-ISOPROPYLMALATESYN-RXN\t3.462\n+2-ISOPROPYLMALATESYN-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.392\n+2-ISOPROPYLMALATESYN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.07\n+2.3.1.179-RXN\t7.712\n+2.3.1.179-RXN|g__Bacteroides.s__Bacteroides_dorei\t3.428\n+2.3.1.179-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.284\n+2.3.1.180-RXN\t2.2\n+2.3.1.180-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.1\n+2.3.1.180-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.1\n+2.5.1.64-RXN\t2.54\n+2.5.1.64-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.54\n+2.6.1.37-RXN\t6.061\n+2.6.1.37-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t6.061\n+2.7.7.33-RXN\t4.405\n+2.7.7.33-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.405\n+2.8.1.6-RXN\t2.323\n+2.8.1.6-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.323\n+2PGADEHYDRAT-RXN\t4.177\n+2PGADEHYDRAT-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.506\n+2PGADEHYDRAT-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.671\n+3-DEHYDROQUINATE-SYNTHASE-RXN\t4.154\n+3-DEHYDROQUINATE-SYNTHASE-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.154\n+3.1.11.6-RXN\t5.79\n+3.1.11.6-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.136\n+3.1.11.6-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.654\n+3.1.21.2-RXN\t6.775\n+3.1.21.2-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t6.775\n+3.1.22.4-RXN\t4.246\n+3.1.22.4-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.123\n+3.1.22.4-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.123\n+3.1.26.11-RXN\t2.575\n+3.1.26.11-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.203\n+3.1.26.11-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.372\n+3.1.26.3-RXN\t1.208\n+3.1.26.3-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.208\n+3.1.26.4-RXN\t1.972\n+3.1.26.4-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.972\n+3.2.1.23-RXN\t6.884\n+3.2.1.23-RXN|g__Bacteroides.s__Bacteroides_dorei\t5.582\n+3.2.1.23-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.302\n+3.2.1.89-RXN\t5.594\n+3.2.1.89-RXN|g__Bacteroides.s__Bacteroides_dorei\t0.958\n+3.2.1.89-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.636\n+3.4.11.18-RXN\t5.507\n+3.4.11.18-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t5.507\n+3.4.11.4-RXN\t3.565\n+3.4.11.4-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.783\n+3.4.11.4-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.783\n+3.4.21.89-RXN\t10.09\n+3.4.21.89-RXN|g__Bacteroides.s__Bacteroides_dorei\t7.463\n+3.4.21.89-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.627\n+3.4.23.36-RXN\t3.704\n+3.4.23.36-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.852\n+3.4.23.36-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.852\n+3.5.1.27-RXN\t8.772\n+3.5.1.27-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.386\n+3.5.1.27-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.386\n+3.5.1.88-RXN\t8.772\n+3.5.1.88-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.386\n+3.5.1.88-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.386\n+3.6.3.3-RXN\t2.479\n+3.6.3.3-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.862\n+3.6.3.3-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t0.617\n+3PGAREARR-RXN\t'..b'cteroides_dorei\t3.428\n+RXN1G-94|g__Bacteroides.s__Bacteroides_vulgatus\t4.284\n+RXN1G-962\t5.27\n+RXN1G-962|g__Bacteroides.s__Bacteroides_dorei\t2.635\n+RXN1G-962|g__Bacteroides.s__Bacteroides_vulgatus\t2.635\n+RXN66-469\t8.477\n+RXN66-469|unclassified\t8.477\n+RXN66-477\t8.477\n+RXN66-477|unclassified\t8.477\n+RXN66-480\t8.477\n+RXN66-480|unclassified\t8.477\n+RXN66-483\t8.477\n+RXN66-483|unclassified\t8.477\n+RXN66-484\t8.477\n+RXN66-484|unclassified\t8.477\n+RXNQT-4191\t1.475\n+RXNQT-4191|g__Bacteroides.s__Bacteroides_vulgatus\t1.475\n+S-ADENMETSYN-RXN\t2.513\n+S-ADENMETSYN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.513\n+SHIKIMATE-KINASE-RXN\t9.324\n+SHIKIMATE-KINASE-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.662\n+SHIKIMATE-KINASE-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.662\n+SUCCINATE-DEHYDROGENASE-UBIQUINONE-RXN\t9.188\n+SUCCINATE-DEHYDROGENASE-UBIQUINONE-RXN|unclassified\t9.188\n+SULFATE-ADENYLYLTRANS-RXN\t7.863\n+SULFATE-ADENYLYLTRANS-RXN|g__Bacteroides.s__Bacteroides_dorei\t3.69\n+SULFATE-ADENYLYLTRANS-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t4.173\n+SULFOCYS-RXN\t2.347\n+SULFOCYS-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.347\n+TDCEACT-RXN\t7.937\n+TDCEACT-RXN|g__Bacteroides.s__Bacteroides_dorei\t7.937\n+TETRAACYLDISACC4KIN-RXN\t3.992\n+TETRAACYLDISACC4KIN-RXN|g__Bacteroides.s__Bacteroides_dorei\t0.998\n+TETRAACYLDISACC4KIN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.994\n+THI-P-SYN-RXN\t1.894\n+THI-P-SYN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.894\n+THIAZOLSYN2-RXN\t4.444\n+THIAZOLSYN2-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.963\n+THIAZOLSYN2-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.481\n+THIOREDOXIN-REDUCT-NADPH-RXN\t9.423\n+THIOREDOXIN-REDUCT-NADPH-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t9.423\n+TRANS-RXN-214\t13.436\n+TRANS-RXN-214|g__Bacteroides.s__Bacteroides_dorei\t6.461\n+TRANS-RXN-214|g__Bacteroides.s__Bacteroides_vulgatus\t6.975\n+TRANSALDOL-RXN\t3.584\n+TRANSALDOL-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.792\n+TRANSALDOL-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.792\n+TRIOSEPISOMERIZATION-RXN\t9.132\n+TRIOSEPISOMERIZATION-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.522\n+TRIOSEPISOMERIZATION-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t7.61\n+TRNA-GUANINE-N7--METHYLTRANSFERASE-RXN\t1.515\n+TRNA-GUANINE-N7--METHYLTRANSFERASE-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.515\n+TRYPSYN-RXN\t18.92\n+TRYPSYN-RXN|g__Bacteroides.s__Bacteroides_dorei\t10.876\n+TRYPSYN-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t8.044\n+UDP-NACMUR-ALA-LIG-RXN\t3.13\n+UDP-NACMUR-ALA-LIG-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.347\n+UDP-NACMUR-ALA-LIG-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t0.782\n+UDP-NACMURALGLDAPAALIG-RXN\t6.667\n+UDP-NACMURALGLDAPAALIG-RXN|g__Bacteroides.s__Bacteroides_dorei\t4.167\n+UDP-NACMURALGLDAPAALIG-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.5\n+UDP-NACMURALGLDAPLIG-RXN\t3.687\n+UDP-NACMURALGLDAPLIG-RXN|g__Bacteroides.s__Bacteroides_dorei\t0.737\n+UDP-NACMURALGLDAPLIG-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.95\n+UDPGLUCEPIM-RXN\t1.068\n+UDPGLUCEPIM-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.068\n+UDPNACETYLGLUCOSAMACYLTRANS-RXN\t13.453\n+UDPNACETYLGLUCOSAMACYLTRANS-RXN|g__Bacteroides.s__Bacteroides_dorei\t7.474\n+UDPNACETYLGLUCOSAMACYLTRANS-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t5.979\n+UDPNACETYLGLUCOSAMENOLPYRTRANS-RXN\t0.829\n+UDPNACETYLGLUCOSAMENOLPYRTRANS-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t0.829\n+UDPNACETYLMURAMATEDEHYDROG-RXN\t7.65\n+UDPNACETYLMURAMATEDEHYDROG-RXN|g__Bacteroides.s__Bacteroides_dorei\t5.464\n+UDPNACETYLMURAMATEDEHYDROG-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.186\n+UDPREDUCT-RXN\t3.155\n+UDPREDUCT-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.103\n+UDPREDUCT-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t1.052\n+UNDECAPRENYL-DIPHOSPHATASE-RXN\t1.425\n+UNDECAPRENYL-DIPHOSPHATASE-RXN|g__Bacteroides.s__Bacteroides_dorei\t1.425\n+XANTHOSINEPHOSPHORY-RXN\t2.813\n+XANTHOSINEPHOSPHORY-RXN|g__Bacteroides.s__Bacteroides_vulgatus\t2.813\n+XMPXAN-RXN\t2.963\n+XMPXAN-RXN|g__Bacteroides.s__Bacteroides_dorei\t2.963\n+XYLISOM-RXN\t0.821\n+XYLISOM-RXN|g__Bacteroides.s__Bacteroides_dorei\t0.821\n'
b
diff -r 000000000000 -r a1747df2bc21 test-data/relab_levelwise_renormalized_pathway_abundance.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/relab_levelwise_renormalized_pathway_abundance.tsv Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,29 @@
+# Pathway demo_Abundance
+UNMAPPED 0.193449
+UNINTEGRATED 0.796985
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_vulgatus 0.584618
+UNINTEGRATED|g__Bacteroides.s__Bacteroides_dorei 0.393252
+UNINTEGRATED|unclassified 0.00936201
+PWY-5423: oleoresin monoterpene volatiles biosynthesis 0.00172203
+PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified 0.00276929
+PWY-6305: putrescine biosynthesis IV 0.00154401
+PWY-6305: putrescine biosynthesis IV|unclassified 0.002483
+PWY490-3: nitrate reduction VI (assimilatory) 0.001374
+PWY490-3: nitrate reduction VI (assimilatory)|unclassified 0.0022096
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 0.00133038
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 0.00180326
+COA-PWY-1: coenzyme A biosynthesis II (mammalian) 0.000795872
+COA-PWY-1: coenzyme A biosynthesis II (mammalian)|g__Bacteroides.s__Bacteroides_vulgatus 0.000963748
+PWY-6700: queuosine biosynthesis 0.000696312
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.00068396
+PWY-6700: queuosine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.000291624
+PWY-7111: pyruvate fermentation to isobutanol (engineered) 0.000395699
+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Bacteroides.s__Bacteroides_vulgatus 0.000476857
+VALSYN-PWY: L-valine biosynthesis 0.000395699
+VALSYN-PWY: L-valine biosynthesis|g__Bacteroides.s__Bacteroides_vulgatus 0.000476857
+PWY-5695: urate biosynthesis/inosine 5'-phosphate degradation 0.000358864
+PWY-5097: L-lysine biosynthesis VI 0.000333173
+PWY-2942: L-lysine biosynthesis III 0.000316201
+PWY-2942: L-lysine biosynthesis III|g__Bacteroides.s__Bacteroides_dorei 0.000358824
+PWY-7219: adenosine ribonucleotides de novo biosynthesis 0.000303519
+PWY-7219: adenosine ribonucleotides de novo biosynthesis|g__Bacteroides.s__Bacteroides_dorei 0.000250663
b
diff -r 000000000000 -r a1747df2bc21 test-data/taxonomic_profile.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/taxonomic_profile.tabular Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,11 @@
+#SampleID Metaphlan2_Analysis
+k__Bacteria 100.0
+k__Bacteria|p__Bacteroidetes 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides 100.0
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris 68.44368
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron 31.55632
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_stercoris|t__Bacteroides_stercoris_unclassified 68.44368
+k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_thetaiotaomicron|t__Bacteroides_thetaiotaomicron_unclassified 31.55632
b
diff -r 000000000000 -r a1747df2bc21 tool-data/humann2_nucleotide_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/humann2_nucleotide_database.loc.sample Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,4 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+#02_16_2014 ChocoPhlAn chocophlan /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r a1747df2bc21 tool-data/humann2_protein_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/humann2_protein_database.loc.sample Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,8 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+#02_16_2014 Full UniRef50 uniref50_diamond /path/to/data
+#02_16_2014 EC-filtered UniRef50 uniref50_ec_filtered_diamond /path/to/data
+#02_16_2014 GO filtered UniRef50 for rapsearch2 uniref50_GO_filtered_rapsearch2 /path/to/data
+#02_16_2014 Full UniRef90 uniref90_diamond /path/to/data
+#02_16_2014 EC-filtered UniRef90 uniref90_ec_filtered_diamond /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r a1747df2bc21 tool-data/metaphlan2_database.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metaphlan2_database.loc.sample Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,4 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of metagenomics files.  
+#file has this format (white space characters are TAB characters)
+#02_16_2014  MetaPhlAn2 clade-specific marker genes db_v20 /path/to/data
\ No newline at end of file
b
diff -r 000000000000 -r a1747df2bc21 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Mar 13 12:38:02 2017 -0400
b
@@ -0,0 +1,14 @@
+<tables>
+    <table name="metaphlan2_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/metaphlan2_database.loc" />
+    </table>
+    <table name="humann2_nucleotide_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/humann2_nucleotide_database.loc" />
+    </table>
+    <table name="humann2_protein_database" comment_char="#">
+        <columns>value, name, dbkey, path</columns>
+        <file path="tool-data/humann2_protein_database.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r a1747df2bc21 transform_json_to_pkl.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/transform_json_to_pkl.py Mon Mar 13 12:38:02 2017 -0400
[
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+import bz2
+import cPickle as pickle
+import json
+
+
+def transform_json_to_pkl(args):
+    with open(args.json_input, 'r') as json_file:
+        json_str = json_file.read()
+        metadata = json.loads(json_str)
+
+        for marker in metadata["markers"]:
+            a_set = set(metadata["markers"][marker]["ext"])
+            metadata["markers"][marker]["ext"] = a_set
+
+    pkl_output = bz2.BZ2File(args.pkl_output, 'w')
+    pickle.dump(metadata, pkl_output, pickle.HIGHEST_PROTOCOL)
+    pkl_output.close()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--json_input', required=True)
+    parser.add_argument('--pkl_output', required=True)
+    args = parser.parse_args()
+
+    transform_json_to_pkl(args)