Repository 'combine_metaphlan2_humann2'
hg clone https://toolshed.g2.bx.psu.edu/repos/bebatut/combine_metaphlan2_humann2

Changeset 0:31394a0c0242 (2016-04-15)
Next changeset 1:e25efca0a49c (2020-09-14)
Commit message:
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/combine_metaphlan2_humann2 commit e6bee6545960c2a1ae3ca3031ec74d7c26d0b0ce-dirty
added:
combine_metaphlan2_humann2.py
combine_metaphlan2_humann2.xml
test-data/gene_families_output.tabular
test-data/humann2_gene_families_input.tabular
test-data/humann2_pathways_input.tabular
test-data/metaphlan2_input.txt
test-data/pathways_output.tabular
b
diff -r 000000000000 -r 31394a0c0242 combine_metaphlan2_humann2.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/combine_metaphlan2_humann2.py Fri Apr 15 09:15:21 2016 -0400
[
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import argparse
+import re
+
+def extract_clade_abundance(metaphlan2_filepath):
+    clade_abundance = {}
+    with open(metaphlan2_filepath, 'r') as metaphlan2_file:
+        for line in metaphlan2_file.readlines():
+            if line.find('g__') == -1:
+                continue
+
+            split_line = line[:-1].split('\t')
+            taxo = split_line[0]
+            abundance = split_line[1]
+
+            genus = taxo[(taxo.find('g__')+3):]
+            if genus.find('|') != -1:
+                genus = genus[:(genus.find('|'))]
+            clade_abundance.setdefault(genus, {'abundance':0, 'species':{}})
+            if taxo.find('t__') != -1:
+                continue
+            elif taxo.find('s__') != -1:
+                species = taxo[(taxo.find('s__')+3):]
+                clade_abundance[genus]['species'].setdefault(species, abundance)
+            else:
+                clade_abundance[genus]['abundance'] = abundance
+    return clade_abundance
+
+def compute_overall_abundance(humann2_file):
+    overall_abundance = 0
+    with open(args.humann2_file, 'r') as humann2_file:
+        for line in humann2_file.readlines():
+            if line.find('|') != -1 or line.startswith('#'):
+                continue
+            split_line = line[:-1].split('\t')
+            overall_abundance += float(split_line[1])
+    return overall_abundance
+
+def format_characteristic_name(name):
+    formatted_name = name
+    formatted_name = formatted_name.replace('/',' ')
+    formatted_name = formatted_name.replace('-',' ')
+    formatted_name = formatted_name.replace("'",'')
+    if formatted_name.find('(') != -1 and formatted_name.find(')') != -1:
+        open_bracket = formatted_name.find('(')
+        close_bracket = formatted_name.find(')')+1
+        formatted_name = formatted_name[:open_bracket] + formatted_name[close_bracket:]
+    return formatted_name
+
+def combine_metaphlan2_humann2(args):
+    clade_abundance = extract_clade_abundance(args.metaphlan2_file)
+    overall_abundance = compute_overall_abundance(args.humann2_file)
+
+    with open(args.output_file, 'w') as output_file:
+        output_file.write('genus\t')
+        output_file.write('genus_abundance\t')
+        output_file.write('species\t')
+        output_file.write('species_abundance\t')
+        output_file.write(args.type + '_id\t')
+        output_file.write(args.type + '_name\t')
+        output_file.write(args.type + '_abundance\n')
+        with open(args.humann2_file, 'r') as humann2_file:
+            for line in humann2_file.readlines():
+                if line.find('|') == -1:
+                    continue
+
+                split_line = line[:-1].split('\t')
+                abundance = 100*float(split_line[1])/overall_abundance
+                annotation = split_line[0].split('|')
+                characteristic = annotation[0].split(':')
+                characteristic_id = characteristic[0]
+                characteristic_name = ''
+                if len(characteristic) > 1:
+                    characteristic_name = format_characteristic_name(characteristic[-1])
+                taxo = annotation[1].split('.')
+                
+                if taxo[0] == 'unclassified':
+                    continue
+                genus = taxo[0][3:]
+                species = taxo[1][3:]
+
+                if not clade_abundance.has_key(genus):
+                    print "no", genus, "found in", args.metaphlan2_file
+                    continue
+                if not clade_abundance[genus]['species'].has_key(species):
+                    print "no", species, "found in", args.metaphlan2_file,
+                    print "for", genus
+                    continue
+                output_file.write(genus + '\t')
+                output_file.write(clade_abundance[genus]['abundance'] + '\t')
+                output_file.write(species + '\t')
+                output_file.write(clade_abundance[genus]['species'][species] + '\t')
+                output_file.write(characteristic_id + '\t')
+                output_file.write(characteristic_name + '\t')
+                output_file.write(str(abundance) + '\n')
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--humann2_file', required=True)
+    parser.add_argument('--metaphlan2_file', required=True)
+    parser.add_argument('--output_file', required=True)
+    parser.add_argument('--type', required=True, 
+        choices = ['gene_families','pathways'])
+    args = parser.parse_args()
+
+    combine_metaphlan2_humann2(args)
\ No newline at end of file
b
diff -r 000000000000 -r 31394a0c0242 combine_metaphlan2_humann2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/combine_metaphlan2_humann2.xml Fri Apr 15 09:15:21 2016 -0400
[
@@ -0,0 +1,74 @@
+<tool id="combine_metaphlan2_humann2" name="Combine MetaPhlAn2 and HUMAnN2 outputs" version="0.1.0">
+    <description>to relate genus/species abundances and gene families/pathways abundances</description>
+
+    <requirements>
+    </requirements>
+
+    <stdio>
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+
+    <version_command></version_command>
+
+    <command><![CDATA[
+        python $__tool_directory__/combine_metaphlan2_humann2.py 
+            --metaphlan2_file $metaphlan2_file
+            --humann2_file $humann2_file
+            --type $type    
+
+            #if str($type) == 'gene_families'
+                --output_file $gene_families_output_file
+            #else
+                --output_file $pathway_output_file
+            #end if
+    ]]></command>
+
+    <inputs>
+        <param name="metaphlan2_file" format="txt,tabular" type="data" label="Input file corresponding to MetaPhlAn2 output" help="The MetaPhlAn2 output file contains relative abundance of clades at different taxonomic levels (--metaphlan2_file)"/>
+        
+        <param name="humann2_file" format="txt,tabular" type="data" label="Input file corresponding to HUMAnN2 output" help="The HUMAnN2 output file contains relative abundance of gene families or pathways with corresponding taxonomic stratification (--humann2_file)"/>
+
+        <param name='type' type="select" label="Type of characteristics in HUMAnN2 file" help="(--type)">
+            <option value="gene_families" selected="true">Gene families</option>
+            <option value="pathways">Pathways</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data name="gene_families_output_file" format="tabular"
+            label="${tool.name} on ${on_string}: Gene family abundances related to genus/species abundances" >
+            <filter>type=="gene_families"</filter>
+        </data>
+        <data name="pathway_output_file" format="tabular"
+            label="${tool.name} on ${on_string}: Pathway abundances related to genus/species abundances" >
+            <filter>type=="pathways"</filter>
+        </data>
+    </outputs>
+
+    <tests>
+     <test>
+      <param name="metaphlan2_file" value="metaphlan2_input.txt"/>
+      <param name="humann2_file" value="humann2_gene_families_input.tabular"/>
+      <param name='type' value="gene_families"/>
+      <output name="gene_families_output_file" file="gene_families_output.tabular"/>
+     </test>
+     <test>
+      <param name="metaphlan2_file" value="metaphlan2_input.txt"/>
+      <param name="humann2_file" value="humann2_pathways_input.tabular"/>
+      <param name='type' value="pathways"/>
+      <output name="pathway_output_file" file="pathways_output.tabular"/>
+     </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+This tool combine MetaPhlAn2 outputs and HUMANnN2 outputs.
+
+For each gene families/pathways and the corresponding taxonomic stratification, you get relative abundance of this gene family/pathway and the relative abundance of corresponding species and genus.
+    ]]></help>
+
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 31394a0c0242 test-data/gene_families_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_families_output.tabular Fri Apr 15 09:15:21 2016 -0400
b
b'@@ -0,0 +1,29434 @@\n+genus\tgenus_abundance\tspecies\tspecies_abundance\tgene_families_id\tgene_families_name\tgene_families_abundance\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_P19529\t Replication initiation protein\t0.40728906083\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_P19529\t Replication initiation protein\t0.261480825839\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tUniRef50_P19529\t Replication initiation protein\t0.188582893984\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_Q5HJZ6\t Plasmid recombination enzyme type 3\t0.338670908045\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_Q5HJZ6\t Plasmid recombination enzyme type 3\t0.298445310895\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tUniRef50_Q5HJZ6\t Plasmid recombination enzyme type 3\t0.0233463103488\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_P02983\t Tetracycline resistance protein\t0.25596042462\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_P02983\t Tetracycline resistance protein\t0.255300007484\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_Q93GF3\t Rep\t0.322388303126\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_Q93GF3\t Rep\t0.0431524470097\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_V6QG63\t Integrase\t0.303352524824\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_W1W6K4\t\t0.247856118482\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_D4FM51\t Plasmid recombination enzyme\t0.214063812113\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_Z6ILY0\t\t0.196787157891\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_F0P516\t Replication initiation protein, truncated\t0.11963734642\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_F0P516\t Replication initiation protein, truncated\t0.0759957583321\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_Q8CU99\t\t0.173070998557\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_P14491\t Protein rlx\t0.172105258916\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_P14491\t Protein rlx\t0.00010650270839\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_P18358\t Transposon Tn552 resolvase\t0.132196365122\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_P18358\t Transposon Tn552 resolvase\t0.0309505071873\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_Q5HRN3\t ISSep1 like transposase\t0.125576550449\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_Q5HRN3\t ISSep1 like transposase\t0.0183157071851\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tUniRef50_Q5HRN3\t ISSep1 like transposase\t0.00112799242136\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_Q4L351\t Staphylococcus haemolyticus JCSC1435 DNA, complete genome\t0.130782524407\n+Rhodobacter\t5.64659\tRhodobacter_sphaeroides\t5.64659\tUniRef50_Q3J5T4\t 50S ribosomal protein L11\t0.12410365529\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_N6A8S2\t\t0.113249904113\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_L7WXY9\t\t0.107571179124\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_P18357\t Regulatory protein BlaR1\t0.0712959295517\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_P18357\t Regulatory protein BlaR1\t0.0277440503984\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tUniRef50_P18357\t Regulatory protein BlaR1\t0.0084931424066\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_K0LDS3\t\t0.104070365649\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tUniRef50_I3U5U5\t Mobilization protein C\t0.0997088242247\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_I3U5U5\t Mobilization protein C\t0.00114228510186\n+Staphylococcus\t55.38558\tStap'..b'500697096e-05\n+Neisseria\t0.04343\tNeisseria_meningitidis\t0.04343\tUniRef50_E3D229\t Chromosome partition protein Smc\t3.18199078347e-05\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_G0LRQ0\t Serine aspartate repeat containing protein D\t3.14433408727e-05\n+Deinococcus\t0.30974\tDeinococcus_radiodurans\t0.15196\tUniRef50_Q9RUP8\t Isoleucine  tRNA ligase\t3.12100672473e-05\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tUniRef50_C1DLW9\t\t3.09897383496e-05\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tUniRef50_A3M2F1\t IcmB protein\t3.0951737447e-05\n+Neisseria\t0.04343\tNeisseria_meningitidis\t0.04343\tUniRef50_F0MM85\t Hemagglutinin hemolysin family protein\t3.06557363026e-05\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tUniRef50_P58402\t Sensor protein EvgS\t3.01027333145e-05\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tUniRef50_Q02GC2\t Type 4 fimbrial biogenesis protein PilY1\t3.00132483779e-05\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tUniRef50_J4VP36\t DNA polymerase III, alpha subunit\t2.97216860599e-05\n+Neisseria\t0.04343\tNeisseria_meningitidis\t0.04343\tUniRef50_Q9JVX8\t DNA polymerase III subunit alpha\t2.94357339795e-05\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tUniRef50_A0A009XXP0\t RecF RecN SMC N terminal domain protein\t2.92905620962e-05\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tUniRef50_F9YY53\t\t2.92566118618e-05\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tUniRef50_B0V8Z8\t ATP dependent dsDNA exonuclease \t2.88552636616e-05\n+Helicobacter\t0.07678\tHelicobacter_pylori\t0.07678\tUniRef50_I9V7J9\t Outer membrane protein HopL\t2.85775736026e-05\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tUniRef50_Q6ABR7\t Cobalamin biosynthesis protein CobN\t2.8553327931e-05\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tUniRef50_A6V8C8\t\t2.82972442692e-05\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tUniRef50_G8VL08\t ATP dependent helicase HrpA\t2.82418370752e-05\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tUniRef50_W1C828\t Molybdate metabolism regulator\t2.79757737727e-05\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tUniRef50_I3GYQ4\t\t2.73839347839e-05\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tUniRef50_Q8FJC7\t DNA translocase FtsK\t2.65486635693e-05\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tUniRef50_G9W1N9\t Enterobactin synthetase component F\t2.6238193233e-05\n+Neisseria\t0.04343\tNeisseria_meningitidis\t0.04343\tUniRef50_F0N1G5\t Transcription repair coupling factor\t2.58949903961e-05\n+Neisseria\t0.04343\tNeisseria_meningitidis\t0.04343\tUniRef50_C9WZ56\t Transcription repair coupling factor\t2.48987836093e-05\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tUniRef50_E2QDF8\t Phage lambda related protein, Side tail fiber protein homolog\t2.4770502341e-05\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tUniRef50_L4JUW1\t Adhesin invasin\t2.44170659171e-05\n+Neisseria\t0.04343\tNeisseria_meningitidis\t0.04343\tUniRef50_R0UE42\t Filamentous hemagglutinin family N terminal domain protein\t2.43934688419e-05\n+Deinococcus\t0.30974\tDeinococcus_radiodurans\t0.15196\tUniRef50_Q9RWI1\t\t2.27141884239e-05\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tUniRef50_F9NYB1\t RHS repeat associated core domain protein\t2.15782362763e-05\n+Methanobrevibacter\t6.51285\tMethanobrevibacter_smithii\t6.51285\tUniRef50_A5UKU3\t Adhesin like protein\t1.83906316145e-05\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tUniRef50_V5VA04\t Hemolysin type calcium binding domain containing protein\t1.83471865409e-05\n+Neisseria\t0.04343\tNeisseria_meningitidis\t0.04343\tUniRef50_R0YL20\t Filamentous hemagglutinin family N terminal domain protein\t1.82246511863e-05\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tUniRef50_A3M865\t\t1.69091555151e-05\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tUniRef50_A0A024L957\t\t1.60746007854e-05\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tUniRef50_U6ANV5\t\t1.52169657171e-05\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tUniRef50_M4JLP2\t\t1.50648414743e-05\n'
b
diff -r 000000000000 -r 31394a0c0242 test-data/humann2_gene_families_input.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humann2_gene_families_input.tabular Fri Apr 15 09:15:21 2016 -0400
b
b'@@ -0,0 +1,204097 @@\n+# Gene Family\thumann2_Abundance\n+UniRef50_P19529: Replication initiation protein\t8491.6203916670\n+UniRef50_P19529: Replication initiation protein|g__Staphylococcus.s__Staphylococcus_epidermidis\t4033.9801448086\n+UniRef50_P19529: Replication initiation protein|g__Staphylococcus.s__Staphylococcus_aureus\t2589.8276215214\n+UniRef50_P19529: Replication initiation protein|g__Propionibacterium.s__Propionibacterium_acnes\t1867.8126253369\n+UniRef50_Q5HJZ6: Plasmid recombination enzyme type 3\t6541.5278380159\n+UniRef50_Q5HJZ6: Plasmid recombination enzyme type 3|g__Staphylococcus.s__Staphylococcus_aureus\t3354.3540695429\n+UniRef50_Q5HJZ6: Plasmid recombination enzyme type 3|g__Staphylococcus.s__Staphylococcus_epidermidis\t2955.9410606524\n+UniRef50_Q5HJZ6: Plasmid recombination enzyme type 3|g__Escherichia.s__Escherichia_coli\t231.2327078206\n+UniRef50_P02983: Tetracycline resistance protein\t5063.7609262711\n+UniRef50_P02983: Tetracycline resistance protein|g__Staphylococcus.s__Staphylococcus_aureus\t2535.1510022555\n+UniRef50_P02983: Tetracycline resistance protein|g__Staphylococcus.s__Staphylococcus_epidermidis\t2528.6099240155\n+UniRef50_Q93GF3: Rep\t3620.4854732918\n+UniRef50_Q93GF3: Rep|g__Staphylococcus.s__Staphylococcus_epidermidis\t3193.0835831399\n+UniRef50_Q93GF3: Rep|g__Staphylococcus.s__Staphylococcus_aureus\t427.4018901518\n+UniRef50_V6QG63: Integrase\t3006.6923019119\n+UniRef50_V6QG63: Integrase|g__Staphylococcus.s__Staphylococcus_epidermidis\t3004.5443880088\n+UniRef50_V6QG63: Integrase|unclassified\t2.1479139031\n+UniRef50_W1W6K4\t2456.8919290157\n+UniRef50_W1W6K4|g__Staphylococcus.s__Staphylococcus_epidermidis\t2454.8821878197\n+UniRef50_W1W6K4|unclassified\t2.0097411960\n+UniRef50_D4FM51: Plasmid recombination enzyme\t2120.1874806725\n+UniRef50_D4FM51: Plasmid recombination enzyme|g__Staphylococcus.s__Staphylococcus_epidermidis\t2120.1874806725\n+UniRef50_Z6ILY0\t2001.4420730481\n+UniRef50_Z6ILY0|g__Staphylococcus.s__Staphylococcus_epidermidis\t1949.0714679824\n+UniRef50_Z6ILY0|unclassified\t52.3706050657\n+UniRef50_F0P516: Replication initiation protein, truncated\t1937.6411893465\n+UniRef50_F0P516: Replication initiation protein, truncated|g__Staphylococcus.s__Staphylococcus_epidermidis\t1184.9438800291\n+UniRef50_F0P516: Replication initiation protein, truncated|g__Staphylococcus.s__Staphylococcus_aureus\t752.6973093174\n+UniRef50_Q8CU99\t1714.1756039229\n+UniRef50_Q8CU99|g__Staphylococcus.s__Staphylococcus_epidermidis\t1714.1756039229\n+UniRef50_P14491: Protein rlx\t1713.0351893643\n+UniRef50_P14491: Protein rlx|g__Staphylococcus.s__Staphylococcus_epidermidis\t1704.6104696986\n+UniRef50_P14491: Protein rlx|unclassified\t7.3698673450\n+UniRef50_P14491: Protein rlx|g__Staphylococcus.s__Staphylococcus_aureus\t1.0548523207\n+UniRef50_P18358: Transposon Tn552 resolvase\t1615.8824453625\n+UniRef50_P18358: Transposon Tn552 resolvase|g__Staphylococcus.s__Staphylococcus_aureus\t1309.3342380259\n+UniRef50_P18358: Transposon Tn552 resolvase|g__Staphylococcus.s__Staphylococcus_epidermidis\t306.5482073366\n+UniRef50_Q5HRN3: ISSep1-like transposase\t1436.3479542640\n+UniRef50_Q5HRN3: ISSep1-like transposase|g__Staphylococcus.s__Staphylococcus_epidermidis\t1243.7685169618\n+UniRef50_Q5HRN3: ISSep1-like transposase|g__Staphylococcus.s__Staphylococcus_aureus\t181.4072761300\n+UniRef50_Q5HRN3: ISSep1-like transposase|g__Propionibacterium.s__Propionibacterium_acnes\t11.1721611722\n+UniRef50_Q4L351: Staphylococcus haemolyticus JCSC1435 DNA, complete genome\t1295.3309025030\n+UniRef50_Q4L351: Staphylococcus haemolyticus JCSC1435 DNA, complete genome|g__Staphylococcus.s__Staphylococcus_epidermidis\t1295.3309025030\n+UniRef50_Q3J5T4: 50S ribosomal protein L11\t1229.1802787878\n+UniRef50_Q3J5T4: 50S ribosomal protein L11|g__Rhodobacter.s__Rhodobacter_sphaeroides\t1229.1802787878\n+UniRef50_N6A8S2\t1135.8005834263\n+UniRef50_N6A8S2|g__Staphylococcus.s__Staphylococcus_epidermidis\t1121.6796828769\n+UniRef50_N6A8S2|unclassified\t14.1209005495\n+UniRef50_L7WXY9\t1065.4899191617\n+UniRef50_L7WXY9|g__Staphyloc'..b'444DC: hypothetical protein\t0.0017973217\n+UniRef50_UPI00037444DC: hypothetical protein|unclassified\t0.0017973217\n+UniRef50_UPI000378A614: hypothetical protein\t0.0017691421\n+UniRef50_UPI000378A614: hypothetical protein|unclassified\t0.0017691421\n+UniRef50_R4LEH4: Yd repeat-containing protein\t0.0017472130\n+UniRef50_R4LEH4: Yd repeat-containing protein|unclassified\t0.0017472130\n+UniRef50_S4YMU8: Filamentous hemagglutinin\t0.0017432148\n+UniRef50_S4YMU8: Filamentous hemagglutinin|unclassified\t0.0017432148\n+UniRef50_UPI000443E2D6: PREDICTED: tetratricopeptide repeat protein 40\t0.0016819660\n+UniRef50_UPI000443E2D6: PREDICTED: tetratricopeptide repeat protein 40|unclassified\t0.0016819660\n+UniRef50_UPI00036DCFC8: hypothetical protein\t0.0016327044\n+UniRef50_UPI00036DCFC8: hypothetical protein|unclassified\t0.0016327044\n+UniRef50_UPI0001BF6B99: 90S preribosome component RRP12\t0.0016313392\n+UniRef50_UPI0001BF6B99: 90S preribosome component RRP12|unclassified\t0.0016313392\n+UniRef50_UPI000365699C: hypothetical protein\t0.0016224188\n+UniRef50_UPI000365699C: hypothetical protein|unclassified\t0.0016224188\n+UniRef50_UPI000344F009: hypothetical protein\t0.0015763429\n+UniRef50_UPI000344F009: hypothetical protein|unclassified\t0.0015763429\n+UniRef50_U6M5E8\t0.0015640805\n+UniRef50_U6M5E8|unclassified\t0.0015640805\n+UniRef50_UPI000349BE1A: hypothetical protein\t0.0015464606\n+UniRef50_UPI000349BE1A: hypothetical protein|unclassified\t0.0015464606\n+UniRef50_UPI0003773ED0: hypothetical protein\t0.0014975465\n+UniRef50_UPI0003773ED0: hypothetical protein|unclassified\t0.0014975465\n+UniRef50_R0ISA3\t0.0014873041\n+UniRef50_R0ISA3|unclassified\t0.0014873041\n+UniRef50_F4GI46\t0.0014628012\n+UniRef50_F4GI46|unclassified\t0.0014628012\n+UniRef50_UPI0002D336FD: hypothetical protein\t0.0014421264\n+UniRef50_UPI0002D336FD: hypothetical protein|unclassified\t0.0014421264\n+UniRef50_C0N8P3: Type I secretion target GGXGXDXXX repeat protein domain protein\t0.0014134994\n+UniRef50_C0N8P3: Type I secretion target GGXGXDXXX repeat protein domain protein|unclassified\t0.0014134994\n+UniRef50_W7A2A5\t0.0013833667\n+UniRef50_W7A2A5|unclassified\t0.0013833667\n+UniRef50_D2QX58: Peptidase domain protein\t0.0013763922\n+UniRef50_D2QX58: Peptidase domain protein|unclassified\t0.0013763922\n+UniRef50_UPI0004446091: PREDICTED: LOW QUALITY PROTEIN: histone-lysine N-methyltransferase 2C\t0.0013693947\n+UniRef50_UPI0004446091: PREDICTED: LOW QUALITY PROTEIN: histone-lysine N-methyltransferase 2C|unclassified\t0.0013693947\n+UniRef50_UPI0003644B17: hypothetical protein, partial\t0.0013488169\n+UniRef50_UPI0003644B17: hypothetical protein, partial|unclassified\t0.0013488169\n+UniRef50_A0A011N6I9\t0.0013414891\n+UniRef50_A0A011N6I9|unclassified\t0.0013414891\n+UniRef50_A0A058ZAA0\t0.0012939818\n+UniRef50_A0A058ZAA0|unclassified\t0.0012939818\n+UniRef50_UPI000468C770: hypothetical protein\t0.0012465542\n+UniRef50_UPI000468C770: hypothetical protein|unclassified\t0.0012465542\n+UniRef50_UPI00036FCEE3: hypothetical protein\t0.0012113651\n+UniRef50_UPI00036FCEE3: hypothetical protein|unclassified\t0.0012113651\n+UniRef50_UPI00035C33AC: hypothetical protein\t0.0010786375\n+UniRef50_UPI00035C33AC: hypothetical protein|unclassified\t0.0010786375\n+UniRef50_N1Q3A9\t0.0009733696\n+UniRef50_N1Q3A9|unclassified\t0.0009733696\n+UniRef50_A0A031GKF7: Polymorphic membrane protein, Filamentous hemagglutinin/Adhesin\t0.0008849280\n+UniRef50_A0A031GKF7: Polymorphic membrane protein, Filamentous hemagglutinin/Adhesin|unclassified\t0.0008849280\n+UniRef50_D8M1X0: Singapore isolate B (sub-type 7) whole genome shotgun sequence assembly, scaffold_16\t0.0008849116\n+UniRef50_D8M1X0: Singapore isolate B (sub-type 7) whole genome shotgun sequence assembly, scaffold_16|unclassified\t0.0008849116\n+UniRef50_A8LV91\t0.0008555354\n+UniRef50_A8LV91|unclassified\t0.0008555354\n+UniRef50_D3E2A1: Adhesin-like protein\t0.0007987026\n+UniRef50_D3E2A1: Adhesin-like protein|unclassified\t0.0007987026\n+UniRef50_U6MJL1\t0.0007379580\n+UniRef50_U6MJL1|unclassified\t0.0007379580\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 31394a0c0242 test-data/humann2_pathways_input.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humann2_pathways_input.tabular Fri Apr 15 09:15:21 2016 -0400
b
b'@@ -0,0 +1,2313 @@\n+# Pathway\thumann2_Abundance\n+PWY-3781: aerobic respiration I (cytochrome c)\t1469.4934957509\n+PWY-3781: aerobic respiration I (cytochrome c)|g__Rhodobacter.s__Rhodobacter_sphaeroides\t458.5505069573\n+PWY-3781: aerobic respiration I (cytochrome c)|g__Staphylococcus.s__Staphylococcus_aureus\t314.9603040367\n+PWY-3781: aerobic respiration I (cytochrome c)|g__Staphylococcus.s__Staphylococcus_epidermidis\t290.4852870919\n+PWY-3781: aerobic respiration I (cytochrome c)|g__Escherichia.s__Escherichia_coli\t114.2846087701\n+PWY-3781: aerobic respiration I (cytochrome c)|g__Pseudomonas.s__Pseudomonas_aeruginosa\t74.9156572789\n+PWY-3781: aerobic respiration I (cytochrome c)|unclassified\t36.9962755214\n+PWY-3781: aerobic respiration I (cytochrome c)|g__Helicobacter.s__Helicobacter_pylori\t4.7585206385\n+PWY-3781: aerobic respiration I (cytochrome c)|g__Propionibacterium.s__Propionibacterium_acnes\t3.0760781448\n+PWY-3781: aerobic respiration I (cytochrome c)|g__Deinococcus.s__Deinococcus_radiodurans\t2.4395473535\n+PWY-3781: aerobic respiration I (cytochrome c)|g__Neisseria.s__Neisseria_meningitidis\t1.4220887339\n+PWY66-389: phytol degradation\t979.4805701095\n+PWY66-389: phytol degradation|g__Staphylococcus.s__Staphylococcus_aureus\t310.4884820709\n+PWY66-389: phytol degradation|g__Rhodobacter.s__Rhodobacter_sphaeroides\t295.0258373174\n+PWY66-389: phytol degradation|g__Escherichia.s__Escherichia_coli\t131.9572220185\n+PWY66-389: phytol degradation|g__Pseudomonas.s__Pseudomonas_aeruginosa\t36.1586640369\n+PWY66-389: phytol degradation|g__Acinetobacter.s__Acinetobacter_baumannii\t17.2278325323\n+PWY66-389: phytol degradation|g__Clostridium.s__Clostridium_beijerinckii\t6.7292789582\n+PWY66-389: phytol degradation|g__Propionibacterium.s__Propionibacterium_acnes\t4.2717229621\n+PWY66-389: phytol degradation|g__Deinococcus.s__Deinococcus_radiodurans\t3.1970501558\n+PWY66-389: phytol degradation|unclassified\t1.0145146064\n+PWY-5173: superpathway of acetyl-CoA biosynthesis\t960.5984271439\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|g__Staphylococcus.s__Staphylococcus_epidermidis\t369.2127925716\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|g__Rhodobacter.s__Rhodobacter_sphaeroides\t184.8066347027\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|g__Staphylococcus.s__Staphylococcus_aureus\t155.9950294019\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|g__Streptococcus.s__Streptococcus_mutans\t132.6919877720\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|g__Escherichia.s__Escherichia_coli\t60.4553922962\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|g__Pseudomonas.s__Pseudomonas_aeruginosa\t10.6937687177\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|g__Streptococcus.s__Streptococcus_agalactiae\t4.8481259198\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|g__Acinetobacter.s__Acinetobacter_baumannii\t1.4045480082\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|unclassified\t1.2013427395\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|g__Propionibacterium.s__Propionibacterium_acnes\t1.1988764961\n+PWY-7111: pyruvate fermentation to isobutanol (engineered)\t912.1509918013\n+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Staphylococcus.s__Staphylococcus_epidermidis\t254.0802490614\n+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Rhodobacter.s__Rhodobacter_sphaeroides\t219.4700532096\n+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Staphylococcus.s__Staphylococcus_aureus\t184.0094002055\n+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Escherichia.s__Escherichia_coli\t134.5806848081\n+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Streptococcus.s__Streptococcus_mutans\t105.3626945686\n+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Methanobrevibacter.s__Methanobrevibacter_smithii\t36.6811294465\n+PWY-7111: pyruvate fermentation to isobutanol (engineered)|g__Pseudomonas.s__Pseudomonas_aeruginosa\t16.8691584603\n+PWY-7111: pyruvate fermentation to isobutanol (engineered)|'..b"y of quinolone and alkylquinolone biosynthesis|g__Pseudomonas.s__Pseudomonas_aeruginosa\t8.7871569624\n+PWY-5181: toluene degradation III (aerobic) (via p-cresol)\t9.4768220350\n+PWY-6760: xylose degradation III\t8.0498341681\n+LYSINE-DEG1-PWY: L-lysine degradation XI (mammalian)\t7.8134292537\n+PWY-6672: cis-genanyl-CoA degradation\t6.9084670612\n+PWY-6672: cis-genanyl-CoA degradation|g__Pseudomonas.s__Pseudomonas_aeruginosa\t2.4674381202\n+PWY-6672: cis-genanyl-CoA degradation|g__Acinetobacter.s__Acinetobacter_baumannii\t1.9793128710\n+PWY-6581: spirilloxanthin and 2,2'-diketo-spirilloxanthin biosynthesis\t6.8207617638\n+PWY-6581: spirilloxanthin and 2,2'-diketo-spirilloxanthin biosynthesis|unclassified\t0.9325005511\n+PWY-5183: superpathway of aerobic toluene degradation\t6.2092354547\n+PWY-6383: mono-trans, poly-cis decaprenyl phosphate biosynthesis\t4.8092790379\n+TRIGLSYN-PWY: diacylglycerol and triacylglycerol biosynthesis\t4.5946669738\n+ARGDEG-IV-PWY: L-arginine degradation VIII (arginine oxidase pathway)\t4.1314165669\n+ARGDEG-IV-PWY: L-arginine degradation VIII (arginine oxidase pathway)|unclassified\t0.0472292096\n+PWY-7385: 1,3-propanediol biosynthesis (engineered)\t3.2727699123\n+PWY-6565: superpathway of polyamine biosynthesis III\t3.0724774533\n+PWY-6565: superpathway of polyamine biosynthesis III|unclassified\t0.6506157163\n+PWY-7337: 10-cis-heptadecenoyl-CoA degradation (yeast)\t2.6524638102\n+PWY-7338: 10-trans-heptadecenoyl-CoA degradation (reductase-dependent, yeast)\t2.6524638102\n+PWY-6263: superpathway of menaquinol-8 biosynthesis II\t2.5059205125\n+PWY-6562: norspermidine biosynthesis\t2.1886041799\n+PWY-6562: norspermidine biosynthesis|unclassified\t0.7022305715\n+PWY-5514: UDP-N-acetyl-D-galactosamine biosynthesis II\t2.1199921193\n+PWY-2201: folate transformations I\t2.0735386067\n+PWY-2201: folate transformations I|unclassified\t1.1580586462\n+UDPNACETYLGALSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis II\t1.4187052686\n+UDPNACETYLGALSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis II|g__Propionibacterium.s__Propionibacterium_acnes\t1.0503113260\n+PWY-7371: 1,4-dihydroxy-6-naphthoate biosynthesis II\t1.2424871748\n+DHGLUCONATE-PYR-CAT-PWY: glucose degradation (oxidative)\t0.9351240467\n+PWY-7373: superpathway of demethylmenaquinol-6 biosynthesis II\t0.7814745906\n+PWY-5420: catechol degradation II (meta-cleavage pathway)\t0.3907944440\n+THREOCAT-PWY: superpathway of L-threonine metabolism\t0.3633843000\n+ILEUDEG-PWY: L-isoleucine degradation I\t0.3384217460\n+ILEUDEG-PWY: L-isoleucine degradation I|unclassified\t0.3102350409\n+PWY-5109: 2-methylbutanoate biosynthesis\t0.3377296788\n+LYSINE-AMINOAD-PWY: L-lysine biosynthesis IV\t0.3065393268\n+PWY-6486: D-galacturonate degradation II\t0.3033074635\n+PWY-6486: D-galacturonate degradation II|unclassified\t0.0612811943\n+PWY-5419: catechol degradation to 2-oxopent-4-enoate II\t0.2239292649\n+PWY-7374: 1,4-dihydroxy-6-naphthoate biosynthesis I\t0.2225632096\n+PWY-5654: 2-amino-3-carboxymuconate semialdehyde degradation to 2-oxopentenoate\t0.2218668573\n+PWY6666-2: dopamine degradation\t0.1687667991\n+PWY6666-2: dopamine degradation|unclassified\t0.1687667991\n+PWY-7413: dTDP-6-deoxy-&alpha;-D-allose biosynthesis\t0.1226474527\n+PWY-7413: dTDP-6-deoxy-&alpha;-D-allose biosynthesis|unclassified\t0.1188476851\n+PWY-5742: L-arginine degradation IX (arginine:pyruvate transaminase pathway)\t0.1109282575\n+PWY-5742: L-arginine degradation IX (arginine:pyruvate transaminase pathway)|unclassified\t0.0472292096\n+PWY-6117: spermine and spermidine degradation I\t0.1027051153\n+PWY-6117: spermine and spermidine degradation I|unclassified\t0.0997625171\n+PWY-6834: spermidine biosynthesis III\t0.0875565721\n+PWY-6834: spermidine biosynthesis III|unclassified\t0.0845731084\n+PWY-5079: L-phenylalanine degradation III\t0.0714741288\n+PWY-5079: L-phenylalanine degradation III|unclassified\t0.0652061920\n+PWY-7654: (8E,10E)-dodeca-8,10-dienol biosynthesis\t0.0481757443\n+PWY-7654: (8E,10E)-dodeca-8,10-dienol biosynthesis|unclassified\t0.0458917789\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 31394a0c0242 test-data/metaphlan2_input.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metaphlan2_input.txt Fri Apr 15 09:15:21 2016 -0400
b
b'@@ -0,0 +1,85 @@\n+#SampleID\tMetaphlan2_Analysis\n+#clade_name\trelative_abundance\tcoverage\taverage_genome_length_in_the_clade\testimated_number_of_reads_from_the_clade\n+k__Bacteria\t93.48715\t0.138259210304\t3060646\t423162\n+k__Archaea\t6.51285\t0.00963193275713\t1717866\t16546\n+k__Bacteria|p__Firmicutes\t73.45294\t0.108630391421\t2645593\t287392\n+k__Bacteria|p__Proteobacteria\t19.36391\t0.0286375092603\t3135748\t89800\n+k__Archaea|p__Euryarchaeota\t6.51285\t0.00963193275713\t1954104\t18822\n+k__Bacteria|p__Actinobacteria\t0.36055\t0.000533224419409\t3230214\t1722\n+k__Bacteria|p__Deinococcus_Thermus\t0.30974\t0.000458085203848\t2916300\t1336\n+k__Bacteria|p__Firmicutes|c__Bacilli\t72.514\t0.107241784241\t2708875\t290505\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria\t13.59711\t0.0201089279307\t3686133\t74124\n+k__Archaea|p__Euryarchaeota|c__Methanobacteria\t6.51285\t0.00963193275713\t1658190\t15972\n+k__Bacteria|p__Proteobacteria|c__Alphaproteobacteria\t5.64659\t0.00835080616008\t3693177\t30841\n+k__Bacteria|p__Firmicutes|c__Clostridia\t0.93894\t0.00138860718019\t2874996\t3992\n+k__Bacteria|p__Actinobacteria|c__Actinobacteria\t0.36055\t0.000533224419409\t3230214\t1722\n+k__Bacteria|p__Deinococcus_Thermus|c__Deinococci\t0.30974\t0.000458085203848\t2916300\t1336\n+k__Bacteria|p__Proteobacteria|c__Epsilonproteobacteria\t0.07678\t0.000113544968539\t2003141\t227\n+k__Bacteria|p__Proteobacteria|c__Betaproteobacteria\t0.04343\t6.42302010405e-05\t3060202\t197\n+k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales\t55.38558\t0.0819103700977\t3407631\t279120\n+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales\t17.12842\t0.0253314141428\t2010119\t50919\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales\t11.54704\t0.017077045274\t4079009\t69657\n+k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales\t6.51285\t0.00963193275713\t1658190\t15972\n+k__Bacteria|p__Proteobacteria|c__Alphaproteobacteria|o__Rhodobacterales\t5.64659\t0.00835080616008\t4758592\t39738\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales\t2.05008\t0.00303188265663\t4126474\t12511\n+k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales\t0.93894\t0.00138860718019\t3127383\t4343\n+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales\t0.36055\t0.000533224419409\t4860922\t2592\n+k__Bacteria|p__Deinococcus_Thermus|c__Deinococci|o__Deinococcales\t0.30974\t0.000458085203848\t3351957\t1535\n+k__Bacteria|p__Proteobacteria|c__Epsilonproteobacteria|o__Campylobacterales\t0.07678\t0.000113544968539\t2228417\t253\n+k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Neisseriales\t0.04343\t6.42302010405e-05\t3161861\t203\n+k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales|f__Staphylococcaceae\t55.38558\t0.0819103700977\t2461214\t201599\n+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae\t17.12842\t0.0253314141428\t2193606\t55567\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae\t11.54704\t0.017077045274\t4079009\t69657\n+k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae\t6.51285\t0.00963193275713\t2073038\t19967\n+k__Bacteria|p__Proteobacteria|c__Alphaproteobacteria|o__Rhodobacterales|f__Rhodobacteraceae\t5.64659\t0.00835080616008\t5037498\t42067\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Pseudomonadaceae\t1.92917\t0.00285306704708\t5434751\t15506\n+k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Clostridiaceae\t0.93894\t0.00138860718019\t3351243\t4654\n+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Propionibacteriaceae\t0.36055\t0.000533224419409\t3386340\t1806\n+k__Bacteria|p__Deinococcus_Thermus|c__Deinococci|o__Deinococcales|f__Deinococcaceae\t0.30974\t0.000458085203848\t3443517\t1577\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae\t0.12091\t0.000178815609551\t2818198\t504\n+k__Bacteria|p__Proteobacteria|c__Epsilonproteobacteria|o__Campylobacterales|f__Helicobacteraceae\t0.07678\t0.000113544968539\t2208224\t251\n+k__Bacteria|p__Proteobacteria|c__B'..b'ruginosa\t0.81333\t0.00120284715161\t6695839\t8054\n+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Propionibacteriaceae|g__Propionibacterium|s__Propionibacterium_acnes\t0.36055\t0.000533224419409\t2505029\t1336\n+k__Bacteria|p__Deinococcus_Thermus|c__Deinococci|o__Deinococcales|f__Deinococcaceae|g__Deinococcus|s__Deinococcus_unclassified\t0.15778\t0.000233347738069\t3443517\t804\n+k__Bacteria|p__Deinococcus_Thermus|c__Deinococci|o__Deinococcales|f__Deinococcaceae|g__Deinococcus|s__Deinococcus_radiodurans\t0.15196\t0.000224737465779\t3060986\t688\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Acinetobacter|s__Acinetobacter_baumannii\t0.12091\t0.000178815609551\t3966061\t709\n+k__Bacteria|p__Proteobacteria|c__Epsilonproteobacteria|o__Campylobacterales|f__Helicobacteraceae|g__Helicobacter|s__Helicobacter_pylori\t0.07678\t0.000113544968539\t1635217\t186\n+k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Neisseriales|f__Neisseriaceae|g__Neisseria|s__Neisseria_meningitidis\t0.04343\t6.42302010405e-05\t2200434\t141\n+k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales|f__Staphylococcaceae|g__Staphylococcus|s__Staphylococcus_aureus|t__Staphylococcus_aureus_unclassified\t28.27762\t0.041820097544\t2866570\t119880\n+k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales|f__Staphylococcaceae|g__Staphylococcus|s__Staphylococcus_epidermidis|t__Staphylococcus_epidermidis_unclassified\t27.10796\t0.0400902725537\t2525605\t101252\n+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_mutans|t__Streptococcus_mutans_unclassified\t16.30896\t0.0241195042226\t1908490\t46032\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteriales|f__Enterobacteriaceae|g__Escherichia|s__Escherichia_coli|t__Escherichia_coli_unclassified\t6.69956\t0.00990806034098\t5208306\t51604\n+k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_smithii|t__Methanobrevibacter_smithii_unclassified\t6.51285\t0.00963193275713\t1883713\t18144\n+k__Bacteria|p__Proteobacteria|c__Alphaproteobacteria|o__Rhodobacterales|f__Rhodobacteraceae|g__Rhodobacter|s__Rhodobacter_sphaeroides|t__Rhodobacter_sphaeroides_unclassified\t5.64659\t0.00835080616008\t4259863\t35573\n+k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Clostridiaceae|g__Clostridium|s__Clostridium_beijerinckii|t__Clostridium_beijerinckii_unclassified\t0.93894\t0.00138860718019\t5906224\t8201\n+k__Bacteria|p__Firmicutes|c__Bacilli|o__Lactobacillales|f__Streptococcaceae|g__Streptococcus|s__Streptococcus_agalactiae|t__Streptococcus_agalactiae_unclassified\t0.81946\t0.00121190992023\t2093917\t2538\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Pseudomonadaceae|g__Pseudomonas|s__Pseudomonas_aeruginosa|t__Pseudomonas_aeruginosa_unclassified\t0.81333\t0.00120284715161\t6695839\t8054\n+k__Bacteria|p__Actinobacteria|c__Actinobacteria|o__Actinomycetales|f__Propionibacteriaceae|g__Propionibacterium|s__Propionibacterium_acnes|t__Propionibacterium_acnes_unclassified\t0.36055\t0.000533224419409\t2505029\t1336\n+k__Bacteria|p__Deinococcus_Thermus|c__Deinococci|o__Deinococcales|f__Deinococcaceae|g__Deinococcus|s__Deinococcus_radiodurans|t__GCF_000008565\t0.15196\t0.000224737465779\t3060986\t688\n+k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadales|f__Moraxellaceae|g__Acinetobacter|s__Acinetobacter_baumannii|t__Acinetobacter_baumannii_unclassified\t0.12091\t0.000178815609551\t3966061\t709\n+k__Bacteria|p__Proteobacteria|c__Epsilonproteobacteria|o__Campylobacterales|f__Helicobacteraceae|g__Helicobacter|s__Helicobacter_pylori|t__Helicobacter_pylori_unclassified\t0.07678\t0.000113544968539\t1635217\t186\n+k__Bacteria|p__Proteobacteria|c__Betaproteobacteria|o__Neisseriales|f__Neisseriaceae|g__Neisseria|s__Neisseria_meningitidis|t__Neisseria_meningitidis_unclassified\t0.04343\t6.42302010405e-05\t2200434\t141\n+#estimated total number of reads from known clades: 439708\n'
b
diff -r 000000000000 -r 31394a0c0242 test-data/pathways_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pathways_output.tabular Fri Apr 15 09:15:21 2016 -0400
b
b'@@ -0,0 +1,1533 @@\n+genus\tgenus_abundance\tspecies\tspecies_abundance\tpathways_id\tpathways_name\tpathways_abundance\n+Rhodobacter\t5.64659\tRhodobacter_sphaeroides\t5.64659\tPWY-3781\t aerobic respiration I \t0.368556895131\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tPWY-3781\t aerobic respiration I \t0.253147232386\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tPWY-3781\t aerobic respiration I \t0.233475601635\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-3781\t aerobic respiration I \t0.0918554879572\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-3781\t aerobic respiration I \t0.0602129571868\n+Helicobacter\t0.07678\tHelicobacter_pylori\t0.07678\tPWY-3781\t aerobic respiration I \t0.00382462905467\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tPWY-3781\t aerobic respiration I \t0.00247237718207\n+Deinococcus\t0.30974\tDeinococcus_radiodurans\t0.15196\tPWY-3781\t aerobic respiration I \t0.00196076982686\n+Neisseria\t0.04343\tNeisseria_meningitidis\t0.04343\tPWY-3781\t aerobic respiration I \t0.00114299428398\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tPWY66-389\t phytol degradation\t0.249553035467\n+Rhodobacter\t5.64659\tRhodobacter_sphaeroides\t5.64659\tPWY66-389\t phytol degradation\t0.237125038432\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY66-389\t phytol degradation\t0.106059732351\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY66-389\t phytol degradation\t0.0290622837557\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tPWY66-389\t phytol degradation\t0.0138467548757\n+Clostridium\t0.93894\tClostridium_beijerinckii\t0.93894\tPWY66-389\t phytol degradation\t0.0054086128391\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tPWY66-389\t phytol degradation\t0.00343336868651\n+Deinococcus\t0.30974\tDeinococcus_radiodurans\t0.15196\tPWY66-389\t phytol degradation\t0.00256960762473\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tPWY-5173\t superpathway of acetyl CoA biosynthesis\t0.296752306253\n+Rhodobacter\t5.64659\tRhodobacter_sphaeroides\t5.64659\tPWY-5173\t superpathway of acetyl CoA biosynthesis\t0.148537093411\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tPWY-5173\t superpathway of acetyl CoA biosynthesis\t0.125379958848\n+Streptococcus\t17.12842\tStreptococcus_mutans\t16.30896\tPWY-5173\t superpathway of acetyl CoA biosynthesis\t0.106650295398\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-5173\t superpathway of acetyl CoA biosynthesis\t0.0485906161711\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-5173\t superpathway of acetyl CoA biosynthesis\t0.00859504489919\n+Streptococcus\t17.12842\tStreptococcus_agalactiae\t0.81946\tPWY-5173\t superpathway of acetyl CoA biosynthesis\t0.00389664869866\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tPWY-5173\t superpathway of acetyl CoA biosynthesis\t0.00112889604332\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tPWY-5173\t superpathway of acetyl CoA biosynthesis\t0.000963588944608\n+Staphylococcus\t55.38558\tStaphylococcus_epidermidis\t27.10796\tPWY-7111\t pyruvate fermentation to isobutanol \t0.204215296434\n+Rhodobacter\t5.64659\tRhodobacter_sphaeroides\t5.64659\tPWY-7111\t pyruvate fermentation to isobutanol \t0.176397583599\n+Staphylococcus\t55.38558\tStaphylococcus_aureus\t28.27762\tPWY-7111\t pyruvate fermentation to isobutanol \t0.14789632153\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-7111\t pyruvate fermentation to isobutanol \t0.108168322976\n+Streptococcus\t17.12842\tStreptococcus_mutans\t16.30896\tPWY-7111\t pyruvate fermentation to isobutanol \t0.0846845592441\n+Methanobrevibacter\t6.51285\tMethanobrevibacter_smithii\t6.51285\tPWY-7111\t pyruvate fermentation to isobutanol \t0.0294822118253\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-7111\t pyruvate fermentation to isobutanol \t0.0135584729954\n+Clostridium\t0.93894\tClostridium_beijerinckii\t0.93894\tPWY-7111\t pyruvate fermentation to isobutanol \t0.0129564304194\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tPWY-7111\t pyruvate fermentation to isobutanol \t0.00466772463498\n+Streptococcus\t17.12'..b'371335506\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-5656\t mannosylglycerate biosynthesis I\t0.0189326674698\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-7165\t L ascorbate biosynthesis VI \t0.00330800105556\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tLPSSYN-PWY\t superpathway of lipopolysaccharide biosynthesis\t0.018571618591\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tFUCCAT-PWY\t fucose degradation\t0.0258996873857\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-7446\t sulfoglycolysis\t0.0275758331758\n+Methanobrevibacter\t6.51285\tMethanobrevibacter_smithii\t6.51285\tP241-PWY\t coenzyme B biosynthesis\t0.0254264295134\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-6948\t sitosterol degradation to androstenedione\t0.00329315802192\n+Clostridium\t0.93894\tClostridium_beijerinckii\t0.93894\tCENTFERM-PWY\t pyruvate fermentation to butanoate\t0.0077654210394\n+Clostridium\t0.93894\tClostridium_beijerinckii\t0.93894\tPWY-5367\t petroselinate biosynthesis\t0.00470335180994\n+Streptococcus\t17.12842\tStreptococcus_agalactiae\t0.81946\tPWY-5367\t petroselinate biosynthesis\t0.00385663493416\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY0-41\t allantoin degradation IV \t0.0222930161173\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-6731\t starch degradation III\t0.016278474061\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tFUC-RHAMCAT-PWY\t superpathway of fucose and rhamnose degradation\t0.0214779190288\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-7409\t phospholipid remodeling \t0.0186794588374\n+Rhodobacter\t5.64659\tRhodobacter_sphaeroides\t5.64659\tPWY-3941\t &beta; alanine biosynthesis II\t0.00804419432078\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY0-1241\t ADP L glycero &beta; D manno heptose biosynthesis\t0.0202085072369\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tRHAMCAT-PWY\t L rhamnose degradation I\t0.0188827745801\n+Clostridium\t0.93894\tClostridium_beijerinckii\t0.93894\tRHAMCAT-PWY\t L rhamnose degradation I\t0.00223834731648\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tKDO-NAGLIPASYN-PWY\t superpathway of 2 lipid A biosynthesis\t0.012366617493\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY0-1338\t polymyxin resistance\t0.0203425769976\n+Methanobrevibacter\t6.51285\tMethanobrevibacter_smithii\t6.51285\tP261-PWY\t coenzyme M biosynthesis I\t0.0184374760723\n+Methanobrevibacter\t6.51285\tMethanobrevibacter_smithii\t6.51285\tPWY-5198\t factor 420 biosynthesis\t0.0165329714607\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tHCAMHPDEG-PWY\t 3 phenylpropanoate and 3 propanoate degradation to 2 oxopent 4 enoate\t0.0112800262634\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-6690\t cinnamate and 3 hydroxycinnamate degradation to 2 oxopent 4 enoate\t0.0112800262634\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-7090\t UDP 2,3 diacetamido 2,3 dideoxy &alpha; D mannuronate biosynthesis\t0.00812186950085\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-6467\t Kdo transfer to lipid IVA III \t0.00739415124056\n+Escherichia\t11.54704\tEscherichia_coli\t6.69956\tPWY-6309\t L tryptophan degradation XI \t0.00644240092358\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-6309\t L tryptophan degradation XI \t0.00336506734394\n+Clostridium\t0.93894\tClostridium_beijerinckii\t0.93894\tP562-PWY\t myo inositol degradation I\t0.00411946456846\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-6660\t 2 heptyl 3 hydroxy 4 quinolone biosynthesis\t0.00736993141727\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY66-388\t fatty acid &alpha; oxidation III\t0.00731134431452\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-6662\t superpathway of quinolone and alkylquinolone biosynthesis\t0.00706261848576\n+Pseudomonas\t1.92917\tPseudomonas_aeruginosa\t0.81333\tPWY-6672\t cis genanyl CoA degradation\t0.00198318684357\n+Acinetobacter\t0.12091\tAcinetobacter_baumannii\t0.12091\tPWY-6672\t cis genanyl CoA degradation\t0.00159085944767\n+Propionibacterium\t0.36055\tPropionibacterium_acnes\t0.36055\tUDPNACETYLGALSYN-PWY\t UDP N acetyl D glucosamine biosynthesis II\t0.000844180685352\n'