Repository 'compare_humann2_output'
hg clone https://toolshed.g2.bx.psu.edu/repos/bebatut/compare_humann2_output

Changeset 0:9959fa526f1a (2016-04-20)
Next changeset 1:c1aca37cb1fc (2016-04-20)
Commit message:
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit c16428041ae3d60b61b6570035c9268726730543-dirty
added:
compare_humann2_output.py
compare_humann2_output.xml
test-data/humann2_fasta_pathabundance_relab_renormalized.csv
test-data/humann2_m8_pathabundance_cmp_renormalized.tsv
test-data/log_output.txt
test-data/more_abundant_output.tabular
test-data/similar_output.tabular
test-data/specific_to_sample1_output.txt
test-data/specific_to_sample2_output.txt
b
diff -r 000000000000 -r 9959fa526f1a compare_humann2_output.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/compare_humann2_output.py Wed Apr 20 08:30:08 2016 -0400
[
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import argparse
+import re
+
+def extract_abundances(filepath, nb_charact_to_extract):
+    abundances = {}
+    more_abund_charact = []
+    abund_sum = 0
+    with open(filepath, 'r') as abundance_file:
+        for line in abundance_file.readlines()[1:]:
+            split_line = line[:-1].split('\t')
+            charact_id = split_line[0]
+            abund = float(split_line[1])
+            abundances[charact_id] = 100*abund
+            abund_sum += abundances[charact_id]
+
+            if len(more_abund_charact) < nb_charact_to_extract:
+                more_abund_charact.append(charact_id)
+            else:
+                best_pos = None
+                for i in range(len(more_abund_charact)-1,-1,-1):
+                    if abundances[more_abund_charact[i]] < abund:
+                        best_pos = i
+                    else:
+                        break
+                if best_pos != None:
+                    tmp_more_abund_charact = more_abund_charact
+                    more_abund_charact = tmp_more_abund_charact[:best_pos]
+                    more_abund_charact += [charact_id]
+                    more_abund_charact += tmp_more_abund_charact[best_pos:-1]
+    return abundances, more_abund_charact
+
+def format_characteristic_name(all_name):
+    if all_name.find(':') != -1:
+        charact_id = all_name.split(':')[0]
+        charact_name = all_name.split(':')[1][1:]
+    else:
+        charact_id = all_name
+        charact_name = ''
+
+    charact_name = charact_name.replace('/',' ')
+    charact_name = charact_name.replace('-',' ')
+    charact_name = charact_name.replace("'",'')
+    if charact_name.find('(') != -1 and charact_name.find(')') != -1:
+        open_bracket = charact_name.find('(')
+        close_bracket = charact_name.find(')')+1
+        charact_name = charact_name[:open_bracket] + charact_name[close_bracket:]
+    return charact_id,charact_name
+
+def write_more_abundant_charat(abundances,more_abund_charact, output_filepath):
+    with open(output_filepath,'w') as output_file:
+        output_file.write('id\tname\t')
+        output_file.write('\t'.join(abundances.keys()) + '\n')
+
+        for mac in more_abund_charact:
+            charact_id,charact_name = format_characteristic_name(mac)
+            output_file.write(charact_id + '\t' + charact_name)
+            for sample in abundances:
+                abund = abundances[sample].get(mac, 0)
+                output_file.write('\t' + str(abund))
+            output_file.write('\n')
+
+def extract_similar_characteristics(abundances, sim_output_filepath,
+    specific_output_files):
+    sim_characteristics = set(abundances[abundances.keys()[0]].keys())
+    for sample in abundances.keys()[1:]:
+        sim_characteristics.intersection_update(abundances[sample].keys())
+    print 'Similar between all samples:', len(sim_characteristics)
+
+    with open(sim_output_filepath, 'w') as sim_output_file:
+        sim_output_file.write('id\tname\t' + '\t'.join(abundances.keys()) + '\n')
+        for charact in list(sim_characteristics):
+            charact_id,charact_name = format_characteristic_name(charact)
+            sim_output_file.write(charact_id + '\t' + charact_name)
+            for sample in abundances.keys():
+                sim_output_file.write('\t' + str(abundances[sample][charact]))
+            sim_output_file.write('\n')
+
+    print 'Specific to samples:'
+    diff_characteristics = {}
+    for i in range(len(abundances.keys())):
+        sample = abundances.keys()[i]
+        print ' ', sample, ""
+        print '    All:', len(abundances[sample].keys())
+        diff_characteristics[sample] = set(abundances[sample].keys())
+        diff_characteristics[sample].difference_update(sim_characteristics)
+        print '    Number of specific characteristics:', 
+        print len(diff_characteristics[sample])
+        print '    Percentage of specific characteristics:',
+        print 100*len(diff_characteristics[sample])/(1.*len(abundances[sample].keys()))
+
+        relative_abundance = 0
+        with open(specific_output_files[i], 'w') as output_file:
+            output_file.write('id\tname\tabundances\n')
+            for charact in list(diff_characteristics[sample]):
+                charact_id,charact_name = format_characteristic_name(charact)
+                output_file.write(charact_id + '\t' + charact_name + '\t')
+                output_file.write(str(abundances[sample][charact]) + '\n')
+                relative_abundance += abundances[sample][charact]
+        print '    Relative abundance of specific characteristics(%):', relative_abundance
+
+    return sim_characteristics
+
+def compare_humann2_output(args):
+    abundances = {}
+    more_abund_charact = []
+
+    for i in range(len(args.sample_name)):
+        abundances[args.sample_name[i]], mac = extract_abundances(args.charact_input_file[i],
+            args.most_abundant_characteristics_to_extract)
+        more_abund_charact += mac
+
+    write_more_abundant_charat(abundances, list(set(more_abund_charact)), 
+        args.more_abundant_output_file)
+    sim_characteristics = extract_similar_characteristics(abundances, 
+        args.similar_output_file, args.specific_output_file)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--sample_name', required=True, action='append')
+    parser.add_argument('--charact_input_file', required=True, action='append')
+    parser.add_argument('--most_abundant_characteristics_to_extract', required=True,
+        type = int)
+    parser.add_argument('--more_abundant_output_file', required=True)
+    parser.add_argument('--similar_output_file', required=True)
+    parser.add_argument('--specific_output_file', required=True,action='append')
+    args = parser.parse_args()
+
+    if len(args.sample_name) != len(args.charact_input_file):
+        raise ValueError("Same number of values (in same order) are expected for --sample_name and --charact_input_file")
+    if len(args.sample_name) != len(args.specific_output_file):
+        raise ValueError("Same number of values (in same order) are expected for --sample_name and --specific_output_file")
+
+    compare_humann2_output(args)
\ No newline at end of file
b
diff -r 000000000000 -r 9959fa526f1a compare_humann2_output.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/compare_humann2_output.xml Wed Apr 20 08:30:08 2016 -0400
[
@@ -0,0 +1,86 @@
+<tool id="compare_humann2_output" name="Compare outputs of HUMAnN2 for several samples" version="0.1.0">
+    <description>and extract information</description>
+
+    <requirements>
+    </requirements>
+
+    <stdio>
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+
+    <version_command></version_command>
+
+    <command><![CDATA[
+        mkdir specifics 
+        &&
+
+        python $__tool_directory__/compare_humann2_output.py
+            #for $sample in $samples:
+                --sample_name "${sample.sample_name}"
+                --charact_input_file "${sample.input}"
+                --specific_output_file "specifics/specific_to_${sample.sample_name}.txt"
+            #end for
+
+            --most_abundant_characteristics_to_extract $charact_nb
+            --more_abundant_output_file $more_abundant_output_file
+            --similar_output_file $similar_output_file
+            > $log
+                
+    ]]></command>
+
+    <inputs>
+        <repeat name="samples" title="Add sample and input file (HUMAnN2 output after normalization)" >
+            <param name="sample_name" type="text" label="Name of the sample" help="(--sample_name)"/>
+            <param name="input" format="txt,tabular" type="data" label="Input file corresponding to HUMAnN2 output" help="The HUMAnN2 output file contains relative abundance of gene families or pathways (after normalization, --charact_input_file)"/>
+        </repeat>
+
+        <param name="charact_nb" type="integer" value="10" label="Number of most abundant characteristics to extract for each sample" help="(--most_abundant_characteristics_to_extract)"/>
+    </inputs>
+
+    <outputs>
+        <data name="more_abundant_output_file" format="tabular"
+            label="${tool.name} on ${on_string}: More abundant characteristics for each sample" />
+        <data name="similar_output_file" format="tabular"
+            label="${tool.name} on ${on_string}: Similar characteristics and the relative abundances for all samples" />
+        <data name="log" format="txt"
+            label="${tool.name} on ${on_string}: Log" />
+        <collection name="specific_files" type="list">
+            <discover_datasets pattern="__designation_and_ext__" directory="specifics"/>
+        </collection>
+
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="samples_0|sample_name" value="sample1"/>
+            <param name="samples_0|input" value="humann2_m8_pathabundance_cmp_renormalized.tsv"/>
+            <param name="samples_1|sample_name" value="sample2"/>
+            <param name="samples_1|input" value="humann2_fasta_pathabundance_relab_renormalized.csv"/>
+            <param name="charact_nb" value="10"/>
+            <output name="more_abundant_output_file" file="more_abundant_output.tabular"/>
+            <output name="similar_output_file" file="similar_output.tabular"/>
+            <output name="log" file="log_output.txt"/>
+            <output_collection name="specific_files" type="list">
+                <element name="specific_to_sample1" file="specific_to_sample1_output.txt" />
+                <element name="specific_to_sample2" file="specific_to_sample2_output.txt" />
+            </output_collection>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+This tool compare HUMANnN2 outputs with gene families or pathways and their relative abundances between several samples. Several files are extracted:
+
+  * Similar gene families or pathways between the samples and the relative abundances of these similar characteristics
+
+  * Most abundant gene families or pathways for each sample and the corresponding relative abundance in all samples
+  
+  * Specific gene families and pathways for each samples and the relative abundances of these specific characteristics
+
+    ]]></help>
+
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 9959fa526f1a test-data/humann2_fasta_pathabundance_relab_renormalized.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humann2_fasta_pathabundance_relab_renormalized.csv Wed Apr 20 08:30:08 2016 -0400
b
b"@@ -0,0 +1,267 @@\n+# Pathway\thumann2_Abundance\n+PYRIDOXSYN-PWY: pyridoxal 5'-phosphate biosynthesis I\t0.0245442\n+PYRIDOXSYN-PWY: pyridoxal 5'-phosphate biosynthesis I|unclassified\t0.0341296\n+PWY-3841: folate transformations II\t0.0238544\n+PWY-3841: folate transformations II|unclassified\t0.0227477\n+1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis\t0.0212689\n+1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|unclassified\t0.00834716\n+PWY-7208: superpathway of pyrimidine nucleobases salvage\t0.0206908\n+PWY-7208: superpathway of pyrimidine nucleobases salvage|unclassified\t0.0175918\n+COA-PWY-1: coenzyme A biosynthesis II (mammalian)\t0.019437\n+PWY-7221: guanosine ribonucleotides de novo biosynthesis\t0.0188123\n+CALVIN-PWY: Calvin-Benson-Bassham cycle\t0.0179034\n+CALVIN-PWY: Calvin-Benson-Bassham cycle|unclassified\t0.0240344\n+PWY66-422: D-galactose degradation V (Leloir pathway)\t0.0175745\n+PWY66-422: D-galactose degradation V (Leloir pathway)|g__Bacteroides.s__Bacteroides_stercoris\t0.0149592\n+PWY66-422: D-galactose degradation V (Leloir pathway)|unclassified\t0.0104266\n+PWY-1042: glycolysis IV (plant cytosol)\t0.0175553\n+PWY-1042: glycolysis IV (plant cytosol)|unclassified\t0.0193529\n+PWY-5100: pyruvate fermentation to acetate and lactate II\t0.0170667\n+PWY-5100: pyruvate fermentation to acetate and lactate II|unclassified\t0.0126625\n+DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I\t0.0169441\n+DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I|unclassified\t0.0250835\n+PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I\t0.0169262\n+HOMOSER-METSYN-PWY: L-methionine biosynthesis I\t0.0158217\n+HOMOSER-METSYN-PWY: L-methionine biosynthesis I|unclassified\t0.0109381\n+PWY-7219: adenosine ribonucleotides de novo biosynthesis\t0.0157923\n+PWY-7219: adenosine ribonucleotides de novo biosynthesis|unclassified\t0.0333171\n+PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I\t0.0155872\n+PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I|unclassified\t0.0261337\n+PWY-6125: superpathway of guanosine nucleotides de novo biosynthesis II\t0.0155645\n+PWY-5505: L-glutamate and L-glutamine biosynthesis\t0.0153682\n+PWY-5505: L-glutamate and L-glutamine biosynthesis|unclassified\t0.0196045\n+PWY-5505: L-glutamate and L-glutamine biosynthesis|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t0.0144739\n+PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II\t0.0150984\n+PWY-6126: superpathway of adenosine nucleotides de novo biosynthesis II|unclassified\t0.0244065\n+PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II\t0.0148474\n+PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II|unclassified\t0.0201368\n+PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II\t0.0148474\n+PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II|unclassified\t0.0201368\n+PWY-6703: preQ0 biosynthesis\t0.0139768\n+PWY-6703: preQ0 biosynthesis|unclassified\t0.013368\n+PWY-5659: GDP-mannose biosynthesis\t0.013223\n+PWY-5659: GDP-mannose biosynthesis|unclassified\t0.00106616\n+SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I\t0.012955\n+SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I|unclassified\t0.00288758\n+ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine)\t0.0129401\n+ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine)|unclassified\t0.0159474\n+VALSYN-PWY: L-valine biosynthesis\t0.0129401\n+VALSYN-PWY: L-valine biosynthesis|unclassified\t0.0159474\n+PWY-6936: seleno-amino acid biosynthesis\t0.012799\n+PWY-6936: seleno-amino acid biosynthesis|unclassified\t0.0132349\n+PWY-5030: L-histidine degradation III\t0.0122713\n+PWY-5030: L-histidine degradation III|unclassified\t0.0120381\n+HSERMETANA-PWY: L-methionine biosynthesis III\t0.0121379\n+HSERMETANA-PWY: L-methionine biosynthesis III|unclassified\t0.00515875\n+PWY0-162: superpathway of pyrimidine ribonucleotides de novo biosynthesis\t0.0119161\n+PWY0-162: superpathway of pyrimidine ribonucleotides de novo biosynthesis|unclassif"..b"ryotes)\t0.00104748\n+PWY-5754: 4-hydroxybenzoate biosynthesis I (eukaryotes)|unclassified\t0.00229464\n+P185-PWY: formaldehyde assimilation III (dihydroxyacetone cycle)\t0.00103843\n+P185-PWY: formaldehyde assimilation III (dihydroxyacetone cycle)|unclassified\t0.00115303\n+PWY-6595: superpathway of guanosine nucleotides degradation (plants)\t0.00102956\n+PWY-6595: superpathway of guanosine nucleotides degradation (plants)|unclassified\t0.00225538\n+PWY-5464: superpathway of cytosolic glycolysis (plants), pyruvate dehydrogenase and TCA cycle\t0.000992505\n+PWY-5464: superpathway of cytosolic glycolysis (plants), pyruvate dehydrogenase and TCA cycle|unclassified\t0.00172005\n+THISYNARA-PWY: superpathway of thiamin diphosphate biosynthesis III (eukaryotes)\t0.000973654\n+THISYNARA-PWY: superpathway of thiamin diphosphate biosynthesis III (eukaryotes)|unclassified\t0.0021303\n+PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing)\t0.000904748\n+PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing)|unclassified\t0.00183333\n+THRESYN-PWY: superpathway of L-threonine biosynthesis\t0.000904039\n+THRESYN-PWY: superpathway of L-threonine biosynthesis|unclassified\t0.00188356\n+PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing)\t0.000751931\n+PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing)|unclassified\t0.0014413\n+FAO-PWY: fatty acid &beta;-oxidation I\t0.000714249\n+FAO-PWY: fatty acid &beta;-oxidation I|unclassified\t0.00148771\n+PWY-5136: fatty acid &beta;-oxidation II (peroxisome)\t0.000698245\n+PWY-5136: fatty acid &beta;-oxidation II (peroxisome)|unclassified\t0.00144209\n+PWY-6471: peptidoglycan biosynthesis IV (Enterococcus faecium)\t0.000698193\n+PWY-6471: peptidoglycan biosynthesis IV (Enterococcus faecium)|unclassified\t0.00142191\n+PWY-7007: methyl ketone biosynthesis\t0.000659038\n+PWY-7007: methyl ketone biosynthesis|unclassified\t0.00122854\n+GLCMANNANAUT-PWY: superpathway of N-acetylglucosamine, N-acetylmannosamine and N-acetylneuraminate degradation\t0.000651572\n+GLCMANNANAUT-PWY: superpathway of N-acetylglucosamine, N-acetylmannosamine and N-acetylneuraminate degradation|unclassified\t0.00123299\n+PWY-6769: rhamnogalacturonan type I degradation I (fungi)\t0.000634893\n+PWY-6769: rhamnogalacturonan type I degradation I (fungi)|unclassified\t0.00139081\n+PWY-6309: L-tryptophan degradation XI (mammalian, via kynurenine)\t0.000615914\n+PWY-6309: L-tryptophan degradation XI (mammalian, via kynurenine)|unclassified\t0.00134924\n+PWY-7094: fatty acid salvage\t0.000597229\n+PWY-7094: fatty acid salvage|unclassified\t0.00125407\n+GLUDEG-I-PWY: GABA shunt\t0.000537709\n+GLUDEG-I-PWY: GABA shunt|unclassified\t0.001173\n+PWY1F-823: leucopelargonidin and leucocyanidin biosynthesis\t0.000501896\n+PWY1F-823: leucopelargonidin and leucocyanidin biosynthesis|unclassified\t0.00109947\n+PWY-7234: inosine-5'-phosphate biosynthesis III\t0.00045368\n+PWY-7234: inosine-5'-phosphate biosynthesis III|unclassified\t0.000993843\n+PWY-5173: superpathway of acetyl-CoA biosynthesis\t0.000372086\n+PWY-5173: superpathway of acetyl-CoA biosynthesis|unclassified\t0.000785602\n+PWY-6731: starch degradation III\t0.000336222\n+PWY-6731: starch degradation III|unclassified\t0.000708616\n+P241-PWY: coenzyme B biosynthesis\t0.000322755\n+P241-PWY: coenzyme B biosynthesis|unclassified\t0.000707035\n+PWY-7398: coumarins biosynthesis (engineered)\t0.000273932\n+PWY-7398: coumarins biosynthesis (engineered)|unclassified\t0.000600083\n+PWY-4041: &gamma;-glutamyl cycle\t0.000269421\n+PWY-4041: &gamma;-glutamyl cycle|unclassified\t0.000590201\n+GLYOXYLATE-BYPASS: glyoxylate cycle\t0.000182141\n+GLYOXYLATE-BYPASS: glyoxylate cycle|unclassified\t0.000395503\n+GLYCOL-GLYOXDEG-PWY: superpathway of glycol metabolism and degradation\t0.000175574\n+GLYCOL-GLYOXDEG-PWY: superpathway of glycol metabolism and degradation|unclassified\t0.000384618\n+SO4ASSIM-PWY: sulfate reduction I (assimilatory)\t6.96955e-05\n+SO4ASSIM-PWY: sulfate reduction I (assimilatory)|unclassified\t0.00015138\n"
b
diff -r 000000000000 -r 9959fa526f1a test-data/humann2_m8_pathabundance_cmp_renormalized.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humann2_m8_pathabundance_cmp_renormalized.tsv Wed Apr 20 08:30:08 2016 -0400
b
@@ -0,0 +1,17 @@
+# Pathway humann2_Abundance
+ARGININE-SYN4-PWY: L-ornithine de novo  biosynthesis 353741
+ARGININE-SYN4-PWY: L-ornithine de novo  biosynthesis|unclassified 353741
+HSERMETANA-PWY: L-methionine biosynthesis III 150089
+HSERMETANA-PWY: L-methionine biosynthesis III|unclassified 150089
+DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I 142538
+DTDPRHAMSYN-PWY: dTDP-L-rhamnose biosynthesis I|unclassified 142538
+KETOGLUCONMET-PWY: ketogluconate metabolism 96153.8
+KETOGLUCONMET-PWY: ketogluconate metabolism|unclassified 96153.8
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I 82608.9
+PWY-1269: CMP-3-deoxy-D-manno-octulosonate biosynthesis I|unclassified 82608.9
+PWY-7357: thiamin formation from pyrithiamine and oxythiamine (yeast) 72056.1
+PWY-7357: thiamin formation from pyrithiamine and oxythiamine (yeast)|unclassified 72056.1
+PWY-6151: S-adenosyl-L-methionine cycle I 69841.3
+PWY-6151: S-adenosyl-L-methionine cycle I|unclassified 69841.3
+PWY-6897: thiamin salvage II 32971.5
+PWY-6897: thiamin salvage II|unclassified 32971.5
b
diff -r 000000000000 -r 9959fa526f1a test-data/log_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/log_output.txt Wed Apr 20 08:30:08 2016 -0400
b
@@ -0,0 +1,12 @@
+Similar between all samples: 15
+Specific to samples:
+  sample1 
+    All: 16
+    Number of specific characteristics: 1
+    Percentage of specific characteristics: 6.25
+    Relative abundance of specific characteristics(%): 6984130.0
+  sample2 
+    All: 266
+    Number of specific characteristics: 251
+    Percentage of specific characteristics: 94.3609022556
+    Relative abundance of specific characteristics(%): 186.11689025
b
diff -r 000000000000 -r 9959fa526f1a test-data/more_abundant_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/more_abundant_output.tabular Wed Apr 20 08:30:08 2016 -0400
b
@@ -0,0 +1,21 @@
+id name sample1 sample2
+PWY-7208 superpathway of pyrimidine nucleobases salvage|unclassified 0 1.75918
+PWY-7208 superpathway of pyrimidine nucleobases salvage 0 2.06908
+PYRIDOXSYN-PWY pyridoxal 5 phosphate biosynthesis I 0 2.45442
+ARGININE-SYN4-PWY L ornithine de novo  biosynthesis 35374100.0 0.809135
+PYRIDOXSYN-PWY pyridoxal 5 phosphate biosynthesis I|unclassified 0 3.41296
+HSERMETANA-PWY L methionine biosynthesis III|unclassified 15008900.0 0.515875
+DTDPRHAMSYN-PWY dTDP L rhamnose biosynthesis I 14253800.0 1.69441
+HSERMETANA-PWY L methionine biosynthesis III 15008900.0 1.21379
+DTDPRHAMSYN-PWY dTDP L rhamnose biosynthesis I|unclassified 14253800.0 2.50835
+PWY-3841 folate transformations II 0 2.38544
+ARGININE-SYN4-PWY L ornithine de novo  biosynthesis|unclassified 35374100.0 1.66731
+1CMET2-PWY N10 formyl tetrahydrofolate biosynthesis 0 2.12689
+1CMET2-PWY N10 formyl tetrahydrofolate biosynthesis|unclassified 0 0.834716
+PWY-3841 folate transformations II|unclassified 0 2.27477
+PWY-1269 CMP 3 deoxy D manno octulosonate biosynthesis I 8260890.0 0.755658
+PWY-1269 CMP 3 deoxy D manno octulosonate biosynthesis I|unclassified 8260890.0 0.644897
+KETOGLUCONMET-PWY ketogluconate metabolism 9615380.0 0.141222
+KETOGLUCONMET-PWY ketogluconate metabolism|unclassified 9615380.0 0.309365
+COA-PWY-1 coenzyme A biosynthesis II  0 1.9437
+PWY-7221 guanosine ribonucleotides de novo biosynthesis 0 1.88123
b
diff -r 000000000000 -r 9959fa526f1a test-data/similar_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/similar_output.tabular Wed Apr 20 08:30:08 2016 -0400
b
@@ -0,0 +1,16 @@
+id name sample1 sample2
+PWY-6897 thiamin salvage II|unclassified 3297150.0 0.909424
+PWY-1269 CMP 3 deoxy D manno octulosonate biosynthesis I 8260890.0 0.755658
+ARGININE-SYN4-PWY L ornithine de novo  biosynthesis 35374100.0 0.809135
+HSERMETANA-PWY L methionine biosynthesis III|unclassified 15008900.0 0.515875
+PWY-6897 thiamin salvage II 3297150.0 0.415143
+PWY-7357 thiamin formation from pyrithiamine and oxythiamine  7205610.0 0.41402
+HSERMETANA-PWY L methionine biosynthesis III 15008900.0 1.21379
+DTDPRHAMSYN-PWY dTDP L rhamnose biosynthesis I|unclassified 14253800.0 2.50835
+PWY-7357 thiamin formation from pyrithiamine and oxythiamine |unclassified 7205610.0 0.906963
+ARGININE-SYN4-PWY L ornithine de novo  biosynthesis|unclassified 35374100.0 1.66731
+PWY-6151 S adenosyl L methionine cycle I 6984130.0 0.977587
+PWY-1269 CMP 3 deoxy D manno octulosonate biosynthesis I|unclassified 8260890.0 0.644897
+KETOGLUCONMET-PWY ketogluconate metabolism 9615380.0 0.141222
+KETOGLUCONMET-PWY ketogluconate metabolism|unclassified 9615380.0 0.309365
+DTDPRHAMSYN-PWY dTDP L rhamnose biosynthesis I 14253800.0 1.69441
b
diff -r 000000000000 -r 9959fa526f1a test-data/specific_to_sample1_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/specific_to_sample1_output.txt Wed Apr 20 08:30:08 2016 -0400
b
@@ -0,0 +1,2 @@
+id name abundances
+PWY-6151 S adenosyl L methionine cycle I|unclassified 6984130.0
b
diff -r 000000000000 -r 9959fa526f1a test-data/specific_to_sample2_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/specific_to_sample2_output.txt Wed Apr 20 08:30:08 2016 -0400
[
b'@@ -0,0 +1,252 @@\n+id\tname\tabundances\n+PWY-6731\tstarch degradation III\t0.0336222\n+GLUDEG-I-PWY\tGABA shunt\t0.0537709\n+PWY-6123\tinosine 5 phosphate biosynthesis I\t0.831965\n+PWY-6769\trhamnogalacturonan type I degradation I |unclassified\t0.139081\n+PWY-6471\tpeptidoglycan biosynthesis IV \t0.0698193\n+PWY-7222\tguanosine deoxyribonucleotides de novo biosynthesis II\t1.48474\n+PPGPPMET-PWY\tppGpp biosynthesis\t0.203621\n+PWY-7234\tinosine 5 phosphate biosynthesis III\t0.045368\n+PWY-5686\tUMP biosynthesis|unclassified\t1.7892\n+PWY-3841\tfolate transformations II\t2.38544\n+P185-PWY\tformaldehyde assimilation III \t0.103843\n+ILEUSYN-PWY\tL isoleucine biosynthesis I \t1.29401\n+PWY-3001\tsuperpathway of L isoleucine biosynthesis I\t0.142024\n+PWY-5754\t4 hydroxybenzoate biosynthesis I |unclassified\t0.229464\n+PWY-6737\tstarch degradation V|unclassified\t0.316335\n+PWY-5104\tL isoleucine biosynthesis IV\t0.831272\n+PWY-7242\tD fructuronate degradation\t1.03935\n+OANTIGEN-PWY\tO antigen building blocks biosynthesis \t0.321981\n+PWY-5667\tCDP diacylglycerol biosynthesis I|unclassified\t0.149856\n+PWY-6606\tguanosine nucleotides degradation II|unclassified\t0.247734\n+PWY66-422\tD galactose degradation V |unclassified\t1.04266\n+PWY66-400\tglycolysis VI \t0.938218\n+PWY66-389\tphytol degradation\t0.548391\n+PWY-5097\tL lysine biosynthesis VI\t1.04893\n+1CMET2-PWY\tN10 formyl tetrahydrofolate biosynthesis|unclassified\t0.834716\n+PWY-6386\tUDP N acetylmuramoyl pentapeptide biosynthesis II \t0.0904748\n+PWY4LZ-257\tsuperpathway of fermentation \t0.147873\n+PWY-6549\tL glutamine biosynthesis III\t0.508899\n+GLCMANNANAUT-PWY\tsuperpathway of N acetylglucosamine, N acetylmannosamine and N acetylneuraminate degradation|unclassified\t0.123299\n+PWY-7111\tpyruvate fermentation to isobutanol |unclassified\t1.73067\n+PEPTIDOGLYCANSYN-PWY\tpeptidoglycan biosynthesis I \t0.850699\n+PWY-7115\tC4 photosynthetic carbon assimilation cycle, NAD ME type\t0.187881\n+P241-PWY\tcoenzyme B biosynthesis|unclassified\t0.0707035\n+PWY-6124\tinosine 5 phosphate biosynthesis II|unclassified\t2.22455\n+PWY-5173\tsuperpathway of acetyl CoA biosynthesis|unclassified\t0.0785602\n+PWY-7316\tdTDP N acetylviosamine biosynthesis\t0.542069\n+PWY-5101\tL isoleucine biosynthesis II|unclassified\t0.617859\n+PEPTIDOGLYCANSYN-PWY\tpeptidoglycan biosynthesis I |unclassified\t1.16221\n+PWY-6769\trhamnogalacturonan type I degradation I \t0.0634893\n+GLUCUROCAT-PWY\tsuperpathway of &beta; D glucuronide and D glucuronate degradation\t0.838546\n+PWY-7219\tadenosine ribonucleotides de novo biosynthesis|unclassified\t3.33171\n+PWY4LZ-257\tsuperpathway of fermentation |unclassified\t0.318099\n+ARGSYN-PWY\tL arginine biosynthesis I \t1.16946\n+PWY-5100\tpyruvate fermentation to acetate and lactate II|unclassified\t1.26625\n+ARO-PWY\tchorismate biosynthesis I\t0.39449\n+ARGSYNBSUB-PWY\tL arginine biosynthesis II \t1.02161\n+PWY-6936\tseleno amino acid biosynthesis\t1.2799\n+PWY-6595\tsuperpathway of guanosine nucleotides degradation |unclassified\t0.225538\n+ANAGLYCOLYSIS-PWY\tglycolysis III \t1.03937\n+PWY0-162\tsuperpathway of pyrimidine ribonucleotides de novo biosynthesis|unclassified\t0.295685\n+PWY-841\tsuperpathway of purine nucleotides de novo biosynthesis I\t0.531622\n+PWY-5505\tL glutamate and L glutamine biosynthesis|g__Bacteroides.s__Bacteroides_thetaiotaomicron\t1.44739\n+THRESYN-PWY\tsuperpathway of L threonine biosynthesis|unclassified\t0.188356\n+PWY-5941\tglycogen degradation II \t0.127166\n+PWY-7094\tfatty acid salvage\t0.0597229\n+CITRULBIO-PWY\tL citrulline biosynthesis|unclassified\t0.49408\n+PWY-7221\tguanosine ribonucleotides de novo biosynthesis\t1.88123\n+ARGSYNBSUB-PWY\tL arginine biosynthesis II |unclassified\t1.31054\n+PWY-6630\tsuperpathway of L tyrosine biosynthesis\t0.319673\n+RIBOSYN2-PWY\tflavin biosynthesis I |unclassified\t0.213182\n+PWY-2942\tL lysine biosynthesis III|unclassified\t1.10762\n+PWY-7094\tfatty acid salvage|unclassified\t0.125407\n+PWY-1042\tglycolysis IV |unclassified\t1.93529\n+VALSYN-PWY\tL valine biosynthesis\t1.29401\n+PWY-6277\tsuperpathway of 5 aminoimidazole ribonucleotide biosynthesis|unclassified\t0.521078'..b'on\t0.88\n+THISYNARA-PWY\tsuperpathway of thiamin diphosphate biosynthesis III |unclassified\t0.21303\n+PWY-6123\tinosine 5 phosphate biosynthesis I|unclassified\t1.82252\n+PWY-6121\t5 aminoimidazole ribonucleotide biosynthesis I\t0.258418\n+COA-PWY-1\tcoenzyme A biosynthesis II \t1.9437\n+PWY-6936\tseleno amino acid biosynthesis|unclassified\t1.32349\n+PWY-6163\tchorismate biosynthesis from 3 dehydroquinate\t0.410329\n+PWY-5667\tCDP diacylglycerol biosynthesis I\t0.384585\n+PWY-7184\tpyrimidine deoxyribonucleotides de novo biosynthesis I\t0.770993\n+PWY-3841\tfolate transformations II|unclassified\t2.27477\n+CITRULBIO-PWY\tL citrulline biosynthesis\t0.974607\n+RHAMCAT-PWY\tL rhamnose degradation I\t0.539026\n+PWY-724\tsuperpathway of L lysine, L threonine and L methionine biosynthesis II|unclassified\t0.29625\n+GLUTORN-PWY\tL ornithine biosynthesis|unclassified\t0.900908\n+PWY-5265\tpeptidoglycan biosynthesis II |unclassified\t0.209837\n+PWY-5464\tsuperpathway of cytosolic glycolysis , pyruvate dehydrogenase and TCA cycle|unclassified\t0.172005\n+PWY-6606\tguanosine nucleotides degradation II\t0.113088\n+OANTIGEN-PWY\tO antigen building blocks biosynthesis |unclassified\t0.620846\n+PWY-7388\toctanoyl [acyl carrier protein] biosynthesis |unclassified\t0.641891\n+P185-PWY\tformaldehyde assimilation III |unclassified\t0.115303\n+METHGLYUT-PWY\tsuperpathway of methylglyoxal degradation\t0.226728\n+PWY-6737\tstarch degradation V\t0.201137\n+PWY-724\tsuperpathway of L lysine, L threonine and L methionine biosynthesis II\t0.142842\n+NONOXIPENT-PWY\tpentose phosphate pathway |unclassified\t1.8885\n+DAPLYSINESYN-PWY\tL lysine biosynthesis I\t0.35429\n+PWY-6628\tsuperpathway of L phenylalanine biosynthesis\t0.49241\n+THISYNARA-PWY\tsuperpathway of thiamin diphosphate biosynthesis III \t0.0973654\n+PWY-6385\tpeptidoglycan biosynthesis III \t0.820786\n+PWY1F-823\tleucopelargonidin and leucocyanidin biosynthesis\t0.0501896\n+ASPASN-PWY\tsuperpathway of L aspartate and L asparagine biosynthesis\t1.09751\n+P105-PWY\tTCA cycle IV |unclassified\t0.340536\n+TRNA-CHARGING-PWY\ttRNA charging|unclassified\t0.526713\n+GALACT-GLUCUROCAT-PWY\tsuperpathway of hexuronide and hexuronate degradation\t0.391566\n+PWY-5173\tsuperpathway of acetyl CoA biosynthesis\t0.0372086\n+FAO-PWY\tfatty acid &beta; oxidation I\t0.0714249\n+P241-PWY\tcoenzyme B biosynthesis\t0.0322755\n+COMPLETE-ARO-PWY\tsuperpathway of aromatic amino acid biosynthesis|unclassified\t0.862669\n+HISDEG-PWY\tL histidine degradation I\t0.519408\n+COMPLETE-ARO-PWY\tsuperpathway of aromatic amino acid biosynthesis\t0.439273\n+DAPLYSINESYN-PWY\tL lysine biosynthesis I|unclassified\t0.314147\n+PWY-5103\tL isoleucine biosynthesis III\t0.831677\n+GLUDEG-I-PWY\tGABA shunt|unclassified\t0.1173\n+GLCMANNANAUT-PWY\tsuperpathway of N acetylglucosamine, N acetylmannosamine and N acetylneuraminate degradation\t0.0651572\n+P441-PWY\tsuperpathway of N acetylneuraminate degradation|unclassified\t0.204428\n+PWY-5686\tUMP biosynthesis\t0.980672\n+PWY-6595\tsuperpathway of guanosine nucleotides degradation \t0.102956\n+PWY66-422\tD galactose degradation V \t1.75745\n+PWY-5505\tL glutamate and L glutamine biosynthesis|unclassified\t1.96045\n+PWY-6305\tputrescine biosynthesis IV\t0.333385\n+HISDEG-PWY\tL histidine degradation I|unclassified\t0.770332\n+PWY-5659\tGDP mannose biosynthesis|unclassified\t0.106616\n+FAO-PWY\tfatty acid &beta; oxidation I|unclassified\t0.148771\n+METHGLYUT-PWY\tsuperpathway of methylglyoxal degradation|unclassified\t0.496676\n+DENOVOPURINE2-PWY\tsuperpathway of purine nucleotides de novo biosynthesis II\t0.14462\n+PWY-5857\tubiquinol 10 biosynthesis \t0.15637\n+PWY-5101\tL isoleucine biosynthesis II\t0.492461\n+RIBOSYN2-PWY\tflavin biosynthesis I \t0.11474\n+SO4ASSIM-PWY\tsulfate reduction I |unclassified\t0.015138\n+BRANCHED-CHAIN-AA-SYN-PWY\tsuperpathway of branched amino acid biosynthesis\t1.02856\n+PWY-5265\tpeptidoglycan biosynthesis II \t0.106882\n+ASPASN-PWY\tsuperpathway of L aspartate and L asparagine biosynthesis|unclassified\t1.92205\n+PWY-7234\tinosine 5 phosphate biosynthesis III|unclassified\t0.0993843\n+GLYCOLYSIS\tglycolysis I |unclassified\t1.37456\n'