Previous changeset 2:fdfb35745104 (2022-10-19) Next changeset 4:662a334004b4 (2023-11-04) |
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/combine_metaphlan2_humann2 commit b84cbcbffa25c55acd1a31df76e4a4f78772cbd7 |
modified:
test-data/humann2_gene_families_input.tabular |
added:
combine_metaphlan_humann.py combine_metaphlan_humann.xml test-data/humann36_gene_families_input.tabular test-data/humann36_pathways_input.tabular test-data/metaphlan4_input.txt |
removed:
combine_metaphlan2_humann2.py combine_metaphlan2_humann2.xml |
b |
diff -r fdfb35745104 -r 01ac9954c27f combine_metaphlan2_humann2.py --- a/combine_metaphlan2_humann2.py Wed Oct 19 14:44:00 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,107 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse - - -def extract_clade_abundance(metaphlan2_fp): - clade_abund = {} - with open(metaphlan2_fp, "r") as metaphlan2_f: - for line in metaphlan2_f.readlines(): - if line.find("g__") == -1: - continue - - split_line = line[:-1].split("\t") - taxo = split_line[0] - abundance = split_line[1] - - genus = taxo[(taxo.find("g__") + 3):] - if genus.find("|") != -1: - genus = genus[: (genus.find("|"))] - clade_abund.setdefault(genus, {"abundance": 0, "species": {}}) - if taxo.find("t__") != -1: - continue - elif taxo.find("s__") != -1: - species = taxo[(taxo.find("s__") + 3):] - clade_abund[genus]["species"].setdefault(species, abundance) - else: - clade_abund[genus]["abundance"] = abundance - return clade_abund - - -def compute_overall_abundance(humann2_fp): - overall_abundance = 0 - with open(humann2_fp, "r") as humann2_f: - for line in humann2_f.readlines(): - if line.find("|") != -1 or line.startswith("#"): - continue - split_line = line[:-1].split("\t") - overall_abundance += float(split_line[1]) - return overall_abundance - - -def format_characteristic_name(name): - formatted_n = name - formatted_n = formatted_n.replace("/", " ") - formatted_n = formatted_n.replace("-", " ") - formatted_n = formatted_n.replace("'", "") - if formatted_n.find("(") != -1 and formatted_n.find(")") != -1: - open_bracket = formatted_n.find("(") - close_bracket = formatted_n.find(")") + 1 - formatted_n = formatted_n[:open_bracket] + formatted_n[close_bracket:] - return formatted_n - - -def combine_metaphlan2_humann2(args): - clade_abund = extract_clade_abundance(args.metaphlan2_fp) - overall_abund = compute_overall_abundance(args.humann2_fp) - - with open(args.output_fp, "w") as output_f: - s = "genus\tgenus_abundance\tspecies\tspecies_abundance\t" - s = "%s\t%s_id\t%s_name\t%s_abundance\n" % (s, args.type, args.type, args.type) - output_f.write(s) - with open(args.humann2_fp, "r") as humann2_f: - for line in humann2_f.readlines(): - if line.find("|") == -1: - continue - - split_line = line[:-1].split("\t") - abundance = 100 * float(split_line[1]) / overall_abund - annotation = split_line[0].split("|") - charact = annotation[0].split(":") - charact_id = charact[0] - char_name = "" - if len(charact) > 1: - char_name = format_characteristic_name(charact[-1]) - taxo = annotation[1].split(".") - - if taxo[0] == "unclassified": - continue - genus = taxo[0][3:] - species = taxo[1][3:] - - if genus not in clade_abund: - print("no %s found in %s" % (genus, args.metaphlan2_fp)) - continue - if species not in clade_abund[genus]["species"]: - print( - "no %s found in %s for % s" - % (species, args.metaphlan2_fp, genus) - ) - continue - - s = "%s\t%s\t" % (genus, clade_abund[genus]["abundance"]) - s += "%s\t%s\t" % (species, clade_abund[genus]["species"][species]) - s += "%s\t%s\t%s\n" % (charact_id, char_name, abundance) - output_f.write(s) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--humann2_fp", required=True) - parser.add_argument("--metaphlan2_fp", required=True) - parser.add_argument("--output_fp", required=True) - parser.add_argument("--type", required=True, choices=["gene_families", "pathways"]) - args = parser.parse_args() - - combine_metaphlan2_humann2(args) |
b |
diff -r fdfb35745104 -r 01ac9954c27f combine_metaphlan2_humann2.xml --- a/combine_metaphlan2_humann2.xml Wed Oct 19 14:44:00 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,81 +0,0 @@ -<tool id="combine_metaphlan2_humann2" name="Combine MetaPhlAn2 and HUMAnN2 outputs" version="0.2.0"> - <description>to relate genus/species abundances and gene families/pathways abundances</description> - <requirements> - </requirements> - <stdio> - <exit_code range="1:" /> - <exit_code range=":-1" /> - </stdio> - <version_command></version_command> - <command><![CDATA[ - python $__tool_directory__/combine_metaphlan2_humann2.py - --metaphlan2_fp '$metaphlan2_file' - --humann2_fp '$humann2_file' - --type '$type' - - #if str($type) == 'gene_families' - --output_fp '$gene_families_output_file' - #else - --output_fp '$pathway_output_file' - #end if - ]]></command> - <inputs> - <param name="metaphlan2_file" format="txt,tabular" type="data" label="Input file corresponding to MetaPhlAn2 output" help="The MetaPhlAn2 output file contains relative abundance of clades at different taxonomic levels (--metaphlan2_file)"/> - <param name="humann2_file" format="txt,tabular" type="data" label="Input file corresponding to HUMAnN2 output" help="The HUMAnN2 output file contains relative abundance of gene families or pathways with corresponding taxonomic stratification (--humann2_file)"/> - <param name='type' type="select" label="Type of characteristics in HUMAnN2 file" help="(--type)"> - <option value="gene_families" selected="true">Gene families</option> - <option value="pathways">Pathways</option> - </param> - </inputs> - <outputs> - <data name="gene_families_output_file" format="tabular" - label="${tool.name} on ${on_string}: Gene family abundances related to genus/species abundances" > - <filter>type=="gene_families"</filter> - </data> - <data name="pathway_output_file" format="tabular" - label="${tool.name} on ${on_string}: Pathway abundances related to genus/species abundances" > - <filter>type=="pathways"</filter> - </data> - </outputs> - <tests> - <test> - <param name="metaphlan2_file" value="metaphlan2_input.txt"/> - <param name="humann2_file" value="humann2_gene_families_input.tabular"/> - <param name='type' value="gene_families"/> - <output name="gene_families_output_file"> - <assert_contents> - <has_n_columns n="8"/> - <has_n_lines n="29434"/> - <has_text text="Staphylococcus_epidermidis"/> - <has_text text="Putative transposon Tn552 DNA invertase bin3"/> - <has_size value="3467947"/> - </assert_contents> - </output> - </test> - <test> - <param name="metaphlan2_file" value="metaphlan2_input.txt"/> - <param name="humann2_file" value="humann2_pathways_input.tabular"/> - <param name='type' value="pathways"/> - <output name="pathway_output_file"> - <assert_contents> - <has_n_columns n="8"/> - <has_n_lines n="1533"/> - <has_text text="Rhodobacter_sphaeroides"/> - <has_text text="superpathway of acetyl CoA biosynthesis"/> - <has_size value="186363"/> - </assert_contents> - </output> - </test> - </tests> - <help><![CDATA[ -**What it does** - -This tool combine MetaPhlAn2 outputs and HUMANnN2 outputs. - -For each gene families/pathways and the corresponding taxonomic stratification, -you get relative abundance of this gene family/pathway and the relative abundance -of corresponding species and genus. - ]]></help> - <citations> - </citations> -</tool> \ No newline at end of file |
b |
diff -r fdfb35745104 -r 01ac9954c27f combine_metaphlan_humann.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/combine_metaphlan_humann.py Thu Jul 20 10:07:12 2023 +0000 |
[ |
@@ -0,0 +1,118 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse + + +def extract_clade_abundance(metaphlan_fp): + clade_abund = {} + with open(metaphlan_fp, "r") as metaphlan_f: + is_metaphlan_v4 = False + for line in metaphlan_f.readlines(): + if 'SGB' in line: + # New versions of metaphlan against a recent DB contain a header line with DB name, which contains SGB + is_metaphlan_v4 = True + if line.find("g__") == -1: + continue + + split_line = line[:-1].split("\t") + taxo = split_line[0] + if is_metaphlan_v4: + # Column order in new metaphlan versions: + # clade_name NCBI_tax_id relative_abundance additional_species + abundance = split_line[2] + else: + # Column order in the old metaphlan versions: + # clade_name relative_abundance coverage average_genome_length_in_the_clade estimated_number_of_reads_from_the_clade + abundance = split_line[1] + + genus = taxo[(taxo.find("g__") + 3):] + if genus.find("|") != -1: + genus = genus[: (genus.find("|"))] + clade_abund.setdefault(genus, {"abundance": 0, "species": {}}) + if taxo.find("t__") != -1: + continue + elif taxo.find("s__") != -1: + species = taxo[(taxo.find("s__") + 3):] + clade_abund[genus]["species"].setdefault(species, abundance) + else: + clade_abund[genus]["abundance"] = abundance + return clade_abund + + +def compute_overall_abundance(humann_fp): + overall_abundance = 0 + with open(humann_fp, "r") as humann_f: + for line in humann_f.readlines(): + if line.find("|") != -1 or line.startswith("#"): + continue + split_line = line[:-1].split("\t") + overall_abundance += float(split_line[1]) + return overall_abundance + + +def format_characteristic_name(name): + formatted_n = name + formatted_n = formatted_n.replace("/", " ") + formatted_n = formatted_n.replace("-", " ") + formatted_n = formatted_n.replace("'", "") + if formatted_n.find("(") != -1 and formatted_n.find(")") != -1: + open_bracket = formatted_n.find("(") + close_bracket = formatted_n.find(")") + 1 + formatted_n = formatted_n[:open_bracket] + formatted_n[close_bracket:] + return formatted_n + + +def combine_metaphlan_humann(args): + clade_abund = extract_clade_abundance(args.metaphlan_fp) + overall_abund = compute_overall_abundance(args.humann_fp) + + with open(args.output_fp, "w") as output_f: + s = "genus\tgenus_abundance\tspecies\tspecies_abundance\t" + s = "%s\t%s_id\t%s_name\t%s_abundance\n" % (s, args.type, args.type, args.type) + output_f.write(s) + with open(args.humann_fp, "r") as humann_f: + for line in humann_f.readlines(): + if line.find("|") == -1: + continue + + split_line = line[:-1].split("\t") + abundance = 100 * float(split_line[1]) / overall_abund + annotation = split_line[0].split("|") + charact = annotation[0].split(":") + charact_id = charact[0] + char_name = "" + if len(charact) > 1: + char_name = format_characteristic_name(charact[-1]) + taxo = annotation[1].split(".") + + if taxo[0] == "unclassified": + continue + genus = taxo[0][3:] + species = taxo[1][3:] + + if genus not in clade_abund: + print("no %s found in %s" % (genus, args.metaphlan_fp)) + continue + if species not in clade_abund[genus]["species"]: + print( + "No %s found in %s for % s" + % (species, args.metaphlan_fp, genus) + ) + continue + + s = "%s\t%s\t" % (genus, clade_abund[genus]["abundance"]) + s += "%s\t%s\t" % (species, clade_abund[genus]["species"][species]) + s += "%s\t%s\t%s\n" % (charact_id, char_name, abundance) + output_f.write(s) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--humann_fp", required=True) + parser.add_argument("--metaphlan_fp", required=True) + parser.add_argument("--output_fp", required=True) + parser.add_argument("--type", required=True, choices=["gene_families", "pathways"]) + args = parser.parse_args() + + combine_metaphlan_humann(args) |
b |
diff -r fdfb35745104 -r 01ac9954c27f combine_metaphlan_humann.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/combine_metaphlan_humann.xml Thu Jul 20 10:07:12 2023 +0000 |
[ |
@@ -0,0 +1,109 @@ +<tool id="combine_metaphlan_humann" name="Combine MetaPhlAn and HUMAnN outputs" version="0.3.0" profile="22.01"> + <description>to relate genus/species abundances and gene families/pathways abundances</description> + <requirements> + <requirement type="package" version="3.10">python</requirement> + </requirements> + <version_command></version_command> + <command detect_errors="aggressive"><![CDATA[ + python '$__tool_directory__/combine_metaphlan_humann.py' + --metaphlan_fp '$metaphlan_file' + --humann_fp '$humann_file' + --type '$type' + + #if str($type) == 'gene_families' + --output_fp '$gene_families_output_file' + #else + --output_fp '$pathway_output_file' + #end if + ]]></command> + <inputs> + <param argument="--metaphlan_file" format="txt,tabular" type="data" label="Input file corresponding to MetaPhlAn output" help="The MetaPhlAn output file contains relative abundance of clades at different taxonomic levels"/> + <param argument="--humann_file" format="txt,tabular" type="data" label="Input file corresponding to HUMAnN output" help="The HUMAnN output file contains relative abundance of gene families or pathways with corresponding taxonomic stratification"/> + <param argument='--type' type="select" label="Type of characteristics in HUMAnN file"> + <option value="gene_families" selected="true">Gene families</option> + <option value="pathways">Pathways</option> + </param> + </inputs> + <outputs> + <data name="gene_families_output_file" format="tabular" + label="${tool.name} on ${on_string}: Gene family abundances related to genus/species abundances" > + <filter>type=="gene_families"</filter> + </data> + <data name="pathway_output_file" format="tabular" + label="${tool.name} on ${on_string}: Pathway abundances related to genus/species abundances" > + <filter>type=="pathways"</filter> + </data> + </outputs> + <tests> + <!-- Using the output of old (i.e. v2) Humann and metaphlan --> + <test expect_num_outputs="1"> + <param name="metaphlan_file" value="metaphlan2_input.txt"/> + <param name="humann_file" value="humann2_gene_families_input.tabular"/> + <param name='type' value="gene_families"/> + <output name="gene_families_output_file"> + <assert_contents> + <has_n_columns n="8"/> + <has_n_lines n="3043"/> + <has_text text="Staphylococcus_epidermidis"/> + <has_text text="Putative transposon Tn552 DNA invertase bin3"/> + <has_size value="374787"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="metaphlan_file" value="metaphlan2_input.txt"/> + <param name="humann_file" value="humann2_pathways_input.tabular"/> + <param name='type' value="pathways"/> + <output name="pathway_output_file"> + <assert_contents> + <has_n_columns n="8"/> + <has_n_lines n="1533"/> + <has_text text="Rhodobacter_sphaeroides"/> + <has_text text="superpathway of acetyl CoA biosynthesis"/> + <has_size value="186363"/> + </assert_contents> + </output> + </test> + + <!-- Using the output of v3 Humann and v4 metaphlan --> + <test expect_num_outputs="1"> + <param name="metaphlan_file" value="metaphlan4_input.txt"/> + <param name="humann_file" value="humann36_gene_families_input.tabular"/> + <param name='type' value="gene_families"/> + <output name="gene_families_output_file"> + <assert_contents> + <has_n_columns n="8"/> + <has_n_lines n="2242"/> + <has_text text="Acetivibrio_thermocellus"/> + <has_size value="213828"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="metaphlan_file" value="metaphlan4_input.txt"/> + <param name="humann_file" value="humann36_pathways_input.tabular"/> + <param name='type' value="pathways"/> + <output name="pathway_output_file"> + <assert_contents> + <has_n_columns n="8"/> + <has_n_lines n="49"/> + <has_text text="Acetivibrio_thermocellus"/> + <has_text text="preQ0 biosynthesis"/> + <has_size value="6102"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**What it does** + +This tool combine MetaPhlAn outputs and HUMANnN outputs. + +For each gene families/pathways and the corresponding taxonomic stratification, +you get relative abundance of this gene family/pathway and the relative abundance +of corresponding species and genus. + ]]></help> + <citations> + <citation type="doi">10.7554/eLife.65088</citation> + </citations> +</tool> |
b |
diff -r fdfb35745104 -r 01ac9954c27f test-data/humann2_gene_families_input.tabular --- a/test-data/humann2_gene_families_input.tabular Wed Oct 19 14:44:00 2022 +0000 +++ b/test-data/humann2_gene_families_input.tabular Thu Jul 20 10:07:12 2023 +0000 |
b |
b'@@ -4998,199100 +4998,3 @@\n UniRef50_Q9KX08: Putative ribosome biogenesis GTPase RsgA|g__Staphylococcus.s__Staphylococcus_aureus\t68.5175802082\n UniRef50_B8IA33: Fumarylacetoacetate (FAA) hydrolase\t141.3231115250\n UniRef50_B8IA33: Fumarylacetoacetate (FAA) hydrolase|g__Rhodobacter.s__Rhodobacter_sphaeroides\t139.6536624432\n-UniRef50_B8IA33: Fumarylacetoacetate (FAA) hydrolase|g__Pseudomonas.s__Pseudomonas_aeruginosa\t1.6694490818\n-UniRef50_Q5HKL3: RarD protein\t141.2937913397\n-UniRef50_Q5HKL3: RarD protein|g__Staphylococcus.s__Staphylococcus_aureus\t75.4443334388\n-UniRef50_Q5HKL3: RarD protein|g__Staphylococcus.s__Staphylococcus_epidermidis\t65.8494579009\n-UniRef50_P98056: Cytochrome c oxidase subunit 1 homolog\t141.2789852789\n-UniRef50_P98056: Cytochrome c oxidase subunit 1 homolog|g__Rhodobacter.s__Rhodobacter_sphaeroides\t141.2789852789\n-UniRef50_J9YR02: BioY family protein\t141.2515684687\n-UniRef50_J9YR02: BioY family protein|g__Streptococcus.s__Streptococcus_mutans\t141.2515684687\n-UniRef50_D3QDA3: Peptidase, M16 family\t141.2203427703\n-UniRef50_D3QDA3: Peptidase, M16 family|g__Staphylococcus.s__Staphylococcus_aureus\t74.7770938901\n-UniRef50_D3QDA3: Peptidase, M16 family|g__Staphylococcus.s__Staphylococcus_epidermidis\t66.4432488801\n-UniRef50_A6UJ92\t141.2154495421\n-UniRef50_A6UJ92|g__Rhodobacter.s__Rhodobacter_sphaeroides\t141.2154495421\n-UniRef50_Q59928: Acetylornithine aminotransferase\t141.1553501126\n-UniRef50_Q59928: Acetylornithine aminotransferase|g__Staphylococcus.s__Staphylococcus_epidermidis\t91.6696965661\n-UniRef50_Q59928: Acetylornithine aminotransferase|g__Streptococcus.s__Streptococcus_mutans\t49.4856535465\n-UniRef50_Q6GH28: Putative oligopeptide transport ATP-binding protein oppF2\t141.1374136621\n-UniRef50_Q6GH28: Putative oligopeptide transport ATP-binding protein oppF2|g__Staphylococcus.s__Staphylococcus_aureus\t76.9039310117\n-UniRef50_Q6GH28: Putative oligopeptide transport ATP-binding protein oppF2|g__Staphylococcus.s__Staphylococcus_epidermidis\t64.2334826504\n-UniRef50_Q18GT4: Ketol-acid reductoisomerase\t141.1077114723\n-UniRef50_Q18GT4: Ketol-acid reductoisomerase|g__Rhodobacter.s__Rhodobacter_sphaeroides\t134.2898797519\n-UniRef50_Q18GT4: Ketol-acid reductoisomerase|g__Acinetobacter.s__Acinetobacter_baumannii\t6.7608317134\n-UniRef50_Q18GT4: Ketol-acid reductoisomerase|unclassified\t0.0570000070\n-UniRef50_Q38YN7: Putative Holliday junction resolvase\t140.8793311776\n-UniRef50_Q38YN7: Putative Holliday junction resolvase|g__Staphylococcus.s__Staphylococcus_aureus\t140.8793311776\n-UniRef50_G7ZR43\t140.8561513628\n-UniRef50_G7ZR43|g__Staphylococcus.s__Staphylococcus_aureus\t133.2227176893\n-UniRef50_G7ZR43|g__Staphylococcus.s__Staphylococcus_epidermidis\t7.6334336734\n-UniRef50_Q2YXZ9: Probable CtpA-like serine protease\t140.8504434391\n-UniRef50_Q2YXZ9: Probable CtpA-like serine protease|g__Staphylococcus.s__Staphylococcus_epidermidis\t88.2267049391\n-UniRef50_Q2YXZ9: Probable CtpA-like serine protease|g__Staphylococcus.s__Staphylococcus_aureus\t52.6237385000\n-UniRef50_H8LDC1: Helix-turn-helix domain protein\t140.8006300981\n-UniRef50_H8LDC1: Helix-turn-helix domain protein|g__Staphylococcus.s__Staphylococcus_epidermidis\t103.1895152246\n-UniRef50_H8LDC1: Helix-turn-helix domain protein|g__Staphylococcus.s__Staphylococcus_aureus\t37.6111148735\n-UniRef50_Q4L675: Anthranilate synthase component II\t140.7381662526\n-UniRef50_Q4L675: Anthranilate synthase component II|g__Staphylococcus.s__Staphylococcus_epidermidis\t88.4762794479\n-UniRef50_Q4L675: Anthranilate synthase component II|g__Staphylococcus.s__Staphylococcus_aureus\t52.2618868047\n-UniRef50_Q3IV03\t140.6498028101\n-UniRef50_Q3IV03|g__Rhodobacter.s__Rhodobacter_sphaeroides\t133.9618423122\n-UniRef50_Q3IV03|unclassified\t6.6879604980\n-UniRef50_F4LW30: Galactitol-specific enzyme IIC component of PTS\t140.6201005299\n-UniRef50_F4LW30: Galactitol-specific enzyme IIC component of PTS|g__Staphylococcus.s__Staphylococcus_aureus\t140.6201005299\n-UniRef50_K0LNC6: Na+transporting ATP synthase\t140.6088040788\n-UniRef'..b'444DC: hypothetical protein\t0.0017973217\n-UniRef50_UPI00037444DC: hypothetical protein|unclassified\t0.0017973217\n-UniRef50_UPI000378A614: hypothetical protein\t0.0017691421\n-UniRef50_UPI000378A614: hypothetical protein|unclassified\t0.0017691421\n-UniRef50_R4LEH4: Yd repeat-containing protein\t0.0017472130\n-UniRef50_R4LEH4: Yd repeat-containing protein|unclassified\t0.0017472130\n-UniRef50_S4YMU8: Filamentous hemagglutinin\t0.0017432148\n-UniRef50_S4YMU8: Filamentous hemagglutinin|unclassified\t0.0017432148\n-UniRef50_UPI000443E2D6: PREDICTED: tetratricopeptide repeat protein 40\t0.0016819660\n-UniRef50_UPI000443E2D6: PREDICTED: tetratricopeptide repeat protein 40|unclassified\t0.0016819660\n-UniRef50_UPI00036DCFC8: hypothetical protein\t0.0016327044\n-UniRef50_UPI00036DCFC8: hypothetical protein|unclassified\t0.0016327044\n-UniRef50_UPI0001BF6B99: 90S preribosome component RRP12\t0.0016313392\n-UniRef50_UPI0001BF6B99: 90S preribosome component RRP12|unclassified\t0.0016313392\n-UniRef50_UPI000365699C: hypothetical protein\t0.0016224188\n-UniRef50_UPI000365699C: hypothetical protein|unclassified\t0.0016224188\n-UniRef50_UPI000344F009: hypothetical protein\t0.0015763429\n-UniRef50_UPI000344F009: hypothetical protein|unclassified\t0.0015763429\n-UniRef50_U6M5E8\t0.0015640805\n-UniRef50_U6M5E8|unclassified\t0.0015640805\n-UniRef50_UPI000349BE1A: hypothetical protein\t0.0015464606\n-UniRef50_UPI000349BE1A: hypothetical protein|unclassified\t0.0015464606\n-UniRef50_UPI0003773ED0: hypothetical protein\t0.0014975465\n-UniRef50_UPI0003773ED0: hypothetical protein|unclassified\t0.0014975465\n-UniRef50_R0ISA3\t0.0014873041\n-UniRef50_R0ISA3|unclassified\t0.0014873041\n-UniRef50_F4GI46\t0.0014628012\n-UniRef50_F4GI46|unclassified\t0.0014628012\n-UniRef50_UPI0002D336FD: hypothetical protein\t0.0014421264\n-UniRef50_UPI0002D336FD: hypothetical protein|unclassified\t0.0014421264\n-UniRef50_C0N8P3: Type I secretion target GGXGXDXXX repeat protein domain protein\t0.0014134994\n-UniRef50_C0N8P3: Type I secretion target GGXGXDXXX repeat protein domain protein|unclassified\t0.0014134994\n-UniRef50_W7A2A5\t0.0013833667\n-UniRef50_W7A2A5|unclassified\t0.0013833667\n-UniRef50_D2QX58: Peptidase domain protein\t0.0013763922\n-UniRef50_D2QX58: Peptidase domain protein|unclassified\t0.0013763922\n-UniRef50_UPI0004446091: PREDICTED: LOW QUALITY PROTEIN: histone-lysine N-methyltransferase 2C\t0.0013693947\n-UniRef50_UPI0004446091: PREDICTED: LOW QUALITY PROTEIN: histone-lysine N-methyltransferase 2C|unclassified\t0.0013693947\n-UniRef50_UPI0003644B17: hypothetical protein, partial\t0.0013488169\n-UniRef50_UPI0003644B17: hypothetical protein, partial|unclassified\t0.0013488169\n-UniRef50_A0A011N6I9\t0.0013414891\n-UniRef50_A0A011N6I9|unclassified\t0.0013414891\n-UniRef50_A0A058ZAA0\t0.0012939818\n-UniRef50_A0A058ZAA0|unclassified\t0.0012939818\n-UniRef50_UPI000468C770: hypothetical protein\t0.0012465542\n-UniRef50_UPI000468C770: hypothetical protein|unclassified\t0.0012465542\n-UniRef50_UPI00036FCEE3: hypothetical protein\t0.0012113651\n-UniRef50_UPI00036FCEE3: hypothetical protein|unclassified\t0.0012113651\n-UniRef50_UPI00035C33AC: hypothetical protein\t0.0010786375\n-UniRef50_UPI00035C33AC: hypothetical protein|unclassified\t0.0010786375\n-UniRef50_N1Q3A9\t0.0009733696\n-UniRef50_N1Q3A9|unclassified\t0.0009733696\n-UniRef50_A0A031GKF7: Polymorphic membrane protein, Filamentous hemagglutinin/Adhesin\t0.0008849280\n-UniRef50_A0A031GKF7: Polymorphic membrane protein, Filamentous hemagglutinin/Adhesin|unclassified\t0.0008849280\n-UniRef50_D8M1X0: Singapore isolate B (sub-type 7) whole genome shotgun sequence assembly, scaffold_16\t0.0008849116\n-UniRef50_D8M1X0: Singapore isolate B (sub-type 7) whole genome shotgun sequence assembly, scaffold_16|unclassified\t0.0008849116\n-UniRef50_A8LV91\t0.0008555354\n-UniRef50_A8LV91|unclassified\t0.0008555354\n-UniRef50_D3E2A1: Adhesin-like protein\t0.0007987026\n-UniRef50_D3E2A1: Adhesin-like protein|unclassified\t0.0007987026\n-UniRef50_U6MJL1\t0.0007379580\n-UniRef50_U6MJL1|unclassified\t0.0007379580\n\\ No newline at end of file\n' |
b |
diff -r fdfb35745104 -r 01ac9954c27f test-data/humann36_gene_families_input.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/humann36_gene_families_input.tabular Thu Jul 20 10:07:12 2023 +0000 |
b |
b'@@ -0,0 +1,6393 @@\n+# Gene Family\thumann_Abundance-RPKs\n+UNMAPPED\t94157.0000000000\n+UniRef90_A3DCI4\t42213.2758828385\n+UniRef90_A3DCI4|g__Acetivibrio.s__Acetivibrio_thermocellus\t42205.5707425926\n+UniRef90_A3DCI4|unclassified\t7.7051402458\n+UniRef90_A3DCB9\t39287.6314397701\n+UniRef90_A3DCB9|g__Acetivibrio.s__Acetivibrio_thermocellus\t39273.2031556915\n+UniRef90_A3DCB9|unclassified\t14.4282840786\n+UniRef90_A3DC67\t33187.2752874343\n+UniRef90_A3DC67|g__Acetivibrio.s__Acetivibrio_thermocellus\t33173.4939517261\n+UniRef90_A3DC67|unclassified\t13.7813357082\n+UniRef90_A3DBR3\t23099.5863265035\n+UniRef90_A3DBR3|g__Acetivibrio.s__Acetivibrio_thermocellus\t23099.5863265035\n+UniRef90_A3DI60\t19908.5320286869\n+UniRef90_A3DI60|g__Acetivibrio.s__Acetivibrio_thermocellus\t19908.5320286869\n+UniRef90_G2JC59\t17973.0999765074\n+UniRef90_G2JC59|g__Acetivibrio.s__Acetivibrio_thermocellus\t17967.4502589933\n+UniRef90_G2JC59|unclassified\t5.6497175141\n+UniRef90_A3DEF8\t8784.6701267235\n+UniRef90_A3DEF8|g__Acetivibrio.s__Acetivibrio_thermocellus\t8784.6701267235\n+UniRef90_B5Y8J9\t6444.1520775882\n+UniRef90_B5Y8J9|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t6444.1520775882\n+UniRef90_G2JC80\t6352.4725274725\n+UniRef90_G2JC80|g__Acetivibrio.s__Acetivibrio_thermocellus\t6333.3333333333\n+UniRef90_G2JC80|unclassified\t19.1391941392\n+UniRef90_A3DHD7\t6300.0000000000\n+UniRef90_A3DHD7|g__Acetivibrio.s__Acetivibrio_thermocellus\t6300.0000000000\n+UniRef90_UPI00003C9096\t5440.2852049911\n+UniRef90_UPI00003C9096|g__Acetivibrio.s__Acetivibrio_thermocellus\t5440.2852049911\n+UniRef90_A3DBD1\t5228.5275293678\n+UniRef90_A3DBD1|g__Acetivibrio.s__Acetivibrio_thermocellus\t5228.5275293678\n+UniRef90_B5Y7J1\t4886.8772665304\n+UniRef90_B5Y7J1|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t4877.5749409490\n+UniRef90_B5Y7J1|unclassified\t9.3023255814\n+UniRef90_A3DK33\t4847.6911046572\n+UniRef90_A3DK33|g__Acetivibrio.s__Acetivibrio_thermocellus\t4847.6911046572\n+UniRef90_A3DI53\t4497.2048074566\n+UniRef90_A3DI53|g__Acetivibrio.s__Acetivibrio_thermocellus\t4497.2048074566\n+UniRef90_A3DDB7\t4259.5094204382\n+UniRef90_A3DDB7|g__Acetivibrio.s__Acetivibrio_thermocellus\t4259.5094204382\n+UniRef90_A3DIZ6\t4066.5364815483\n+UniRef90_A3DIZ6|g__Acetivibrio.s__Acetivibrio_thermocellus\t4066.5364815483\n+UniRef90_A3DF72\t3886.8805031285\n+UniRef90_A3DF72|g__Acetivibrio.s__Acetivibrio_thermocellus\t3886.8805031285\n+UniRef90_A3DEF4\t3725.9575790837\n+UniRef90_A3DEF4|g__Acetivibrio.s__Acetivibrio_thermocellus\t3725.9575790837\n+UniRef90_A3DJT8\t3675.2912070884\n+UniRef90_A3DJT8|g__Acetivibrio.s__Acetivibrio_thermocellus\t3675.2912070884\n+UniRef90_A3DEF7\t3565.1931988761\n+UniRef90_A3DEF7|g__Acetivibrio.s__Acetivibrio_thermocellus\t3550.6874875446\n+UniRef90_A3DEF7|unclassified\t14.5057113315\n+UniRef90_W5SYI9\t3113.2956007559\n+UniRef90_W5SYI9|unclassified\t3113.2956007559\n+UniRef90_Q8GEF9\t2946.4796211538\n+UniRef90_Q8GEF9|unclassified\t2946.4796211538\n+UniRef90_A3DHZ3\t2882.3529411765\n+UniRef90_A3DHZ3|g__Acetivibrio.s__Acetivibrio_thermocellus\t2882.3529411765\n+UniRef90_A3DC76\t2725.8973504340\n+UniRef90_A3DC76|g__Acetivibrio.s__Acetivibrio_thermocellus\t2725.8973504340\n+UniRef90_A3DE79\t2702.1266968326\n+UniRef90_A3DE79|g__Acetivibrio.s__Acetivibrio_thermocellus\t2702.1266968326\n+UniRef90_A3DBK1\t2694.5061274276\n+UniRef90_A3DBK1|g__Acetivibrio.s__Acetivibrio_thermocellus\t2694.5061274276\n+UniRef90_A3DF83\t2692.7087960061\n+UniRef90_A3DF83|g__Acetivibrio.s__Acetivibrio_thermocellus\t2692.7087960061\n+UniRef90_B5Y935\t2440.8093147270\n+UniRef90_B5Y935|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t2440.8093147270\n+UniRef90_A3DEM9\t2396.7775807888\n+UniRef90_A3DEM9|g__Acetivibrio.s__Acetivibrio_thermocellus\t2396.7775807888\n+UniRef90_B5Y742\t2374.1258357332\n+UniRef90_B5Y742|g__Coprothermobacter.s__Coprothermobacter_proteolyticus\t2374.1258357332\n+UniRef90_UPI0001C1468A\t2330.3621184267\n+UniRef90_UPI0001C1468A|g__Acetivibrio.s__Acetivibrio_thermocellus\t2330.3621184267\n+UniRef90_A3DDQ7\t2277.0867600003\n+UniRef90_A3DDQ7|g__Acetivibrio.s__Acetivibrio_thermocellu'..b'93046292\n+UniRef90_A0A376TLE6\t2.7874315825\n+UniRef90_A0A376TLE6|unclassified\t2.7874315825\n+UniRef90_UPI000DDE62FE\t2.7216667800\n+UniRef90_UPI000DDE62FE|unclassified\t2.7216667800\n+UniRef90_A0A357AIJ3\t2.6461389890\n+UniRef90_A0A357AIJ3|unclassified\t2.6461389890\n+UniRef90_UPI0009875C6E\t2.6315789474\n+UniRef90_UPI0009875C6E|unclassified\t2.6315789474\n+UniRef90_A0A3D3QTT5\t2.4975222993\n+UniRef90_A0A3D3QTT5|unclassified\t2.4975222993\n+UniRef90_UPI000B6039CE\t2.4834437086\n+UniRef90_UPI000B6039CE|unclassified\t2.4834437086\n+UniRef90_A0A1B1YNW6\t2.4739225771\n+UniRef90_A0A1B1YNW6|unclassified\t2.4739225771\n+UniRef90_W5S5F3\t2.4341148999\n+UniRef90_W5S5F3|unclassified\t2.4341148999\n+UniRef90_W5S9S1\t2.4341148999\n+UniRef90_W5S9S1|unclassified\t2.4341148999\n+UniRef90_UPI000C7DA0AE\t2.4268007336\n+UniRef90_UPI000C7DA0AE|unclassified\t2.4268007336\n+UniRef90_A0A3B8J649\t2.4002809077\n+UniRef90_A0A3B8J649|unclassified\t2.4002809077\n+UniRef90_A0A162MND7\t2.3923444976\n+UniRef90_A0A162MND7|unclassified\t2.3923444976\n+UniRef90_UPI0008D9B061\t2.3800084950\n+UniRef90_UPI0008D9B061|unclassified\t2.3800084950\n+UniRef90_W4V9N2\t2.3649971475\n+UniRef90_W4V9N2|unclassified\t2.3649971475\n+UniRef90_M1YXY6\t2.2653263486\n+UniRef90_M1YXY6|unclassified\t2.2653263486\n+UniRef90_O27365\t2.2372806123\n+UniRef90_O27365|unclassified\t2.2372806123\n+UniRef90_A0A2K9E423\t2.2175274658\n+UniRef90_A0A2K9E423|unclassified\t2.2175274658\n+UniRef90_A0A2K2FN24\t2.2095097179\n+UniRef90_A0A2K2FN24|unclassified\t2.2095097179\n+UniRef90_R6CBA5\t2.1551724138\n+UniRef90_R6CBA5|unclassified\t2.1551724138\n+UniRef90_A0A328PEH5\t2.1486843000\n+UniRef90_A0A328PEH5|unclassified\t2.1486843000\n+UniRef90_A0A3D4VQP6\t2.1276595745\n+UniRef90_A0A3D4VQP6|unclassified\t2.1276595745\n+UniRef90_O26139\t2.1196420160\n+UniRef90_O26139|unclassified\t2.1196420160\n+UniRef90_A0A140LCX8\t2.1093529946\n+UniRef90_A0A140LCX8|unclassified\t2.1093529946\n+UniRef90_X1J9F7\t1.8298059965\n+UniRef90_X1J9F7|unclassified\t1.8298059965\n+UniRef90_A0A117KRB1\t1.8119740034\n+UniRef90_A0A117KRB1|unclassified\t1.8119740034\n+UniRef90_A0A1F9CUU1\t1.6058644812\n+UniRef90_A0A1F9CUU1|unclassified\t1.6058644812\n+UniRef90_A5D2N8\t1.5956362082\n+UniRef90_A5D2N8|unclassified\t1.5956362082\n+UniRef90_X1LZV2\t1.5948963317\n+UniRef90_X1LZV2|unclassified\t1.5948963317\n+UniRef90_UPI000DD697AB\t1.5851270470\n+UniRef90_UPI000DD697AB|unclassified\t1.5851270470\n+UniRef90_A0A1B2CU56\t1.5777772956\n+UniRef90_A0A1B2CU56|unclassified\t1.5777772956\n+UniRef90_A0A372HJC7\t1.4739212069\n+UniRef90_A0A372HJC7|unclassified\t1.4739212069\n+UniRef90_F4LVJ1\t1.2944593157\n+UniRef90_F4LVJ1|unclassified\t1.2944593157\n+UniRef90_A0A2S8DIE5\t1.2835673811\n+UniRef90_A0A2S8DIE5|unclassified\t1.2835673811\n+UniRef90_A0A101HUR7\t1.1696034784\n+UniRef90_A0A101HUR7|unclassified\t1.1696034784\n+UniRef90_A4J2K8\t1.1696034784\n+UniRef90_A4J2K8|unclassified\t1.1696034784\n+UniRef90_A5D3F7\t1.1696034784\n+UniRef90_A5D3F7|unclassified\t1.1696034784\n+UniRef90_A0A3B8J8R7\t1.1013215859\n+UniRef90_A0A3B8J8R7|unclassified\t1.1013215859\n+UniRef90_A0A2K9EDL4\t1.0729613734\n+UniRef90_A0A2K9EDL4|unclassified\t1.0729613734\n+UniRef90_A0A1M4SS56\t1.0460251046\n+UniRef90_A0A1M4SS56|unclassified\t1.0460251046\n+UniRef90_E3N9M8\t1.0460251046\n+UniRef90_E3N9M8|unclassified\t1.0460251046\n+UniRef90_UPI000BB7D2FD\t0.8818342152\n+UniRef90_UPI000BB7D2FD|unclassified\t0.8818342152\n+UniRef90_UPI000D6F978D\t0.8503401361\n+UniRef90_UPI000D6F978D|unclassified\t0.8503401361\n+UniRef90_A0A1C0ACE1\t0.8031316965\n+UniRef90_A0A1C0ACE1|unclassified\t0.8031316965\n+UniRef90_R9R0L9\t0.8012635864\n+UniRef90_R9R0L9|unclassified\t0.8012635864\n+UniRef90_A0A037Z7M6\t0.7685924790\n+UniRef90_A0A037Z7M6|unclassified\t0.7685924790\n+UniRef90_A0A0L6ZD62\t0.7685924790\n+UniRef90_A0A0L6ZD62|unclassified\t0.7685924790\n+UniRef90_UPI0009B7FD86\t0.6327050825\n+UniRef90_UPI0009B7FD86|unclassified\t0.6327050825\n+UniRef90_UPI000AB0D734\t0.5923196517\n+UniRef90_UPI000AB0D734|unclassified\t0.5923196517\n+UniRef90_A0A2S8DNM3\t0.5555555556\n+UniRef90_A0A2S8DNM3|unclassified\t0.5555555556\n+UniRef90_A0A2L0NH00\t0.2389765701\n+UniRef90_A0A2L0NH00|unclassified\t0.2389765701\n' |
b |
diff -r fdfb35745104 -r 01ac9954c27f test-data/humann36_pathways_input.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/humann36_pathways_input.tabular Thu Jul 20 10:07:12 2023 +0000 |
b |
b"@@ -0,0 +1,116 @@\n+# Pathway\thumann_Abundance\n+UNMAPPED\t8349.9735860182\n+UNINTEGRATED\t48556.8350742608\n+UNINTEGRATED|g__Acetivibrio.s__Acetivibrio_thermocellus\t29162.0390929203\n+UNINTEGRATED|unclassified\t3307.1774531381\n+PWY-6609: adenine and adenosine salvage III\t277.0510927865\n+PWY-6609: adenine and adenosine salvage III|g__Acetivibrio.s__Acetivibrio_thermocellus\t66.3688569288\n+PWY-6609: adenine and adenosine salvage III|unclassified\t11.9769595308\n+PWY-1042: glycolysis IV\t194.2219430842\n+PWY-5100: pyruvate fermentation to acetate and lactate II\t129.0053457248\n+PWY-5100: pyruvate fermentation to acetate and lactate II|g__Acetivibrio.s__Acetivibrio_thermocellus\t69.7497669984\n+PWY-7221: guanosine ribonucleotides de novo biosynthesis\t121.7671397650\n+PWY-7221: guanosine ribonucleotides de novo biosynthesis|g__Acetivibrio.s__Acetivibrio_thermocellus\t108.5388331853\n+PWY-7221: guanosine ribonucleotides de novo biosynthesis|unclassified\t13.6910424631\n+PWY-6703: preQ0 biosynthesis\t103.6377362280\n+PWY-6703: preQ0 biosynthesis|g__Acetivibrio.s__Acetivibrio_thermocellus\t98.9807370179\n+PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I\t83.3982268723\n+PWY-7228: superpathway of guanosine nucleotides de novo biosynthesis I|g__Acetivibrio.s__Acetivibrio_thermocellus\t48.4938466340\n+RIBOSYN2-PWY: flavin biosynthesis I (bacteria and plants)\t77.2232042652\n+RIBOSYN2-PWY: flavin biosynthesis I (bacteria and plants)|g__Acetivibrio.s__Acetivibrio_thermocellus\t53.2211975186\n+VALSYN-PWY: L-valine biosynthesis\t77.0829759735\n+VALSYN-PWY: L-valine biosynthesis|g__Acetivibrio.s__Acetivibrio_thermocellus\t77.0829759735\n+PWY-7208: superpathway of pyrimidine nucleobases salvage\t77.0563435419\n+PWY-7208: superpathway of pyrimidine nucleobases salvage|g__Acetivibrio.s__Acetivibrio_thermocellus\t45.1294786723\n+PWY-7208: superpathway of pyrimidine nucleobases salvage|unclassified\t17.1808137447\n+ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine)\t70.6881932740\n+ILEUSYN-PWY: L-isoleucine biosynthesis I (from threonine)|g__Acetivibrio.s__Acetivibrio_thermocellus\t70.6881932740\n+PWY-5981: CDP-diacylglycerol biosynthesis III\t70.6859344494\n+PWY-3841: folate transformations II (plants)\t68.9410605440\n+PWY-3841: folate transformations II (plants)|g__Acetivibrio.s__Acetivibrio_thermocellus\t65.3076297526\n+PWY-5103: L-isoleucine biosynthesis III\t66.6594375601\n+PWY-5103: L-isoleucine biosynthesis III|g__Acetivibrio.s__Acetivibrio_thermocellus\t66.6594375601\n+PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I\t60.8146639896\n+PWY-7229: superpathway of adenosine nucleotides de novo biosynthesis I|g__Acetivibrio.s__Acetivibrio_thermocellus\t43.7454671075\n+PWY0-1319: CDP-diacylglycerol biosynthesis II\t60.4369653934\n+PWY-5667: CDP-diacylglycerol biosynthesis I\t60.4369653934\n+COA-PWY-1: superpathway of coenzyme A biosynthesis III (mammals)\t53.6643541116\n+COA-PWY-1: superpathway of coenzyme A biosynthesis III (mammals)|g__Acetivibrio.s__Acetivibrio_thermocellus\t44.5540259674\n+ANAGLYCOLYSIS-PWY: glycolysis III (from glucose)\t53.4593266289\n+TRNA-CHARGING-PWY: tRNA charging\t50.1452719795\n+TRNA-CHARGING-PWY: tRNA charging|g__Acetivibrio.s__Acetivibrio_thermocellus\t40.2804860544\n+UDPNAGSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis I\t50.0293370404\n+UDPNAGSYN-PWY: UDP-N-acetyl-D-glucosamine biosynthesis I|g__Acetivibrio.s__Acetivibrio_thermocellus\t43.2002393401\n+P41-PWY: pyruvate fermentation to acetate and (S)-lactate I\t49.9950777370\n+P41-PWY: pyruvate fermentation to acetate and (S)-lactate I|g__Acetivibrio.s__Acetivibrio_thermocellus\t45.0063828591\n+COA-PWY: coenzyme A biosynthesis I (prokaryotic)\t46.0146775357\n+COA-PWY: coenzyme A biosynthesis I (prokaryotic)|g__Acetivibrio.s__Acetivibrio_thermocellus\t37.7428378807\n+PWY-7851: coenzyme A biosynthesis II (eukaryotic)\t46.0146775357\n+PWY-7851: coenzyme A biosynthesis II (eukaryotic)|g__Acetivibrio.s__Acetivibrio_thermocellus\t37.7428378807\n+PWY-5695: inosine 5'-phosphate degradation\t44.9825390520\n+PW"..b"9529719544\n+NONMEVIPP-PWY: methylerythritol phosphate pathway I\t33.6589833555\n+NONMEVIPP-PWY: methylerythritol phosphate pathway I|g__Acetivibrio.s__Acetivibrio_thermocellus\t29.0719820809\n+PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing)\t33.0594389993\n+PWY-6387: UDP-N-acetylmuramoyl-pentapeptide biosynthesis I (meso-diaminopimelate containing)|g__Acetivibrio.s__Acetivibrio_thermocellus\t20.8795281789\n+SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I\t32.8269248922\n+SER-GLYSYN-PWY: superpathway of L-serine and glycine biosynthesis I|unclassified\t12.3167606082\n+PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II\t30.5171500072\n+PWY-6122: 5-aminoimidazole ribonucleotide biosynthesis II|g__Acetivibrio.s__Acetivibrio_thermocellus\t23.3388473782\n+PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis\t30.5171500072\n+PWY-6277: superpathway of 5-aminoimidazole ribonucleotide biosynthesis|g__Acetivibrio.s__Acetivibrio_thermocellus\t23.3388473782\n+PYRIDNUCSYN-PWY: NAD de novo biosynthesis I (from aspartate)\t29.9045452545\n+PYRIDNUCSYN-PWY: NAD de novo biosynthesis I (from aspartate)|g__Acetivibrio.s__Acetivibrio_thermocellus\t20.7694090097\n+PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing)\t29.7827132121\n+PWY-6386: UDP-N-acetylmuramoyl-pentapeptide biosynthesis II (lysine-containing)|g__Acetivibrio.s__Acetivibrio_thermocellus\t24.3303646214\n+PWY-6124: inosine-5'-phosphate biosynthesis II\t29.6777862771\n+PWY-6124: inosine-5'-phosphate biosynthesis II|g__Acetivibrio.s__Acetivibrio_thermocellus\t27.8207756981\n+PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing)\t29.4351322618\n+PEPTIDOGLYCANSYN-PWY: peptidoglycan biosynthesis I (meso-diaminopimelate containing)|g__Acetivibrio.s__Acetivibrio_thermocellus\t21.3306301878\n+PWY-2942: L-lysine biosynthesis III\t28.9072019154\n+PWY-2942: L-lysine biosynthesis III|g__Acetivibrio.s__Acetivibrio_thermocellus\t28.9072019154\n+PWY0-1296: purine ribonucleosides degradation\t28.8114422948\n+PWY0-1296: purine ribonucleosides degradation|g__Acetivibrio.s__Acetivibrio_thermocellus\t21.7825853640\n+PANTOSYN-PWY: superpathway of coenzyme A biosynthesis I (bacteria)\t28.6286133554\n+PANTOSYN-PWY: superpathway of coenzyme A biosynthesis I (bacteria)|g__Acetivibrio.s__Acetivibrio_thermocellus\t25.9318707800\n+PWY-6123: inosine-5'-phosphate biosynthesis I\t28.5956903171\n+PWY-6123: inosine-5'-phosphate biosynthesis I|g__Acetivibrio.s__Acetivibrio_thermocellus\t27.1410372715\n+PWY-6385: peptidoglycan biosynthesis III (mycobacteria)\t28.0459769653\n+PWY-6385: peptidoglycan biosynthesis III (mycobacteria)|g__Acetivibrio.s__Acetivibrio_thermocellus\t20.5915247014\n+PWY-7953: UDP-N-acetylmuramoyl-pentapeptide biosynthesis III (meso-diaminopimelate containing)\t27.3969889419\n+PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing)\t25.5073725776\n+PWY0-1586: peptidoglycan maturation (meso-diaminopimelate containing)|g__Acetivibrio.s__Acetivibrio_thermocellus\t25.5073725776\n+PANTO-PWY: phosphopantothenate biosynthesis I\t23.5721171877\n+PANTO-PWY: phosphopantothenate biosynthesis I|g__Acetivibrio.s__Acetivibrio_thermocellus\t21.8182330339\n+PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II\t22.4257853098\n+PWY-7220: adenosine deoxyribonucleotides de novo biosynthesis II|g__Acetivibrio.s__Acetivibrio_thermocellus\t19.9656642609\n+PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II\t22.4257853098\n+PWY-7222: guanosine deoxyribonucleotides de novo biosynthesis II|g__Acetivibrio.s__Acetivibrio_thermocellus\t19.9656642609\n+PWY-6700: queuosine biosynthesis I (de novo)\t20.5232364544\n+PWY-6700: queuosine biosynthesis I (de novo)|g__Acetivibrio.s__Acetivibrio_thermocellus\t19.1237279391\n+PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I\t18.5007041182\n+PWY-6147: 6-hydroxymethyl-dihydropterin diphosphate biosynthesis I|g__Acetivibrio.s__Acetivibrio_thermocellus\t18.3787528306\n" |
b |
diff -r fdfb35745104 -r 01ac9954c27f test-data/metaphlan4_input.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/metaphlan4_input.txt Thu Jul 20 10:07:12 2023 +0000 |
b |
@@ -0,0 +1,20 @@ +#mpa_vOct22_CHOCOPhlAnSGB_202212 +#/usr/local/tools/_conda/envs/__metaphlan@4.0.6/bin/metaphlan /data/dnb08/galaxy_db/files/3/4/6/dataset_3461ff07-1b4d-4e57-b746-40d9d8066f01.dat,/data/dnb08/galaxy_db/files/e/a/b/dataset_eab63cfc-85ed-4f05-a3eb-f87cb309da27.dat --input_type fastq --read_min_len 70 --bt2_ps very-sensitive --min_mapq_val 5 --bowtie2db /data/db/data_managers/metaphlan/data/mpa_vOct22_CHOCOPhlAnSGB_202212 --index mpa_vOct22_CHOCOPhlAnSGB_202212 -t rel_ab --tax_lev a --min_cu_len 2000 --add_viruses --stat_q 0.1 --perc_nonzero 0.33 --avoid_disqm --sample_id_key SampleID --sample_id Metaphlan_Analysis -o /data/jwd05e/main/059/485/59485896/outputs/galaxy_dataset_dbfb8cc7-14d1-4318-a3b7-2a988fa2ff7f.dat --bowtie2out bowtie2out -s /data/jwd05e/main/059/485/59485896/outputs/galaxy_dataset_1afff43f-1b78-4574-b961-e7c7e7b902f2.dat --biom /data/jwd05e/main/059/485/59485896/outputs/galaxy_dataset_f4980675-ac4a-44fa-9e21-4c33652e5e87.dat --nproc 1 +#465754 reads processed +#SampleID Metaphlan_Analysis +#clade_name NCBI_tax_id relative_abundance additional_species +k__Bacteria 2 100.0 +k__Bacteria|p__Firmicutes 2|1239 68.23371 +k__Bacteria|p__Coprothermobacterota 2|2138240 31.76629 +k__Bacteria|p__Firmicutes|c__Clostridia 2|1239|186801 68.23371 +k__Bacteria|p__Coprothermobacterota|c__Coprothermobacteria 2|2138240|2138243 31.76629 +k__Bacteria|p__Firmicutes|c__Clostridia|o__Eubacteriales 2|1239|186801|186802 68.23371 +k__Bacteria|p__Coprothermobacterota|c__Coprothermobacteria|o__Coprothermobacterales 2|2138240|2138243|2138246 31.76629 +k__Bacteria|p__Firmicutes|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae 2|1239|186801|186802|216572 68.23371 +k__Bacteria|p__Coprothermobacterota|c__Coprothermobacteria|o__Coprothermobacterales|f__Coprothermobacteraceae 2|2138240|2138243|2138246|2138247 31.76629 +k__Bacteria|p__Firmicutes|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Acetivibrio 2|1239|186801|186802|216572|35829 68.23371 +k__Bacteria|p__Coprothermobacterota|c__Coprothermobacteria|o__Coprothermobacterales|f__Coprothermobacteraceae|g__Coprothermobacter 2|2138240|2138243|2138246|2138247|68335 31.76629 +k__Bacteria|p__Firmicutes|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Acetivibrio|s__Acetivibrio_thermocellus 2|1239|186801|186802|216572|35829|1515 68.23371 +k__Bacteria|p__Coprothermobacterota|c__Coprothermobacteria|o__Coprothermobacterales|f__Coprothermobacteraceae|g__Coprothermobacter|s__Coprothermobacter_proteolyticus 2|2138240|2138243|2138246|2138247|68335|35786 31.76629 +k__Bacteria|p__Firmicutes|c__Clostridia|o__Eubacteriales|f__Oscillospiraceae|g__Acetivibrio|s__Acetivibrio_thermocellus|t__SGB8476 2|1239|186801|186802|216572|35829|1515| 68.23371 +k__Bacteria|p__Coprothermobacterota|c__Coprothermobacteria|o__Coprothermobacterales|f__Coprothermobacteraceae|g__Coprothermobacter|s__Coprothermobacter_proteolyticus|t__SGB8555 2|2138240|2138243|2138246|2138247|68335|35786| 31.76629 |