# HG changeset patch # User bebatut # Date 1464855789 14400 # Node ID bcd414bb721b3068155f37c38a587fc219a5b75a # Parent 3d6f37e7e3a84fc6c0adce445c385a825d99cae4 planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/humann2/ commit 79571d981d7d56657699be8aa24a40a36a8d0ab5-dirty diff -r 3d6f37e7e3a8 -r bcd414bb721b README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Thu Jun 02 04:23:09 2016 -0400 @@ -0,0 +1,14 @@ +Galaxy wrappers for HUMAnN2 +=========================== + +Galaxy should be able to automatically install the dependencies, i.e. the +HUMAnN2 binaries and its dependencies. + +After installation, you must tell Galaxy about the defaut database with +clade-specific marker genes used for taxonomic profiling (MetaPhlAn2), and where +to find it: + +* Put the ``metaphlan2_db.loc`` file in the ``tool-data/`` folder, after uncommenting last line +* Download whole MetaPhlan2 source code: https://bitbucket.org/biobakery/metaphlan2/get/2.5.0.zip +* Unzip it +* Move ``db_v20`` folder into ``dependency_dir/metaphlan2/2.5.0/bebatut/package_metaphlan2_2_5_0/...`` folder diff -r 3d6f37e7e3a8 -r bcd414bb721b humann2.xml --- a/humann2.xml Thu May 26 10:20:59 2016 -0400 +++ b/humann2.xml Thu Jun 02 04:23:09 2016 -0400 @@ -25,46 +25,49 @@ #end for && - #end if + #end if `mkdir protein_db` && #if $protein_db.protein_db_selector == "history" - diamond makedb - --in $protein_db.history_protein_db + diamond makedb + --in $protein_db.history_protein_db --db protein_db/protein_db + && #end if - && + #if $taxonomic_profile.taxonomic_profile_test == "false" + #if $taxonomic_profile.metaphlan2_db_choice.metaphlan2_db_selector == "history" + `mkdir ref_db` + && + bowtie2-build $taxonomic_profile.metaphlan2_db_choice.metaphlan2_db_sequences ref_db/ref_db + && + python $__tool_directory__/transform_json_to_pkl.py + --json_input $metaphlan2_db_metadata + --pkl_output ref_db/metadata.pkl + && + #end if + #end if - humann2 + humann2 -i "$input_file" - + #set $metaphlan_option = "-t rel_ab" #if $taxonomic_profile.taxonomic_profile_test == "true": --taxonomic-profile $taxonomic_profile.taxonomic_profile_file #else - - #if $taxonomic_profile.mpa_pkl.mpa_pkl_selector == "cached" - #set $mpa_pkl_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.mpa_pkl.cached_mpa_pkl.input.options.tool_data_table.data]) - #set $mpa_pkl_db = $taxonomic_profile.mpa_pkl.cached_mpa_pkl.value - #set $metaphlan_option += " --mpa_pkl " + $mpa_pkl_table[$mpa_pkl_db] + #if $taxonomic_profile.metaphlan2_db_choice.metaphlan2_db_selector == "cached" + #set $table = dict([(_[0], _[2]) for _ in $taxonomic_profile.metaphlan2_db_choice.cached_metaphlan2_db.input.options.tool_data_table.data]) + #set $db_choice = $taxonomic_profile.metaphlan2_db_choice.cached_metaphlan2_db.value + #set $metaphlan_option += " --bowtie2db " + $table[$db_choice] + #set $metaphlan_option += " --mpa_pkl " + $table[$db_choice] + ".pkl" #else - #set $metaphlan_option += " --mpa_pkl " + $taxonomic_profile.mpa_pkl.history_mpa_pkl + #set $metaphlan_option += " --bowtie2db " + ref_db/ref_db + #set $metaphlan_option += " --mpa_pkl " + ref_db/metadata.pkl #end if - - #if $taxonomic_profile.bowtie2db.bowtie2db_selector == "cached" - #set $bowtie2_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.bowtie2db.cached_bowtie2db.input.options.tool_data_table.data]) - #set $bowtie2db_choice = $taxonomic_profile.bowtie2db.cached_bowtie2db.value - #set $metaphlan_option += " --bowtie2db " + $bowtie2_table[$bowtie2db_choice] - #else - #set $metaphlan_option += " --bowtie2db " + $taxonomic_profile.bowtie2db.history_bowtie2db - #end if - #end if - --metaphlan-options="$metaphlan_option" --evalue $e_value @@ -95,59 +98,43 @@ --output-format $output_format --output-max-decimals $output_max_dec --output-basename "humann2" - $remove_statified_output + $remove_statified_output ]]> - - + - + - - + + - - + + - - - - - - - - - - - - - - - - - + + - + @@ -159,7 +146,7 @@ - + @@ -255,9 +242,29 @@ The input is a single file corresponding either to filtered shotgun sequencing metagenome file (fastq, fastq.gz, fasta, or fasta.gz format), alignment file (sam, bam or blastm8 format) or gene table file (tsv or biom format). -A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. +A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. Otherwise, default MetaPhlAn2 or custom databases can be used for taxonomic profiling. For custom databases, a fasta file with marker gene sequences is required and also a json file containing metadata: + +:: -HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset. + { + "taxonomy": { + "taxonomy of genome1": genome1_length, + "taxonomy of genome2": genome2_length, + ... + } + "markers": { + "marker1_name": { + "clade": the clade that the marker belongs to, + "ext": [list of external genomes where the marker appears], + "len": length of the marker, + "score": score of the marker, + "taxon": the taxon of the marker + } + ... + } + } + +For functional profiling, HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset. **Outputs** @@ -270,4 +277,4 @@ ]]> - \ No newline at end of file + diff -r 3d6f37e7e3a8 -r bcd414bb721b humann2_macros.xml --- a/humann2_macros.xml Thu May 26 10:20:59 2016 -0400 +++ b/humann2_macros.xml Thu Jun 02 04:23:09 2016 -0400 @@ -3,7 +3,7 @@ bowtie2 - metaphlan2 + metaphlan2 diamond humann2 @@ -19,4 +19,4 @@ 10.1371/journal.pcbi.1002358 - \ No newline at end of file + diff -r 3d6f37e7e3a8 -r bcd414bb721b tool-data/metaphlan2_bowtie_db.loc.sample --- a/tool-data/metaphlan2_bowtie_db.loc.sample Thu May 26 10:20:59 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -# Bowtie2 db have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/src/5424bb911dfc/db_v20/?at=default (whole directory) -# -#Since MetaPhlAn comes bundled with 1 Bowtie2 database, you can use it -#by downloading it as explained above and uncommenting the following lines. -#bowtie_db_v20 Defaut BowTie2 database $METAPHLAN2_DIR/db_v20/mpa_v20_m200 diff -r 3d6f37e7e3a8 -r bcd414bb721b tool-data/metaphlan2_db.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/metaphlan2_db.loc.sample Thu Jun 02 04:23:09 2016 -0400 @@ -0,0 +1,5 @@ +# Bowtie2 db have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/src/5424bb911dfc/db_v20/?at=default (whole directory) +# +#Since MetaPhlAn comes bundled with 1 Bowtie2 database, you can use it +#by downloading it as explained above and uncommenting the following lines. +#mpa_v20_m200 Defaut database with clade-specific marker genes $METAPHLAN2_DIR/db_v20/mpa_v20_m200 diff -r 3d6f37e7e3a8 -r bcd414bb721b tool-data/metaphlan2_metadata.loc.sample --- a/tool-data/metaphlan2_metadata.loc.sample Thu May 26 10:20:59 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -#Metadata have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/raw/5424bb911dfcdb7212ea0949d4faeb6e69cfa61f/db_v20/mpa_v20_m200.pkl -# -#Since MetaPhlAn comes bundled with 1 metadata database, you can use it -#by downloading it as explained above and uncommenting the following lines. -#metadata_db_v20 Defaut MetaPhlAn2 metadata $METAPHLAN2_DIR/db_v20/mpa_v20_m200.pkl diff -r 3d6f37e7e3a8 -r bcd414bb721b tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Thu May 26 10:20:59 2016 -0400 +++ b/tool_data_table_conf.xml.sample Thu Jun 02 04:23:09 2016 -0400 @@ -1,19 +1,6 @@ - - - value, name, path - -
- +
value, name, path - -
- - value, name, path - -
- - value, name, path - +
diff -r 3d6f37e7e3a8 -r bcd414bb721b tool_dependencies.xml --- a/tool_dependencies.xml Thu May 26 10:20:59 2016 -0400 +++ b/tool_dependencies.xml Thu Jun 02 04:23:09 2016 -0400 @@ -3,8 +3,8 @@ - - + + diff -r 3d6f37e7e3a8 -r bcd414bb721b transform_json_to_pkl.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/transform_json_to_pkl.py Thu Jun 02 04:23:09 2016 -0400 @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import cPickle as pickle +import bz2 +import json +import argparse + +def transform_json_to_pkl(args): + with open(args.json_input, 'r') as json_file: + json_str = json_file.read() + metadata = json.loads(json_str) + + for marker in metadata["markers"]: + metadata["markers"][marker]["ext"] = set(metadata["markers"][marker]["ext"]) + + pkl_output = bz2.BZ2File(args.pkl_output, 'w') + pickle.dump(metadata, pkl_output, pickle.HIGHEST_PROTOCOL) + pkl_output.close() + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--json_input', required=True) + parser.add_argument('--pkl_output', required=True) + + args = parser.parse_args() + + transform_json_to_pkl(args)