changeset 1:bcd414bb721b draft default tip

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/humann2/ commit 79571d981d7d56657699be8aa24a40a36a8d0ab5-dirty
author bebatut
date Thu, 02 Jun 2016 04:23:09 -0400
parents 3d6f37e7e3a8
children
files README.rst humann2.xml humann2_macros.xml tool-data/metaphlan2_bowtie_db.loc.sample tool-data/metaphlan2_db.loc.sample tool-data/metaphlan2_metadata.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml transform_json_to_pkl.py
diffstat 9 files changed, 113 insertions(+), 82 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Thu Jun 02 04:23:09 2016 -0400
@@ -0,0 +1,14 @@
+Galaxy wrappers for HUMAnN2
+===========================
+
+Galaxy should be able to automatically install the dependencies, i.e. the
+HUMAnN2 binaries and its dependencies.
+
+After installation, you must tell Galaxy about the defaut database with
+clade-specific marker genes used for taxonomic profiling (MetaPhlAn2), and where
+to find it:
+
+* Put the ``metaphlan2_db.loc`` file in the ``tool-data/`` folder, after uncommenting last line
+* Download whole MetaPhlan2 source code: https://bitbucket.org/biobakery/metaphlan2/get/2.5.0.zip
+* Unzip it
+* Move ``db_v20`` folder into ``dependency_dir/metaphlan2/2.5.0/bebatut/package_metaphlan2_2_5_0/...`` folder
--- a/humann2.xml	Thu May 26 10:20:59 2016 -0400
+++ b/humann2.xml	Thu Jun 02 04:23:09 2016 -0400
@@ -25,46 +25,49 @@
             #end for
 
             &&
-        #end if        
+        #end if
 
         `mkdir protein_db`
 
         &&
 
         #if $protein_db.protein_db_selector == "history"
-            diamond makedb 
-                --in $protein_db.history_protein_db 
+            diamond makedb
+                --in $protein_db.history_protein_db
                 --db protein_db/protein_db
+             &&
         #end if
 
-        &&
+        #if $taxonomic_profile.taxonomic_profile_test == "false"
+          #if $taxonomic_profile.metaphlan2_db_choice.metaphlan2_db_selector == "history"
+              `mkdir ref_db`
+              &&
+              bowtie2-build $taxonomic_profile.metaphlan2_db_choice.metaphlan2_db_sequences ref_db/ref_db
+              &&
+              python $__tool_directory__/transform_json_to_pkl.py
+                  --json_input $metaphlan2_db_metadata
+                  --pkl_output ref_db/metadata.pkl
+              &&
+          #end if
+        #end if
 
-        humann2 
+        humann2
             -i "$input_file"
-            
+
             #set $metaphlan_option = "-t rel_ab"
             #if $taxonomic_profile.taxonomic_profile_test == "true":
                 --taxonomic-profile $taxonomic_profile.taxonomic_profile_file
             #else
-
-                #if $taxonomic_profile.mpa_pkl.mpa_pkl_selector == "cached"
-                    #set $mpa_pkl_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.mpa_pkl.cached_mpa_pkl.input.options.tool_data_table.data])
-                    #set $mpa_pkl_db = $taxonomic_profile.mpa_pkl.cached_mpa_pkl.value
-                    #set $metaphlan_option += " --mpa_pkl " + $mpa_pkl_table[$mpa_pkl_db]
+                #if $taxonomic_profile.metaphlan2_db_choice.metaphlan2_db_selector == "cached"
+                    #set $table = dict([(_[0], _[2]) for _ in $taxonomic_profile.metaphlan2_db_choice.cached_metaphlan2_db.input.options.tool_data_table.data])
+                    #set $db_choice = $taxonomic_profile.metaphlan2_db_choice.cached_metaphlan2_db.value
+                    #set $metaphlan_option += " --bowtie2db " + $table[$db_choice]
+                    #set $metaphlan_option += " --mpa_pkl " + $table[$db_choice] + ".pkl"
                 #else
-                    #set $metaphlan_option += " --mpa_pkl " + $taxonomic_profile.mpa_pkl.history_mpa_pkl
+                    #set $metaphlan_option += " --bowtie2db " + ref_db/ref_db
+                    #set $metaphlan_option += " --mpa_pkl " + ref_db/metadata.pkl
                 #end if
-
-                #if $taxonomic_profile.bowtie2db.bowtie2db_selector == "cached"
-                    #set $bowtie2_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.bowtie2db.cached_bowtie2db.input.options.tool_data_table.data])
-                    #set $bowtie2db_choice = $taxonomic_profile.bowtie2db.cached_bowtie2db.value
-                    #set $metaphlan_option += " --bowtie2db " + $bowtie2_table[$bowtie2db_choice]
-                #else
-                    #set $metaphlan_option += " --bowtie2db " + $taxonomic_profile.bowtie2db.history_bowtie2db
-                #end if
-
             #end if
-
             --metaphlan-options="$metaphlan_option"
 
             --evalue $e_value
@@ -95,59 +98,43 @@
             --output-format $output_format
             --output-max-decimals $output_max_dec
             --output-basename "humann2"
-            $remove_statified_output            
+            $remove_statified_output
     ]]></command>
 
     <inputs>
-        <param name="input_file" type="data" format="fastq,fasta,sam,bam,biom" 
+        <param name="input_file" type="data" format="fastq,fasta,sam,bam,biom"
             label="Input sequence file" help=""/>
 
         <conditional name="taxonomic_profile">
             <param name='taxonomic_profile_test' type='select' label="Use a custom taxonomic profile?" help="The file must have been created by MetaPhlan2">
                 <option value="true">Yes</option>
                 <option value="false" selected="true">No</option>
-            </param> 
+            </param>
             <when value="true">
-                <param name="taxonomic_profile_file" type="data" format="tabular,txt" label="Taxonomic profile 
-                    file" help=""/>
+                <param name="taxonomic_profile_file" type="data" format="tabular,txt" label="Taxonomic profile file" help=""/>
             </when>
             <when value="false">
-                <conditional name="mpa_pkl">
-                    <param name="mpa_pkl_selector" type="select" label="Metadata for MetaPhlAn2" help="">
+                <conditional name="metaphlan2_db_choice">
+                    <param name="metaphlan2_db_selector" type="select" label="Database with clade-specific marker genes" help="">
                         <option value="cached" selected="true">Locally cached</option>
                         <option value="history">From history</option>
                     </param>
 
                     <when value="cached">
-                        <param name="cached_mpa_pkl" label="Cached metadata" type="select">
-                        <options from_data_table="metaphlan2_metadata" />
+                        <param name="cached_metaphlan2_db" label="Cached database with clade-specific marker genes" type="select" >
+                          <options from_data_table="metaphlan2_db" />
                         </param>
                     </when>
                     <when value="history">
-                        <param name="history_mpa_pkl" type="data" format="fasta" label="Metadata from history"/>
-                    </when>
-                </conditional>
-
-                <conditional name="bowtie2db">
-                    <param name="bowtie2db_selector" type="select" label="BowTie2 database for MetaPhlAn2" help="">
-                        <option value="cached" selected="true">Locally cached</option>
-                        <option value="history">From history</option>
-                    </param>
-
-                    <when value="cached">
-                        <param name="cached_bowtie2db" label="Cached BowTie2 database" type="select" >
-                        <options from_data_table="metaphlan2_bowtie_db" />
-                        </param>
-                    </when>
-                    <when value="history">
-                        <param name="history_bowtie2db" type="data" format="fasta" label="BowTie2 database from history"/>
+                        <param name="metaphlan2_db_sequences" type="data" format="fasta" label="Database with clade-specific marker genes from history" help="(--bowtie2db)"/>
+                        <param name="metaphlan2_db_metadata" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history" help="(--mpa_pkl)"/>
                     </when>
                 </conditional>
             </when>
         </conditional>
 
         <conditional name="nucleotide_db">
-            <param name="nucleotide_db_selector" type="select" label="Nucleotide database" help="">
+            <param name="nucleotide_db_selector" type="select" label="Nucleotide database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
                 <option value="cached" selected="true">Locally cached</option>
                 <option value="history">From history (as collection)</option>
             </param>
@@ -159,7 +146,7 @@
         </conditional>
 
         <conditional name="protein_db">
-            <param name="protein_db_selector" type="select" label="Protein database" help="">
+            <param name="protein_db_selector" type="select" label="Protein database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
                 <option value="cached" selected="true">Locally cached</option>
                 <option value="history">From history</option>
             </param>
@@ -255,9 +242,29 @@
 
 The input is a single file corresponding either to filtered shotgun sequencing metagenome file (fastq, fastq.gz, fasta, or fasta.gz format), alignment file (sam, bam or blastm8 format) or gene table file (tsv or biom format).
 
-A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. 
+A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. Otherwise, default MetaPhlAn2 or custom databases can be used for taxonomic profiling. For custom databases, a fasta file with marker gene sequences is required and also a json file containing metadata:
+
+::
 
-HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset.
+  {
+    "taxonomy": {
+            "taxonomy of genome1": genome1_length,
+            "taxonomy of genome2": genome2_length,
+            ...
+        }
+    "markers": {
+            "marker1_name": {
+                "clade": the clade that the marker belongs to,
+                "ext": [list of external genomes where the marker appears],
+                "len": length of the marker,
+                "score": score of the marker,
+                "taxon": the taxon of the marker
+            }
+            ...
+        }
+  }
+
+For functional profiling, HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset.
 
 **Outputs**
 
@@ -270,4 +277,4 @@
     ]]></help>
 
     <expand macro="citations"/>
-</tool>
\ No newline at end of file
+</tool>
--- a/humann2_macros.xml	Thu May 26 10:20:59 2016 -0400
+++ b/humann2_macros.xml	Thu Jun 02 04:23:09 2016 -0400
@@ -3,7 +3,7 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="2.2.5">bowtie2</requirement>
-            <requirement type="package" version="2.2.0">metaphlan2</requirement>
+            <requirement type="package" version="2.5.0">metaphlan2</requirement>
             <requirement type="package" version="0.7.10">diamond</requirement>
             <requirement type="package" version="0.6.1">humann2</requirement>
         </requirements>
@@ -19,4 +19,4 @@
             <citation type="doi">10.1371/journal.pcbi.1002358</citation>
         </citations>
     </xml>
-</macros>
\ No newline at end of file
+</macros>
--- a/tool-data/metaphlan2_bowtie_db.loc.sample	Thu May 26 10:20:59 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-# Bowtie2 db have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/src/5424bb911dfc/db_v20/?at=default (whole directory)
-#
-#Since MetaPhlAn comes bundled with 1 Bowtie2 database, you can use it
-#by downloading it as explained above and uncommenting the following lines.
-#bowtie_db_v20	Defaut BowTie2 database	$METAPHLAN2_DIR/db_v20/mpa_v20_m200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metaphlan2_db.loc.sample	Thu Jun 02 04:23:09 2016 -0400
@@ -0,0 +1,5 @@
+# Bowtie2 db have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/src/5424bb911dfc/db_v20/?at=default (whole directory)
+#
+#Since MetaPhlAn comes bundled with 1 Bowtie2 database, you can use it
+#by downloading it as explained above and uncommenting the following lines.
+#mpa_v20_m200	Defaut database with clade-specific marker genes	$METAPHLAN2_DIR/db_v20/mpa_v20_m200
--- a/tool-data/metaphlan2_metadata.loc.sample	Thu May 26 10:20:59 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#Metadata have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/raw/5424bb911dfcdb7212ea0949d4faeb6e69cfa61f/db_v20/mpa_v20_m200.pkl
-#
-#Since MetaPhlAn comes bundled with 1 metadata database, you can use it
-#by downloading it as explained above and uncommenting the following lines.
-#metadata_db_v20	Defaut MetaPhlAn2 metadata	$METAPHLAN2_DIR/db_v20/mpa_v20_m200.pkl
--- a/tool_data_table_conf.xml.sample	Thu May 26 10:20:59 2016 -0400
+++ b/tool_data_table_conf.xml.sample	Thu Jun 02 04:23:09 2016 -0400
@@ -1,19 +1,6 @@
 <tables>
-    <!-- Locations of public ribosomal databases -->
-    <table name="humann2_protein_database" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/humann2_protein_database.loc" />
-    </table>
-    <table name="humann2_nucleotide_database" comment_char="#">
+    <table name="metaphlan2_db" comment_char="#">
         <columns>value, name, path</columns>
-        <file path="tool-data/humann2_nucleotide_database.loc" />
-    </table>
-    <table name="metaphlan2_metadata" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/metaphlan2_metadata.loc" />
-    </table>
-    <table name="metaphlan2_bowtie_db" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/metaphlan2_bowtie_db.loc" />
+        <file path="tool-data/metaphlan2_db.loc" />
     </table>
 </tables>
--- a/tool_dependencies.xml	Thu May 26 10:20:59 2016 -0400
+++ b/tool_dependencies.xml	Thu Jun 02 04:23:09 2016 -0400
@@ -3,8 +3,8 @@
     <package name="bowtie2" version="2.2.5">
         <repository changeset_revision="30bd7eaeddbf" name="package_bowtie_2_2_5" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-    <package name="metaphlan2" version="2.2.0">
-        <repository changeset_revision="e9ab4fcb8278" name="package_metaphlan2_2_2_0" owner="bebatut" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="metaphlan2" version="2.5.0">
+        <repository changeset_revision="43a80f92815f" name="package_metaphlan2_2_5_0" owner="bebatut" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="diamond" version="0.7.10">
         <repository changeset_revision="c538574b1553" name="package_diamond_0_7_10" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/transform_json_to_pkl.py	Thu Jun 02 04:23:09 2016 -0400
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import cPickle as pickle
+import bz2
+import json
+import argparse
+
+def transform_json_to_pkl(args):
+    with open(args.json_input, 'r') as json_file:
+        json_str = json_file.read()
+        metadata = json.loads(json_str)
+
+        for marker in metadata["markers"]:
+            metadata["markers"][marker]["ext"] = set(metadata["markers"][marker]["ext"])
+
+    pkl_output = bz2.BZ2File(args.pkl_output, 'w')
+    pickle.dump(metadata, pkl_output, pickle.HIGHEST_PROTOCOL)
+    pkl_output.close()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--json_input', required=True)
+    parser.add_argument('--pkl_output', required=True)
+    
+    args = parser.parse_args()
+
+    transform_json_to_pkl(args)