Repository 'humann2'
hg clone https://toolshed.g2.bx.psu.edu/repos/bebatut/humann2

Changeset 1:bcd414bb721b (2016-06-02)
Previous changeset 0:3d6f37e7e3a8 (2016-05-26)
Commit message:
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/humann2/ commit 79571d981d7d56657699be8aa24a40a36a8d0ab5-dirty
modified:
humann2.xml
humann2_macros.xml
tool_data_table_conf.xml.sample
tool_dependencies.xml
added:
README.rst
tool-data/metaphlan2_db.loc.sample
transform_json_to_pkl.py
removed:
tool-data/metaphlan2_bowtie_db.loc.sample
tool-data/metaphlan2_metadata.loc.sample
b
diff -r 3d6f37e7e3a8 -r bcd414bb721b README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Thu Jun 02 04:23:09 2016 -0400
b
@@ -0,0 +1,14 @@
+Galaxy wrappers for HUMAnN2
+===========================
+
+Galaxy should be able to automatically install the dependencies, i.e. the
+HUMAnN2 binaries and its dependencies.
+
+After installation, you must tell Galaxy about the defaut database with
+clade-specific marker genes used for taxonomic profiling (MetaPhlAn2), and where
+to find it:
+
+* Put the ``metaphlan2_db.loc`` file in the ``tool-data/`` folder, after uncommenting last line
+* Download whole MetaPhlan2 source code: https://bitbucket.org/biobakery/metaphlan2/get/2.5.0.zip
+* Unzip it
+* Move ``db_v20`` folder into ``dependency_dir/metaphlan2/2.5.0/bebatut/package_metaphlan2_2_5_0/...`` folder
b
diff -r 3d6f37e7e3a8 -r bcd414bb721b humann2.xml
--- a/humann2.xml Thu May 26 10:20:59 2016 -0400
+++ b/humann2.xml Thu Jun 02 04:23:09 2016 -0400
[
b'@@ -25,46 +25,49 @@\n             #end for\n \n             &&\n-        #end if        \n+        #end if\n \n         `mkdir protein_db`\n \n         &&\n \n         #if $protein_db.protein_db_selector == "history"\n-            diamond makedb \n-                --in $protein_db.history_protein_db \n+            diamond makedb\n+                --in $protein_db.history_protein_db\n                 --db protein_db/protein_db\n+             &&\n         #end if\n \n-        &&\n+        #if $taxonomic_profile.taxonomic_profile_test == "false"\n+          #if $taxonomic_profile.metaphlan2_db_choice.metaphlan2_db_selector == "history"\n+              `mkdir ref_db`\n+              &&\n+              bowtie2-build $taxonomic_profile.metaphlan2_db_choice.metaphlan2_db_sequences ref_db/ref_db\n+              &&\n+              python $__tool_directory__/transform_json_to_pkl.py\n+                  --json_input $metaphlan2_db_metadata\n+                  --pkl_output ref_db/metadata.pkl\n+              &&\n+          #end if\n+        #end if\n \n-        humann2 \n+        humann2\n             -i "$input_file"\n-            \n+\n             #set $metaphlan_option = "-t rel_ab"\n             #if $taxonomic_profile.taxonomic_profile_test == "true":\n                 --taxonomic-profile $taxonomic_profile.taxonomic_profile_file\n             #else\n-\n-                #if $taxonomic_profile.mpa_pkl.mpa_pkl_selector == "cached"\n-                    #set $mpa_pkl_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.mpa_pkl.cached_mpa_pkl.input.options.tool_data_table.data])\n-                    #set $mpa_pkl_db = $taxonomic_profile.mpa_pkl.cached_mpa_pkl.value\n-                    #set $metaphlan_option += " --mpa_pkl " + $mpa_pkl_table[$mpa_pkl_db]\n+                #if $taxonomic_profile.metaphlan2_db_choice.metaphlan2_db_selector == "cached"\n+                    #set $table = dict([(_[0], _[2]) for _ in $taxonomic_profile.metaphlan2_db_choice.cached_metaphlan2_db.input.options.tool_data_table.data])\n+                    #set $db_choice = $taxonomic_profile.metaphlan2_db_choice.cached_metaphlan2_db.value\n+                    #set $metaphlan_option += " --bowtie2db " + $table[$db_choice]\n+                    #set $metaphlan_option += " --mpa_pkl " + $table[$db_choice] + ".pkl"\n                 #else\n-                    #set $metaphlan_option += " --mpa_pkl " + $taxonomic_profile.mpa_pkl.history_mpa_pkl\n+                    #set $metaphlan_option += " --bowtie2db " + ref_db/ref_db\n+                    #set $metaphlan_option += " --mpa_pkl " + ref_db/metadata.pkl\n                 #end if\n-\n-                #if $taxonomic_profile.bowtie2db.bowtie2db_selector == "cached"\n-                    #set $bowtie2_table = dict([(_[0], _[2]) for _ in $taxonomic_profile.bowtie2db.cached_bowtie2db.input.options.tool_data_table.data])\n-                    #set $bowtie2db_choice = $taxonomic_profile.bowtie2db.cached_bowtie2db.value\n-                    #set $metaphlan_option += " --bowtie2db " + $bowtie2_table[$bowtie2db_choice]\n-                #else\n-                    #set $metaphlan_option += " --bowtie2db " + $taxonomic_profile.bowtie2db.history_bowtie2db\n-                #end if\n-\n             #end if\n-\n             --metaphlan-options="$metaphlan_option"\n \n             --evalue $e_value\n@@ -95,59 +98,43 @@\n             --output-format $output_format\n             --output-max-decimals $output_max_dec\n             --output-basename "humann2"\n-            $remove_statified_output            \n+            $remove_statified_output\n     ]]></command>\n \n     <inputs>\n-        <param name="input_file" type="data" format="fastq,fasta,sam,bam,biom" \n+        <param name="input_file" type="data" format="fastq,fasta,sam,bam,biom"\n             label="Input sequence file" help=""/>\n \n         <conditional name="taxonomic_profile">\n             <param name=\'taxonomic_profile_test\' type=\'select\' label="Use a custom taxonomic profile?" help="The file must have been created by MetaPhlan2">\n           '..b'hen value="cached">\n-                        <param name="cached_bowtie2db" label="Cached BowTie2 database" type="select" >\n-                        <options from_data_table="metaphlan2_bowtie_db" />\n-                        </param>\n-                    </when>\n-                    <when value="history">\n-                        <param name="history_bowtie2db" type="data" format="fasta" label="BowTie2 database from history"/>\n+                        <param name="metaphlan2_db_sequences" type="data" format="fasta" label="Database with clade-specific marker genes from history" help="(--bowtie2db)"/>\n+                        <param name="metaphlan2_db_metadata" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history" help="(--mpa_pkl)"/>\n                     </when>\n                 </conditional>\n             </when>\n         </conditional>\n \n         <conditional name="nucleotide_db">\n-            <param name="nucleotide_db_selector" type="select" label="Nucleotide database" help="">\n+            <param name="nucleotide_db_selector" type="select" label="Nucleotide database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">\n                 <option value="cached" selected="true">Locally cached</option>\n                 <option value="history">From history (as collection)</option>\n             </param>\n@@ -159,7 +146,7 @@\n         </conditional>\n \n         <conditional name="protein_db">\n-            <param name="protein_db_selector" type="select" label="Protein database" help="">\n+            <param name="protein_db_selector" type="select" label="Protein database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">\n                 <option value="cached" selected="true">Locally cached</option>\n                 <option value="history">From history</option>\n             </param>\n@@ -255,9 +242,29 @@\n \n The input is a single file corresponding either to filtered shotgun sequencing metagenome file (fastq, fastq.gz, fasta, or fasta.gz format), alignment file (sam, bam or blastm8 format) or gene table file (tsv or biom format).\n \n-A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. \n+A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. Otherwise, default MetaPhlAn2 or custom databases can be used for taxonomic profiling. For custom databases, a fasta file with marker gene sequences is required and also a json file containing metadata:\n+\n+::\n \n-HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset.\n+  {\n+    "taxonomy": {\n+            "taxonomy of genome1": genome1_length,\n+            "taxonomy of genome2": genome2_length,\n+            ...\n+        }\n+    "markers": {\n+            "marker1_name": {\n+                "clade": the clade that the marker belongs to,\n+                "ext": [list of external genomes where the marker appears],\n+                "len": length of the marker,\n+                "score": score of the marker,\n+                "taxon": the taxon of the marker\n+            }\n+            ...\n+        }\n+  }\n+\n+For functional profiling, HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset.\n \n **Outputs**\n \n@@ -270,4 +277,4 @@\n     ]]></help>\n \n     <expand macro="citations"/>\n-</tool>\n\\ No newline at end of file\n+</tool>\n'
b
diff -r 3d6f37e7e3a8 -r bcd414bb721b humann2_macros.xml
--- a/humann2_macros.xml Thu May 26 10:20:59 2016 -0400
+++ b/humann2_macros.xml Thu Jun 02 04:23:09 2016 -0400
b
@@ -3,7 +3,7 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="2.2.5">bowtie2</requirement>
-            <requirement type="package" version="2.2.0">metaphlan2</requirement>
+            <requirement type="package" version="2.5.0">metaphlan2</requirement>
             <requirement type="package" version="0.7.10">diamond</requirement>
             <requirement type="package" version="0.6.1">humann2</requirement>
         </requirements>
@@ -19,4 +19,4 @@
             <citation type="doi">10.1371/journal.pcbi.1002358</citation>
         </citations>
     </xml>
-</macros>
\ No newline at end of file
+</macros>
b
diff -r 3d6f37e7e3a8 -r bcd414bb721b tool-data/metaphlan2_bowtie_db.loc.sample
--- a/tool-data/metaphlan2_bowtie_db.loc.sample Thu May 26 10:20:59 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-# Bowtie2 db have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/src/5424bb911dfc/db_v20/?at=default (whole directory)
-#
-#Since MetaPhlAn comes bundled with 1 Bowtie2 database, you can use it
-#by downloading it as explained above and uncommenting the following lines.
-#bowtie_db_v20 Defaut BowTie2 database $METAPHLAN2_DIR/db_v20/mpa_v20_m200
b
diff -r 3d6f37e7e3a8 -r bcd414bb721b tool-data/metaphlan2_db.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/metaphlan2_db.loc.sample Thu Jun 02 04:23:09 2016 -0400
b
@@ -0,0 +1,5 @@
+# Bowtie2 db have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/src/5424bb911dfc/db_v20/?at=default (whole directory)
+#
+#Since MetaPhlAn comes bundled with 1 Bowtie2 database, you can use it
+#by downloading it as explained above and uncommenting the following lines.
+#mpa_v20_m200 Defaut database with clade-specific marker genes $METAPHLAN2_DIR/db_v20/mpa_v20_m200
b
diff -r 3d6f37e7e3a8 -r bcd414bb721b tool-data/metaphlan2_metadata.loc.sample
--- a/tool-data/metaphlan2_metadata.loc.sample Thu May 26 10:20:59 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-#Metadata have to be downloaded from https://bitbucket.org/biobakery/metaphlan2/raw/5424bb911dfcdb7212ea0949d4faeb6e69cfa61f/db_v20/mpa_v20_m200.pkl
-#
-#Since MetaPhlAn comes bundled with 1 metadata database, you can use it
-#by downloading it as explained above and uncommenting the following lines.
-#metadata_db_v20 Defaut MetaPhlAn2 metadata $METAPHLAN2_DIR/db_v20/mpa_v20_m200.pkl
b
diff -r 3d6f37e7e3a8 -r bcd414bb721b tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Thu May 26 10:20:59 2016 -0400
+++ b/tool_data_table_conf.xml.sample Thu Jun 02 04:23:09 2016 -0400
b
@@ -1,19 +1,6 @@
 <tables>
-    <!-- Locations of public ribosomal databases -->
-    <table name="humann2_protein_database" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/humann2_protein_database.loc" />
-    </table>
-    <table name="humann2_nucleotide_database" comment_char="#">
+    <table name="metaphlan2_db" comment_char="#">
         <columns>value, name, path</columns>
-        <file path="tool-data/humann2_nucleotide_database.loc" />
-    </table>
-    <table name="metaphlan2_metadata" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/metaphlan2_metadata.loc" />
-    </table>
-    <table name="metaphlan2_bowtie_db" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/metaphlan2_bowtie_db.loc" />
+        <file path="tool-data/metaphlan2_db.loc" />
     </table>
 </tables>
b
diff -r 3d6f37e7e3a8 -r bcd414bb721b tool_dependencies.xml
--- a/tool_dependencies.xml Thu May 26 10:20:59 2016 -0400
+++ b/tool_dependencies.xml Thu Jun 02 04:23:09 2016 -0400
b
@@ -3,8 +3,8 @@
     <package name="bowtie2" version="2.2.5">
         <repository changeset_revision="30bd7eaeddbf" name="package_bowtie_2_2_5" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
-    <package name="metaphlan2" version="2.2.0">
-        <repository changeset_revision="e9ab4fcb8278" name="package_metaphlan2_2_2_0" owner="bebatut" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="metaphlan2" version="2.5.0">
+        <repository changeset_revision="43a80f92815f" name="package_metaphlan2_2_5_0" owner="bebatut" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="diamond" version="0.7.10">
         <repository changeset_revision="c538574b1553" name="package_diamond_0_7_10" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
b
diff -r 3d6f37e7e3a8 -r bcd414bb721b transform_json_to_pkl.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/transform_json_to_pkl.py Thu Jun 02 04:23:09 2016 -0400
[
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import cPickle as pickle
+import bz2
+import json
+import argparse
+
+def transform_json_to_pkl(args):
+    with open(args.json_input, 'r') as json_file:
+        json_str = json_file.read()
+        metadata = json.loads(json_str)
+
+        for marker in metadata["markers"]:
+            metadata["markers"][marker]["ext"] = set(metadata["markers"][marker]["ext"])
+
+    pkl_output = bz2.BZ2File(args.pkl_output, 'w')
+    pickle.dump(metadata, pkl_output, pickle.HIGHEST_PROTOCOL)
+    pkl_output.close()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--json_input', required=True)
+    parser.add_argument('--pkl_output', required=True)
+    
+    args = parser.parse_args()
+
+    transform_json_to_pkl(args)