changeset 3:f8a9a5eaca8a draft

Updated to version 0.32.3: add support for FASTQ pairs (dataset collections)
author pjbriggs
date Wed, 23 Sep 2015 08:59:23 -0400
parents a60283899c6d
children 14d05f2d511d
files README.rst install_tool_deps.sh package_trimmomatic.sh run_planemo_tests.sh trimmomatic.xml
diffstat 5 files changed, 204 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Wed Apr 22 09:36:27 2015 -0400
+++ b/README.rst	Wed Sep 23 08:59:23 2015 -0400
@@ -58,6 +58,9 @@
 ========== ======================================================================
 Version    Changes
 ---------- ----------------------------------------------------------------------
+0.32.3     - Add support for FASTQ R1/R2 pairs using dataset collections (input
+             can be dataset collection, in which case tool also outputs dataset
+	     collections) and improve order and naming of output files.
 0.32.2     - Use ``GALAXY_SLOTS`` to set the appropriate number of threads to use
              at runtime (default is 6).
 0.32.1     - Remove ``trimmomatic_adapters.loc.sample`` and hard-code adapter files
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/install_tool_deps.sh	Wed Sep 23 08:59:23 2015 -0400
@@ -0,0 +1,40 @@
+#!/bin/bash
+#
+# Install dependencies for Trimmomatic for testing from the command line
+#
+# Installation directory
+TOP_DIR=$1
+if [ -z "$TOP_DIR" ] ; then
+    echo Usage: $(basename $0) DIR
+    exit
+fi
+if [ -z "$(echo $TOP_DIR | grep ^/)" ] ; then
+    TOP_DIR=$(pwd)/$TOP_DIR
+fi
+if [ ! -d "$TOP_DIR" ] ; then
+    mkdir -p $TOP_DIR
+fi
+cd $TOP_DIR
+# Trimmomatic 0.32
+INSTALL_DIR=$TOP_DIR/trimmomatic/0.32
+mkdir -p $INSTALL_DIR
+wd=$(mktemp -d)
+pushd $wd
+wget -q http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-0.32.zip
+unzip -qq Trimmomatic-0.32.zip
+mv Trimmomatic-0.32/trimmomatic-0.32.jar $INSTALL_DIR/
+mv Trimmomatic-0.32/adapters/ $INSTALL_DIR/
+popd
+rm -rf $wd/*
+rmdir $wd
+# Make setup file
+cat > trimmomatic/0.32/env.sh <<EOF
+#!/bin/sh
+# Source this to setup trimmomatic/0.32
+echo Setting up Trimmomatic 0.32
+export TRIMMOMATIC_DIR=$INSTALL_DIR
+export TRIMMOMATIC_ADAPTERS_DIR=$INSTALL_DIR/adapters
+#
+EOF
+##
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/package_trimmomatic.sh	Wed Sep 23 08:59:23 2015 -0400
@@ -0,0 +1,24 @@
+#!/bin/sh
+#
+# Package Trimmomatic tool files into tgz file for upload to
+# Galaxy toolshed
+#
+TGZ=trimmomatic.tgz
+if [ -f $TGZ ] ; then
+    echo $TGZ: already exists, please remove >&2
+    exit 1
+fi
+tar cvzf $TGZ \
+    README.rst \
+    trimmomatic.xml \
+    trimmomatic.sh \
+    tool_dependencies.xml \
+    test-data
+if [ -f $TGZ ] ; then
+    echo Created $TGZ
+else
+    echo Failed to created $TGZ >&2
+    exit 1
+fi
+##
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/run_planemo_tests.sh	Wed Sep 23 08:59:23 2015 -0400
@@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# Install dependencies and set up environment for
+# trimmomatic tool, then run tests using planemo
+#
+# Note that any arguments supplied to the script are
+# passed directly to the "planemo test..." invocation
+#
+# e.g. --install_galaxy (to get planemo to create a
+#                        Galaxy instance to run tests)
+#
+#      --galaxy_root DIR (to run tests using existing
+#                         Galaxy instance)
+#
+# List of dependencies
+TOOL_DEPENDENCIES="trimmomatic/0.32"
+# Where to find them
+TOOL_DEPENDENCIES_DIR=$(pwd)/test.tool_dependencies.trimmomatic
+if [ ! -d $TOOL_DEPENDENCIES_DIR ] ; then
+    echo WARNING $TOOL_DEPENDENCIES_DIR not found >&2
+    echo Creating tool dependencies dir
+    mkdir -p $TOOL_DEPENDENCIES_DIR
+    echo Installing tool dependencies
+    $(dirname $0)/install_tool_deps.sh $TOOL_DEPENDENCIES_DIR
+fi
+# Load dependencies
+for dep in $TOOL_DEPENDENCIES ; do
+    env_file=$TOOL_DEPENDENCIES_DIR/$dep/env.sh
+    if [ -e $env_file ] ; then
+	. $env_file
+    else
+	echo ERROR no env.sh file found for $dep >&2
+	exit 1
+    fi
+done
+# Run the planemo tests
+planemo test $@ $(dirname $0)/trimmomatic.xml
+##
+#
--- a/trimmomatic.xml	Wed Apr 22 09:36:27 2015 -0400
+++ b/trimmomatic.xml	Wed Sep 23 08:59:23 2015 -0400
@@ -1,12 +1,31 @@
-<tool id="trimmomatic" name="Trimmomatic" version="0.32.2">
+<tool id="trimmomatic" name="Trimmomatic" version="0.32.3">
   <description>flexible read trimming tool for Illumina NGS data</description>
-  <command interpreter="bash">trimmomatic.sh
+  <requirements>
+    <requirement type="package" version="0.32">trimmomatic</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" />
+  </stdio>
+  <command interpreter="bash"><![CDATA[
+  trimmomatic.sh
   -mx8G
   -jar \$TRIMMOMATIC_DIR/trimmomatic-0.32.jar
   #if $paired_end.is_paired_end
-    PE -threads \${GALAXY_SLOTS:-6} -phred33 $fastq_r1_in $paired_end.fastq_r2_in $fastq_out_r1_paired $fastq_out_r1_unpaired $fastq_out_r2_paired $fastq_out_r2_unpaired 
+    PE -threads \${GALAXY_SLOTS:-6} -phred33
+    #set $paired_input_type = $paired_end.paired_input_type_conditional.paired_input_type
+    #if $paired_input_type == "pair_of_files"
+      "${paired_end.paired_input_type_conditional.fastq_r1_in}"
+      "${paired_end.paired_input_type_conditional.fastq_r2_in}"
+      "${fastq_out_r1_paired}" "${fastq_out_r1_unpaired}"
+      "${fastq_out_r2_paired}" "${fastq_out_r2_unpaired}"
+    #else
+      "${paired_end.paired_input_type_conditional.fastq_pair.forward}"
+      "${paired_end.paired_input_type_conditional.fastq_pair.reverse}"
+      "${fastq_out_paired.forward}" "${fastq_out_unpaired.forward}"
+      "${fastq_out_paired.reverse}" "${fastq_out_unpaired.reverse}"
+    #end if
   #else
-    SE -threads \${GALAXY_SLOTS:-6} -phred33 $fastq_in $fastq_out
+    SE -threads \${GALAXY_SLOTS:-6} -phred33 "$fastq_in" "$fastq_out"
   #end if
   ## ILLUMINACLIP option
   #if $illuminaclip.do_illuminaclip
@@ -35,10 +54,7 @@
       HEADCROP:$op.operation.headcrop
     #end if
   #end for
-  </command>
-  <requirements>
-    <requirement type="package" version="0.32">trimmomatic</requirement>
-  </requirements>
+  ]]></command>
   <inputs>
     <conditional name="paired_end">
     <param name="is_paired_end" type="boolean" label="Paired end data?" truevalue="yes" falsevalue="no" checked="on" />
@@ -46,10 +62,23 @@
       <param name="fastq_in" type="data" format="fastqsanger" label="Input FASTQ file" />
     </when>
     <when value="yes">
-      <param name="fastq_r1_in" type="data" format="fastqsanger"
-	     label="Input FASTQ file (R1/first of pair)" />
-      <param name="fastq_r2_in" type="data" format="fastqsanger"
-	     label="Input FASTQ file (R2/second of pair)" />
+      <conditional name="paired_input_type_conditional">
+        <param name="paired_input_type" type="select" label="Input Type">
+          <option value="pair_of_files" selected="true">Pair of datasets</option>
+          <option value="collection">Dataset collection pair</option>
+        </param>
+        <when value="pair_of_files">
+ 	  <param name="fastq_r1_in" type="data" format="fastqsanger"
+		 label="Input FASTQ file (R1/first of pair)" />
+ 	  <param name="fastq_r2_in" type="data" format="fastqsanger"
+		 label="Input FASTQ file (R2/second of pair)" />
+	</when>
+        <when value="collection">
+          <param name="fastq_pair" format="fastqsanger" type="data_collection"
+ 		 collection_type="paired"
+ 		 label="Select FASTQ dataset collection with R1/R2 pair" />
+        </when>
+      </conditional>
     </when>
     </conditional>
     <conditional name="illuminaclip">
@@ -101,21 +130,37 @@
     </repeat>
   </inputs>
   <outputs>
-    <data format="fastqsanger" name="fastq_out_r1_paired" label="${tool.name} on ${on_string} (R1 paired)">
+    <data format="fastqsanger" name="fastq_out_r1_paired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_r1_in.name} (R1 paired)">
       <filter>paired_end['is_paired_end']</filter>
+      <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "pair_of_files"</filter>
     </data>
-    <data format="fastqsanger" name="fastq_out_r1_unpaired" label="${tool.name} on ${on_string} (R1 unpaired)">
+    <data format="fastqsanger" name="fastq_out_r2_paired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_r2_in.name} (R2 paired)">
       <filter>paired_end['is_paired_end']</filter>
+      <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "pair_of_files"</filter>
     </data>
-    <data format="fastqsanger" name="fastq_out_r2_paired" label="${tool.name} on ${on_string} (R2 paired)">
+    <data format="fastqsanger" name="fastq_out_r1_unpaired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_r1_in.name} (R1 unpaired)">
       <filter>paired_end['is_paired_end']</filter>
+      <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "pair_of_files"</filter>
     </data>
-    <data format="fastqsanger" name="fastq_out_r2_unpaired" label="${tool.name} on ${on_string} (R2 unpaired)">
+    <data format="fastqsanger" name="fastq_out_r2_unpaired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_r2_in.name} (R2 unpaired)">
       <filter>paired_end['is_paired_end']</filter>
+      <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "pair_of_files"</filter>
     </data>
-    <data format="fastqsanger" name="fastq_out" label="${tool.name} on ${on_string}">
+    <data format="fastqsanger" name="fastq_out" label="${tool.name} on ${paired_end.fastq_in.name}">
       <filter>not paired_end['is_paired_end']</filter>
     </data>
+    <collection name="fastq_out_paired" type="paired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.name}: paired">
+      <data name="forward" format="fastqsanger" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.forward.name} (R1 paired)" />
+      <data name="reverse" format="fastqsanger" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.reverse.name} (R2 paired)" />
+      <filter>paired_end['is_paired_end']</filter>
+      <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "collection"</filter>
+    </collection>
+    <collection name="fastq_out_unpaired" type="paired" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.name}: unpaired">
+      <data name="forward" format="fastqsanger" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.forward.name} (R1 unpaired)" />
+      <data name="reverse" format="fastqsanger" label="${tool.name} on ${paired_end.paired_input_type_conditional.fastq_pair.reverse.name} (R2 unpaired)" />
+      <filter>paired_end['is_paired_end']</filter>
+      <filter>paired_end['paired_input_type_conditional']['paired_input_type'] == "collection"</filter>
+    </collection>
   </outputs>
   <tests>
     <test>
@@ -159,8 +204,28 @@
       -->
       <output name="fastq_out" file="trimmomatic_se_out2.fastq" />
     </test>
+    <test>
+      <!-- Paired-end with dataset collection -->
+      <param name="is_paired_end" value="yes" />
+      <param name="paired_input_type" value="collection" />
+      <param name="fastq_pair">
+        <collection type="paired">
+          <element name="forward" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
+          <element name="reverse" value="Illumina_SG_R2.fastq" ftype="fastqsanger"/>
+        </collection>
+      </param>
+      <param name="operations_0|operation|name" value="SLIDINGWINDOW" />
+      <output_collection name="fastq_out_paired" type="paired">
+	<element name="forward" file="trimmomatic_pe_r1_paired_out1.fastq" />
+	<element name="reverse" file="trimmomatic_pe_r2_paired_out1.fastq" />
+      </output_collection>
+      <output_collection name="fastq_out_unpaired" type="paired">
+	<element name="forward" file="trimmomatic_pe_r1_unpaired_out1.fastq" />
+	<element name="reverse" file="trimmomatic_pe_r2_unpaired_out1.fastq" />
+      </output_collection>
+    </test>
   </tests>
-  <help>
+  <help><![CDATA[
 .. class:: infomark
 
 **What it does**
@@ -191,6 +256,14 @@
 
 .. class:: infomark
 
+**Inputs**
+
+For single-end data this Trimmomatic tool accepts a single FASTQ file; for
+paired-end data it will accept either two FASTQ files (R1 and R2), or a
+dataset collection containing the R1/R2 FASTQ pair.
+
+.. class:: infomark
+
 **Outputs**
 
 For paired-end data a particular strength of Trimmomatic is that it retains the
@@ -201,6 +274,12 @@
  * Additionally two FASTQ files (R1-unpaired and R2-unpaired) contain reads where
    one of the pair failed the filtering steps.
 
+.. class:: warningmark
+
+If the input consists of a dataset collection with the R1/R2 FASTQ pair then
+the outputs will also inclue two dataset collections: one for the 'paired'
+outputs and one for the 'unpaired' (as described above)
+
 Retaining the same order and number of reads in the filtered output fastq files is
 essential for many downstream analysis tools.
 
@@ -228,7 +307,7 @@
 
 Please kindly acknowledge both this Galaxy tool and the Trimmomatic program if you
 use it.
-  </help>
+  ]]></help>
   <citations>
     <!--
     See https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set