diff trim_galore_wrapper.xml @ 4:2c1f0fe810f7 draft

Uploaded
author bgruening
date Wed, 15 Apr 2015 11:32:11 -0400
parents eb546ac2aab2
children f11ff7be8c78
line wrap: on
line diff
--- a/trim_galore_wrapper.xml	Fri Jul 19 09:49:25 2013 -0400
+++ b/trim_galore_wrapper.xml	Wed Apr 15 11:32:11 2015 -0400
@@ -1,62 +1,63 @@
-<tool id="trim_galore" name="Trim Galore" version="0.2.8.1">
-    <!-- Wrapper compatible with Trim Galore version 0.2.8 -->
+<tool id="trim_galore" name="Trim Galore" version="0.3.7.0">
+    <!-- Wrapper compatible with Trim Galore version 0.3.7 -->
     <description>adaptive quality and adapter trimmer</description>
     <version_command interpreter="perl">trim_galore --version</version_command>
     <requirements>
-        <requirement type="package" version="1.1">cutadapt</requirement>
+        <requirement type="package" version="1.8">cutadapt</requirement>
     </requirements>
-    <command interpreter="perl">
-        #from glob import glob
-        #import tempfile, os
+    <macros>
+        <macro name="paired_adapter_trimming">
+            <param name="trim1" type="boolean" truevalue="--trim1" falsevalue="" checked="False" label="Trims 1 bp off every read from its 3' end." help="" />
+            <param name="adapter" type="text" value="AGATCGGAAGAGC" label="Adapter sequence to be trimmed off read 1">
+                <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator>
+            </param>
+            <param name="adapter2" type="text" optional="True" value="" label="Adapter sequence to be trimmed off read 2">
+                <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator>
+            </param>
 
-        ##
-        ##  Creating a temporary directory where trim_galore will store all result files
-        ##
+            <param name="three_prime_clip_R1" type="integer" value="" optional="True" label="Remove N bp from the 3' end of read 1">
+                <help>Instructs Trim Galore to remove N bp from the 3' end of read 1 after adapter/quality trimming has been performed. 
+                    This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.
+                    (--three_prime_clip_R1)</help>
+            </param>
+            <param name="three_prime_clip_R2" type="integer" value="" optional="True" label="Remove N bp from the 3' end of read 1">
+                <help>Instructs Trim Galore to remove N bp from the 3' end of read 2 after
+                    adapter/quality trimming has been performed. This may remove some unwanted bias from
+                    the 3' end that is not directly related to adapter sequence or basecall quality. (--three_prime_clip_R2)</help>
+            </param>
+        </macro>
+    </macros>
+    <command>
+<![CDATA[
 
-        #set $temp_dir = os.path.abspath(tempfile.mkdtemp())
-
-
-        ## trim_galore removes .fastq and .fq file extensions of input files. 
-        ## That is essential if Galaxy provides links to files (these can have real extensions), but that behaviour is causing an inconsitency in output filenaming.
-        ## Fix: link every file to $TMP without file extension
+        ## trim_galore removes .fastq and .fq file extensions of input files.
+        ## This is essential if Galaxy provides links to files (with real extensions)
+        ## but that behaviour is causing an inconsitency in output filenaming.
+        ## We work around this by linking every file to cwd without file extension
 
         #if $singlePaired.sPaired == "single":
-            #set $input_singles_tmp_handle = tempfile.NamedTemporaryFile( dir=$temp_dir )
-            #set $input_singles_tmp = $input_singles_tmp_handle.name
-            #silent $input_singles_tmp_handle.close()
-            #silent os.system("ln -s %s %s" % (str($singlePaired.input_singles), $input_singles_tmp))
+            ln -s "${singlePaired.input_singles}" ./input_singles;
+        #elif $singlePaired.sPaired == "paired":
+            ln -s "${singlePaired.input_mate1}" ./input_mate1;
+            ln -s "${singlePaired.input_mate2}" ./input_mate2;
         #else:
-            #set $input_mate1_tmp_handle = tempfile.NamedTemporaryFile( dir=$temp_dir )
-            #set $input_mate2_tmp_handle = tempfile.NamedTemporaryFile( dir=$temp_dir )
-            
-            #set $input_mate1_tmp = $input_mate1_tmp_handle.name
-            #silent $input_mate1_tmp_handle.close()
-
-            #set $input_mate2_tmp = $input_mate2_tmp_handle.name
-            #silent $input_mate2_tmp_handle.close()
-
-            #silent os.system("ln -s %s %s" % (str($singlePaired.input_mate1), $input_mate1_tmp))
-            #silent os.system("ln -s %s %s" % (str($singlePaired.input_mate2), $input_mate2_tmp))
+            ln -s "${singlePaired.input_mate_pairs.forward}" ./input_mate1;
+            ln -s "${singlePaired.input_mate_pairs.reverse}" ./input_mate2;
         #end if
 
-        trim_galore
+        perl $__tool_directory__/trim_galore
 
-        ##
-        ##  Input parameters
-        ##
-
+        ## we only support fastqsanger
+        --phred33
 
         #if $params.settingsType == "custom":
 
             ## default 20
             --quality $params.quality
-            ## default 'AGATCGGAAGAGC'
-            #if $params.adapter.strip() != '':
-                --adapter $params.adapter
-            #end if
+
             ## default 1
             --stringency $params.stringency
-            
+
             ## default 0.1
             -e $params.error_rate
 
@@ -66,7 +67,7 @@
             #if int($params.clip_R1) > 0:
                 --clip_R1 $params.clip_R1
             #end if
-            
+
             #if int($params.clip_R2) > 0:
                 --clip_R2 $params.clip_R2
             #end if
@@ -79,128 +80,91 @@
 
         #end if
 
-        ##
         ## RBBS specific options.
-        ##
-
         #if $rrbs.settingsType == "custom":
-
             $rrbs.rrbs
             $rrbs.non_directional
+        #end if
 
+        --output_dir ./
+        --suppress_warn
+
+        #if $params.settingsType == "custom" and not $params.report:
+            --no_report_file
         #end if
 
-        --output_dir $temp_dir
-        --suppress_warn
 
+        ## default 'AGATCGGAAGAGC'
+        #if $singlePaired.adapter.strip() != '':
+           --adapter $singlePaired.adapter
+        #end if
+
+        #if $singlePaired.three_prime_clip_R1:
+            --three_prime_clip_R1 $singlePaired.three_prime_clip_R1
+        #end if
 
         #if $singlePaired.sPaired == "single":
-
-            #if $singlePaired.input_singles.ext == "fastqillumina":
-                --phred64
-            #elif $singlePaired.input_singles.ext == "fastqsanger":
-                --phred33
-            #end if
-
-            #if $params.settingsType == "custom":
-                #if not $params.report:
-                    --no_report_file
-                #end if
-            #end if
-
             ## input sequence
-            $input_singles_tmp
+            ./input_singles
         #else:
-            --paired 
-            #if $singlePaired.input_mate1.ext == "fastqillumina":
-                --phred64
-            #elif $singlePaired.input_mate1.ext == "fastqsanger":
-                --phred33
-            #end if
+            --paired
 
             $singlePaired.trim1
-            #if $singlePaired.adapter2.strip() != '':
+
+            #if $singlePaired.adapter2 and $singlePaired.adapter2.strip() != '':
                 --adapter2 $singlePaired.adapter2
             #end if
 
-            #if $params.settingsType == "custom":
-                #if not $params.report:
-                    --no_report_file
-                #end if
+            #if $singlePaired.three_prime_clip_R2:
+                --three_prime_clip_R2 $singlePaired.three_prime_clip_R2
             #end if
 
             ## input sequences
-            $input_mate1_tmp
-            $input_mate2_tmp
+            ./input_mate1
+            ./input_mate2
 
         #end if
 
-        &amp;&amp;
-
-        ##
-        ##  Trim Galore! run is finished. Move the result files to the proper place
-        ##
-
-
-        #if $singlePaired.sPaired == "single":
-            #set $single_end_path =  os.path.join($temp_dir, os.path.basename(str($input_singles_tmp)) + '_trimmed.fq')
-            mv $single_end_path $trimmed_reads_single;
-
-            #if $params.settingsType == "custom":
-                #if $params.report:
-                    #set $report_path =  os.path.join($temp_dir, os.path.basename(str($input_singles_tmp)) + '_trimming_report.txt')
-                    mv $report_path $report_file;
-                #end if
-            #end if
+        &&
 
-        #else:
-            #set $paired_end_path_1 =  os.path.join($temp_dir, os.path.basename(str($input_mate1_tmp)) + '_val_1.fq')
-            #set $paired_end_path_2 =  os.path.join($temp_dir, os.path.basename(str($input_mate2_tmp)) + '_val_2.fq')
-            mv $paired_end_path_1 $trimmed_reads_pair1;
-            mv $paired_end_path_2 $trimmed_reads_pair2;
-
-            #if $params.settingsType == "custom":
-                #if $params.retain_unpaired.settingsType == "retain_unpaired_output":
-                    #set $unpaired_path_1 =  os.path.join($temp_dir, os.path.basename(str($input_mate1_tmp)) + '_unpaired_1.fq')
-                    #set $unpaired_path_2 =  os.path.join($temp_dir, os.path.basename(str($input_mate2_tmp)) + '_unpaired_2.fq')
-                    mv $unpaired_path_1 $unpaired_reads_1;
-                    mv $unpaired_path_2 $unpaired_reads_2;
-                #end if
-
-                #if $params.report:
-                    #set $report_path =  os.path.join($temp_dir, os.path.basename(str($input_mate1_tmp)) + '_trimming_report.txt')
-                    mv $report_path $report_file;
-                #end if
-
-            #end if
+        ##  Trim Galore! run is finished. Move the report files to the proper place
+        #if $params.settingsType == "custom" and $params.report:
+            cat ./*_trimming_report.txt > $report_file;
         #end if
 
-        ## delete the temp_dir
-        rm -rf $temp_dir
-
+]]>
     </command>
     <inputs>
-
         <!-- Input Parameters -->
         <conditional name="singlePaired">
-            <param name="sPaired" type="select" label="Is this library mate-paired?">
-              <option value="single">Single-end</option>
-              <option value="paired">Paired-end</option>
+            <param name="sPaired" type="select" label="Is this library paired- or single-end?">
+                <option value="single">Single-end</option>
+                <option value="paired">Paired-end</option>
+                <option value="paired_collection">Paired Collection</option>
             </param>
             <when value="single">
-                <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
+                <param name="input_singles" type="data" format="fastqsanger" label="Reads in FASTQ format" />
+                <param name="adapter" type="text" value="AGATCGGAAGAGC" label="Adapter sequence to be trimmed">
+                    <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator>
+                </param>
+                <param name="three_prime_clip_R1" type="integer" value="" optional="True" label="Remove N bp from the 3' end">
+                    <help>Instructs Trim Galore to remove N bp from the 3' end of read 1 after adapter/quality trimming has been performed. 
+                        This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.
+                        (--three_prime_clip_R1)</help>
+                </param>
             </when>
             <when value="paired">
-                <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
-                <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
-                <param name="trim1" type="boolean" truevalue="--trim1" falsevalue="" checked="False" label="Trims 1 bp off every read from its 3' end." help="" />
-                <param name="adapter2" type="text" value="" label="Optional adapter sequence to be trimmed off read 2">
-                    <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator>
-                </param>
+                <param name="input_mate1" type="data" format="fastqsanger" label="Reads in FASTQ format" />
+                <param name="input_mate2" type="data" format="fastqsanger" label="Reads in FASTQ format" />
+                <expand macro="paired_adapter_trimming" />
+            </when>
+            <when value="paired_collection">
+                <param name="input_mate_pairs" format="fastqsanger" type="data_collection" collection_type="paired" 
+                    label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
+                <expand macro="paired_adapter_trimming" />
             </when>
         </conditional>
 
-
         <conditional name="params">
             <param name="settingsType" type="select" label="Trim galore! advanced settings" help="You can use the default settings or set custom values for any of Trim Galore's parameters.">
               <option value="default">Use Defaults</option>
@@ -209,16 +173,14 @@
             <when value="default" />
             <!-- Full/advanced params. -->
             <when value="custom">
-                <param name="quality" type="integer" value="20" label="Trim low-quality ends from reads in addition to adapter removal." help="For more information please see below." />
-                <param name="adapter" type="text" value="AGATCGGAAGAGC" label="Adapter sequence to be trimmed">
-                    <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator>
-                </param>
+                <param name="quality" type="integer" value="20" label="Trim low-quality ends from reads in addition to adapter removal"
+                    help="For more information please see below." />
                 <param name="stringency" type="integer" value="1" label="Overlap with adapter sequence required to trim a sequence" />
                 <param name="error_rate" type="float" value="0.1" label="Maximum allowed error rate" />
                 <param name="min_length" type="integer" value="20" label="Discard reads that became shorter than length INT" />
 
-                <param name="clip_R1" type="integer" value="0" label="nstructs Trim Galore to remove INT bp from the 5' end of read 1" />
-                <param name="clip_R2" type="integer" value="0" label="nstructs Trim Galore to remove INT bp from the 5' end of read 2" />
+                <param name="clip_R1" type="integer" value="0" label="Instructs Trim Galore to remove INT bp from the 5' end of read 1" />
+                <param name="clip_R2" type="integer" value="0" label="Instructs Trim Galore to remove INT bp from the 5' end of read 2" />
 
                 <param name="report" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Generate a report file" help="" />
 
@@ -246,95 +208,144 @@
             <when value="default" />
             <!-- Full/advanced params. -->
             <when value="custom">
-                <param name="rrbs" type="boolean" truevalue="--rrbs" falsevalue="" checked="True" label="Specifies that the input file was an MspI digested RRBS sample" />
-                <param name="non_directional" type="boolean" truevalue="--non_directional" falsevalue="" checked="False" label="Selecting this option for non-directional RRBS libraries" />
+                <param name="rrbs" type="boolean" truevalue="--rrbs" falsevalue="" checked="True"
+                    label="Specifies that the input file was an MspI digested RRBS sample" />
+                <param name="non_directional" type="boolean" truevalue="--non_directional" falsevalue="" checked="False"
+                    label="Selecting this option for non-directional RRBS libraries" />
             </when>  <!-- full -->
-      </conditional>  <!-- params -->
+        </conditional>  <!-- params -->
 
     </inputs>
     <outputs>
 
-        <data format="fastq" name="trimmed_reads_single" label="${tool.name} on ${on_string}: trimmed reads">
+        <data format="fastqsanger" name="trimmed_reads_single" from_work_dir="input_singles_trimmed.fq" label="${tool.name} on ${on_string}: trimmed reads">
           <filter>singlePaired['sPaired'] == "single"</filter>
-          <actions>
-                <action type="format">
-                  <option type="from_param" name="singlePaired.input_singles" param_attribute="ext" />
-                </action>
-          </actions>
-        </data>
-
-        <data format="fastq" name="trimmed_reads_pair1" label="${tool.name} on ${on_string}: trimmed reads pair 1">
-            <filter>singlePaired['sPaired'] == "paired"</filter>
-            <actions>
-                <action type="format">
-                    <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
-                </action>
-          </actions>
-        </data>
-
-        <data format="fastq" name="trimmed_reads_pair2" label="${tool.name} on ${on_string}: trimmed reads pair 2">
-            <filter>singlePaired['sPaired'] == "paired"</filter>
-            <actions>
-                <action type="format">
-                    <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
-                </action>
-            </actions>
         </data>
 
-        <data format="fastq" name="unpaired_reads_1" label="${tool.name} on ${on_string}: unpaired reads (1)">
-          <filter>
+        <collection name="trimmed_reads_paired_collection" type="paired" label="${tool.name} on ${on_string}: trimmed reads">
+            <filter>singlePaired['sPaired'] == "paired_collection"</filter>
+            <data name="forward" format="fastqsanger" from_work_dir="input_mate1_val_1.fq" />
+            <data name="reverse" format="fastqsanger" from_work_dir="input_mate2_val_2.fq" />
+        </collection>
+
+        <collection name="trimmed_reads_unpaired_collection" type="paired" label="${tool.name} on ${on_string}: unpaired reads">
+            <filter>
             ((
-              params['settingsType'] == "custom" and
-              params['retain_unpaired']['settingsType'] == "retain_unpaired_output"
+                params['settingsType'] == "custom" and
+                params['retain_unpaired']['settingsType'] == "retain_unpaired_output" and
+                singlePaired['sPaired'] == "paired_collection"
             ))
-          </filter>
-          <actions>
-                <action type="format">
-                  <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
-                </action>
-          </actions>
+            </filter>
+            <data name="forward" format="fastqsanger" from_work_dir="input_mate1_unpaired_1.fq" />
+            <data name="reverse" format="fastqsanger" from_work_dir="input_mate2_unpaired_2.fq" />
+        </collection>
+
+
+        <data format="fastqsanger" name="trimmed_reads_pair1" from_work_dir="input_mate1_val_1.fq"
+            label="${tool.name} on ${on_string}: trimmed reads pair 1">
+            <filter>singlePaired['sPaired'] == "paired"</filter>
         </data>
 
-        <data format="fastq" name="unpaired_reads_2" label="${tool.name} on ${on_string}: unpaired reads (2)">
-          <filter>
+        <data format="fastqsanger" name="trimmed_reads_pair2" from_work_dir="input_mate2_val_2.fq"
+            label="${tool.name} on ${on_string}: trimmed reads pair 2">
+            <filter>singlePaired['sPaired'] == "paired"</filter>
+        </data>
+
+        <data format="fastqsanger" name="unpaired_reads_1" from_work_dir="input_mate1_val_1.fq"
+            label="${tool.name} on ${on_string}: unpaired reads (1)">
+            <filter>
             ((
-              params['settingsType'] == "custom" and
-              params['retain_unpaired']['settingsType'] == "retain_unpaired_output"
+                params['settingsType'] == "custom" and
+                params['retain_unpaired']['settingsType'] == "retain_unpaired_output" and
+                singlePaired['sPaired'] == "paired"
             ))
-          </filter>
-          <actions>
-                <action type="format">
-                  <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
-                </action>
-          </actions>
+            </filter>
+        </data>
+
+        <data format="fastqsanger" name="unpaired_reads_2" from_work_dir="input_mate2_val_2.fq"
+            label="${tool.name} on ${on_string}: unpaired reads (2)">
+            <filter>
+            ((
+                params['settingsType'] == "custom" and
+                params['retain_unpaired']['settingsType'] == "retain_unpaired_output" and
+                singlePaired['sPaired'] == "paired"
+            ))
+            </filter>
         </data>
 
         <data format="txt" name="report_file" label="${tool.name} on ${on_string}: report file">
-          <filter>
+            <filter>
             ((
               params['settingsType'] == "custom" and
               params['report'] == True
             ))
-          </filter>
+            </filter>
         </data>
 
     </outputs>
     <tests>
-    </tests>
+        <test>
+            <!-- Trim entire sequences; keep empty reads -->
+            <param name="input_singles" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="sPaired" value="single" />
+            <param name="settingsType" value="custom" />
+            <param name="report" value="true" />
+            <output name="trimmed_reads_single" file="sanger_full_range_results1.fastqsanger" ftype="fastqsanger"/>
+            <output name="report_file" file="sanger_full_range_report_results1.txt" ftype="txt" lines_diff="2" />
+        </test>
+
+        <test>
+            <!-- Trim entire sequences; keep empty reads -->
+            <param name="input_mate1" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
+            <param name="input_mate2" value="bwa-mem-fastq2.fq" ftype="fastqsanger" />
+            <param name="sPaired" value="paired" />
+            <param name="settingsType" value="custom" />
+            <param name="report" value="true" />
+            <output name="trimmed_reads_pair1" file="paired_example_pair1_results2.fastqsanger" ftype="fastqsanger"/>
+            <output name="trimmed_reads_pair2" file="paired_example_pair2_results2.fastqsanger" ftype="fastqsanger"/>
+            <output name="report_file" file="paired_example_results2.txt" ftype="txt" lines_diff="8" />
+        </test>
 
+        <test>
+            <!-- Trim entire sequences; keep empty reads -->
+            <param name="input_mate_pairs">
+                <collection type="paired">
+                    <element name="forward" value="bwa-mem-fastq1.fq" />
+                    <element name="reverse" value="bwa-mem-fastq2.fq" />
+                </collection>
+            </param>
+            <param name="sPaired" value="paired_collection" />
+            <param name="settingsType" value="custom" />
+            <param name="report" value="true" />
+            <param name="retain_unpaired" value="retain_unpaired_output" />
+
+            <output name="report_file" file="paired_collection_example_results3.txt" ftype="txt" lines_diff="8" />
+
+            <output_collection name="trimmed_reads_paired_collection" type="paired">
+                <element name="forward" file="paired_collection_example_pair1_results3.fastqsanger" ftype="fastqsanger"/>
+                <element name="reverse" file="paired_collection_example_pair2_results3.fastqsanger" ftype="fastqsanger"/>
+            </output_collection>
+
+            <output_collection name="trimmed_reads_unpaired_collection" type="paired">
+                <element name="forward" file="paired_collection_example_unpair1_results3.fastqsanger" ftype="fastqsanger"/>
+                <element name="reverse" file="paired_collection_example_unpair2_results3.fastqsanger" ftype="fastqsanger"/>
+            </output_collection>
+        </test>
+    </tests>
     <help>
+<![CDATA[
 
 **What it does**
 
-TrimGalore!_ is a wrapper script that makes use of the publically available 
+TrimGalore_ is a wrapper script that makes use of the publically available
 adapter trimming tool Cutadapt.
 
+.. _TrimGalore: http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
+
 
-.. _TrimGalore!: http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
+It is developed by Felix Krueger at the Babraham Institute.
 
 
-It is developed by Krueger F at the Babraham Institute.
-
-
-  </help>
+]]>
+    </help>
 </tool>