view trimmomatic.xml @ 0:3358c3d30143 draft

Uploaded initial version.
author pjbriggs
date Mon, 01 Dec 2014 10:40:07 -0500
parents
children 2bd7cdbb6228
line wrap: on
line source

<tool id="trimmomatic" name="Trimmomatic" version="0.32.1">
  <description>flexible read trimming tool for Illumina NGS data</description>
  <command interpreter="bash">trimmomatic.sh
  -mx8G
  -jar \$TRIMMOMATIC_DIR/trimmomatic-0.32.jar
  #if $paired_end.is_paired_end
    PE -threads 6 -phred33 $fastq_r1_in $paired_end.fastq_r2_in $fastq_out_r1_paired $fastq_out_r1_unpaired $fastq_out_r2_paired $fastq_out_r2_unpaired 
  #else
    SE -threads 6 -phred33 $fastq_in $fastq_out
  #end if
  ## ILLUMINACLIP option
  #if $illuminaclip.do_illuminaclip
    ILLUMINACLIP:\$TRIMMOMATIC_ADAPTERS_DIR/$illuminaclip.adapter_fasta:$illuminaclip.seed_mismatches:$illuminaclip.palindrome_clip_threshold:$illuminaclip.simple_clip_threshold
  #end if
  ## Other operations
  #for $op in $operations
    ## SLIDINGWINDOW
    #if str( $op.operation.name ) == "SLIDINGWINDOW"
      SLIDINGWINDOW:$op.operation.window_size:$op.operation.required_quality
    #end if
    ## MINLEN:36
    #if str( $op.operation.name ) == "MINLEN"
      MINLEN:$op.operation.minlen
    #end if
    #if str( $op.operation.name ) == "LEADING"
      LEADING:$op.operation.leading
    #end if
    #if str( $op.operation.name ) == "TRAILING"
      TRAILING:$op.operation.trailing
    #end if
    #if str( $op.operation.name ) == "CROP"
      CROP:$op.operation.crop
    #end if
    #if str( $op.operation.name ) == "HEADCROP"
      HEADCROP:$op.operation.headcrop
    #end if
  #end for
  </command>
  <requirements>
    <requirement type="package" version="0.32">trimmomatic</requirement>
  </requirements>
  <inputs>
    <conditional name="paired_end">
    <param name="is_paired_end" type="boolean" label="Paired end data?" truevalue="yes" falsevalue="no" checked="on" />
    <when value="no">
      <param name="fastq_in" type="data" format="fastqsanger" label="Input FASTQ file" />
    </when>
    <when value="yes">
      <param name="fastq_r1_in" type="data" format="fastqsanger"
	     label="Input FASTQ file (R1/first of pair)" />
      <param name="fastq_r2_in" type="data" format="fastqsanger"
	     label="Input FASTQ file (R2/second of pair)" />
    </when>
    </conditional>
    <conditional name="illuminaclip">
    <param name="do_illuminaclip" type="boolean" label="Perform initial ILLUMINACLIP step?" help="Cut adapter and other illumina-specific sequences from the read" truevalue="yes" falsevalue="no" checked="off" />
    <when value="yes">
      <param name="adapter_fasta" type="select" label="Adapter sequences to use">
	<option value="TruSeq2-SE.fa">TruSeq2 (single-ended, for Illumina GAII)</option>
	<option value="TruSeq3-SE.fa">TruSeq3 (single-ended, for MiSeq and HiSeq)</option>
	<option value="TruSeq2-PE.fa">TruSeq2 (paired-ended, for Illumina GAII)</option>
	<option value="TruSeq3-PE.fa">TruSeq3 (paired-ended, for MiSeq and HiSeq)</option>
	<option value="TruSeq3-PE-2.fa">TruSeq3 (additional seqs) (paired-ended, for MiSeq and HiSeq)</option>
	<option value="NexteraPE-PE.fa">Nextera (paired-ended)</option>
      </param>
      <param name="seed_mismatches" type="integer" label="Maximum mismatch count which will still allow a full match to be performed" value="2" />
      <param name="palindrome_clip_threshold" type="integer" label="How accurate the match between the two 'adapter ligated' reads must be for PE palindrome read alignment" value="30" />
      <param name="simple_clip_threshold" type="integer" label="How accurate the match between any adapter etc. sequence must be against a read" value="10" />
    </when>
    </conditional>
    <repeat name="operations" title="Trimmomatic Operation" min="1">
      <conditional name="operation">
	<param name="name" type="select" label="Select Trimmomatic operation to perform">
	  <option selected="true" value="SLIDINGWINDOW">Sliding window trimming (SLIDINGWINDOW)</option>
	  <option value="MINLEN">Drop reads below a specified length (MINLEN)</option>
	  <option value="LEADING">Cut bases off the start of a read, if below a threshold quality (LEADING)</option>
	  <option value="TRAILING">Cut bases off the end of a read, if below a threshold quality (TRAILING)</option>
	  <option value="CROP">Cut the read to a specified length (CROP)</option>
	  <option value="HEADCROP">Cut the specified number of bases from the start of the read (HEADCROP)</option>
	</param>
	<when value="SLIDINGWINDOW">
	  <param name="window_size" type="integer" label="Number of bases to average across" value="4" />
	  <param name="required_quality" type="integer" label="Average quality required" value="20" />
	</when>
	<when value="MINLEN">
	  <param name="minlen" type="integer" label="Minimum length of reads to be kept" value="20" />
	</when>
	<when value="LEADING">
	  <param name="leading" type="integer" label="Minimum quality required to keep a base" value="3" help="Bases at the start of the read with quality below the threshold will be removed" />
	</when>
	<when value="TRAILING">
	  <param name="trailing" type="integer" label="Minimum quality required to keep a base" value="3" help="Bases at the end of the read with quality below the threshold will be removed" />
	</when>
	<when value="CROP">
	  <param name="crop" type="integer" label="Number of bases to keep from the start of the read" value="" />
	</when>
	<when value="HEADCROP">
	  <param name="headcrop" type="integer" label="Number of bases to remove from the start of the read" value="" />
	</when>
      </conditional>
    </repeat>
  </inputs>
  <outputs>
    <data format="fastqsanger" name="fastq_out_r1_paired" label="${tool.name} on ${on_string} (R1 paired)">
      <filter>paired_end['is_paired_end']</filter>
    </data>
    <data format="fastqsanger" name="fastq_out_r1_unpaired" label="${tool.name} on ${on_string} (R1 unpaired)">
      <filter>paired_end['is_paired_end']</filter>
    </data>
    <data format="fastqsanger" name="fastq_out_r2_paired" label="${tool.name} on ${on_string} (R2 paired)">
      <filter>paired_end['is_paired_end']</filter>
    </data>
    <data format="fastqsanger" name="fastq_out_r2_unpaired" label="${tool.name} on ${on_string} (R2 unpaired)">
      <filter>paired_end['is_paired_end']</filter>
    </data>
    <data format="fastqsanger" name="fastq_out" label="${tool.name} on ${on_string}">
      <filter>not paired_end['is_paired_end']</filter>
    </data>
  </outputs>
  <tests>
    <test>
      <!-- Single-end example -->
      <param name="is_paired_end" value="no" />
      <param name="fastq_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
      <param name="operations_0|operation|name" value="SLIDINGWINDOW" />
      <!-- 
      **NB** outputs have to be specified in order that they appear in the
      tool (which is the order they will be written to the history) - the
      test framework seems to use the order and ignores the "name" attribute
      -->
      <output name="fastq_out" file="trimmomatic_se_out1.fastq" />
    </test>
    <test>
      <!-- Paired-end example -->
      <param name="is_paired_end" value="yes" />
      <param name="fastq_r1_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
      <param name="fastq_r2_in" value="Illumina_SG_R2.fastq" ftype="fastqsanger" />
      <param name="operations_0|operation|name" value="SLIDINGWINDOW" />
      <!-- 
      **NB** outputs have to be specified in order that they appear in the
      tool (which is the order they will be written to the history) - the
      test framework seems to use the order and ignores the "name" attribute
      -->
      <output name="fastq_out_r1_paired" file="trimmomatic_pe_r1_paired_out1.fastq" />
      <output name="fastq_out_r1_unpaired" file="trimmomatic_pe_r1_unpaired_out1.fastq" />
      <output name="fastq_out_r2_paired" file="trimmomatic_pe_r2_paired_out1.fastq" />
      <output name="fastq_out_r2_unpaired" file="trimmomatic_pe_r2_unpaired_out1.fastq" />
    </test>
    <test>
      <!-- Single-end example (cropping) -->
      <param name="is_paired_end" value="no" />
      <param name="fastq_in" value="Illumina_SG_R1.fastq" ftype="fastqsanger" />
      <param name="operations_0|operation|name" value="CROP" />
      <param name="operations_0|operation|crop" value="10" />
      <!-- 
      **NB** outputs have to be specified in order that they appear in the
      tool (which is the order they will be written to the history) - the
      test framework seems to use the order and ignores the "name" attribute
      -->
      <output name="fastq_out" file="trimmomatic_se_out2.fastq" />
    </test>
  </tests>
  <help>
.. class:: infomark

**What it does**

Trimmomatic performs a variety of useful trimming tasks for illumina paired-end and
single ended data.

This tool allows the following trimming steps to be performed:

 * **ILLUMINACLIP:** Cut adapter and other illumina-specific sequences from the read
 * **SLIDINGWINDOW:** Perform a sliding window trimming, cutting once the average
   quality within the window falls below a threshold
 * **MINLEN:** Drop the read if it is below a specified length
 * **LEADING:** Cut bases off the start of a read, if below a threshold quality
 * **TRAILING:** Cut bases off the end of a read, if below a threshold quality
 * **CROP:** Cut the read to a specified length
 * **HEADCROP:** Cut the specified number of bases from the start of the read

If ILLUMINACLIP is requested then it is always performed first; subsequent options
can be mixed and matched and will be performed in the order that they have been
specified.

.. class:: warningmark

Note that trimming operation order is important.

-------------

.. class:: infomark

**Outputs**

For paired-end data a particular strength of Trimmomatic is that it retains the
pairing of reads (from R1 and R2) in the filtered output files:

 * Two FASTQ files (R1-paired and R2-paired) contain one read from each pair where
   both have survived filtering.
 * Additionally two FASTQ files (R1-unpaired and R2-unpaired) contain reads where
   one of the pair failed the filtering steps.

Retaining the same order and number of reads in the filtered output fastq files is
essential for many downstream analysis tools.

For single-end data the output is a single FASTQ file containing just the filtered
reads.

-------------

.. class:: infomark

**Credits**

This Galaxy tool has been developed within the Bioinformatics Core Facility at the
University of Manchester. It runs the Trimmomatic program which has been developed
within Bjorn Usadel's group at RWTH Aachen university.

Trimmomatic website (including documentation):

 * http://www.usadellab.org/cms/index.php?page=trimmomatic 

The reference for Trimmomatic is:

 * Lohse M, Bolger AM, Nagel A, Fernie AR, Lunn JE, Stitt M, Usadel B. RobiNA: a
   user-friendly, integrated software solution for RNA-Seq-based transcriptomics.
   Nucleic Acids Res. 2012 Jul;40(Web Server issue):W622-7)

Please kindly acknowledge both this Galaxy tool and the Trimmomatic program if you
use it.
  </help>
</tool>