view trim_galore.xml @ 8:f1e71aeaa923 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/trim_galore commit d48f42b1c267c8ebfc50161ea764baed8ee66701-dirty
author bgruening
date Fri, 18 Mar 2016 07:56:01 -0400
parents 8352713cf939
children 1bfc7254232e
line wrap: on
line source

<tool id="trim_galore" name="Trim Galore!" version="0.4.2">
    <!-- Wrapper compatible with Trim Galore! version 0.4 -->
    <description>adaptive quality and adapter trimmer</description>
    <macros>
        <macro name="adapter_trimming">
            <conditional name="trimming">
                <param name="trimming_select" type="select" label="Trimming reads?">
                    <option value="">Automatic detection</option>
                    <option value="--illumina">Illumina universal</option>
                    <option value="--nextera">Nextera transposase</option>
                    <option value="--small_rna">Illumina small RNA adapters</option>
                    <option value="user">User defined adapter trimming</option>
                </param>
                <when value=""/>
                <when value="--illumina"/>
                <when value="--nextera"/>
                <when value="--small_rna"/>
                <when value="user">
                    <param name="adapter" type="text" value="AGATCGGAAGAGC" label="Adapter sequence to be trimmed off">
                        <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator>
                    </param>
                    <yield/>
                </when>
            </conditional>
        </macro>
        <macro name="paired_adapter_trimming">

            <expand macro="adapter_trimming">
                <param name="adapter2" type="text" optional="True" value="" label="Adapter sequence to be trimmed off read 2">
                    <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator>
                </param>
            </expand>
            <param name="trim1" type="boolean" truevalue="--trim1" falsevalue="" checked="False" label="Trims 1 bp off every read from its 3' end." help="" />
            <param name="three_prime_clip_R1" type="integer" value="" optional="True" label="Remove N bp from the 3' end of read 1">
                <help>Instructs Trim Galore! to remove N bp from the 3' end of read 1 after adapter/quality trimming has been performed.
                    This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.
                    (--three_prime_clip_R1)</help>
            </param>
            <param name="three_prime_clip_R2" type="integer" value="" optional="True" label="Remove N bp from the 3' end of read 1">
                <help>Instructs Trim Galore! to remove N bp from the 3' end of read 2 after
                    adapter/quality trimming has been performed. This may remove some unwanted bias from
                    the 3' end that is not directly related to adapter sequence or basecall quality. (--three_prime_clip_R2)</help>
            </param>
        </macro>
    </macros>
    <requirements>
        <!-- conda dependency -->
        <requirement type="package" version="1.8.3">cutadapt</requirement>
        <requirement type="package" version="1.8">cutadapt</requirement>
    </requirements>
    <version_command>
        perl $__tool_directory__/trim_galore --version
    </version_command>
    <command>
<![CDATA[

        ## trim_galore removes .fastq and .fq file extensions of input files.
        ## This is essential if Galaxy provides links to files (with real extensions)
        ## but that behaviour is causing an inconsistency in output filenaming.
        ## We work around this by linking every file to cwd without file extension

        #if $singlePaired.sPaired == "single":
            #if str($singlePaired.input_singles).endswith(".gz"):
                ln -s "${singlePaired.input_singles}" ./input_singles.gz;
            #else
                ln -s "${singlePaired.input_singles}" ./input_singles;
            #end if
        #elif $singlePaired.sPaired == "paired":
            #if str($singlePaired.input_mate1).endswith(".gz"):
                ln -s "${singlePaired.input_mate1}" ./input_mate1.gz;
                ln -s "${singlePaired.input_mate2}" ./input_mate2.gz;
            #else
                ln -s "${singlePaired.input_mate1}" ./input_mate1;
                ln -s "${singlePaired.input_mate2}" ./input_mate2;
            #end if
        #else:
            #if str($singlePaired.input_mate_pairs.forward).endswith(".gz"):
                ln -s "${singlePaired.input_mate_pairs.forward}" ./input_mate1.gz;
                ln -s "${singlePaired.input_mate_pairs.reverse}" ./input_mate2.gz;
            #else
                ln -s "${singlePaired.input_mate_pairs.forward}" ./input_mate1;
                ln -s "${singlePaired.input_mate_pairs.reverse}" ./input_mate2;
            #end if
        #end if

        perl $__tool_directory__/trim_galore

        ## we only support fastqsanger
        --phred33

        #if $params.settingsType == "custom":

            ## default 20
            --quality $params.quality

            ## default 1
            --stringency $params.stringency

            ## default 0.1
            -e $params.error_rate

            ## default 20
            --length $params.min_length

            #if $params.clip_R1:
                --clip_R1 $params.clip_R1
            #end if

            #if $params.clip_R2:
                --clip_R2 $params.clip_R2
            #end if

            #if $params.retain_unpaired.retain_unpaired_select == "retain_unpaired_output":
                --retain_unpaired
                --length_1 $params.retain_unpaired.length_1
                --length_2 $params.retain_unpaired.length_2
            #end if

        #end if

        ## RBBS specific options.
        #if $rrbs.settingsType == "custom":
            $rrbs.rrbs
            $rrbs.non_directional
        #end if

        --output_dir ./
        --suppress_warn

        #if $params.settingsType == "custom" and not $params.report:
            --no_report_file
        #end if

        #if $singlePaired.trimming.trimming_select == 'user':
            ## default 'AGATCGGAAGAGC'
            #if $singlePaired.trimming.adapter.strip() != '':
               --adapter $singlePaired.trimming.adapter
            #end if
        #else:
            $singlePaired.trimming.trimming_select
        #end if


        #if $singlePaired.three_prime_clip_R1:
            --three_prime_clip_R1 $singlePaired.three_prime_clip_R1
        #end if

        #if $singlePaired.sPaired == "single":
            ## input sequence
            #if str($singlePaired.input_singles).endswith(".gz"):
                ./input_singles.gz
                --dont_gzip
            #else
                ./input_singles
            #end if
        #else:
            --paired

            $singlePaired.trim1

            #if $singlePaired.trimming.trimming_select == 'user':
                #if $singlePaired.trimming.adapter2 and $singlePaired.trimming.adapter2.strip() != '':
                    --adapter2 $singlePaired.trimming.adapter2
                #end if
            #end if

            #if $singlePaired.three_prime_clip_R2:
                --three_prime_clip_R2 $singlePaired.three_prime_clip_R2
            #end if

            ## input sequences
            #if $singlePaired.sPaired == "paired":
                #if str($singlePaired.input_mate1).endswith(".gz"):
                    ./input_mate1.gz
                    ./input_mate2.gz
                    --dont_gzip
                #else
                    ./input_mate1
                    ./input_mate2
                #end if
            #else:
                #if str($singlePaired.input_mate_pairs.forward).endswith(".gz"):
                    ./input_mate1.gz
                    ./input_mate2.gz
                    --dont_gzip
                #else
                    ./input_mate1
                    ./input_mate2
                #end if
            #end if

        #end if

        ##  Trim Galore is finished, rename the output if compressed
        &&
        if [ -f input_singles.gz_trimmed.fq ] ; then mv input_singles.gz_trimmed.fq input_singles_trimmed.fq ; fi
        &&
        if [ -f input_mate1.gz_val_1.fq ] ; then mv input_mate1.gz_val_1.fq input_mate1_val_1.fq ; fi
        &&
        if [ -f input_mate2.gz_val_2.fq ] ; then mv input_mate2.gz_val_2.fq input_mate2_val_2.fq ; fi
        &&
        if [ -f input_mate1.gz_unpaired_1.fq ] ; then mv input_mate1.gz_unpaired_1.fq input_mate1_unpaired_1.fq ; fi
        &&
        if [ -f input_mate2.gz_unpaired_2.fq ] ; then mv input_mate2.gz_unpaired_2.fq input_mate2_unpaired_2.fq ; fi

        ##  Trim Galore! run is finished. Move the report files to the proper place
        #if $params.settingsType == "custom" and $params.report:
            &&
            cat ./*_trimming_report.txt > $report_file;
        #end if

]]>
    </command>
    <inputs>
        <!-- Input Parameters -->
        <conditional name="singlePaired">
            <param name="sPaired" type="select" label="Is this library paired- or single-end?">
                <option value="single">Single-end</option>
                <option value="paired">Paired-end</option>
                <option value="paired_collection">Paired Collection</option>
            </param>
            <when value="single">
                <param name="input_singles" type="data" format="fastqsanger" label="Reads in FASTQ format" />
                <expand macro="adapter_trimming"/>

                <param name="three_prime_clip_R1" type="integer" value="" optional="True" label="Remove N bp from the 3' end">
                    <help>Instructs Trim Galore! to remove N bp from the 3' end of read 1 after adapter/quality trimming has been performed.
                        This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.
                        (--three_prime_clip_R1)</help>
                </param>
            </when>
            <when value="paired">
                <param name="input_mate1" type="data" format="fastqsanger" label="Reads in FASTQ format" />
                <param name="input_mate2" type="data" format="fastqsanger" label="Reads in FASTQ format" />
                <expand macro="paired_adapter_trimming" />
            </when>
            <when value="paired_collection">
                <param name="input_mate_pairs" format="fastqsanger" type="data_collection" collection_type="paired"
                    label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
                <expand macro="paired_adapter_trimming" />
            </when>
        </conditional>

        <conditional name="params">
            <param name="settingsType" type="select" label="Trim Galore! advanced settings" help="You can use the default settings or set custom values for any of Trim Galore!'s parameters.">
              <option value="default">Use defaults</option>
              <option value="custom">Full parameter list</option>
            </param>
            <when value="default" />
            <!-- Full/advanced params. -->
            <when value="custom">
                <param name="quality" type="integer" value="20" label="Trim low-quality ends from reads in addition to adapter removal"
                    help="For more information please see below." />
                <param name="stringency" type="integer" value="1" label="Overlap with adapter sequence required to trim a sequence" />
                <param name="error_rate" type="float" value="0.1" label="Maximum allowed error rate" />
                <param name="min_length" type="integer" value="20" label="Discard reads that became shorter than length INT" />

                <param name="clip_R1" type="integer" optional="True" min="0" label="Instructs Trim Galore! to remove INT bp from the 5' end of read 1" />
                <param name="clip_R2" type="integer" optional="True" min="0" label="Instructs Trim Galore! to remove INT bp from the 5' end of read 2" />

                <param name="report" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Generate a report file" help="" />

                <conditional name="retain_unpaired">
                    <param name="retain_unpaired_select" type="select" label="specify if you would like to retain unpaired reads">
                      <option value="no_output">Do not output unpaired reads</option>
                      <option value="retain_unpaired_output">Output unpaired reads</option>
                    </param>
                    <when value="no_output" />
                    <!-- Output params. -->
                    <when value="retain_unpaired_output">
                        <param name="length_1" type="integer" value="35" label="Unpaired single-end read length cutoff needed for read 1 to be written" />
                        <param name="length_2" type="integer" value="35" label="Unpaired single-end read length cutoff needed for read 2 to be written" />
                    </when>  <!-- output -->
                </conditional>  <!-- retain_unpaired -->

            </when>  <!-- full -->
        </conditional>  <!-- params -->

        <conditional name="rrbs">
            <param name="settingsType" type="select" label="RRBS specific settings">
              <option value="default">Use defaults (no RRBS)</option>
              <option value="custom">Full parameter list</option>
            </param>
            <when value="default" />
            <!-- Full/advanced params. -->
            <when value="custom">
                <param name="rrbs" type="boolean" truevalue="--rrbs" falsevalue="" checked="True"
                    label="Specifies that the input file was an MspI digested RRBS sample" />
                <param name="non_directional" type="boolean" truevalue="--non_directional" falsevalue="" checked="False"
                    label="Selecting this option for non-directional RRBS libraries" />
            </when>  <!-- full -->
        </conditional>  <!-- params -->

    </inputs>
    <outputs>
        <data format="fastqsanger" name="trimmed_reads_single" from_work_dir="input_singles_trimmed.fq" label="${tool.name} on ${on_string}: trimmed reads">
            <filter>singlePaired['sPaired'] == "single"</filter>
        </data>

        <collection name="trimmed_reads_paired_collection" type="paired" label="${tool.name} on ${on_string}: paired reads">
            <data name="forward" format="fastqsanger" from_work_dir="input_mate1_val_1.fq" />
            <data name="reverse" format="fastqsanger" from_work_dir="input_mate2_val_2.fq" />
            <filter>singlePaired['sPaired'] == "paired_collection"</filter>
        </collection>

        <collection name="trimmed_reads_unpaired_collection" type="paired" label="${tool.name} on ${on_string}: unpaired reads">
            <data name="forward" format="fastqsanger" from_work_dir="input_mate1_unpaired_1.fq" />
            <data name="reverse" format="fastqsanger" from_work_dir="input_mate2_unpaired_2.fq" />
            <filter>params['settingsType'] == "custom"</filter>
            <filter>params['retain_unpaired']['retain_unpaired_select'] == "retain_unpaired_output"</filter>
            <filter>singlePaired['sPaired'] == "paired_collection"</filter>
        </collection>

        <data format="fastqsanger" name="trimmed_reads_pair1" from_work_dir="input_mate1_val_1.fq"
            label="${tool.name} on ${on_string}: trimmed reads pair 1">
            <filter>singlePaired['sPaired'] == "paired"</filter>
        </data>

        <data format="fastqsanger" name="trimmed_reads_pair2" from_work_dir="input_mate2_val_2.fq"
            label="${tool.name} on ${on_string}: trimmed reads pair 2">
            <filter>singlePaired['sPaired'] == "paired"</filter>
        </data>

        <data format="fastqsanger" name="unpaired_reads_1" from_work_dir="input_mate1_unpaired_1.fq"
            label="${tool.name} on ${on_string}: unpaired reads (1)">
            <filter>params['settingsType'] == "custom"</filter>
            <filter>params['retain_unpaired']['retain_unpaired_select'] == "retain_unpaired_output"</filter>
            <filter>singlePaired['sPaired'] == "paired"</filter>
        </data>

        <data format="fastqsanger" name="unpaired_reads_2" from_work_dir="input_mate2_unpaired_2.fq"
            label="${tool.name} on ${on_string}: unpaired reads (2)">
            <filter>params['settingsType'] == "custom"</filter>
            <filter>params['retain_unpaired']['retain_unpaired_select'] == "retain_unpaired_output"</filter>
            <filter>singlePaired['sPaired'] == "paired"</filter>
        </data>
        <data format="txt" name="report_file" label="${tool.name} on ${on_string}: report file">
            <filter>params['settingsType'] == "custom"</filter>
            <filter>params['report'] == True</filter>
        </data>

    </outputs>
    <tests>
        <test>
            <param name="input_singles" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
            <param name="sPaired" value="single" />
            <param name="settingsType" value="custom" />
            <param name="report" value="true" />
            <output name="trimmed_reads_single" file="sanger_full_range_results1.fastqsanger" ftype="fastqsanger"/>
            <output name="report_file" file="sanger_full_range_report_results1.txt" ftype="txt" lines_diff="8" />
        </test>

        <test>
            <param name="input_singles" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
            <param name="sPaired" value="single" />
            <param name="trimming_select" value="--illumina" />
            <output name="trimmed_reads_single" file="sanger_full_range_results2.fastqsanger" ftype="fastqsanger"/>
        </test>

        <test>
            <param name="input_singles" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
            <param name="sPaired" value="single" />
            <param name="adapter" value="AAAGAGC" />
            <output name="trimmed_reads_single" file="sanger_full_range_results3.fastqsanger" ftype="fastqsanger"/>
        </test>

        <test>
            <param name="input_mate1" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
            <param name="input_mate2" value="bwa-mem-fastq2.fq" ftype="fastqsanger" />
            <param name="sPaired" value="paired" />
            <param name="settingsType" value="custom" />
            <param name="report" value="true" />
            <output name="trimmed_reads_pair1" file="paired_example_pair1_results2.fastqsanger" ftype="fastqsanger"/>
            <output name="trimmed_reads_pair2" file="paired_example_pair2_results2.fastqsanger" ftype="fastqsanger"/>
            <output name="report_file" file="paired_example_results2.txt" ftype="txt" lines_diff="24" />
        </test>

        <test>
            <param name="input_mate_pairs">
                <collection type="paired">
                    <element name="forward" value="bwa-mem-fastq1.fq" ftype="fastqsanger" />
                    <element name="reverse" value="bwa-mem-fastq2.fq" ftype="fastqsanger" />
                </collection>
            </param>

            <param name="sPaired" value="paired_collection" />
            <param name="settingsType" value="custom" />
            <param name="report" value="true" />
            <param name="retain_unpaired_select" value="retain_unpaired_output" />

            <output name="report_file" file="paired_collection_example_results3.txt" ftype="txt" lines_diff="24" />

            <output_collection name="trimmed_reads_paired_collection" type="paired">
                <element name="forward" file="paired_collection_example_pair1_results3.fastqsanger" ftype="fastqsanger"/>
                <element name="reverse" file="paired_collection_example_pair2_results3.fastqsanger" ftype="fastqsanger"/>
            </output_collection>

            <output_collection name="trimmed_reads_unpaired_collection" type="paired">
                <element name="forward" file="paired_collection_example_unpair1_results3.fastqsanger" ftype="fastqsanger"/>
                <element name="reverse" file="paired_collection_example_unpair2_results3.fastqsanger" ftype="fastqsanger"/>
            </output_collection>

        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

`Trim Galore!`_ is a wrapper script to automate quality and adapter trimming as well as quality control, with some added functionality to remove biased methylation positions for RRBS sequence files (for directional, non-directional (or paired-end) sequencing). It's main features are:

 * For adapter trimming, Trim Galore! uses the first 13 bp of Illumina standard adapters ('AGATCGGAAGAGC') by default (suitable for both ends of paired-end libraries), but accepts other adapter sequence, too
 * For MspI-digested RRBS libraries, Trim Galore! performs quality and adapter trimming in two subsequent steps. This allows it to remove 2 additional bases that contain a cytosine which was artificially introduced in the end-repair step during the library preparation
 * For any kind of FASTQ file other than MspI-digested RRBS, Trim Galore! can perform single-pass adapter and quality trimming
 * The Phred quality of basecalls and the stringency for adapter removal can be specified individually
 * Trim Galore! can remove sequences if they become too short during the trimming process. For paired-end files Trim Galore! removes entire sequence pairs if one (or both) of the two reads became shorter than the set length cutoff. Reads of a read-pair that are longer than a given threshold but for which the partner read has become too short can optionally be written out to single-end files. This ensures that the information of a read pair is not lost entirely if only one read is of good quality
 * Trim Galore! can trim paired-end files by 1 additional bp from the 3' end of all reads to avoid problems with invalid alignments with Bowtie 1

.. _Trim Galore!: http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/

It is developed by Felix Krueger at the Babraham Institute.
]]>
    </help>
    <citations></citations>
</tool>