Mercurial > repos > bgruening > trim_galore
diff trim_galore_wrapper.xml @ 4:2c1f0fe810f7 draft
Uploaded
author | bgruening |
---|---|
date | Wed, 15 Apr 2015 11:32:11 -0400 |
parents | eb546ac2aab2 |
children | f11ff7be8c78 |
line wrap: on
line diff
--- a/trim_galore_wrapper.xml Fri Jul 19 09:49:25 2013 -0400 +++ b/trim_galore_wrapper.xml Wed Apr 15 11:32:11 2015 -0400 @@ -1,62 +1,63 @@ -<tool id="trim_galore" name="Trim Galore" version="0.2.8.1"> - <!-- Wrapper compatible with Trim Galore version 0.2.8 --> +<tool id="trim_galore" name="Trim Galore" version="0.3.7.0"> + <!-- Wrapper compatible with Trim Galore version 0.3.7 --> <description>adaptive quality and adapter trimmer</description> <version_command interpreter="perl">trim_galore --version</version_command> <requirements> - <requirement type="package" version="1.1">cutadapt</requirement> + <requirement type="package" version="1.8">cutadapt</requirement> </requirements> - <command interpreter="perl"> - #from glob import glob - #import tempfile, os + <macros> + <macro name="paired_adapter_trimming"> + <param name="trim1" type="boolean" truevalue="--trim1" falsevalue="" checked="False" label="Trims 1 bp off every read from its 3' end." help="" /> + <param name="adapter" type="text" value="AGATCGGAAGAGC" label="Adapter sequence to be trimmed off read 1"> + <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator> + </param> + <param name="adapter2" type="text" optional="True" value="" label="Adapter sequence to be trimmed off read 2"> + <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator> + </param> - ## - ## Creating a temporary directory where trim_galore will store all result files - ## + <param name="three_prime_clip_R1" type="integer" value="" optional="True" label="Remove N bp from the 3' end of read 1"> + <help>Instructs Trim Galore to remove N bp from the 3' end of read 1 after adapter/quality trimming has been performed. + This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality. + (--three_prime_clip_R1)</help> + </param> + <param name="three_prime_clip_R2" type="integer" value="" optional="True" label="Remove N bp from the 3' end of read 1"> + <help>Instructs Trim Galore to remove N bp from the 3' end of read 2 after + adapter/quality trimming has been performed. This may remove some unwanted bias from + the 3' end that is not directly related to adapter sequence or basecall quality. (--three_prime_clip_R2)</help> + </param> + </macro> + </macros> + <command> +<![CDATA[ - #set $temp_dir = os.path.abspath(tempfile.mkdtemp()) - - - ## trim_galore removes .fastq and .fq file extensions of input files. - ## That is essential if Galaxy provides links to files (these can have real extensions), but that behaviour is causing an inconsitency in output filenaming. - ## Fix: link every file to $TMP without file extension + ## trim_galore removes .fastq and .fq file extensions of input files. + ## This is essential if Galaxy provides links to files (with real extensions) + ## but that behaviour is causing an inconsitency in output filenaming. + ## We work around this by linking every file to cwd without file extension #if $singlePaired.sPaired == "single": - #set $input_singles_tmp_handle = tempfile.NamedTemporaryFile( dir=$temp_dir ) - #set $input_singles_tmp = $input_singles_tmp_handle.name - #silent $input_singles_tmp_handle.close() - #silent os.system("ln -s %s %s" % (str($singlePaired.input_singles), $input_singles_tmp)) + ln -s "${singlePaired.input_singles}" ./input_singles; + #elif $singlePaired.sPaired == "paired": + ln -s "${singlePaired.input_mate1}" ./input_mate1; + ln -s "${singlePaired.input_mate2}" ./input_mate2; #else: - #set $input_mate1_tmp_handle = tempfile.NamedTemporaryFile( dir=$temp_dir ) - #set $input_mate2_tmp_handle = tempfile.NamedTemporaryFile( dir=$temp_dir ) - - #set $input_mate1_tmp = $input_mate1_tmp_handle.name - #silent $input_mate1_tmp_handle.close() - - #set $input_mate2_tmp = $input_mate2_tmp_handle.name - #silent $input_mate2_tmp_handle.close() - - #silent os.system("ln -s %s %s" % (str($singlePaired.input_mate1), $input_mate1_tmp)) - #silent os.system("ln -s %s %s" % (str($singlePaired.input_mate2), $input_mate2_tmp)) + ln -s "${singlePaired.input_mate_pairs.forward}" ./input_mate1; + ln -s "${singlePaired.input_mate_pairs.reverse}" ./input_mate2; #end if - trim_galore + perl $__tool_directory__/trim_galore - ## - ## Input parameters - ## - + ## we only support fastqsanger + --phred33 #if $params.settingsType == "custom": ## default 20 --quality $params.quality - ## default 'AGATCGGAAGAGC' - #if $params.adapter.strip() != '': - --adapter $params.adapter - #end if + ## default 1 --stringency $params.stringency - + ## default 0.1 -e $params.error_rate @@ -66,7 +67,7 @@ #if int($params.clip_R1) > 0: --clip_R1 $params.clip_R1 #end if - + #if int($params.clip_R2) > 0: --clip_R2 $params.clip_R2 #end if @@ -79,128 +80,91 @@ #end if - ## ## RBBS specific options. - ## - #if $rrbs.settingsType == "custom": - $rrbs.rrbs $rrbs.non_directional + #end if + --output_dir ./ + --suppress_warn + + #if $params.settingsType == "custom" and not $params.report: + --no_report_file #end if - --output_dir $temp_dir - --suppress_warn + ## default 'AGATCGGAAGAGC' + #if $singlePaired.adapter.strip() != '': + --adapter $singlePaired.adapter + #end if + + #if $singlePaired.three_prime_clip_R1: + --three_prime_clip_R1 $singlePaired.three_prime_clip_R1 + #end if #if $singlePaired.sPaired == "single": - - #if $singlePaired.input_singles.ext == "fastqillumina": - --phred64 - #elif $singlePaired.input_singles.ext == "fastqsanger": - --phred33 - #end if - - #if $params.settingsType == "custom": - #if not $params.report: - --no_report_file - #end if - #end if - ## input sequence - $input_singles_tmp + ./input_singles #else: - --paired - #if $singlePaired.input_mate1.ext == "fastqillumina": - --phred64 - #elif $singlePaired.input_mate1.ext == "fastqsanger": - --phred33 - #end if + --paired $singlePaired.trim1 - #if $singlePaired.adapter2.strip() != '': + + #if $singlePaired.adapter2 and $singlePaired.adapter2.strip() != '': --adapter2 $singlePaired.adapter2 #end if - #if $params.settingsType == "custom": - #if not $params.report: - --no_report_file - #end if + #if $singlePaired.three_prime_clip_R2: + --three_prime_clip_R2 $singlePaired.three_prime_clip_R2 #end if ## input sequences - $input_mate1_tmp - $input_mate2_tmp + ./input_mate1 + ./input_mate2 #end if - && - - ## - ## Trim Galore! run is finished. Move the result files to the proper place - ## - - - #if $singlePaired.sPaired == "single": - #set $single_end_path = os.path.join($temp_dir, os.path.basename(str($input_singles_tmp)) + '_trimmed.fq') - mv $single_end_path $trimmed_reads_single; - - #if $params.settingsType == "custom": - #if $params.report: - #set $report_path = os.path.join($temp_dir, os.path.basename(str($input_singles_tmp)) + '_trimming_report.txt') - mv $report_path $report_file; - #end if - #end if + && - #else: - #set $paired_end_path_1 = os.path.join($temp_dir, os.path.basename(str($input_mate1_tmp)) + '_val_1.fq') - #set $paired_end_path_2 = os.path.join($temp_dir, os.path.basename(str($input_mate2_tmp)) + '_val_2.fq') - mv $paired_end_path_1 $trimmed_reads_pair1; - mv $paired_end_path_2 $trimmed_reads_pair2; - - #if $params.settingsType == "custom": - #if $params.retain_unpaired.settingsType == "retain_unpaired_output": - #set $unpaired_path_1 = os.path.join($temp_dir, os.path.basename(str($input_mate1_tmp)) + '_unpaired_1.fq') - #set $unpaired_path_2 = os.path.join($temp_dir, os.path.basename(str($input_mate2_tmp)) + '_unpaired_2.fq') - mv $unpaired_path_1 $unpaired_reads_1; - mv $unpaired_path_2 $unpaired_reads_2; - #end if - - #if $params.report: - #set $report_path = os.path.join($temp_dir, os.path.basename(str($input_mate1_tmp)) + '_trimming_report.txt') - mv $report_path $report_file; - #end if - - #end if + ## Trim Galore! run is finished. Move the report files to the proper place + #if $params.settingsType == "custom" and $params.report: + cat ./*_trimming_report.txt > $report_file; #end if - ## delete the temp_dir - rm -rf $temp_dir - +]]> </command> <inputs> - <!-- Input Parameters --> <conditional name="singlePaired"> - <param name="sPaired" type="select" label="Is this library mate-paired?"> - <option value="single">Single-end</option> - <option value="paired">Paired-end</option> + <param name="sPaired" type="select" label="Is this library paired- or single-end?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + <option value="paired_collection">Paired Collection</option> </param> <when value="single"> - <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> + <param name="input_singles" type="data" format="fastqsanger" label="Reads in FASTQ format" /> + <param name="adapter" type="text" value="AGATCGGAAGAGC" label="Adapter sequence to be trimmed"> + <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator> + </param> + <param name="three_prime_clip_R1" type="integer" value="" optional="True" label="Remove N bp from the 3' end"> + <help>Instructs Trim Galore to remove N bp from the 3' end of read 1 after adapter/quality trimming has been performed. + This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality. + (--three_prime_clip_R1)</help> + </param> </when> <when value="paired"> - <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> - <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> - <param name="trim1" type="boolean" truevalue="--trim1" falsevalue="" checked="False" label="Trims 1 bp off every read from its 3' end." help="" /> - <param name="adapter2" type="text" value="" label="Optional adapter sequence to be trimmed off read 2"> - <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator> - </param> + <param name="input_mate1" type="data" format="fastqsanger" label="Reads in FASTQ format" /> + <param name="input_mate2" type="data" format="fastqsanger" label="Reads in FASTQ format" /> + <expand macro="paired_adapter_trimming" /> + </when> + <when value="paired_collection"> + <param name="input_mate_pairs" format="fastqsanger" type="data_collection" collection_type="paired" + label="Select a paired collection" help="See help section for an explanation of dataset collections"/> + <expand macro="paired_adapter_trimming" /> </when> </conditional> - <conditional name="params"> <param name="settingsType" type="select" label="Trim galore! advanced settings" help="You can use the default settings or set custom values for any of Trim Galore's parameters."> <option value="default">Use Defaults</option> @@ -209,16 +173,14 @@ <when value="default" /> <!-- Full/advanced params. --> <when value="custom"> - <param name="quality" type="integer" value="20" label="Trim low-quality ends from reads in addition to adapter removal." help="For more information please see below." /> - <param name="adapter" type="text" value="AGATCGGAAGAGC" label="Adapter sequence to be trimmed"> - <validator type="regex" message="Adapter sequence must contain DNA characters only (A,C,T,G or N)">^[ACTGNactgn]*$</validator> - </param> + <param name="quality" type="integer" value="20" label="Trim low-quality ends from reads in addition to adapter removal" + help="For more information please see below." /> <param name="stringency" type="integer" value="1" label="Overlap with adapter sequence required to trim a sequence" /> <param name="error_rate" type="float" value="0.1" label="Maximum allowed error rate" /> <param name="min_length" type="integer" value="20" label="Discard reads that became shorter than length INT" /> - <param name="clip_R1" type="integer" value="0" label="nstructs Trim Galore to remove INT bp from the 5' end of read 1" /> - <param name="clip_R2" type="integer" value="0" label="nstructs Trim Galore to remove INT bp from the 5' end of read 2" /> + <param name="clip_R1" type="integer" value="0" label="Instructs Trim Galore to remove INT bp from the 5' end of read 1" /> + <param name="clip_R2" type="integer" value="0" label="Instructs Trim Galore to remove INT bp from the 5' end of read 2" /> <param name="report" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Generate a report file" help="" /> @@ -246,95 +208,144 @@ <when value="default" /> <!-- Full/advanced params. --> <when value="custom"> - <param name="rrbs" type="boolean" truevalue="--rrbs" falsevalue="" checked="True" label="Specifies that the input file was an MspI digested RRBS sample" /> - <param name="non_directional" type="boolean" truevalue="--non_directional" falsevalue="" checked="False" label="Selecting this option for non-directional RRBS libraries" /> + <param name="rrbs" type="boolean" truevalue="--rrbs" falsevalue="" checked="True" + label="Specifies that the input file was an MspI digested RRBS sample" /> + <param name="non_directional" type="boolean" truevalue="--non_directional" falsevalue="" checked="False" + label="Selecting this option for non-directional RRBS libraries" /> </when> <!-- full --> - </conditional> <!-- params --> + </conditional> <!-- params --> </inputs> <outputs> - <data format="fastq" name="trimmed_reads_single" label="${tool.name} on ${on_string}: trimmed reads"> + <data format="fastqsanger" name="trimmed_reads_single" from_work_dir="input_singles_trimmed.fq" label="${tool.name} on ${on_string}: trimmed reads"> <filter>singlePaired['sPaired'] == "single"</filter> - <actions> - <action type="format"> - <option type="from_param" name="singlePaired.input_singles" param_attribute="ext" /> - </action> - </actions> - </data> - - <data format="fastq" name="trimmed_reads_pair1" label="${tool.name} on ${on_string}: trimmed reads pair 1"> - <filter>singlePaired['sPaired'] == "paired"</filter> - <actions> - <action type="format"> - <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" /> - </action> - </actions> - </data> - - <data format="fastq" name="trimmed_reads_pair2" label="${tool.name} on ${on_string}: trimmed reads pair 2"> - <filter>singlePaired['sPaired'] == "paired"</filter> - <actions> - <action type="format"> - <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" /> - </action> - </actions> </data> - <data format="fastq" name="unpaired_reads_1" label="${tool.name} on ${on_string}: unpaired reads (1)"> - <filter> + <collection name="trimmed_reads_paired_collection" type="paired" label="${tool.name} on ${on_string}: trimmed reads"> + <filter>singlePaired['sPaired'] == "paired_collection"</filter> + <data name="forward" format="fastqsanger" from_work_dir="input_mate1_val_1.fq" /> + <data name="reverse" format="fastqsanger" from_work_dir="input_mate2_val_2.fq" /> + </collection> + + <collection name="trimmed_reads_unpaired_collection" type="paired" label="${tool.name} on ${on_string}: unpaired reads"> + <filter> (( - params['settingsType'] == "custom" and - params['retain_unpaired']['settingsType'] == "retain_unpaired_output" + params['settingsType'] == "custom" and + params['retain_unpaired']['settingsType'] == "retain_unpaired_output" and + singlePaired['sPaired'] == "paired_collection" )) - </filter> - <actions> - <action type="format"> - <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" /> - </action> - </actions> + </filter> + <data name="forward" format="fastqsanger" from_work_dir="input_mate1_unpaired_1.fq" /> + <data name="reverse" format="fastqsanger" from_work_dir="input_mate2_unpaired_2.fq" /> + </collection> + + + <data format="fastqsanger" name="trimmed_reads_pair1" from_work_dir="input_mate1_val_1.fq" + label="${tool.name} on ${on_string}: trimmed reads pair 1"> + <filter>singlePaired['sPaired'] == "paired"</filter> </data> - <data format="fastq" name="unpaired_reads_2" label="${tool.name} on ${on_string}: unpaired reads (2)"> - <filter> + <data format="fastqsanger" name="trimmed_reads_pair2" from_work_dir="input_mate2_val_2.fq" + label="${tool.name} on ${on_string}: trimmed reads pair 2"> + <filter>singlePaired['sPaired'] == "paired"</filter> + </data> + + <data format="fastqsanger" name="unpaired_reads_1" from_work_dir="input_mate1_val_1.fq" + label="${tool.name} on ${on_string}: unpaired reads (1)"> + <filter> (( - params['settingsType'] == "custom" and - params['retain_unpaired']['settingsType'] == "retain_unpaired_output" + params['settingsType'] == "custom" and + params['retain_unpaired']['settingsType'] == "retain_unpaired_output" and + singlePaired['sPaired'] == "paired" )) - </filter> - <actions> - <action type="format"> - <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" /> - </action> - </actions> + </filter> + </data> + + <data format="fastqsanger" name="unpaired_reads_2" from_work_dir="input_mate2_val_2.fq" + label="${tool.name} on ${on_string}: unpaired reads (2)"> + <filter> + (( + params['settingsType'] == "custom" and + params['retain_unpaired']['settingsType'] == "retain_unpaired_output" and + singlePaired['sPaired'] == "paired" + )) + </filter> </data> <data format="txt" name="report_file" label="${tool.name} on ${on_string}: report file"> - <filter> + <filter> (( params['settingsType'] == "custom" and params['report'] == True )) - </filter> + </filter> </data> </outputs> <tests> - </tests> + <test> + <!-- Trim entire sequences; keep empty reads --> + <param name="input_singles" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="sPaired" value="single" /> + <param name="settingsType" value="custom" /> + <param name="report" value="true" /> + <output name="trimmed_reads_single" file="sanger_full_range_results1.fastqsanger" ftype="fastqsanger"/> + <output name="report_file" file="sanger_full_range_report_results1.txt" ftype="txt" lines_diff="2" /> + </test> + + <test> + <!-- Trim entire sequences; keep empty reads --> + <param name="input_mate1" value="bwa-mem-fastq1.fq" ftype="fastqsanger" /> + <param name="input_mate2" value="bwa-mem-fastq2.fq" ftype="fastqsanger" /> + <param name="sPaired" value="paired" /> + <param name="settingsType" value="custom" /> + <param name="report" value="true" /> + <output name="trimmed_reads_pair1" file="paired_example_pair1_results2.fastqsanger" ftype="fastqsanger"/> + <output name="trimmed_reads_pair2" file="paired_example_pair2_results2.fastqsanger" ftype="fastqsanger"/> + <output name="report_file" file="paired_example_results2.txt" ftype="txt" lines_diff="8" /> + </test> + <test> + <!-- Trim entire sequences; keep empty reads --> + <param name="input_mate_pairs"> + <collection type="paired"> + <element name="forward" value="bwa-mem-fastq1.fq" /> + <element name="reverse" value="bwa-mem-fastq2.fq" /> + </collection> + </param> + <param name="sPaired" value="paired_collection" /> + <param name="settingsType" value="custom" /> + <param name="report" value="true" /> + <param name="retain_unpaired" value="retain_unpaired_output" /> + + <output name="report_file" file="paired_collection_example_results3.txt" ftype="txt" lines_diff="8" /> + + <output_collection name="trimmed_reads_paired_collection" type="paired"> + <element name="forward" file="paired_collection_example_pair1_results3.fastqsanger" ftype="fastqsanger"/> + <element name="reverse" file="paired_collection_example_pair2_results3.fastqsanger" ftype="fastqsanger"/> + </output_collection> + + <output_collection name="trimmed_reads_unpaired_collection" type="paired"> + <element name="forward" file="paired_collection_example_unpair1_results3.fastqsanger" ftype="fastqsanger"/> + <element name="reverse" file="paired_collection_example_unpair2_results3.fastqsanger" ftype="fastqsanger"/> + </output_collection> + </test> + </tests> <help> +<![CDATA[ **What it does** -TrimGalore!_ is a wrapper script that makes use of the publically available +TrimGalore_ is a wrapper script that makes use of the publically available adapter trimming tool Cutadapt. +.. _TrimGalore: http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ + -.. _TrimGalore!: http://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ +It is developed by Felix Krueger at the Babraham Institute. -It is developed by Krueger F at the Babraham Institute. - - - </help> +]]> + </help> </tool>