comparison dada2_plotComplexity.xml @ 0:ab2030f217a9 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author iuc
date Fri, 08 Nov 2019 18:51:48 -0500
parents
children 1728e5ee871a
comparison
equal deleted inserted replaced
-1:000000000000 0:ab2030f217a9
1 <tool id="dada2_plotComplexity" name="dada2: plotComplexity" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09">
2 <description>Plot sequence complexity profile</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <expand macro="version_command"/>
9 <command detect_errors="exit_code"><![CDATA[
10 ##name files by linking
11 #import re
12 mkdir forward &&
13 #if $batch_cond.paired_cond.paired_select != "single"
14 mkdir reverse &&
15 #end if
16
17 #if $batch_cond.batch_select == "batch":
18 #set elid = re.sub('[^\w\-\.]', '_', str($batch_cond.paired_cond.reads.element_identifier))
19 #if $batch_cond.paired_cond.paired_select != "paired"
20 ln -s '$batch_cond.paired_cond.reads' forward/'$elid' &&
21 #else
22 ln -s '$batch_cond.paired_cond.reads.forward' forward/'$elid' &&
23 ln -s '$batch_cond.paired_cond.reads.reverse' reverse/'$elid' &&
24 #end if
25 #if $batch_cond.paired_cond.paired_select == "separate"
26 ln -s '$batch_cond.paired_cond.sdaer' reverse/'$elid' &&
27 #end if
28 #else
29 #for $read in $batch_cond.paired_cond.reads:
30 #set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier))
31 #if $batch_cond.paired_cond.paired_select != "paired"
32 ln -s '$read' forward/'$elid' &&
33 #else
34 ln -s '$read.forward' forward/'$elid' &&
35 ln -s '$read.reverse' reverse/'$elid' &&
36 #end if
37 #end for
38 #if $batch_cond.paired_cond.paired_select == "separate"
39 #for $read in $batch_cond.paired_cond.sdaer:
40 #set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier))
41 ln -s '$read' reverse/'$elid' &&
42 #end for
43 #end if
44 #end if
45
46 Rscript --slave '$dada2_script'
47 ]]></command>
48 <configfiles>
49 <configfile name="dada2_script"><![CDATA[
50 #import re
51 library(ggplot2, quietly=T)
52 library(dada2, quietly=T)
53
54 #if $batch_cond.batch_select != "batch"
55 agg <- $batch_cond.aggregate
56 #else
57 agg <- FALSE
58 #end if
59
60 #if str($window) == ""
61 wndw <- NULL
62 #else
63 wndw <- $window
64 #end if
65
66 fwd_files <- list.files("forward", full.names=T)
67 qp <- plotComplexity(fwd_files, kmerSize=$kmerSize, window=wndw, by=$by, n=$n, bins=$bins, aggregate = agg)
68 ggsave('output.pdf', qp, width = 20,height = 15,units = c("cm"))
69
70 #if $batch_cond.paired_cond.paired_select != "single"
71 rev_files <- list.files("reverse", full.names=T)
72 qp <- plotComplexity(rev_files, kmerSize=$kmerSize, window=wndw, by=$by, n=$n, bins=$bins, aggregate = agg)
73 ggsave('output_rev.pdf', qp, width = 20,height = 15,units = c("cm"))
74 #end if
75 ]]></configfile>
76 </configfiles>
77 <inputs>
78 <conditional name="batch_cond">
79 <param name="batch_select" type="select" label="Processing mode" help="Joint processing processes all reads at once in a single job creating a single output (two in the case of paired data). Batch processes the samples in separate jobs and creates separate output for each">
80 <option value="joint">Joint</option>
81 <option value="batch">Batch</option>
82 </param>
83 <when value="joint">
84 <expand macro="fastq_input" multiple="True" collection_type="list:paired" argument_fwd="fl" argument_rev="fl"/>
85 <param argument="aggregate" type="boolean" label="Aggregate data" checked="True" truevalue="TRUE" falsevalue="FALSE" help="Create a single plot for all data sets (default) or a separate plot for each data set"/>
86 </when>
87 <when value="batch">
88 <expand macro="fastq_input" multiple="False" collection_type="paired" argument_fwd="fl" argument_rev="fl"/>
89 </when>
90 </conditional>
91 <param argument="kmerSize" type="integer" value="2" label="kmer size" help="kmer: also known as oligonucleotides words"/>
92 <param argument="window" type="integer" value="" optional="true" label="width (nucleotides) of the moving window" help="If not specified (default) the whole sequence is used"/>
93 <param argument="by" type="integer" value="5" label="step size (nucleotides)" help="between each moving window tested"/>
94 <param argument="n" type="integer" value="100000" label="sample number" help="number of records to sample from the fastq file"/>
95 <param argument="bins" type="integer" value="100" label="number of bins to use for the histogram" help=""/>
96 </inputs>
97 <outputs>
98 <data name="output" format="pdf" from_work_dir="output.pdf">
99 <filter>batch_cond['paired_cond']['paired_select'] == "single"</filter>
100 </data>
101 <data name="output_fwd" format="pdf" from_work_dir="output.pdf" label="${tool.name} on ${on_string}: forward reads">
102 <filter>batch_cond['paired_cond']['paired_select'] != "single"</filter>
103 </data>
104 <data name="output_rev" format="pdf" from_work_dir="output_rev.pdf" label="${tool.name} on ${on_string}: reverse reads">
105 <filter>batch_cond['paired_cond']['paired_select'] != "single"</filter>
106 </data>
107 </outputs>
108 <tests>
109 <!-- all tests are against the same file using a delta that should ensure that the pdf contains a plot -->
110 <!-- paired joint, no-aggregate -->
111 <test expect_num_outputs="2">
112 <param name="batch_cond|batch_select" value="joint"/>
113 <param name="batch_cond|paired_cond|paired_select" value="paired"/>
114 <param name="batch_cond|paired_cond|reads">
115 <collection type="list:paired">
116 <element name="F3D0_S188_L001">
117 <collection type="paired">
118 <element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
119 <element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
120 </collection>
121 </element>
122 </collection>
123 </param>
124 <param name="batch_cond|aggregate" value="FALSE"/>
125 <output name="output_fwd" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
126 <output name="output_rev" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
127 </test>
128 <!-- paired-separate joint, no-aggregate (sim_size because element ids differ) -->
129 <test expect_num_outputs="2">
130 <param name="batch_cond|batch_select" value="joint"/>
131 <param name="batch_cond|paired_cond|paired_select" value="separate"/>
132 <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
133 <param name="batch_cond|paired_cond|sdaer" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
134 <param name="batch_cond|aggregate" value="FALSE"/>
135 <output name="output_fwd" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
136 <output name="output_rev" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
137 </test>
138 <!-- single, non-batch, aggregate, small sample -->
139 <test expect_num_outputs="1">
140 <param name="batch_cond|batch_select" value="joint"/>
141 <param name="batch_cond|paired_cond|paired_select" value="single"/>
142 <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz,F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
143 <param name="n" value="10000"/>
144 <param name="batch_cond|aggregate" value="TRUE"/>
145 <output name="output" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
146 </test>
147
148 <!-- paired, batch -->
149 <test expect_num_outputs="2">
150 <param name="batch_cond|batch_select" value="batch"/>
151 <param name="batch_cond|paired_cond|paired_select" value="paired"/>
152 <param name="batch_cond|paired_cond|reads">
153 <collection type="paired">
154 <element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
155 <element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
156 </collection>
157 </param>
158 <output name="output_fwd" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
159 <output name="output_rev" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
160 </test>
161 <!-- paired-separate batch (sim_size because element ids differ)-->
162 <test expect_num_outputs="2">
163 <param name="batch_cond|batch_select" value="batch"/>
164 <param name="batch_cond|paired_cond|paired_select" value="separate"/>
165 <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
166 <param name="batch_cond|paired_cond|sdaer" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
167 <output name="output_fwd" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
168 <output name="output_rev" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
169 </test>
170 <!-- single, batch -->
171 <test expect_num_outputs="1">
172 <param name="batch_cond|batch_select" value="batch"/>
173 <param name="batch_cond|paired_cond|paired_select" value="single"/>
174 <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
175 <param name="n" value="10000"/>
176 <output name="output" value="complexity.pdf" ftype="pdf" compare="sim_size" delta="200"/>
177 </test>
178 </tests>
179 <help><![CDATA[
180 Summary
181 .......
182
183 This function plots a histogram of the distribution of sequence complexities in the form of effective numbers of kmers as determined by seqComplexity. By default, kmers of size 2 are used, in which case a perfectly random sequences will approach an effective kmer number of 16 = 4 (nucleotides)^ 2 (kmer size).
184
185 Details
186 .......
187
188 This function calculates the kmer complexity of input sequences. Complexity is quantified as the Shannon richness of kmers, which can be thought of as the effective number of kmers if they were all at equal frequencies. If a window size is provided, the minimum Shannon richness observed over sliding window along the sequence is returned.
189
190
191 @HELP_OVERVIEW@
192 ]]></help>
193 <expand macro="citations"/>
194 </tool>