comparison purge_dups.xml @ 0:8ec117da1796 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/purge_dups commit ed3bf33e007841e359d164b2aa9e2ecf7fa5fa96"
author iuc
date Fri, 05 Feb 2021 17:52:51 +0000
parents
children 29151e779524
comparison
equal deleted inserted replaced
-1:000000000000 0:8ec117da1796
1 <tool id="purge_dups" name="Purge haplotigs" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description>and overlaps in an assembly based on read depth</description>
3 <macros>
4 <token name="@TOOL_VERSION@">1.2.5</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
7 <requirements>
8 <requirement type="package" version="@TOOL_VERSION@">purge_dups</requirement>
9 </requirements>
10 <command detect_errors="exit_code"><![CDATA[
11 #if $function_select.functions == "purge_dups":
12 purge_dups
13 #if $function_select.coverage:
14 -c '$function_select.coverage'
15 #end if
16 #if $function_select.cutoffs:
17 -T '$function_select.cutoffs'
18 #end if
19 #if $function_select.min_bad:
20 -f $function_select.min_bad
21 #end if
22 #if $function_select.min_align:
23 -a $function_select.min_align
24 #end if
25 #if $function_select.min_match:
26 -b $function_select.min_match
27 #end if
28 #if $function_select.min_chain:
29 -m $function_select.min_chain
30 #end if
31 #if $function_select.max_gap:
32 -M $function_select.max_gap
33 #end if
34 #if $function_select.double_chain.chaining_rounds == "two":
35 -2
36 #if $function_select.double_chain.max_gap_2:
37 -G $function_select.double_chain.max_gap_2
38 #end if
39 #end if
40 #if $function_select.min_chain_score:
41 -l $function_select.min_chain_score
42 #end if
43 #if $function_select.max_extend:
44 -E $function_select.max_extend
45 #end if
46 '$function_select.input' > dups.bed 2> purge_dups.log
47 #else if $function_select.functions == "split_fa":
48 split_fa
49 #if $function_select.split:
50 -n $function_select.split
51 #end if
52 '$function_select.input' > split.fasta
53 #else if $function_select.functions == "pbcstat":
54 pbcstat
55 #if $function_select.max_cov:
56 -M $function_select.max_cov
57 #end if
58 #if $function_select.min_map_ratio:
59 -f $function_select.min_map_ratio
60 #end if
61 #if $function_select.min_map_qual:
62 -q $function_select.min_map_qual
63 #end if
64 #if $function_select.flank:
65 -l $function_select.flank
66 #end if
67 $function_select.primary_alignments
68 '$function_select.input'
69 #else if $function_select.functions == "ngscstat":
70 ngscstat
71 #if $function_select.min_align_qual:
72 -q $function_select.min_align_qual
73 #end if
74 ## #if $function_select.max_depth:
75 ## -M $function_select.max_depth
76 ## #end if
77 #if $function_select.max_insert:
78 -L $function_select.max_insert
79 #end if
80 '$function_select.input'
81 #else if $function_select.functions == "calcuts":
82 calcuts
83 #if $function_select.min_depth:
84 -f $function_select.min_depth
85 #end if
86 #if $function_select.low_depth:
87 -l $function_select.low_depth
88 #end if
89 #if $function_select.transition:
90 -m $function_select.transition
91 #end if
92 #if $function_select.upper_depth:
93 -u $function_select.upper_depth
94 #end if
95 $function_select.ploidy
96 '$function_select.input' > cutoffs.tsv 2>calcuts.log
97 #else if $function_select.functions == "get_seqs":
98 get_seqs
99 $function_select.coverage
100 $function_select.haplotigs
101 $function_select.end_trim
102 $function_select.split
103 #if $function_select.length:
104 -l $function_select.length
105 #end if
106 #if $function_select.min_ratio:
107 -m $function_select.min_ratio
108 #end if
109 #if $function_select.min_gap:
110 -g $function_select.min_gap
111 #end if
112 '$function_select.bed_input' '$function_select.fasta_input'
113 #end if
114 ]]></command>
115 <inputs>
116 <conditional name="function_select">
117 <param type="select" name="functions" label="Select the purge_dups function">
118 <option value="purge_dups">purge haplotigs and overlaps for an assembly</option>
119 <option value="split_fa">split FASTA file by 'N's</option>
120 <option value="pbcstat">create read depth histogram and base-level read depth for pacbio data</option>
121 <option value="ngscstat">create read depth histogram and base-level read detph for illumina data</option>
122 <option value="calcuts">calculate coverage cutoffs</option>
123 <option value="get_seqs">obtain seqeuences after purging</option>
124 </param>
125 <when value="purge_dups">
126 <param name="input" type="data" format="paf" label="PAF input file"/>
127 <param name="coverage" type="data" format="tabular" optional="true" argument="-c" label="Base-level coverage file" />
128 <param name="cutoffs" type="data" format="tabular" label ="Cutoffs file" optional="true" argument="-T"/>
129 <param name="min_bad" type="float" min="0" max="1" argument="-f" optional="true" label="Minimum fraction of haploid/diploid/bad/repetitive bases in a sequence" help="Default = 0.8"/>
130 <param name="min_align" type="integer" label="Minimum alignment score" argument="-a" optional="true"/>
131 <param name="min_match" type="integer" label="Minimum max match score" argument="-b" optional="true"/>
132 <param name="min_chain" label="Minimum matching bases for chaining" type="integer" argument="-m" optional="true"/>
133 <param name="max_gap" label="Maximum gap size for chaining" type="integer" argument="-M" optional="true"/>
134 <conditional name="double_chain">
135 <param type="select" name="chaining_rounds" label="Rounds of chaining">
136 <option value="one">1 round</option>
137 <option value="two">2 rounds</option>
138 </param>
139 <when value="two">
140 <param name="max_gap_2" argument="-G" optional="true" label="Maximum gap size for second round of chaining" type="integer"/>
141 </when>
142 <when value="one"/>
143 </conditional>
144 <param name="min_chain_score" argument="-l" optional="true" label="Minimum chaining score for a match" type="integer" />
145 <param name="max_extend" argument="-E" optional="true" label="Maximum extension for contig ends" type="integer" />
146 </when>
147 <when value="split_fa">
148 <param name="input" type="data" format="fasta" label="Base-level coverage file"/>
149 <param name="split" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Base-level coverage file" />
150 </when>
151 <when value="pbcstat">
152 <param name="input" type="data" format="paf" label="PAF input file"/>
153 <param name="max_cov" type="integer" label="Maximum coverage" argument="-M" optional="true"/>
154 <param name="min_map_ratio" argument="-f" type="float" min="0" max="1" value="0" label="Minimum mapping length ratio"/>
155 <param name="min_map_qual" type="integer" argument="-q" optional="true" label="Minimum mapping quality"/>
156 <param name="flank" type="integer" argument="-l" optional="true" label="Flanking space" />
157 <param name="primary_alignments" argument="-p" type="boolean" truevalue="-p" falsevalue="" checked="true" label="Use only primary alignments" />
158 </when>
159 <when value="ngscstat">
160 <param name="input" type="data" format="bam" label="BAM input file"/>
161 <param name="min_align_qual" type="integer" argument="-q" optional="true" label="Minimum alignment quality" />
162 <!-- Param exists in help text, but isn't actually part of the code. Maybe in the next release? -->
163 <!-- <param name="max_depth" type="integer" label="Maximum read depth" argument="-M" optional="true"/> -->
164 <param name="max_insert" type="integer" argument="-L" optional="true" label="Maximum insert size"/>
165 </when>
166 <when value="calcuts">
167 <param name="input" type="data" format="tabular" label="STAT input file"/>
168 <param name="min_depth" type="float" label="Minimum depth count fraction to maximum depth coun" min="0" max="1" argument="-f" optional="true" help="Default = 0.1"/>
169 <param name="low_depth" label="Lower bound for read depth" type="integer" argument="-l" optional="true"/>
170 <param name="transition" label="Transition between haploid and diploid" type="integer" argument="-m" optional="true"/>
171 <param name="upper_depth" label="Upper bound for read depth" type="integer" argument="-u" optional="true"/>
172 <param name="ploidy" argument="-d" type="select" label="Ploidy">
173 <option value="-d 0" selected="true">Diploid [0]</option>
174 <option value="-d 1">Haploid [1]</option>
175 </param>
176 </when>
177 <when value="get_seqs">
178 <param name="fasta_input" type="data" format="fasta" label="Fasta input file"/>
179 <param name="bed_input" type="data" format="bed" label="Bed input file"/>
180 <param name="coverage" type="boolean" argument="-c" truevalue="-c" falsevalue="" checked="false" label="Keep high coverage contigs in the primary contig set"/>
181 <param name="haplotigs" type="boolean" argument="-a" truevalue="-a" falsevalue="" checked="false" label="Do not add prefix to haplotigs"/>
182 <param name="length" type="integer" argument="-l" optional="true" label="Minimum primary contig length" help="Default: 1000"/>
183 <param name="min_ratio" type="float" min="0" max="1" argument="-m" optional="true" label="Minimum ratio of remaining primary contig length to the original contig length"/>
184 <param name="end_trim" type="boolean" argument="-e" truevalue="-e" falsevalue="" checked="true" label="Trim end sequences" help="Only remove sequences at end of halplotigs If you also want to remove the duplications in the middle, set to false, however that may delete false positive duplications."/>
185 <param name="split" type="boolean" argument="-s" truevalue="-s" falsevalue="" checked="false" label="Split contigs"/>
186 <param name="min_gap" type="integer" argument="-g" optional="true" help="default=10k" label="Minimum gap size between duplications" />
187 </when>
188 </conditional>
189 </inputs>
190 <outputs>
191 <!-- Get Seqs -->
192 <data name="get_seqs_hap" format="fasta" from_work_dir="hap.fa" label="${tool.name} on ${on_string}: get seqs haplotype fasta" >
193 <filter>function_select['functions'] == 'get_seqs'</filter>
194 </data>
195 <data name="get_seqs_purged" format="fasta" from_work_dir="purged.fa" label="${tool.name} on ${on_string}: get seqs purged fasta">
196 <filter>function_select['functions'] == 'get_seqs'</filter>
197 </data>
198 <!-- Split FA -->
199 <data name="split_fasta" format="fasta" from_work_dir="split.fasta" label="${tool.name} on ${on_string}: split fasta">
200 <filter>function_select['functions'] == 'split_fa'</filter>
201 </data>
202 <!-- Ngscstat -->
203 <data name="ngscstat_cov" format="tabular" from_work_dir="TX.base.cov" label="${tool.name} on ${on_string}: ngscstat base coverage file">
204 <filter>function_select['functions'] == 'ngscstat'</filter>
205 </data>
206 <data name="ngscstat_stat" format="tabular" from_work_dir="TX.stat" label="${tool.name} on ${on_string}: ngscstat stat file">
207 <filter>function_select['functions'] == 'ngscstat'</filter>
208 </data>
209 <!-- Pbcstat -->
210 <data name="pbcstat_cov" format="tabular" from_work_dir="PB.base.cov" label="${tool.name} on ${on_string}: pbcstat base coverage file">
211 <filter>function_select['functions'] == 'pbcstat'</filter>
212 </data>
213 <data name="pbcstat_wig" format="wig" from_work_dir="PB.cov.wig" label="${tool.name} on ${on_string}: pbcstat base wig file">
214 <filter>function_select['functions'] == 'pbcstat'</filter>
215 </data>
216 <data name="pbcstat_stat" format="tabular" from_work_dir="PB.stat" label="${tool.name} on ${on_string}: stat file">
217 <filter>function_select['functions'] == 'pbcstat'</filter>
218 </data>
219 <!-- Calcuts -->
220 <data name="calcuts_log" format="txt" from_work_dir="calcuts.log" label="${tool.name} on ${on_string}: calcuts log file">
221 <filter>function_select['functions'] == 'calcuts'</filter>
222 </data>
223 <data name="calcuts_tab" format="tabular" from_work_dir="cutoffs.tsv" label="${tool.name} on ${on_string}: calcuts cutoff file">
224 <filter>function_select['functions'] == 'calcuts'</filter>
225 </data>
226 <!-- Purge dups -->
227 <data name="purge_dups_log" format="txt" from_work_dir="purge_dups.log" label="${tool.name} on ${on_string}: purge_dups log file">
228 <filter>function_select['functions'] == 'purge_dups'</filter>
229 </data>
230 <data name="purge_dups_bed" format="bed" from_work_dir="dups.bed" label="${tool.name} on ${on_string}: purge_dups bed file">
231 <filter>function_select['functions'] == 'purge_dups'</filter>
232 </data>
233 </outputs>
234 <tests>
235 <!-- Purge dups -->
236 <test expect_num_outputs="2">
237 <conditional name="function_select">
238 <param name="functions" value="purge_dups"/>
239 <param name="input" value="test.paf"/>
240 <param name="coverage" value="test.cov" ftype="tabular"/>
241 <param name="cutoffs" value="cutoffs.tsv" ftype="tabular"/>
242 <param name="min_bad" value="0.01"/>
243 <param name="min_align" value="10"/>
244 <param name="min_match" value="100"/>
245 <param name="min_chain" value="1"/>
246 <param name="max_gap" value="1000"/>
247 <conditional name="double_chain">
248 <param name="chaining_rounds" value="two"/>
249 <param name="max_gap_2" value="1001"/>
250 </conditional>
251 <param name="min_chain_score" value="1"/>
252 <param name="max_extend" value="100"/>
253 </conditional>
254 <output name="purge_dups_bed" value="purge_dups_out.bed"/>
255 </test>
256 <!-- Split fa -->
257 <test expect_num_outputs="1">
258 <conditional name="function_select">
259 <param name="functions" value="split_fa"/>
260 <param name="input" value="test.fasta"/>
261 <param name="split" value="-n"/>
262 </conditional>
263 <output name="split_fasta" value="split_out.fasta"/>
264 </test>
265 <!-- pbcstat -->
266 <test expect_num_outputs="3">
267 <conditional name="function_select">
268 <param name="functions" value="pbcstat"/>
269 <param name="input" value="test.paf"/>
270 <param name="max_cov" value="1000"/>
271 <param name="min_map_ratio" value="0.01"/>
272 <param name="min_map_qual" value="1"/>
273 <param name="flank" value="1"/>
274 <param name="primary_alignments" value="-p"/>
275 </conditional>
276 <output name="pbcstat_cov" value="out.cov"/>
277 <output name="pbcstat_wig" value="out.wig"/>
278 </test>
279 <!-- ngscstat -->
280 <test expect_num_outputs="2">
281 <conditional name="function_select">
282 <param name="functions" value="ngscstat"/>
283 <param name="input" value="test.bam"/>
284 <param name="min_align_qual" value="10"/>
285 <param name="max_insert" value="100"/>
286 </conditional>
287 <output name="ngscstat_cov" value="ngsc_out.cov"/>
288 </test>
289 <!-- Calcuts -->
290 <test expect_num_outputs="2">
291 <conditional name="function_select">
292 <param name="functions" value="calcuts"/>
293 <param name="input" value="test.stat"/>
294 <param name="min_depth" value="0.01"/>
295 <param name="low_depth" value="1"/>
296 <param name="transition" value="1"/>
297 <param name="upper_depth" value="100"/>
298 <param name="ploidy" value="-d 0"/>
299 </conditional>
300 <output name="calcuts_tab" value="calcuts_out.tsv"/>
301 </test>
302 <!-- Get seqs -->
303 <test expect_num_outputs="2">
304 <conditional name="function_select">
305 <param name="functions" value="get_seqs"/>
306 <param name="fasta_input" value="split_out.fasta"/>
307 <param name="bed_input" value="dups.bed"/>
308 <param name="coverage" value="-c"/>
309 <param name="length" value="10"/>
310 <param name="haplotigs" value="-a"/>
311 <param name="min_ratio" value=".01"/>
312 <param name="end_trim" value="-e"/>
313 <param name="split" value="-s"/>
314 <param name="min_gap" value="100000"/>
315 </conditional>
316 <output name="get_seqs_purged" value="purged_out.fa"/>
317 </test>
318 </tests>
319 <help><![CDATA[
320 .. class:: infomark
321
322 **What it does**
323
324 The purge_dups tools are designed to remove haplotigs and contig overlaps in a de novo assembly based on read depth.
325
326 ]]></help>
327 <citations>
328 <citation type="doi">10.1093/bioinformatics/btaa025</citation>
329 </citations>
330 </tool>