Mercurial > repos > iuc > purge_dups
comparison purge_dups.xml @ 0:8ec117da1796 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/purge_dups commit ed3bf33e007841e359d164b2aa9e2ecf7fa5fa96"
author | iuc |
---|---|
date | Fri, 05 Feb 2021 17:52:51 +0000 |
parents | |
children | 29151e779524 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8ec117da1796 |
---|---|
1 <tool id="purge_dups" name="Purge haplotigs" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> | |
2 <description>and overlaps in an assembly based on read depth</description> | |
3 <macros> | |
4 <token name="@TOOL_VERSION@">1.2.5</token> | |
5 <token name="@VERSION_SUFFIX@">0</token> | |
6 </macros> | |
7 <requirements> | |
8 <requirement type="package" version="@TOOL_VERSION@">purge_dups</requirement> | |
9 </requirements> | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 #if $function_select.functions == "purge_dups": | |
12 purge_dups | |
13 #if $function_select.coverage: | |
14 -c '$function_select.coverage' | |
15 #end if | |
16 #if $function_select.cutoffs: | |
17 -T '$function_select.cutoffs' | |
18 #end if | |
19 #if $function_select.min_bad: | |
20 -f $function_select.min_bad | |
21 #end if | |
22 #if $function_select.min_align: | |
23 -a $function_select.min_align | |
24 #end if | |
25 #if $function_select.min_match: | |
26 -b $function_select.min_match | |
27 #end if | |
28 #if $function_select.min_chain: | |
29 -m $function_select.min_chain | |
30 #end if | |
31 #if $function_select.max_gap: | |
32 -M $function_select.max_gap | |
33 #end if | |
34 #if $function_select.double_chain.chaining_rounds == "two": | |
35 -2 | |
36 #if $function_select.double_chain.max_gap_2: | |
37 -G $function_select.double_chain.max_gap_2 | |
38 #end if | |
39 #end if | |
40 #if $function_select.min_chain_score: | |
41 -l $function_select.min_chain_score | |
42 #end if | |
43 #if $function_select.max_extend: | |
44 -E $function_select.max_extend | |
45 #end if | |
46 '$function_select.input' > dups.bed 2> purge_dups.log | |
47 #else if $function_select.functions == "split_fa": | |
48 split_fa | |
49 #if $function_select.split: | |
50 -n $function_select.split | |
51 #end if | |
52 '$function_select.input' > split.fasta | |
53 #else if $function_select.functions == "pbcstat": | |
54 pbcstat | |
55 #if $function_select.max_cov: | |
56 -M $function_select.max_cov | |
57 #end if | |
58 #if $function_select.min_map_ratio: | |
59 -f $function_select.min_map_ratio | |
60 #end if | |
61 #if $function_select.min_map_qual: | |
62 -q $function_select.min_map_qual | |
63 #end if | |
64 #if $function_select.flank: | |
65 -l $function_select.flank | |
66 #end if | |
67 $function_select.primary_alignments | |
68 '$function_select.input' | |
69 #else if $function_select.functions == "ngscstat": | |
70 ngscstat | |
71 #if $function_select.min_align_qual: | |
72 -q $function_select.min_align_qual | |
73 #end if | |
74 ## #if $function_select.max_depth: | |
75 ## -M $function_select.max_depth | |
76 ## #end if | |
77 #if $function_select.max_insert: | |
78 -L $function_select.max_insert | |
79 #end if | |
80 '$function_select.input' | |
81 #else if $function_select.functions == "calcuts": | |
82 calcuts | |
83 #if $function_select.min_depth: | |
84 -f $function_select.min_depth | |
85 #end if | |
86 #if $function_select.low_depth: | |
87 -l $function_select.low_depth | |
88 #end if | |
89 #if $function_select.transition: | |
90 -m $function_select.transition | |
91 #end if | |
92 #if $function_select.upper_depth: | |
93 -u $function_select.upper_depth | |
94 #end if | |
95 $function_select.ploidy | |
96 '$function_select.input' > cutoffs.tsv 2>calcuts.log | |
97 #else if $function_select.functions == "get_seqs": | |
98 get_seqs | |
99 $function_select.coverage | |
100 $function_select.haplotigs | |
101 $function_select.end_trim | |
102 $function_select.split | |
103 #if $function_select.length: | |
104 -l $function_select.length | |
105 #end if | |
106 #if $function_select.min_ratio: | |
107 -m $function_select.min_ratio | |
108 #end if | |
109 #if $function_select.min_gap: | |
110 -g $function_select.min_gap | |
111 #end if | |
112 '$function_select.bed_input' '$function_select.fasta_input' | |
113 #end if | |
114 ]]></command> | |
115 <inputs> | |
116 <conditional name="function_select"> | |
117 <param type="select" name="functions" label="Select the purge_dups function"> | |
118 <option value="purge_dups">purge haplotigs and overlaps for an assembly</option> | |
119 <option value="split_fa">split FASTA file by 'N's</option> | |
120 <option value="pbcstat">create read depth histogram and base-level read depth for pacbio data</option> | |
121 <option value="ngscstat">create read depth histogram and base-level read detph for illumina data</option> | |
122 <option value="calcuts">calculate coverage cutoffs</option> | |
123 <option value="get_seqs">obtain seqeuences after purging</option> | |
124 </param> | |
125 <when value="purge_dups"> | |
126 <param name="input" type="data" format="paf" label="PAF input file"/> | |
127 <param name="coverage" type="data" format="tabular" optional="true" argument="-c" label="Base-level coverage file" /> | |
128 <param name="cutoffs" type="data" format="tabular" label ="Cutoffs file" optional="true" argument="-T"/> | |
129 <param name="min_bad" type="float" min="0" max="1" argument="-f" optional="true" label="Minimum fraction of haploid/diploid/bad/repetitive bases in a sequence" help="Default = 0.8"/> | |
130 <param name="min_align" type="integer" label="Minimum alignment score" argument="-a" optional="true"/> | |
131 <param name="min_match" type="integer" label="Minimum max match score" argument="-b" optional="true"/> | |
132 <param name="min_chain" label="Minimum matching bases for chaining" type="integer" argument="-m" optional="true"/> | |
133 <param name="max_gap" label="Maximum gap size for chaining" type="integer" argument="-M" optional="true"/> | |
134 <conditional name="double_chain"> | |
135 <param type="select" name="chaining_rounds" label="Rounds of chaining"> | |
136 <option value="one">1 round</option> | |
137 <option value="two">2 rounds</option> | |
138 </param> | |
139 <when value="two"> | |
140 <param name="max_gap_2" argument="-G" optional="true" label="Maximum gap size for second round of chaining" type="integer"/> | |
141 </when> | |
142 <when value="one"/> | |
143 </conditional> | |
144 <param name="min_chain_score" argument="-l" optional="true" label="Minimum chaining score for a match" type="integer" /> | |
145 <param name="max_extend" argument="-E" optional="true" label="Maximum extension for contig ends" type="integer" /> | |
146 </when> | |
147 <when value="split_fa"> | |
148 <param name="input" type="data" format="fasta" label="Base-level coverage file"/> | |
149 <param name="split" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Base-level coverage file" /> | |
150 </when> | |
151 <when value="pbcstat"> | |
152 <param name="input" type="data" format="paf" label="PAF input file"/> | |
153 <param name="max_cov" type="integer" label="Maximum coverage" argument="-M" optional="true"/> | |
154 <param name="min_map_ratio" argument="-f" type="float" min="0" max="1" value="0" label="Minimum mapping length ratio"/> | |
155 <param name="min_map_qual" type="integer" argument="-q" optional="true" label="Minimum mapping quality"/> | |
156 <param name="flank" type="integer" argument="-l" optional="true" label="Flanking space" /> | |
157 <param name="primary_alignments" argument="-p" type="boolean" truevalue="-p" falsevalue="" checked="true" label="Use only primary alignments" /> | |
158 </when> | |
159 <when value="ngscstat"> | |
160 <param name="input" type="data" format="bam" label="BAM input file"/> | |
161 <param name="min_align_qual" type="integer" argument="-q" optional="true" label="Minimum alignment quality" /> | |
162 <!-- Param exists in help text, but isn't actually part of the code. Maybe in the next release? --> | |
163 <!-- <param name="max_depth" type="integer" label="Maximum read depth" argument="-M" optional="true"/> --> | |
164 <param name="max_insert" type="integer" argument="-L" optional="true" label="Maximum insert size"/> | |
165 </when> | |
166 <when value="calcuts"> | |
167 <param name="input" type="data" format="tabular" label="STAT input file"/> | |
168 <param name="min_depth" type="float" label="Minimum depth count fraction to maximum depth coun" min="0" max="1" argument="-f" optional="true" help="Default = 0.1"/> | |
169 <param name="low_depth" label="Lower bound for read depth" type="integer" argument="-l" optional="true"/> | |
170 <param name="transition" label="Transition between haploid and diploid" type="integer" argument="-m" optional="true"/> | |
171 <param name="upper_depth" label="Upper bound for read depth" type="integer" argument="-u" optional="true"/> | |
172 <param name="ploidy" argument="-d" type="select" label="Ploidy"> | |
173 <option value="-d 0" selected="true">Diploid [0]</option> | |
174 <option value="-d 1">Haploid [1]</option> | |
175 </param> | |
176 </when> | |
177 <when value="get_seqs"> | |
178 <param name="fasta_input" type="data" format="fasta" label="Fasta input file"/> | |
179 <param name="bed_input" type="data" format="bed" label="Bed input file"/> | |
180 <param name="coverage" type="boolean" argument="-c" truevalue="-c" falsevalue="" checked="false" label="Keep high coverage contigs in the primary contig set"/> | |
181 <param name="haplotigs" type="boolean" argument="-a" truevalue="-a" falsevalue="" checked="false" label="Do not add prefix to haplotigs"/> | |
182 <param name="length" type="integer" argument="-l" optional="true" label="Minimum primary contig length" help="Default: 1000"/> | |
183 <param name="min_ratio" type="float" min="0" max="1" argument="-m" optional="true" label="Minimum ratio of remaining primary contig length to the original contig length"/> | |
184 <param name="end_trim" type="boolean" argument="-e" truevalue="-e" falsevalue="" checked="true" label="Trim end sequences" help="Only remove sequences at end of halplotigs If you also want to remove the duplications in the middle, set to false, however that may delete false positive duplications."/> | |
185 <param name="split" type="boolean" argument="-s" truevalue="-s" falsevalue="" checked="false" label="Split contigs"/> | |
186 <param name="min_gap" type="integer" argument="-g" optional="true" help="default=10k" label="Minimum gap size between duplications" /> | |
187 </when> | |
188 </conditional> | |
189 </inputs> | |
190 <outputs> | |
191 <!-- Get Seqs --> | |
192 <data name="get_seqs_hap" format="fasta" from_work_dir="hap.fa" label="${tool.name} on ${on_string}: get seqs haplotype fasta" > | |
193 <filter>function_select['functions'] == 'get_seqs'</filter> | |
194 </data> | |
195 <data name="get_seqs_purged" format="fasta" from_work_dir="purged.fa" label="${tool.name} on ${on_string}: get seqs purged fasta"> | |
196 <filter>function_select['functions'] == 'get_seqs'</filter> | |
197 </data> | |
198 <!-- Split FA --> | |
199 <data name="split_fasta" format="fasta" from_work_dir="split.fasta" label="${tool.name} on ${on_string}: split fasta"> | |
200 <filter>function_select['functions'] == 'split_fa'</filter> | |
201 </data> | |
202 <!-- Ngscstat --> | |
203 <data name="ngscstat_cov" format="tabular" from_work_dir="TX.base.cov" label="${tool.name} on ${on_string}: ngscstat base coverage file"> | |
204 <filter>function_select['functions'] == 'ngscstat'</filter> | |
205 </data> | |
206 <data name="ngscstat_stat" format="tabular" from_work_dir="TX.stat" label="${tool.name} on ${on_string}: ngscstat stat file"> | |
207 <filter>function_select['functions'] == 'ngscstat'</filter> | |
208 </data> | |
209 <!-- Pbcstat --> | |
210 <data name="pbcstat_cov" format="tabular" from_work_dir="PB.base.cov" label="${tool.name} on ${on_string}: pbcstat base coverage file"> | |
211 <filter>function_select['functions'] == 'pbcstat'</filter> | |
212 </data> | |
213 <data name="pbcstat_wig" format="wig" from_work_dir="PB.cov.wig" label="${tool.name} on ${on_string}: pbcstat base wig file"> | |
214 <filter>function_select['functions'] == 'pbcstat'</filter> | |
215 </data> | |
216 <data name="pbcstat_stat" format="tabular" from_work_dir="PB.stat" label="${tool.name} on ${on_string}: stat file"> | |
217 <filter>function_select['functions'] == 'pbcstat'</filter> | |
218 </data> | |
219 <!-- Calcuts --> | |
220 <data name="calcuts_log" format="txt" from_work_dir="calcuts.log" label="${tool.name} on ${on_string}: calcuts log file"> | |
221 <filter>function_select['functions'] == 'calcuts'</filter> | |
222 </data> | |
223 <data name="calcuts_tab" format="tabular" from_work_dir="cutoffs.tsv" label="${tool.name} on ${on_string}: calcuts cutoff file"> | |
224 <filter>function_select['functions'] == 'calcuts'</filter> | |
225 </data> | |
226 <!-- Purge dups --> | |
227 <data name="purge_dups_log" format="txt" from_work_dir="purge_dups.log" label="${tool.name} on ${on_string}: purge_dups log file"> | |
228 <filter>function_select['functions'] == 'purge_dups'</filter> | |
229 </data> | |
230 <data name="purge_dups_bed" format="bed" from_work_dir="dups.bed" label="${tool.name} on ${on_string}: purge_dups bed file"> | |
231 <filter>function_select['functions'] == 'purge_dups'</filter> | |
232 </data> | |
233 </outputs> | |
234 <tests> | |
235 <!-- Purge dups --> | |
236 <test expect_num_outputs="2"> | |
237 <conditional name="function_select"> | |
238 <param name="functions" value="purge_dups"/> | |
239 <param name="input" value="test.paf"/> | |
240 <param name="coverage" value="test.cov" ftype="tabular"/> | |
241 <param name="cutoffs" value="cutoffs.tsv" ftype="tabular"/> | |
242 <param name="min_bad" value="0.01"/> | |
243 <param name="min_align" value="10"/> | |
244 <param name="min_match" value="100"/> | |
245 <param name="min_chain" value="1"/> | |
246 <param name="max_gap" value="1000"/> | |
247 <conditional name="double_chain"> | |
248 <param name="chaining_rounds" value="two"/> | |
249 <param name="max_gap_2" value="1001"/> | |
250 </conditional> | |
251 <param name="min_chain_score" value="1"/> | |
252 <param name="max_extend" value="100"/> | |
253 </conditional> | |
254 <output name="purge_dups_bed" value="purge_dups_out.bed"/> | |
255 </test> | |
256 <!-- Split fa --> | |
257 <test expect_num_outputs="1"> | |
258 <conditional name="function_select"> | |
259 <param name="functions" value="split_fa"/> | |
260 <param name="input" value="test.fasta"/> | |
261 <param name="split" value="-n"/> | |
262 </conditional> | |
263 <output name="split_fasta" value="split_out.fasta"/> | |
264 </test> | |
265 <!-- pbcstat --> | |
266 <test expect_num_outputs="3"> | |
267 <conditional name="function_select"> | |
268 <param name="functions" value="pbcstat"/> | |
269 <param name="input" value="test.paf"/> | |
270 <param name="max_cov" value="1000"/> | |
271 <param name="min_map_ratio" value="0.01"/> | |
272 <param name="min_map_qual" value="1"/> | |
273 <param name="flank" value="1"/> | |
274 <param name="primary_alignments" value="-p"/> | |
275 </conditional> | |
276 <output name="pbcstat_cov" value="out.cov"/> | |
277 <output name="pbcstat_wig" value="out.wig"/> | |
278 </test> | |
279 <!-- ngscstat --> | |
280 <test expect_num_outputs="2"> | |
281 <conditional name="function_select"> | |
282 <param name="functions" value="ngscstat"/> | |
283 <param name="input" value="test.bam"/> | |
284 <param name="min_align_qual" value="10"/> | |
285 <param name="max_insert" value="100"/> | |
286 </conditional> | |
287 <output name="ngscstat_cov" value="ngsc_out.cov"/> | |
288 </test> | |
289 <!-- Calcuts --> | |
290 <test expect_num_outputs="2"> | |
291 <conditional name="function_select"> | |
292 <param name="functions" value="calcuts"/> | |
293 <param name="input" value="test.stat"/> | |
294 <param name="min_depth" value="0.01"/> | |
295 <param name="low_depth" value="1"/> | |
296 <param name="transition" value="1"/> | |
297 <param name="upper_depth" value="100"/> | |
298 <param name="ploidy" value="-d 0"/> | |
299 </conditional> | |
300 <output name="calcuts_tab" value="calcuts_out.tsv"/> | |
301 </test> | |
302 <!-- Get seqs --> | |
303 <test expect_num_outputs="2"> | |
304 <conditional name="function_select"> | |
305 <param name="functions" value="get_seqs"/> | |
306 <param name="fasta_input" value="split_out.fasta"/> | |
307 <param name="bed_input" value="dups.bed"/> | |
308 <param name="coverage" value="-c"/> | |
309 <param name="length" value="10"/> | |
310 <param name="haplotigs" value="-a"/> | |
311 <param name="min_ratio" value=".01"/> | |
312 <param name="end_trim" value="-e"/> | |
313 <param name="split" value="-s"/> | |
314 <param name="min_gap" value="100000"/> | |
315 </conditional> | |
316 <output name="get_seqs_purged" value="purged_out.fa"/> | |
317 </test> | |
318 </tests> | |
319 <help><![CDATA[ | |
320 .. class:: infomark | |
321 | |
322 **What it does** | |
323 | |
324 The purge_dups tools are designed to remove haplotigs and contig overlaps in a de novo assembly based on read depth. | |
325 | |
326 ]]></help> | |
327 <citations> | |
328 <citation type="doi">10.1093/bioinformatics/btaa025</citation> | |
329 </citations> | |
330 </tool> |