comparison polypolish.xml @ 0:aaa868913641 draft

planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/polypolish commit 95f351736787f04c65e830cd9daf9c9c8521893a
author iuc
date Thu, 22 Sep 2022 07:51:48 +0000
parents
children bd2a15dbcea1
comparison
equal deleted inserted replaced
-1:000000000000 0:aaa868913641
1 <tool id="polypolish" name="Polypolish" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>
3 Short-read polishing of long-read bacterial genome assemblies
4 </description>
5 <macros>
6 <import>macro.xml</import>
7 </macros>
8 <expand macro='xrefs'/>
9 <expand macro="requirements" />
10 <expand macro="version_command" />
11 <command detect_errors="aggressive"><![CDATA[
12 ln -s '$input.fasta_file' input_data &&
13 #*======================================
14 For single SAM
15 ======================================*#
16 #if $input.sam_data_type.sam_selector == 'single'
17 #if $input.sam_data_type.single_sam.ext == 'unsorted.bam'
18 samtools view -h $input.sam_data_type.single_sam > input_sam &&
19 #elif $input.sam_data_type.single_sam.ext == 'sam'
20 ln -s $input.sam_data_type.single_sam input_sam &&
21 #end if
22 polypolish input_data input_sam > '$polished_fasta'
23 #*======================================
24 For paired SAM
25 ======================================*#
26 #elif $input.sam_data_type.sam_selector == 'paired'
27 #if $input.sam_data_type.R1_sam.ext == 'unsorted.bam'
28 samtools view -h $input.sam_data_type.R1_sam > sample_R1.sam &&
29 #elif $input.sam_data_type.R1_sam.ext == 'sam'
30 ln -s '$input.sam_data_type.R1_sam' sample_R1.sam &&
31 #end if
32 #if $input.sam_data_type.R2_sam.ext == 'unsorted.bam'
33 samtools view -h $input.sam_data_type.R2_sam > sample_R2.sam &&
34 #elif $input.sam_data_type.R2_sam.ext == 'sam'
35 ln -s '$input.sam_data_type.R2_sam' sample_R2.sam &&
36 #end if
37 #if $input.sam_data_type.insert_filter.filter_select == 'filter'
38 polypolish_insert_filter.py --low '$input.sam_data_type.insert_filter.low'
39 --high '$input.sam_data_type.insert_filter.high'
40 --in1 sample_R1.sam
41 --in2 sample_R2.sam
42 --out1 'filtered_1.sam'
43 --out2 'filtered_2.sam' &&
44 polypolish input_data 'filtered_1.sam' 'filtered_2.sam' > $polished_fasta
45 #else
46 polypolish input_data sample_R1.sam sample_R2.sam > $polished_fasta
47 #end if
48 #*======================================
49 For multiple single-end SAM
50 ======================================*#
51 #elif $input.sam_data_type.sam_selector == 'multiple_single'
52 mkdir single_collection &&
53 #for $value, $single_sam in enumerate($input.sam_data_type.single_collection):
54 #if $single_sam.ext == 'unsorted.bam'
55 samtools view -h $single_sam > 'single_collection/$(single_sam.element_identifier).sam' &&
56 #elif $single_sam.ext == 'sam'
57 ln -s $single_sam 'single_collection/$(single_sam.element_identifier).$(single_sam.ext)' &&
58 #end if
59 #end for
60 polypolish input_data single_collection/*.sam > '$polished_fasta'
61 #*======================================
62 For multiple paired-end SAM
63 ======================================*#
64 #elif $input.sam_data_type.sam_selector == "multiple_paired"
65 mkdir paired_collection &&
66 #for $value, $paired_sam in enumerate($input.sam_data_type.paired_collection):
67 #if $paired_sam.forward.ext == 'unsorted.bam'
68 samtools view -h $paired_sam.forward > 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' &&
69 #else
70 ln -s '$paired_sam.forward' 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' &&
71 #end if
72 #if $paired_sam.reverse.ext == 'unsorted.bam'
73 samtools view -h $paired_sam.reverse > 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' &&
74 #else
75 ln -s '$paired_sam.reverse' 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' &&
76 #end if
77 #if $input.sam_data_type.insert_filter.filter_select == 'filter'
78 polypolish_insert_filter.py --low '$input.sam_data_type.insert_filter.low'
79 --high '$input.sam_data_type.insert_filter.high'
80 --in1 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam'
81 --in2 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam'
82 --out1 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier)_filtered.sam'
83 --out2 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier)_filtered.sam' &&
84 #end if
85 #end for
86 #*======================================
87 Filtering option
88 ======================================*#
89 #if $input.sam_data_type.insert_filter.filter_select == 'filter'
90 polypolish input_data paired_collection/*_filtered.sam > '$polished_fasta'
91 #else
92 polypolish input_data paired_collection/*.sam > '$polished_fasta'
93 #end if
94 #end if
95 #*======================================
96 For debug file output
97 ======================================*#
98 #if $options.debug == 'true'
99 --debug $debug_file
100 #end if
101 #*======================================
102 For LOGFILE OUTPUT
103 ======================================*#
104 #if $options.keep_logfile == 'true'
105 | tee '$logfile'
106 #end if
107 ]]>
108 </command>
109 <inputs>
110 <section name="input" title="Input sequences" expanded="True">
111 <param name="fasta_file" type="data" format="fasta" label="Select a draft genome for polishing"
112 help="Fasta sequence to be cleaned using short-reads data"/>
113 <conditional name="sam_data_type">
114 <param name="sam_selector" type="select" label="Select aligned data to polish" help="Choose number of aligned sam/bam files. Need aligned file with all possible locations in aligner option">
115 <option value="single">Single SAM/BAM file</option>
116 <option value="paired">Paired SAM/BAM files</option>
117 <option value="multiple_single">Multiple single SAM/BAM files</option>
118 <option value="multiple_paired">Multiple paired SAM/BAM files</option>
119 </param>
120 <when value="single">
121 <param name="single_sam" type="data" format="sam,unsorted.bam" label="Select a SAM/BAM file" help="Specify dataset with only one SAM/BAM file"/>
122 </when>
123 <when value="paired">
124 <param name="R1_sam" type="data" format="sam,unsorted.bam" label="Select forward SAM/BAM file" help="Specify the forward SAM/BAM files"/>
125 <param name="R2_sam" type="data" format="sam,unsorted.bam" label="Select reverse SAM/BAM file" help="Specify the reverse SAM/BAM files"/>
126 <expand macro="filter_option"/>
127 </when>
128 <when value="multiple_single">
129 <param name="single_collection" format="sam,unsorted.bam" type="data_collection" collection_type="list" label="Single-end collection" help="Specify a list of single-end dataset"/>
130 </when>
131 <when value="multiple_paired">
132 <param name="paired_collection" format="sam,unsorted.bam" type="data_collection" collection_type="list:paired" label="Single-end collection" help="Specify a list of single-end dataset"/>
133 <expand macro="filter_option"/>
134 </when>
135 </conditional>
136 </section>
137 <section name="options" title="Options" expanded="False">
138 <param name="min_depth" argument="--min_depth" type="integer" min="0" value="5" label="Minimal depth"
139 help="A base must occur at least this many times in the pileup to be considered valid [default: 5]"/>
140 <param name="fraction_invalid" argument="--fraction_invalid" type="float" min="0" value="0.2" max="1" label="Minimal invalid fraction"
141 help="A base must make up less than this fraction of the read depth to be considered invalid [default: 0.2]"/>
142 <param name="max_errors" argument="--max_errors" type="integer" min="0" value="10" label="Number of mismatch/indels to ignore alignments"
143 help="Ignore alignments with more than this many mismatches and indels [default: 10]"/>
144 <param name="fraction_valid" argument="--fraction_valid" type="float" min="0" value="0.5" max="1" label="Minimal valid fraction"
145 help="A base must make up at least this fraction of the read depth to be considered valid [default: 0.5"/>
146 <param name="keep_logfile" type="boolean" truevalue="true" falsevalue="false" label="Keep log file"/>
147 <param name="debug" argument="--debug" type="boolean" truevalue="true" falsevalue="false" label="Keep per base information file"/>
148 </section>
149 </inputs>
150 <outputs>
151 <data name="polished_fasta" format="fasta" label="${tool.name} on ${on_string}: polished fasta"/>
152 <data name="debug_file" format="tabular" label="${tool.name} on ${on_string}: Per base informations">
153 <filter> options['debug'] == True </filter>
154 </data>
155 <data name="logfile" format="txt" from_work_dir="output" label="${tool.name} on ${on_string}: log report">
156 <filter> options['keep_logfile'] == True </filter>
157 </data>
158 </outputs>
159 <tests>
160 <!-- Test_1 with default values and single SAM -->
161 <test expect_num_outputs="1">
162 <section name="input">
163 <param name="fasta_file" value="contigs.fa"/>
164 <conditional name="sam_data_type">
165 <param name="sam_selector" value="single"/>
166 <param name="single_sam" value="aligned_test_file/alignement_R1.sam"/>
167 </conditional>
168 </section>
169 <output name="polished_fasta" value="polished.fasta"/>
170 </test>
171 <!-- Test_2 with default values and paired SAM -->
172 <test expect_num_outputs="2">
173 <section name="input">
174 <param name="fasta_file" value="contigs.fa"/>
175 <conditional name="sam_data_type">
176 <param name="sam_selector" value="paired"/>
177 <param name="R1_sam" value="aligned_test_file/alignement_R1.sam"/>
178 <param name="R2_sam" value="aligned_test_file/alignement_R2.sam"/>
179 </conditional>
180 </section>
181 <section name="options">
182 <param name="debug" value="true"/>
183 <param name="keep_logfile" value="false"/>
184 </section>
185 <output name="polished_fasta" value="polished.fasta"/>
186 <output name="debug_file" value="debug_file_test_2.tsv"/>
187 </test>
188 <!-- Test_3 with default values and single-end multiple SAM -->
189 <test expect_num_outputs="2">
190 <section name="input">
191 <param name="fasta_file" value="contigs.fa"/>
192 <conditional name="sam_data_type">
193 <param name="sam_selector" value="multiple_single"/>
194 <param name="single_collection">
195 <collection type="list">
196 <element name="R1_sam" value="aligned_test_file/alignement_R1.sam" ftype="sam"/>
197 <element name="R1_bis_sam" value="aligned_test_file/alignement_R1_bis.sam" ftype="sam"/>
198 <element name="R1_ter_sam" value="aligned_test_file/alignement_R1_ter.sam" ftype="sam"/>
199 </collection>
200 </param>
201 </conditional>
202 </section>
203 <section name="options">
204 <param name="debug" value="false"/>
205 <param name="keep_logfile" value="true"/>
206 </section>
207 <output name="polished_fasta" value="polished.fasta"/>
208 <output name="logfile" value="logfile_test_3.log" lines_diff="15"/>
209 </test>
210 <!-- Test_4 with default values and paired collection SAM -->
211 <test expect_num_outputs="1">
212 <section name="input">
213 <param name="fasta_file" value="contigs.fa"/>
214 <conditional name="sam_data_type">
215 <param name="sam_selector" value="multiple_paired"/>
216 <param name="paired_collection">
217 <collection type="list:paired">
218 <element name="paired_1">
219 <collection type="paired">
220 <element name="forward" value="aligned_test_file/alignement_R1.sam" ftype="sam"/>
221 <element name="reverse" value="aligned_test_file/alignement_R2.sam" ftype="sam"/>
222 </collection>
223 </element>
224 <element name="paired_2">
225 <collection type="paired">
226 <element name="forward" value="aligned_test_file/alignement_R1_bis.sam" ftype="sam"/>
227 <element name="reverse" value="aligned_test_file/alignement_R2_bis.sam" ftype="sam"/>
228 </collection>
229 </element>
230 <element name="paired_3">
231 <collection type="paired">
232 <element name="forward" value="aligned_test_file/alignement_R1_ter.sam" ftype="sam"/>
233 <element name="reverse" value="aligned_test_file/alignement_R2_ter.sam" ftype="sam"/>
234 </collection>
235 </element>
236 </collection>
237 </param>
238 </conditional>
239 </section>
240 <output name="polished_fasta" value="polished.fasta"/>
241 </test>
242 <!-- Test_5 paired-end without filtering and whitout log file -->
243 <test expect_num_outputs="1">
244 <section name="input">
245 <param name="fasta_file" value="contigs.fa"/>
246 <conditional name="sam_data_type">
247 <param name="sam_selector" value="paired"/>
248 <param name="R1_sam" value="aligned_test_file/alignement_R1.sam"/>
249 <param name="R2_sam" value="aligned_test_file/alignement_R2.sam"/>
250 </conditional>
251 </section>
252 <section name="options">
253 <conditional name="insert_filter">
254 <param name="filter_select" value="non_filter"/>
255 </conditional>
256 <param name="debug" value="false"/>
257 <param name="keep_logfile" value="false"/>
258 </section>
259 <output name="polished_fasta" value="polished.fasta"/>
260 </test>
261 <!-- Test_6 paired-end with filter, user defined values and whitout log file -->
262 <test expect_num_outputs="1">
263 <section name="input">
264 <param name="fasta_file" value="contigs.fa"/>
265 <conditional name="sam_data_type">
266 <param name="sam_selector" value="paired"/>
267 <param name="R1_sam" value="aligned_test_file/alignement_R1.sam"/>
268 <param name="R2_sam" value="aligned_test_file/alignement_R2.sam"/>
269 <conditional name="insert_filter">
270 <param name="filter_select" value="filter"/>
271 <param name="low" value="1"/>
272 <param name="high" value="98.7"/>
273 </conditional>
274 </conditional>
275 </section>
276 <section name="options">
277 <param name="debug" value="false"/>
278 <param name="keep_logfile" value="false"/>
279 </section>
280 <output name="polished_fasta" value="polished.fasta"/>
281 </test>
282 <!-- Test_7 paired-end with all customized filters -->
283 <test expect_num_outputs="1">
284 <section name="input">
285 <param name="fasta_file" value="contigs.fa"/>
286 <conditional name="sam_data_type">
287 <param name="sam_selector" value="paired"/>
288 <param name="R1_sam" value="aligned_test_file/alignement_R1.sam"/>
289 <param name="R2_sam" value="aligned_test_file/alignement_R2.sam"/>
290 <conditional name="insert_filter">
291 <param name="filter_select" value="filter"/>
292 <param name="low" value="1.4"/>
293 <param name="high" value="96.6"/>
294 </conditional>
295 </conditional>
296 </section>
297 <section name="options">
298 <param name="min_depth" value="10"/>
299 <param name="fraction_invalid" value="0.5"/>
300 <param name="max_errors" value="8"/>
301 <param name="fraction_valid" value="0.6"/>
302 <param name="debug" value="false"/>
303 <param name="keep_logfile" value="false"/>
304 </section>
305 <output name="polished_fasta" value="polished.fasta"/>
306 </test>
307 <!-- Test_8 single with bam input -->
308 <test expect_num_outputs="1">
309 <section name="input">
310 <param name="fasta_file" value="contigs.fa"/>
311 <conditional name="sam_data_type">
312 <param name="sam_selector" value="single"/>
313 <param name="single_sam" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/>
314 </conditional>
315 </section>
316 <section name="options">
317 <param name="debug" value="false"/>
318 <param name="keep_logfile" value="false"/>
319 </section>
320 <output name="polished_fasta" value="polished.fasta"/>
321 </test>
322 <!-- Test_9 paired-end with bam input -->
323 <test expect_num_outputs="1">
324 <section name="input">
325 <param name="fasta_file" value="contigs.fa"/>
326 <conditional name="sam_data_type">
327 <param name="sam_selector" value="paired"/>
328 <param name="R1_sam" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/>
329 <param name="R2_sam" value="aligned_test_file/alignement_R2.bam" ftype="unsorted.bam"/>
330 </conditional>
331 </section>
332 <section name="options">
333 <param name="debug" value="false"/>
334 <param name="keep_logfile" value="false"/>
335 </section>
336 <output name="polished_fasta" value="polished.fasta"/>
337 </test>
338 <!-- Test_10 single collection with bam input -->
339 <test expect_num_outputs="1">
340 <section name="input">
341 <param name="fasta_file" value="contigs.fa"/>
342 <conditional name="sam_data_type">
343 <param name="sam_selector" value="multiple_single"/>
344 <param name="single_collection">
345 <collection type="list">
346 <element name="R1_sam" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/>
347 <element name="R1_bis_sam" value="aligned_test_file/alignement_R1_bis.bam" ftype="unsorted.bam"/>
348 <element name="R1_ter_sam" value="aligned_test_file/alignement_R1_ter.bam" ftype="unsorted.bam"/>
349 </collection>
350 </param>
351 </conditional>
352 </section>
353 <section name="options">
354 <param name="debug" value="false"/>
355 <param name="keep_logfile" value="false"/>
356 </section>
357 <output name="polished_fasta" value="polished.fasta"/>
358 </test>
359 <!-- Test_11 paired-end with bam input -->
360 <test expect_num_outputs="1">
361 <section name="input">
362 <param name="fasta_file" value="contigs.fa"/>
363 <conditional name="sam_data_type">
364 <param name="sam_selector" value="multiple_paired"/>
365 <param name="paired_collection">
366 <collection type="list:paired">
367 <element name="paired_1">
368 <collection type="paired">
369 <element name="forward" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/>
370 <element name="reverse" value="aligned_test_file/alignement_R2.bam" ftype="unsorted.bam"/>
371 </collection>
372 </element>
373 <element name="paired_2">
374 <collection type="paired">
375 <element name="forward" value="aligned_test_file/alignement_R1_bis.bam" ftype="unsorted.bam"/>
376 <element name="reverse" value="aligned_test_file/alignement_R2_bis.bam" ftype="unsorted.bam"/>
377 </collection>
378 </element>
379 <element name="paired_3">
380 <collection type="paired">
381 <element name="forward" value="aligned_test_file/alignement_R1_ter.bam" ftype="unsorted.bam"/>
382 <element name="reverse" value="aligned_test_file/alignement_R2_ter.bam" ftype="unsorted.bam"/>
383 </collection>
384 </element>
385 </collection>
386 </param>
387 </conditional>
388 </section>
389 <section name="options">
390 <param name="debug" value="false"/>
391 <param name="keep_logfile" value="false"/>
392 </section>
393 <output name="polished_fasta" value="polished.fasta"/>
394 </test>
395 </tests>
396 <help><![CDATA[
397 **What it does**
398 Polypolish is a tool for polishing genome assemblies with short reads.
399 Polypolish uses SAM/BAM files where each read has been aligned to all possible locations (not just a single best location).
400 This allows it to repair errors in repeat regions that other alignment-based polishers cannot fix.
401
402 **Polypolish pipeline steps**
403 1. [Optional] Filter aligned reads
404 - Exclude some alignments based on their insert size
405 - This should reduce the number of excessive alignments, particularly near the edges of repeat sequences, improving Polypolish's ability to fix errors in those regions.
406 2. Clean assembly with filtered reads
407
408 **Inputs**
409 Polypolish need SAM/BAM input format obtain from aligner with option to keep all possible location
410 Polypolish take on or more assembly as input fasta.
411 It need also raw data reads in single or paired-end SAM/BAM format.
412 You can use multiple aligned data to polish the same assembly.
413 **WARNING It can only work if multiple location information is available in sam/bam files**
414 For example using bwa mem to align raw data before use, you need :
415 1. To align each read data independantly (also for paired data)
416 2. Set the option "Output all alignments for single-ends or unpaired paired-ends" in Select analysis mode>Set input/output options
417 - This allow multiple ailgnemnt output need to use polypolish
418
419
420 ]]></help>
421 <expand macro="citations"/>
422 </tool>