Mercurial > repos > iuc > necat
comparison necat.xml @ 0:6ee7eb5821f0 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/necat commit 6946d81de9419c90e9bc4ea2f7bd5e4168dd6dd6
author | iuc |
---|---|
date | Fri, 25 Nov 2022 14:24:27 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6ee7eb5821f0 |
---|---|
1 <tool id="necat" name="necat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> | |
2 <description>Error correction and de-novo assembly for ONT Nanopore reads</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <xrefs> | |
7 <xref type="bio.tools">necat</xref> | |
8 </xrefs> | |
9 <requirements> | |
10 <requirement type="package" version="@TOOL_VERSION@">necat</requirement> | |
11 </requirements> | |
12 <command detect_errors="exit_code"><![CDATA[ | |
13 ## helper function | |
14 #def make_filename($i, $input_param) | |
15 #set ext = $input_param.extension | |
16 #if $ext == "fastqsanger" | |
17 #set $ext = "fastq" | |
18 #end if | |
19 #set filename = "reads_" + str($i) + "." + $ext | |
20 #return $filename | |
21 #end def | |
22 | |
23 ## push each input file and everything in input collections into read_list.txt | |
24 #set i = 1 | |
25 #for input in $input_fastqs | |
26 #set filename = $make_filename($i, $input) | |
27 cp '$input' $filename | |
28 && echo $filename >> read_list.txt && | |
29 #set i = $i + 1 | |
30 #end for | |
31 | |
32 ## #for $i, $input in enumerate($input_fastqs): | |
33 ## #set filename = 'reads_${i}.$input.ext' | |
34 ## ln -s '$input' $filename && | |
35 ## echo $filename >> read_list.txt && | |
36 ## #end for | |
37 | |
38 ## necat commands | |
39 necat correct '${job_configfile}' | |
40 #if $assembly.should_assemble == "yes": | |
41 && necat assemble '${job_configfile}' | |
42 && necat bridge '${job_configfile}' | |
43 #end if | |
44 ]]></command> | |
45 <configfiles> | |
46 <expand macro="job_conf" /> | |
47 </configfiles> | |
48 <inputs> | |
49 <param name="input_fastqs" type="data" format="fastq,fastq.gz,fasta,fasta.gz" multiple="true" label="Input reads" help="Input read files (FASTQ or FASTA). To select more than one file or collection from your history, use the 'ctrl' key" /> | |
50 | |
51 <param name="genome_size" type="integer" value="" min="1" max="100000000000" label="Genome size" help="Estimated size of genome (bp)" /> | |
52 <param name="min_read_length" type="integer" value="1000" min="1" max="10000000" label="Min read length" help="Minimum length for input reads" /> | |
53 <param name="correction_coverage" type="integer" value="40" min="1" max="10000" label="Correction coverage" help="Number of reads to correct in terms of genome coverage. For a 4Gb genome, setting correction coverage = 10 will correct the longest 40Gb worth of reads from the input fastq. " /> | |
54 <conditional name="assembly"> | |
55 <param name="should_assemble" type="select" label="Assembly"> | |
56 <option value="no" selected="true">Don't perform assembly</option> | |
57 <option value="yes">Perform assembly on corrected reads</option> | |
58 </param> | |
59 <when value="no" /> | |
60 <when value="yes"> | |
61 <param name="assembly_coverage" type="integer" value="30" min="1" max="10000" label="Assembly coverage" help="Number of reads to use in genome assembly in terms of genome coverage" /> | |
62 <param name="polish_contigs" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Polish contigs" help="Polish contigs as final step after briding" /> | |
63 </when> | |
64 </conditional> | |
65 | |
66 <section name="adv" title="Advanced options" expanded="false" help="Warning: only change these if you really know what you are doing"> | |
67 <expand macro="overlap_sensitive_options" /> | |
68 <expand macro="consensus_sensitive_options" /> | |
69 <expand macro="overlap_fast_options" /> | |
70 <expand macro="consensus_fast_options" /> | |
71 <expand macro="trimming_overlap_options" /> | |
72 <expand macro="assembly_overlap_options" /> | |
73 <expand macro="assembly_overlap_filtering" /> | |
74 <expand macro="contig_assembly" /> | |
75 <expand macro="contig_bridging" /> | |
76 </section> | |
77 </inputs> | |
78 <outputs> | |
79 <data name="out_reads" format="fasta.gz" from_work_dir="project/1-consensus/cns_final.fasta.gz" label="${tool.name} on ${on_string}: corrected reads" /> | |
80 <data name="out_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/polished_contigs.fasta" label="${tool.name} on ${on_string}: bridged assembly"> | |
81 <filter>assembly['should_assemble'] == 'yes' and not assembly['polish_contigs']</filter> | |
82 </data> | |
83 <data name="out_polished_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/bridged_contigs.fasta" label="${tool.name} on ${on_string}: polished assembly"> | |
84 <filter>assembly['should_assemble'] == 'yes' and assembly['polish_contigs']</filter> | |
85 </data> | |
86 </outputs> | |
87 <tests> | |
88 <!-- single input fastq --> | |
89 <test expect_num_outputs="2"> | |
90 <param name="input_fastqs" value="test1.fa" /> | |
91 <param name="genome_size" value="13000" /> | |
92 <param name="min_read_length" value="1000" /> | |
93 <param name="correction_coverage" value="40" /> | |
94 <conditional name="assembly"> | |
95 <param name="should_assemble" value="yes" /> | |
96 <param name="assembly_coverage" value="30"/> | |
97 <param name="polish_contigs" value="true"/> | |
98 </conditional> | |
99 <output name="out_reads" ftype="fasta.gz"> | |
100 <assert_contents> | |
101 <has_size value="75000" delta="2000" /> | |
102 </assert_contents> | |
103 </output> | |
104 <output name="out_polished_assembly" ftype="fasta"> | |
105 <assert_contents> | |
106 <has_line line=">bctg00000000 000000F" /> | |
107 <has_size value="13000" delta="1000" /> | |
108 </assert_contents> | |
109 </output> | |
110 </test> | |
111 <!-- multiple input files of different format --> | |
112 <test expect_num_outputs="2"> | |
113 <param name="input_fastqs" value="test1_head.fastq,test1_tail.fasta" /> | |
114 <param name="genome_size" value="13000" /> | |
115 <param name="min_read_length" value="1000" /> | |
116 <param name="correction_coverage" value="40" /> | |
117 <conditional name="assembly"> | |
118 <param name="should_assemble" value="yes" /> | |
119 <param name="assembly_coverage" value="30"/> | |
120 <param name="polish_contigs" value="true"/> | |
121 </conditional> | |
122 <output name="out_reads" ftype="fasta.gz"> | |
123 <assert_contents> | |
124 <has_size value="29000" delta="2000" /> | |
125 </assert_contents> | |
126 </output> | |
127 <output name="out_polished_assembly" ftype="fasta"> | |
128 <assert_contents> | |
129 <has_line line=">bctg00000000 000000F" /> | |
130 <has_size value="13000" delta="1000" /> | |
131 </assert_contents> | |
132 </output> | |
133 </test> | |
134 <!-- advanced params 1 --> | |
135 <test expect_num_outputs="2"> | |
136 <param name="input_fastqs" value="test1.fa" /> | |
137 <param name="genome_size" value="13000" /> | |
138 <param name="min_read_length" value="1000" /> | |
139 <param name="correction_coverage" value="40" /> | |
140 <conditional name="assembly"> | |
141 <param name="should_assemble" value="yes" /> | |
142 <param name="assembly_coverage" value="30"/> | |
143 <param name="polish_contigs" value="true"/> | |
144 </conditional> | |
145 <section name="adv"> | |
146 <section name="ovs"> | |
147 <param name="n" value="600" /> | |
148 <param name="k" value="14" /> | |
149 <param name="q" value="600" /> | |
150 <param name="z" value="15" /> | |
151 <param name="b" value="2500" /> | |
152 <param name="a" value="800" /> | |
153 <param name="d" value="0.25" /> | |
154 <param name="e" value="0.4" /> | |
155 <param name="m" value="600" /> | |
156 </section> | |
157 </section> | |
158 <output name="out_reads" ftype="fasta.gz"> | |
159 <assert_contents> | |
160 <has_size value="75000" delta="2000" /> | |
161 </assert_contents> | |
162 </output> | |
163 <output name="out_polished_assembly" ftype="fasta"> | |
164 <assert_contents> | |
165 <has_line line=">bctg00000000 000000F" /> | |
166 <has_size value="13000" delta="1000" /> | |
167 </assert_contents> | |
168 </output> | |
169 </test> | |
170 <!-- advanced params 2 --> | |
171 <test expect_num_outputs="2"> | |
172 <param name="input_fastqs" value="test1.fa" /> | |
173 <param name="genome_size" value="13000" /> | |
174 <param name="min_read_length" value="1000" /> | |
175 <param name="correction_coverage" value="40" /> | |
176 <conditional name="assembly"> | |
177 <param name="should_assemble" value="yes" /> | |
178 <param name="assembly_coverage" value="30"/> | |
179 <param name="polish_contigs" value="true"/> | |
180 </conditional> | |
181 <section name="adv"> | |
182 <section name="fol"> | |
183 <param name="min_length" value="2000" /> | |
184 <param name="max_length" value="200000" /> | |
185 <param name="min_aligned_length" value="2000" /> | |
186 <param name="max_overhang" value="20000" /> | |
187 <param name="min_coverage" value="5" /> | |
188 <param name="bestn" value="5" /> | |
189 <param name="overhang_local_deviation1" value="5" /> | |
190 </section> | |
191 </section> | |
192 <output name="out_reads" ftype="fasta.gz"> | |
193 <assert_contents> | |
194 <has_size value="75000" delta="2000" /> | |
195 </assert_contents> | |
196 </output> | |
197 <output name="out_polished_assembly" ftype="fasta"> | |
198 <assert_contents> | |
199 <has_line line=">bctg00000000 000000F" /> | |
200 <has_size value="13000" delta="1000" /> | |
201 </assert_contents> | |
202 </output> | |
203 </test> | |
204 <!-- advanced params 3 --> | |
205 <test expect_num_outputs="2"> | |
206 <param name="input_fastqs" value="test1.fa" /> | |
207 <param name="genome_size" value="13000" /> | |
208 <param name="min_read_length" value="1000" /> | |
209 <param name="correction_coverage" value="40" /> | |
210 <conditional name="assembly"> | |
211 <param name="should_assemble" value="yes" /> | |
212 <param name="assembly_coverage" value="30"/> | |
213 <param name="polish_contigs" value="true"/> | |
214 </conditional> | |
215 <section name="adv"> | |
216 <section name="fa"> | |
217 <param name="min_length" value="1000" /> | |
218 <param name="min_identity" value="40" /> | |
219 <param name="min_contig_length" value="600" /> | |
220 <param name="select_branch" value="true" /> | |
221 </section> | |
222 </section> | |
223 <output name="out_reads" ftype="fasta.gz"> | |
224 <assert_contents> | |
225 <has_size value="75000" delta="2000" /> | |
226 </assert_contents> | |
227 </output> | |
228 <output name="out_polished_assembly" ftype="fasta"> | |
229 <assert_contents> | |
230 <has_line line=">bctg00000000 000000F" /> | |
231 <has_size value="13000" delta="1000" /> | |
232 </assert_contents> | |
233 </output> | |
234 </test> | |
235 <!-- advanced params 4 --> | |
236 <test expect_num_outputs="2"> | |
237 <param name="input_fastqs" value="test1.fa" /> | |
238 <param name="genome_size" value="13000" /> | |
239 <param name="min_read_length" value="1000" /> | |
240 <param name="correction_coverage" value="40" /> | |
241 <conditional name="assembly"> | |
242 <param name="should_assemble" value="yes" /> | |
243 <param name="assembly_coverage" value="30"/> | |
244 <param name="polish_contigs" value="true"/> | |
245 </conditional> | |
246 <section name="adv"> | |
247 <section name="fcb"> | |
248 <param name="read_min_length" value="4000" /> | |
249 <param name="ctg_min_length" value="1000" /> | |
250 <param name="ctg2ctg_min_identity" value="90" /> | |
251 <param name="read2ctg_min_identity" value="60" /> | |
252 <param name="min_contig_length" value="1000" /> | |
253 </section> | |
254 </section> | |
255 <output name="out_reads" ftype="fasta.gz"> | |
256 <assert_contents> | |
257 <has_size value="75000" delta="2000" /> | |
258 </assert_contents> | |
259 </output> | |
260 <output name="out_polished_assembly" ftype="fasta"> | |
261 <assert_contents> | |
262 <has_line line=">bctg00000000 000000F" /> | |
263 <has_size value="13000" delta="1000" /> | |
264 </assert_contents> | |
265 </output> | |
266 </test> | |
267 </tests> | |
268 | |
269 <help><![CDATA[ | |
270 | |
271 NECAT | |
272 ..... | |
273 | |
274 **What it does** | |
275 | |
276 | NECAT performs error correction to remove complex errors in nanopore reads. It can also optionally de novo assembly. | |
277 | After assembly it is recommended to use MEDAKA for long-read polishing, then NextPolish for short-read polishing. | |
278 | | |
279 | Github: https://github.com/xiaochuanle/NECAT | |
280 | | |
281 | |
282 **Input** | |
283 | |
284 - One or more files or collections containing sequence reads (fastq / fasta) | |
285 | |
286 **Output** | |
287 | |
288 - Corrected reads (fasta) | |
289 - Genome assembly (fasta) (Optional) | |
290 | |
291 | | |
292 | |
293 **Advanced Settings** | |
294 | |
295 | Necat runs multiple subprograms in an assembly pipeline to create its final output. | |
296 | Each subprogram does a specific task, then hands its output to the next. | |
297 | The subprograms are listed in order below, alongside the settings which can be configured: | |
298 | | |
299 | |
300 *oc2pmov* | |
301 | |
302 | Finds overlaps between raw-reads | |
303 | *Overlap Sensitive Options & Overlap Fast Options* | |
304 | | |
305 | |
306 -k <Integer> kmer size | |
307 -z <Integer> scan window size | |
308 -q <Integer> kmer occurs > q times will be ignored | |
309 -b <Integer> block size | |
310 -n <Integer> number of candidates | |
311 -a <Integer> min align length | |
312 -d <Real> ddf score cutoff | |
313 -e <Real> sequencing error | |
314 -m <Integer> number of output | |
315 | |
316 | | |
317 | |
318 | DEFAULT OPTIONS: | |
319 | -k 15 -z 10 -q 500 -b 2000 -s 3 -n 500 -a 500 -d 0.250000 -e 0.500000 -m 500 -t 1 | |
320 | |
321 | | |
322 | | |
323 | |
324 *oc2cns* | |
325 | |
326 | Creates consensus reads from raw-read overlaps | |
327 | *Consensus Sensitive Options & Consensus Fast Options* | |
328 | | |
329 | |
330 -a <Integer> align length cutoff | |
331 -x <Integer> minimal coverage | |
332 -y <Integer> maximal coverage | |
333 -l <Integer> minimal length of corrected reads. | |
334 -f <0 or 1> full consensus or not: 1 = yes, 0 = no | |
335 -e <Real> sequencing error | |
336 -p <Real> minimal mapping ratio | |
337 -r <0 or 1> rescue long indels or not: 1 = yes, 0 = no | |
338 -u <0 or 1> use dynamic or fixed ident cutoff: 1 = fixed, 0 = dynamic | |
339 | |
340 | | |
341 | |
342 | DEFAULT OPTIONS: | |
343 | -a 400 -x 4 -y 12 -l 500 -f 0 -e 0.500000 -p 0.800000 -t 1 -r 0 -u 0 -s 0 | |
344 | |
345 | | |
346 | | |
347 | |
348 *oc2asmpm* | |
349 | |
350 | Identifies corrected-read overlaps for assembly | |
351 | *Trimming Overlap Options & Assembly Overlap Options* | |
352 | | |
353 | |
354 | |
355 -k <Integer> kmer size | |
356 -z <Integer> scan window size | |
357 -q <Integer> kmer occurs > q times will be ignored | |
358 -b <Integer> block size | |
359 -n <Integer> number of candidates | |
360 -a <Integer> min align length | |
361 -d <Real> ddf score cutoff | |
362 -e <Real> sequencing error | |
363 -m <Integer> number of output | |
364 | |
365 | | |
366 | | |
367 | |
368 *fsa_ol_filter* | |
369 | |
370 | Filters out low-quality corrected-read overlaps for assembly | |
371 | *Assembly Overlap Filtering Options* | |
372 | | |
373 | |
374 --min_length=INT minimum length of reads. default: 2500 | |
375 --max_length=INT maximum length of reads. default: 2147483647 | |
376 --min_identity=DOUBLE minimum identity of overlaps default: -1 | |
377 --min_aligned_length=INT minimum aligned length of overlaps default: 2500 | |
378 --max_overhang=INT maximum overhang of overlaps, negative number = determined by the program. default: -1 | |
379 --min_coverage=INT minimum base coverage, negative number = determined by the program. default: -1 | |
380 --max_coverage=INT maximum base coverage, negative number = determined by the program default: -1 | |
381 --max_diff_coverage=INT maximum difference of base coverage, negative number = determined by the program default: -1 | |
382 --coverage_discard=DOUBLE discard ratio of base coverage. If max_coverage or max_diff_coverage is negative, it will be reset to (100-coverage_discard)th percentile. default: 0.01 | |
383 --bestn=INT output best n overlaps on 5' or 3' end for each read. default: 10 | |
384 --genome_size=INT genome size. It determines the maximum length of reads with coverage together default: 0 | |
385 --coverage=INT coverage. It determines the maximum length of reads with genome_size together default: 40 | |
386 --identity_global_deviation1=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 98 | |
387 --identity_global_deviation2=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6 | |
388 --overhang_global_deviation1=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 30 | |
389 --overhang_global_deviation2=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 6 | |
390 --identity_local_deviation1=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 99 | |
391 --identity_local_deviation2=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6 | |
392 --overhang_local_deviation1=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 10 | |
393 --overhang_local_deviation2=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 6 | |
394 --identity_local_condition=INT Local filtering conditions. 0 = overlap idenitity < threshold, 1 = overlap idenitity < threshold and query identity >= target identity default: 0 | |
395 --local_low_coverage=INT If the coverage of reads is less than local_low_coverage, min_identity and max_overhang are used to filter out low-quality overlaps. Otherwise, the local threshold is used. default: 25 | |
396 | |
397 | | |
398 | | |
399 | |
400 *fsa_assemble* | |
401 | |
402 | Constructs contigs from filtered overlaps | |
403 | *Contig Assembly Options* | |
404 | | |
405 | |
406 --min_length=INT minimum length of reads default: 0 | |
407 --min_identity=DOUBLE minimum identity of overlaps default: 0 | |
408 --min_aligned_length=INT minimum aligned length of overlaps default: 0 | |
409 --min_contig_length=INT minimum length of contigs default: 500 | |
410 --select_branch=BOOL select the most probable branch default: "no" | |
411 --max_spur_length=INT branches less the threshod are treated as spurs default: 50000 | |
412 | |
413 | | |
414 | | |
415 | |
416 *fsa_ctg_bridge* | |
417 | |
418 | Bridges contigs using input long raw-reads | |
419 | *Contig Bridging Options* | |
420 | | |
421 | |
422 --read_min_length=INT minimum rawread length default: 5000 | |
423 --ctg_min_length=INT minimum contig length default: 500 | |
424 --ctg2ctg_min_identity=DOUBLE minimum identity of overlaps between contigs default: 95 | |
425 --ctg2ctg_max_overhang=INT maximum overhang of overlaps between contigs default: 100 | |
426 --ctg2ctg_min_aligned_length=INT minimum aligned length of overlaps between contigs default: 2000 | |
427 --read2ctg_min_identity=DOUBLE minimum identity of overlaps between rawreads and contigs default: 80 | |
428 --read2ctg_max_overhang=INT maximum overhang of overlaps between rawreads and contigs default: 500 | |
429 --read2ctg_min_aligned_length=INT minimum aligned length of overlaps between rawreads and contigs default: 5000 | |
430 --read2ctg_min_coverage=INT minimum coverage of links between rawreads and contigs default: 3 | |
431 --min_contig_length=INT minimum length of bridged contig default: 500 | |
432 --select_branch=BOOL select the most probable branch default: "no" | |
433 --window_size=INT threshold is used to group rawreads that bridge contigs default: 1000 | |
434 | |
435 | | |
436 | |
437 | |
438 ]]></help> | |
439 <expand macro="citations" /> | |
440 </tool> |