comparison canu.xml @ 3:5732f959936a draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/canu commit 7000c7eb839b77a0e7e91874048219bd3a3f5d47"
author bgruening
date Mon, 15 Feb 2021 12:31:26 +0000
parents c5b7390290b1
children 86f150c8019d
comparison
equal deleted inserted replaced
2:c5b7390290b1 3:5732f959936a
1 <tool id="canu" name="Canu assembler" version="1.8"> 1 <tool id="canu" name="Canu assembler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>Assembler optimized for long error-prone reads such as PacBio, Oxford Nanopore </description> 2 <description>Assembler optimized for long error-prone reads such as PacBio, Oxford Nanopore</description>
3 <xrefs>
4 <xref type="bio.tools">canu</xref>
5 </xrefs>
6 <macros>
7 <token name="@TOOL_VERSION@">2.1.1</token>
8 <token name="@VERSION_SUFFIX@">0</token>
9 </macros>
3 <requirements> 10 <requirements>
4 <requirement type="package" version="1.8">canu</requirement> 11 <requirement type="package" version="@TOOL_VERSION@">canu</requirement>
5 </requirements> 12 </requirements>
6 <version_command>canu --version</version_command> 13 <version_command>canu --version</version_command>
7 <command detect_errors="exit_code"> 14 <command detect_errors="exit_code">
8 <![CDATA[ 15 <![CDATA[
9 16
17 canu 24 canu
18 #if $stage != 'all': 25 #if $stage != 'all':
19 $stage 26 $stage
20 #end if 27 #end if
21 -p canu 28 -p canu
22 -d out_dir 29 -d ./out_dir
23 #if $s: 30 #if $s:
24 -s '$s' 31 -s '$s'
25 #end if 32 #end if
26 genomeSize=$genomeSize
27 #if $rawErrorRate: 33 #if $rawErrorRate:
28 rawErrorRate=$rawErrorRate 34 rawErrorRate=$rawErrorRate
29 #end if 35 #end if
30 #if $correctedErrorRate: 36 #if $correctedErrorRate:
31 correctedErrorRate=$correctedErrorRate 37 correctedErrorRate=$correctedErrorRate
32 #end if 38 #end if
33 minReadLength=$minReadLength 39 minReadLength=$minReadLength
34 minOverlapLength=$minOverlapLength 40 minOverlapLength=$minOverlapLength
35 corOutCoverage=$corOutCoverage 41 corOutCoverage=$corOutCoverage
42 #if $stopOnLowCoverage
43 stopOnLowCoverage=$stopOnLowCoverage
44 #end if
45 #if $minInputCoverage
46 minInputCoverage=$minInputCoverage
47 #end if
36 contigFilter=' 48 contigFilter='
37 ${contigFilter.minReads} 49 ${contigFilter.minReads}
38 ${contigFilter.minLength} 50 ${contigFilter.minLength}
39 ${contigFilter.singleReadSpan} 51 ${contigFilter.singleReadSpan}
40 ${contigFilter.lowCovSpan} 52 ${contigFilter.lowCovSpan}
41 ${contigFilter.lowCovDepth} 53 ${contigFilter.lowCovDepth}
42 ' 54 '
43 genomeSize=$genomeSize 55
56 genomeSize='$genomeSize'
44 minThreads=\${GALAXY_SLOTS:-4} 57 minThreads=\${GALAXY_SLOTS:-4}
45 maxThreads=\${GALAXY_SLOTS:-4} 58 maxThreads=\${GALAXY_SLOTS:-4}
59 redMemory=\${GALAXY_MEMORY_MB:-4096}M
60 redThreads=\${GALAXY_SLOTS:-4}
46 obtovlThreads=\${GALAXY_SLOTS:-4} 61 obtovlThreads=\${GALAXY_SLOTS:-4}
47 utgovlThreads=\${GALAXY_SLOTS:-4} 62 utgovlThreads=\${GALAXY_SLOTS:-4}
48 batThreads=\${GALAXY_SLOTS:-4} 63 batThreads=\${GALAXY_SLOTS:-4}
49 batMemory=\${GALAXY_MEMORY_MB:-4096}M 64 batMemory=\${GALAXY_MEMORY_MB:-4096}M
50 cormhapMemory=\${GALAXY_MEMORY_MB:-4096}M 65 cormhapMemory=\${GALAXY_MEMORY_MB:-4096}M
51 obtovlMemory=\${GALAXY_MEMORY_MB:-4096}M 66 obtovlMemory=\${GALAXY_MEMORY_MB:-4096}M
52 utgovlMemory=\${GALAXY_MEMORY_MB:-4096}M 67 utgovlMemory=\${GALAXY_MEMORY_MB:-4096}M
53 gfaThreads=\${GALAXY_SLOTS:-4}
54 corThreads=\${GALAXY_SLOTS:-4} 68 corThreads=\${GALAXY_SLOTS:-4}
69 corMemory=\${GALAXY_MEMORY_MB:-4096}M
55 cnsThreads=\${GALAXY_SLOTS:-4} 70 cnsThreads=\${GALAXY_SLOTS:-4}
71 cnsMemory=\${GALAXY_MEMORY_MB:-4096}M
72 oeaMemory=\${GALAXY_MEMORY_MB:-4096}M
73 oeaThreads=\${GALAXY_SLOTS:-4}
56 useGrid=false 74 useGrid=false
57 $mode 75
76 #for $haplotype in $haplotypes:
77 -haplotype${haplotype.haplotype_name} '${haplotype.haplotype_input}'
78 #end for
79
80 $technology
81 #if $processing:
82 $processing
83 #end if
84
58 #for $counter, $input in enumerate($inputs): 85 #for $counter, $input in enumerate($inputs):
59 #if $input.ext in ['fastq.gz', 'fasta.gz'] 86 #if $input.ext in ['fastq.gz', 'fasta.gz']
60 ./input_${counter}.gz 87 ./input_${counter}.gz
61 #else: 88 #else:
62 '$input' 89 '$input'
63 #end if 90 #end if
64 #end for 91 #end for
65 2>&1 92 2>&1
66 && 93
67 echo "Check echo"
68 ]]> 94 ]]>
69 </command> 95 </command>
70 <inputs> 96 <inputs>
71 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input reads" /> 97 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input reads"/>
72 <param name="mode" type="select" label="Mode"> 98 <repeat name="haplotypes" min="0" max="2" title="Haplotypes for Trio Binning Assembly" help="Canu has support for using parental short-read sequencing to classify and bin">
73 <option value="-nanopore-raw" selected="true">Nanopore raw</option> 99 <param name="haplotype_input" type="data" format="fasta,fastq" multiple="true" label="Haplotype input reads"/>
74 <option value="-nanopore-corrected">Nanopore corrected</option> 100 <param name="haplotype_name" type="text" label="Shot name to identify your haplotype"/>
75 <option value="-pacbio-raw">PacBio raw</option> 101 </repeat>
76 <option value="-pacbio-corrected">PacBio corrected</option> 102 <param name="technology" type="select" label="Technology">
103 <option value="-nanopore" selected="true">Nanopore</option>
104 <option value="-pacbio">PacBio</option>
105 <option value="-pacbio-hifi">PacBio HiFi</option>
106 </param>
107 <param name="processing" type="select" optional="true" label="Processing">
108 <option value="-corrected">Corrected</option>
109 <option value="-trimmed">Trimmed</option>
77 </param> 110 </param>
78 <param name="stage" type="select" label="To restrict canu to only a specific stage, use"> 111 <param name="stage" type="select" label="To restrict canu to only a specific stage, use">
79 <option value="all" selected="true">all</option> 112 <option value="all" selected="true">all</option>
113 <option value="-haplotype">generate haplotype-specific reads</option>
80 <option value="-correct">generate corrected reads</option> 114 <option value="-correct">generate corrected reads</option>
81 <option value="-trim">generate trimmed reads</option> 115 <option value="-trim">generate trimmed reads</option>
82 <option value="-assemble">generate an assembly</option> 116 <option value="-assemble">generate an assembly</option>
83 <option value="-trim-assemble">generate trimmed reads and then assemble them</option> 117 <option value="-trim-assemble">generate trimmed reads and then assemble them</option>
84 </param> 118 </param>
85 <param argument="genomeSize" type="text" label="Estimated genome size (e.g. 80m, 15k, 2g)"> 119 <param argument="genomeSize" type="text" label="Estimated genome size (e.g. 8.0m, 15k, 2g)">
86 <validator type="empty_field" /> 120 <validator type="empty_field"/>
87 </param> 121 <validator type="expression" message="Only values similar to 8.0m, 15k or 2g are allowed.">value.replace('.', '').isalnum() and value[-1] in ['m', 'k', 'g'] and float(value[:-1])</validator>
88 <param argument="rawErrorRate" type="float" value="" optional="true" min="0" max="1" 122 </param>
89 label="Maximum raw overlap mismatch" help="The defaults are 0.300 for PacBio reads and 0.500 for Nanopore reads." /> 123 <param argument="rawErrorRate" type="float" value="" optional="true" min="0" max="1" label="Maximum raw overlap mismatch" help="The defaults are 0.300 for PacBio reads and 0.500 for Nanopore reads."/>
90 <param argument="correctedErrorRate" type="float" value="" optional="true" min="0" max="1" 124 <param argument="correctedErrorRate" type="float" value="" optional="true" min="0" max="1" label="Maximum corrected overlap mismatch" help="The allowed difference in an overlap between two corrected reads. Assemblies of low coverage or data with biological differences will benefit from a slight increase in this. Defaults are 0.045 for PacBio reads and 0.144 for Nanopore reads."/>
91 label="Maximum corrected overlap mismatch" help="The allowed difference in an overlap between two corrected reads. Assemblies of 125 <param argument="minReadLength" type="integer" value="1000" min="1" label="Minimum read length"/>
92 low coverage or data with biological differences will benefit from a slight increase 126 <param argument="minOverlapLength" type="integer" value="500" min="1" label="Minimum overlap"/>
93 in this. Defaults are 0.045 for PacBio reads and 0.144 for Nanopore reads." /> 127 <param argument="minInputCoverage" type="integer" value="" min="1" optional="true" label="Minimum Input Coverage"/>
94 <param argument="minReadLength" type="integer" value="1000" min="1" label="Minimum read length" /> 128 <param argument="corOutCoverage" type="integer" value="40" min="1" label="Target coverage for corrected reads"/>
95 <param argument="minOverlapLength" type="integer" value="500" min="1" label="Minimum overlap" /> 129 <param argument="-s" type="data" format="txt" optional="true" label="Additonal options" help="Additional specifications provided in a canu spec file."/>
96 <param argument="corOutCoverage" type="integer" value="40" min="1" label="Target coverage for corrected reads" /> 130 <param argument="stopOnLowCoverage" type="integer" value="10" min="1" label="Stop the assembly if read coverage is too low to be useful" help="Coverage is checked whene when input sequences are initially loaded into the sequence store, when corrected reads are generated, and when read ends are trimmed off."/>
97 <param argument="-s" type="data" format="txt" optional="true" label="Additonal options" help="Additional specifications provided in a canu spec file." />
98
99 <section name="contigFilter" title="Contig Filters"> 131 <section name="contigFilter" title="Contig Filters">
100 <param argument="minReads" type="integer" value="2" min="0" label="Minimum reads" /> 132 <param argument="minReads" type="integer" value="2" min="0" label="Minimum reads"/>
101 <param argument="minLength" type="integer" value="0" min="0" label="Minimum length" /> 133 <param argument="minLength" type="integer" value="0" min="0" label="Minimum length"/>
102 <param argument="singleReadSpan" type="float" value="1.0" min="0.0" max="1.0" label="Maximum single read span (fraction)" /> 134 <param argument="singleReadSpan" type="float" value="1.0" min="0.0" max="1.0" label="Maximum single read span (fraction)"/>
103 <param argument="lowCovSpan" type="float" value="0.5" min="0.0" max="1.0" label="Low coverage span (fraction)" /> 135 <param argument="lowCovSpan" type="float" value="0.5" min="0.0" max="1.0" label="Low coverage span (fraction)"/>
104 <param argument="lowCovDepth" type="integer" value="5" min="0" label="Low coverage depth" /> 136 <param argument="lowCovDepth" type="integer" value="5" min="0" label="Low coverage depth"/>
105 </section> 137 </section>
106 </inputs> 138 </inputs>
107 <outputs> 139 <outputs>
140 <data name="report" format="txt" from_work_dir="out_dir/canu.report" label="${tool.name} on ${on_string} (report)"/>
108 <data name="contigs" format="fasta" from_work_dir="out_dir/canu.contigs.fasta" label="${tool.name} on ${on_string} (contigs)"> 141 <data name="contigs" format="fasta" from_work_dir="out_dir/canu.contigs.fasta" label="${tool.name} on ${on_string} (contigs)">
109 <filter>stage == 'all'</filter> 142 <filter>stage == 'all'</filter>
110 </data> 143 </data>
111 <data name="unassembled" format="fasta" from_work_dir="out_dir/canu.unassembled.fasta" label="${tool.name} on ${on_string} (unassembled)"> 144 <data name="unassembled" format="fasta" from_work_dir="out_dir/canu.unassembled.fasta" label="${tool.name} on ${on_string} (unassembled)">
112 <filter>stage == 'all'</filter> 145 <filter>stage == 'all'</filter>
113 </data> 146 </data>
114 <data name="unitigs" format="fasta" from_work_dir="out_dir/canu.unitigs.fasta" label="${tool.name} on ${on_string} (unitigs)">
115 <filter>stage == 'all'</filter>
116 </data>
117 <data name="corrected_reads" format="fasta.gz" from_work_dir="out_dir/canu.correctedReads.fasta.gz" label="${tool.name} on ${on_string} (corrected reads)"> 147 <data name="corrected_reads" format="fasta.gz" from_work_dir="out_dir/canu.correctedReads.fasta.gz" label="${tool.name} on ${on_string} (corrected reads)">
118 <filter>'-correct' in stage or stage == 'all'</filter> 148 <filter>'-correct' in stage or stage == 'all'</filter>
119 </data> 149 </data>
120 <data name="trimmed_reads" format="fasta.gz" from_work_dir="out_dir/canu.trimmedReads.fasta.gz" label="${tool.name} on ${on_string} (trimmed reads)"> 150 <data name="trimmed_reads" format="fasta.gz" from_work_dir="out_dir/canu.trimmedReads.fasta.gz" label="${tool.name} on ${on_string} (trimmed reads)">
121 <filter>'-trim' in stage or stage == 'all'</filter> 151 <filter>'-trim' in stage or stage == 'all'</filter>
122 </data> 152 </data>
123 </outputs> 153 </outputs>
124 <tests> 154 <tests>
125 <test expect_num_outputs="5"> 155 <test expect_num_outputs="5">
126 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> 156 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
127 <param name="genomeSize" value="4.6m" /> 157 <param name="technology" value="-nanopore"/>
128 <param name="minReadLength" value="2000" /> 158 <param name="genomeSize" value="20k"/>
159 <param name="stopOnLowCoverage" value="1"/>
160 <param name="minInputCoverage" value="1"/>
161 <param name="minReadLength" value="2000"/>
129 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result1.fa"/> 162 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result1.fa"/>
130 <output name="unitigs" ftype="fasta" file="ecoli_canu_unitigs_result1.fa"/>
131 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result1.fa"/> 163 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result1.fa"/>
132 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result1.fa.gz"/> 164 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result1.fa.gz"/>
133 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result1.fa.gz"/> 165 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result1.fa.gz"/>
166 <output name="report">
167 <assert_contents>
168 <has_n_lines n="488"/>
169 <has_text_matching expression="[UNITIGGING/CONTIGS]"/>
170 <has_text_matching expression="-- Contig sizes based on genome size 20kbp:"/>
171 </assert_contents>
172 </output>
134 </test> 173 </test>
135 <test expect_num_outputs="5"> 174 <test expect_num_outputs="5">
136 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> 175 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
137 <param name="genomeSize" value="4.6m" /> 176 <param name="technology" value="-nanopore"/>
138 <param name="minReadLength" value="2000" /> 177 <param name="genomeSize" value="20k"/>
139 <param name="minOverlapLength" value="800" /> 178 <param name="stopOnLowCoverage" value="1"/>
140 <param name="rawErrorRate" value="0.2" /> 179 <param name="minInputCoverage" value="1"/>
141 <param name="correctedErrorRate" value="0.05" /> 180 <param name="minReadLength" value="2000"/>
142 <param name="corOutCoverage" value="2" /> 181 <param name="minOverlapLength" value="800"/>
182 <param name="rawErrorRate" value="0.2"/>
183 <param name="correctedErrorRate" value="0.05"/>
184 <param name="corOutCoverage" value="2"/>
143 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result2.fa"/> 185 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result2.fa"/>
144 <output name="unitigs" ftype="fasta" file="ecoli_canu_unitigs_result2.fa"/>
145 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result2.fa"/> 186 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result2.fa"/>
146 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result2.fa.gz"/> 187 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result2.fa.gz"/>
147 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result2.fa.gz"/> 188 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result2.fa.gz"/>
148 </test> 189 <output name="report">
149 <test expect_num_outputs="1"> 190 <assert_contents>
150 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> 191 <has_n_lines n="464"/>
192 <has_text_matching expression="[UNITIGGING/CONTIGS]"/>
193 <has_text_matching expression="-- Contig sizes based on genome size 20kbp:"/>
194 </assert_contents>
195 </output>
196 </test>
197 <test expect_num_outputs="2">
198 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
199 <param name="technology" value="-nanopore"/>
200 <param name="genomeSize" value="20k"/>
151 <param name="stage" value="-correct"/> 201 <param name="stage" value="-correct"/>
152 <param name="minReadLength" value="2500" /> 202 <param name="stopOnLowCoverage" value="1"/>
153 <param name="genomeSize" value="4.6m" /> 203 <param name="minReadLength" value="2500"/>
154 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result3.fa.gz"/> 204 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result3.fa.gz"/>
155 </test> 205 <output name="report">
156 <test expect_num_outputs="1"> 206 <assert_contents>
157 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> 207 <has_n_lines n="187"/>
208 <has_text_matching expression="[TRIMMING/READS]"/>
209 <has_text_matching expression="-- Found 89 reads."/>
210 </assert_contents>
211 </output>
212 </test>
213 <!--trimming test - it does currently not trim anything due to the input data -->
214 <test expect_num_outputs="2">
215 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
216 <param name="technology" value="-nanopore"/>
217 <param name="genomeSize" value="3.4m"/>
158 <param name="stage" value="-trim"/> 218 <param name="stage" value="-trim"/>
159 <param name="minReadLength" value="2500" /> 219 <param name="minReadLength" value="500"/>
160 <param name="genomeSize" value="4.6m" /> 220 <output name="report">
161 <output name="trimmed_reads" ftype="fasta.gz" compare="sim_size" delta="12000" file="ecoli_canu_trimmed_reads_result4.fa.gz"/> 221 <assert_contents>
162 </test> 222 <has_text_matching expression="[TRIMMING/READS]"/>
223 <has_n_lines n="6"/>
224 <has_text_matching expression="Found 0 reads."/>
225 </assert_contents>
226 </output>
227 </test>
228 <!--test expect_num_outputs="5">
229 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
230 <param name="technology" value="-pacbio"/>
231 <repeat name="haplotypes">
232 <param name="haplotype_name" value="K12"/>
233 <param name="haplotype_input" ftype="fasta" value="ecoli-reads.fasta"/>
234 </repeat>
235 <repeat name="haplotypes">
236 <param name="haplotype_name" value="K13"/>
237 <param name="haplotype_input" ftype="fasta" value="ecoli-reads.fasta"/>
238 </repeat>
239 <param name="genomeSize" value="20k"/>
240 <param name="stopOnLowCoverage" value="1"/>
241 <param name="minInputCoverage" value="1"/>
242 <param name="minReadLength" value="2000"/>
243 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs_result5.fa"/>
244 <output name="unassembled" ftype="fasta" file="ecoli_canu_unassembled_result5.fa"/>
245 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_corrected_reads_result5.fa.gz"/>
246 <output name="trimmed_reads" ftype="fasta.gz" decompress="True" file="ecoli_canu_trimmed_reads_result5.fa.gz"/>
247 </test-->
163 </tests> 248 </tests>
164 <help> 249 <help>
165 <![CDATA[ 250 <![CDATA[
166 251
167 Canu specializes in assembling PacBio or Oxford Nanopore sequences. Canu operates in three phases: correction, trimming and assembly. 252 Canu specializes in assembling PacBio or Oxford Nanopore sequences. Canu operates in three phases: correction, trimming and assembly.