comparison canu.xml @ 0:4c8f32256fa8 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/canu commit b7904bf39167833b3d3648e250726615f75f7525
author bgruening
date Fri, 08 Jun 2018 04:43:41 -0400
parents
children 58346ef3116b
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8f32256fa8
1 <tool id="canu" name="Canu assembler" version="1.7">
2 <description>Assembler optimized for long error-prone reads such as PacBio, Oxford Nanopore </description>
3 <requirements>
4 <requirement type="package" version="1.7">canu</requirement>
5 </requirements>
6 <version_command>canu --version</version_command>
7 <command detect_errors="exit_code">
8 <![CDATA[
9
10 #for $counter, $input in enumerate($inputs):
11 #if $input.ext in ['fastq.gz', 'fasta.gz']
12 ## linking does not work
13 cp '$input' ./input_${counter}.gz &&
14 #end if
15 #end for
16
17 canu
18 $stage
19 -p canu
20 -d out_dir
21 #if $s:
22 -s '$s'
23 #end if
24 genomeSize=$genomeSize
25 #if $rawErrorRate:
26 rawErrorRate=$rawErrorRate
27 #end if
28 #if $correctedErrorRate:
29 correctedErrorRate=$correctedErrorRate
30 #end if
31 minReadLength=$minReadLength
32 minOverlapLength=$minOverlapLength
33 corOutCoverage=$corOutCoverage
34 contigFilter='
35 ${contigFilter.minReads}
36 ${contigFilter.minLength}
37 ${contigFilter.singleReadSpan}
38 ${contigFilter.lowCovSpan}
39 ${contigFilter.lowCovDepth}
40 '
41 genomeSize=$genomeSize
42 stopOnReadQuality=false
43 minThreads=\${GALAXY_SLOTS:-4}
44 maxThreads=\${GALAXY_SLOTS:-4}
45 obtovlThreads=\${GALAXY_SLOTS:-4}
46 utgovlThreads=\${GALAXY_SLOTS:-4}
47 batThreads=\${GALAXY_SLOTS:-4}
48 batMemory=\${GALAXY_MEMORY_MB:-7}
49 cormhapMemory=\${GALAXY_MEMORY_MB:-7}
50 obtovlMemory=\${GALAXY_MEMORY_MB:-7}
51 utgovlMemory=\${GALAXY_MEMORY_MB:-7}
52 gfaThreads=\${GALAXY_SLOTS:-4}
53 corThreads=\${GALAXY_SLOTS:-4}
54 cnsThreads=\${GALAXY_SLOTS:-4}
55 gnuplotTested=true
56 useGrid=false
57 $mode
58 #for $counter, $input in enumerate($inputs):
59 #if $input.ext in ['fastq.gz', 'fasta.gz']
60 ./input_${counter}.gz
61 #else:
62 '$input'
63 #end if
64 #end for
65 2>&1
66 ]]>
67 </command>
68 <inputs>
69 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input reads" />
70 <param name="mode" type="select" label="Mode">
71 <option value="-nanopore-raw" selected="true">Nanopore raw</option>
72 <option value="-nanopore-corrected">Nanopore corrected</option>
73 <option value="-pacbio-raw">PacBio raw</option>
74 <option value="-pacbio-corrected">PacBio corrected</option>
75 </param>
76 <param name="stage" type="select" label="To restrict canu to only a specific stage, use">
77 <option value="" selected="true">all</option>
78 <option value="-correct">generate corrected reads</option>
79 <option value="-trim">generate trimmed reads</option>
80 <option value="-assemble">generate an assembly</option>
81 <option value="-trim-assemble">generate trimmed reads and then assemble them</option>
82 </param>
83 <param argument="genomeSize" type="text" label="Estimated genome size (e.g. 80m, 15k, 2g)" />
84 <param argument="rawErrorRate" type="float" value="" optional="true" min="0" max="1"
85 label="Maximum raw overlap mismatch" help="The defaults are 0.300 for PacBio reads and 0.500 for Nanopore reads." />
86 <param argument="correctedErrorRate" type="float" value="" optional="true" min="0" max="1"
87 label="Maximum corrected overlap mismatch" help="The allowed difference in an overlap between two corrected reads. Assemblies of
88 low coverage or data with biological differences will benefit from a slight increase
89 in this. Defaults are 0.045 for PacBio reads and 0.144 for Nanopore reads." />
90 <param argument="minReadLength" type="integer" value="1000" min="1" label="Minimum read length" />
91 <param argument="minOverlapLength" type="integer" value="500" min="1" label="Minimum overlap" />
92 <param argument="corOutCoverage" type="integer" value="40" min="1" label="Target coverage for corrected reads" />
93 <param argument="-s" type="data" format="txt" optional="true" label="Additonal options" help="Additional specifications provided in a canu spec file." />
94
95 <section name="contigFilter" title="Contig Filters">
96 <param argument="minReads" type="integer" value="2" min="0" label="Minimum reads" />
97 <param argument="minLength" type="integer" value="0" min="0" label="Minimum length" />
98 <param argument="singleReadSpan" type="float" value="1.0" min="0.0" max="1.0" label="Maximum single read span (fraction)" />
99 <param argument="lowCovSpan" type="float" value="0.5" min="0.0" max="1.0" label="Low coverage span (fraction)" />
100 <param argument="lowCovDepth" type="integer" value="5" min="0" label="Low coverage depth" />
101 </section>
102 </inputs>
103 <outputs>
104 <data name="contigs" format="fasta" from_work_dir="out_dir/canu.contigs.fasta" label="${tool.name} on ${on_string} (contigs)">
105 </data>
106 <data name="unassembled" format="fasta" from_work_dir="out_dir/canu.unassembled.fasta" label="${tool.name} on ${on_string} (contigs)">
107 </data>
108 <data name="unitigs" format="fasta" from_work_dir="out_dir/canu.unitigs.fasta" label="${tool.name} on ${on_string} (unitigs)">
109 </data>
110 <data name="corrected_reads" format="fasta.gz" from_work_dir="out_dir/canu.correctedReads.fasta.gz" label="${tool.name} on ${on_string} (unitigs)">
111 </data>
112 </outputs>
113 <tests>
114 <test>
115 <!-- test multiple input -->
116 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
117 <param name="genomeSize" value="4.6m" />
118 <param name="minReadLength" value="2000" />
119 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs.fa"/>
120 <output name="unitigs" ftype="fasta" file="ecoli_canu_unitigs.fa"/>
121 <output name="unassembled" ftype="fasta" file="ecoli_unassembled.fa"/>
122 </test>
123
124 <test >
125 <!-- test multiple input -->
126 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
127 <param name="genomeSize" value="4.6m" />
128 <param name="minReadLength" value="2000" />
129 <param name="minOverlapLength" value="800" />
130 <param name="rawErrorRate" value="0.2" />
131 <param name="correctedErrorRate" value="0.05" />
132 <param name="corOutCoverage" value="2" />
133 <output name="contigs" ftype="fasta" file="canu_contigs_result1.fa"/>
134 <output name="unitigs" ftype="fasta" file="canu_unitigs_result1.fa"/>
135 <output name="unassembled" ftype="fasta" file="canu_unassembled_result1.fa"/>
136 </test>
137 <test>
138 <!-- test multiple input -->
139 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
140 <param name="minReadLength" value="2000" />
141 <param name="stage" value="-correct"/>
142 <param name="genomeSize" value="4.6m" />
143 <section name="contigFilter">
144 <param name="minReads" value="10" />
145 </section>
146 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="canu_corrected_reads.fa.gz"/>
147 </test>
148 </tests>
149 <help>
150 <![CDATA[
151
152 Canu specializes in assembling PacBio or Oxford Nanopore sequences. Canu operates in three phases: correction, trimming and assembly.
153 The correction phase will improve the accuracy of bases in reads. The trimming phase will trim reads to the portion that appears to
154 be high-quality sequence, removing suspicious regions such as remaining SMRTbell adapter. The assembly phase will order the reads
155 into contigs, generate consensus sequences and create graphs of alternate paths.
156
157 For eukaryotic genomes, coverage more than 20x is enough to outperform current hybrid methods, however, between 30x and 60x
158 coverage is the recommended minimum. More coverage will let Canu use longer reads for assembly, which will result in better assemblies.
159
160 http://canu.readthedocs.io
161
162 ]]>
163 </help>
164 <citations>
165 <citation type="doi">10.1101/gr.215087.116</citation>
166 <citation type="doi">10.1093/bioinformatics/btw753</citation>
167 <citation type="doi">10.1038/nbt.3238</citation>
168 <citation type="doi">10.1126/science.287.5461.2196</citation>
169 <citation type="doi">10.1038/nmeth.4035</citation>
170 <citation type="doi">10.1038/nmeth.2474</citation>
171 </citations>
172 </tool>