Mercurial > repos > bgruening > canu
comparison canu.xml @ 0:4c8f32256fa8 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/canu commit b7904bf39167833b3d3648e250726615f75f7525
author | bgruening |
---|---|
date | Fri, 08 Jun 2018 04:43:41 -0400 |
parents | |
children | 58346ef3116b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8f32256fa8 |
---|---|
1 <tool id="canu" name="Canu assembler" version="1.7"> | |
2 <description>Assembler optimized for long error-prone reads such as PacBio, Oxford Nanopore </description> | |
3 <requirements> | |
4 <requirement type="package" version="1.7">canu</requirement> | |
5 </requirements> | |
6 <version_command>canu --version</version_command> | |
7 <command detect_errors="exit_code"> | |
8 <![CDATA[ | |
9 | |
10 #for $counter, $input in enumerate($inputs): | |
11 #if $input.ext in ['fastq.gz', 'fasta.gz'] | |
12 ## linking does not work | |
13 cp '$input' ./input_${counter}.gz && | |
14 #end if | |
15 #end for | |
16 | |
17 canu | |
18 $stage | |
19 -p canu | |
20 -d out_dir | |
21 #if $s: | |
22 -s '$s' | |
23 #end if | |
24 genomeSize=$genomeSize | |
25 #if $rawErrorRate: | |
26 rawErrorRate=$rawErrorRate | |
27 #end if | |
28 #if $correctedErrorRate: | |
29 correctedErrorRate=$correctedErrorRate | |
30 #end if | |
31 minReadLength=$minReadLength | |
32 minOverlapLength=$minOverlapLength | |
33 corOutCoverage=$corOutCoverage | |
34 contigFilter=' | |
35 ${contigFilter.minReads} | |
36 ${contigFilter.minLength} | |
37 ${contigFilter.singleReadSpan} | |
38 ${contigFilter.lowCovSpan} | |
39 ${contigFilter.lowCovDepth} | |
40 ' | |
41 genomeSize=$genomeSize | |
42 stopOnReadQuality=false | |
43 minThreads=\${GALAXY_SLOTS:-4} | |
44 maxThreads=\${GALAXY_SLOTS:-4} | |
45 obtovlThreads=\${GALAXY_SLOTS:-4} | |
46 utgovlThreads=\${GALAXY_SLOTS:-4} | |
47 batThreads=\${GALAXY_SLOTS:-4} | |
48 batMemory=\${GALAXY_MEMORY_MB:-7} | |
49 cormhapMemory=\${GALAXY_MEMORY_MB:-7} | |
50 obtovlMemory=\${GALAXY_MEMORY_MB:-7} | |
51 utgovlMemory=\${GALAXY_MEMORY_MB:-7} | |
52 gfaThreads=\${GALAXY_SLOTS:-4} | |
53 corThreads=\${GALAXY_SLOTS:-4} | |
54 cnsThreads=\${GALAXY_SLOTS:-4} | |
55 gnuplotTested=true | |
56 useGrid=false | |
57 $mode | |
58 #for $counter, $input in enumerate($inputs): | |
59 #if $input.ext in ['fastq.gz', 'fasta.gz'] | |
60 ./input_${counter}.gz | |
61 #else: | |
62 '$input' | |
63 #end if | |
64 #end for | |
65 2>&1 | |
66 ]]> | |
67 </command> | |
68 <inputs> | |
69 <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input reads" /> | |
70 <param name="mode" type="select" label="Mode"> | |
71 <option value="-nanopore-raw" selected="true">Nanopore raw</option> | |
72 <option value="-nanopore-corrected">Nanopore corrected</option> | |
73 <option value="-pacbio-raw">PacBio raw</option> | |
74 <option value="-pacbio-corrected">PacBio corrected</option> | |
75 </param> | |
76 <param name="stage" type="select" label="To restrict canu to only a specific stage, use"> | |
77 <option value="" selected="true">all</option> | |
78 <option value="-correct">generate corrected reads</option> | |
79 <option value="-trim">generate trimmed reads</option> | |
80 <option value="-assemble">generate an assembly</option> | |
81 <option value="-trim-assemble">generate trimmed reads and then assemble them</option> | |
82 </param> | |
83 <param argument="genomeSize" type="text" label="Estimated genome size (e.g. 80m, 15k, 2g)" /> | |
84 <param argument="rawErrorRate" type="float" value="" optional="true" min="0" max="1" | |
85 label="Maximum raw overlap mismatch" help="The defaults are 0.300 for PacBio reads and 0.500 for Nanopore reads." /> | |
86 <param argument="correctedErrorRate" type="float" value="" optional="true" min="0" max="1" | |
87 label="Maximum corrected overlap mismatch" help="The allowed difference in an overlap between two corrected reads. Assemblies of | |
88 low coverage or data with biological differences will benefit from a slight increase | |
89 in this. Defaults are 0.045 for PacBio reads and 0.144 for Nanopore reads." /> | |
90 <param argument="minReadLength" type="integer" value="1000" min="1" label="Minimum read length" /> | |
91 <param argument="minOverlapLength" type="integer" value="500" min="1" label="Minimum overlap" /> | |
92 <param argument="corOutCoverage" type="integer" value="40" min="1" label="Target coverage for corrected reads" /> | |
93 <param argument="-s" type="data" format="txt" optional="true" label="Additonal options" help="Additional specifications provided in a canu spec file." /> | |
94 | |
95 <section name="contigFilter" title="Contig Filters"> | |
96 <param argument="minReads" type="integer" value="2" min="0" label="Minimum reads" /> | |
97 <param argument="minLength" type="integer" value="0" min="0" label="Minimum length" /> | |
98 <param argument="singleReadSpan" type="float" value="1.0" min="0.0" max="1.0" label="Maximum single read span (fraction)" /> | |
99 <param argument="lowCovSpan" type="float" value="0.5" min="0.0" max="1.0" label="Low coverage span (fraction)" /> | |
100 <param argument="lowCovDepth" type="integer" value="5" min="0" label="Low coverage depth" /> | |
101 </section> | |
102 </inputs> | |
103 <outputs> | |
104 <data name="contigs" format="fasta" from_work_dir="out_dir/canu.contigs.fasta" label="${tool.name} on ${on_string} (contigs)"> | |
105 </data> | |
106 <data name="unassembled" format="fasta" from_work_dir="out_dir/canu.unassembled.fasta" label="${tool.name} on ${on_string} (contigs)"> | |
107 </data> | |
108 <data name="unitigs" format="fasta" from_work_dir="out_dir/canu.unitigs.fasta" label="${tool.name} on ${on_string} (unitigs)"> | |
109 </data> | |
110 <data name="corrected_reads" format="fasta.gz" from_work_dir="out_dir/canu.correctedReads.fasta.gz" label="${tool.name} on ${on_string} (unitigs)"> | |
111 </data> | |
112 </outputs> | |
113 <tests> | |
114 <test> | |
115 <!-- test multiple input --> | |
116 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | |
117 <param name="genomeSize" value="4.6m" /> | |
118 <param name="minReadLength" value="2000" /> | |
119 <output name="contigs" ftype="fasta" file="ecoli_canu_contigs.fa"/> | |
120 <output name="unitigs" ftype="fasta" file="ecoli_canu_unitigs.fa"/> | |
121 <output name="unassembled" ftype="fasta" file="ecoli_unassembled.fa"/> | |
122 </test> | |
123 | |
124 <test > | |
125 <!-- test multiple input --> | |
126 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | |
127 <param name="genomeSize" value="4.6m" /> | |
128 <param name="minReadLength" value="2000" /> | |
129 <param name="minOverlapLength" value="800" /> | |
130 <param name="rawErrorRate" value="0.2" /> | |
131 <param name="correctedErrorRate" value="0.05" /> | |
132 <param name="corOutCoverage" value="2" /> | |
133 <output name="contigs" ftype="fasta" file="canu_contigs_result1.fa"/> | |
134 <output name="unitigs" ftype="fasta" file="canu_unitigs_result1.fa"/> | |
135 <output name="unassembled" ftype="fasta" file="canu_unassembled_result1.fa"/> | |
136 </test> | |
137 <test> | |
138 <!-- test multiple input --> | |
139 <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> | |
140 <param name="minReadLength" value="2000" /> | |
141 <param name="stage" value="-correct"/> | |
142 <param name="genomeSize" value="4.6m" /> | |
143 <section name="contigFilter"> | |
144 <param name="minReads" value="10" /> | |
145 </section> | |
146 <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="canu_corrected_reads.fa.gz"/> | |
147 </test> | |
148 </tests> | |
149 <help> | |
150 <![CDATA[ | |
151 | |
152 Canu specializes in assembling PacBio or Oxford Nanopore sequences. Canu operates in three phases: correction, trimming and assembly. | |
153 The correction phase will improve the accuracy of bases in reads. The trimming phase will trim reads to the portion that appears to | |
154 be high-quality sequence, removing suspicious regions such as remaining SMRTbell adapter. The assembly phase will order the reads | |
155 into contigs, generate consensus sequences and create graphs of alternate paths. | |
156 | |
157 For eukaryotic genomes, coverage more than 20x is enough to outperform current hybrid methods, however, between 30x and 60x | |
158 coverage is the recommended minimum. More coverage will let Canu use longer reads for assembly, which will result in better assemblies. | |
159 | |
160 http://canu.readthedocs.io | |
161 | |
162 ]]> | |
163 </help> | |
164 <citations> | |
165 <citation type="doi">10.1101/gr.215087.116</citation> | |
166 <citation type="doi">10.1093/bioinformatics/btw753</citation> | |
167 <citation type="doi">10.1038/nbt.3238</citation> | |
168 <citation type="doi">10.1126/science.287.5461.2196</citation> | |
169 <citation type="doi">10.1038/nmeth.4035</citation> | |
170 <citation type="doi">10.1038/nmeth.2474</citation> | |
171 </citations> | |
172 </tool> |