Mercurial > repos > bgruening > canu
diff canu.xml @ 0:4c8f32256fa8 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/canu commit b7904bf39167833b3d3648e250726615f75f7525
author | bgruening |
---|---|
date | Fri, 08 Jun 2018 04:43:41 -0400 |
parents | |
children | 58346ef3116b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/canu.xml Fri Jun 08 04:43:41 2018 -0400 @@ -0,0 +1,172 @@ +<tool id="canu" name="Canu assembler" version="1.7"> + <description>Assembler optimized for long error-prone reads such as PacBio, Oxford Nanopore </description> + <requirements> + <requirement type="package" version="1.7">canu</requirement> + </requirements> + <version_command>canu --version</version_command> + <command detect_errors="exit_code"> + <![CDATA[ + + #for $counter, $input in enumerate($inputs): + #if $input.ext in ['fastq.gz', 'fasta.gz'] + ## linking does not work + cp '$input' ./input_${counter}.gz && + #end if + #end for + + canu + $stage + -p canu + -d out_dir + #if $s: + -s '$s' + #end if + genomeSize=$genomeSize + #if $rawErrorRate: + rawErrorRate=$rawErrorRate + #end if + #if $correctedErrorRate: + correctedErrorRate=$correctedErrorRate + #end if + minReadLength=$minReadLength + minOverlapLength=$minOverlapLength + corOutCoverage=$corOutCoverage + contigFilter=' + ${contigFilter.minReads} + ${contigFilter.minLength} + ${contigFilter.singleReadSpan} + ${contigFilter.lowCovSpan} + ${contigFilter.lowCovDepth} + ' + genomeSize=$genomeSize + stopOnReadQuality=false + minThreads=\${GALAXY_SLOTS:-4} + maxThreads=\${GALAXY_SLOTS:-4} + obtovlThreads=\${GALAXY_SLOTS:-4} + utgovlThreads=\${GALAXY_SLOTS:-4} + batThreads=\${GALAXY_SLOTS:-4} + batMemory=\${GALAXY_MEMORY_MB:-7} + cormhapMemory=\${GALAXY_MEMORY_MB:-7} + obtovlMemory=\${GALAXY_MEMORY_MB:-7} + utgovlMemory=\${GALAXY_MEMORY_MB:-7} + gfaThreads=\${GALAXY_SLOTS:-4} + corThreads=\${GALAXY_SLOTS:-4} + cnsThreads=\${GALAXY_SLOTS:-4} + gnuplotTested=true + useGrid=false + $mode + #for $counter, $input in enumerate($inputs): + #if $input.ext in ['fastq.gz', 'fasta.gz'] + ./input_${counter}.gz + #else: + '$input' + #end if + #end for + 2>&1 + ]]> + </command> + <inputs> + <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input reads" /> + <param name="mode" type="select" label="Mode"> + <option value="-nanopore-raw" selected="true">Nanopore raw</option> + <option value="-nanopore-corrected">Nanopore corrected</option> + <option value="-pacbio-raw">PacBio raw</option> + <option value="-pacbio-corrected">PacBio corrected</option> + </param> + <param name="stage" type="select" label="To restrict canu to only a specific stage, use"> + <option value="" selected="true">all</option> + <option value="-correct">generate corrected reads</option> + <option value="-trim">generate trimmed reads</option> + <option value="-assemble">generate an assembly</option> + <option value="-trim-assemble">generate trimmed reads and then assemble them</option> + </param> + <param argument="genomeSize" type="text" label="Estimated genome size (e.g. 80m, 15k, 2g)" /> + <param argument="rawErrorRate" type="float" value="" optional="true" min="0" max="1" + label="Maximum raw overlap mismatch" help="The defaults are 0.300 for PacBio reads and 0.500 for Nanopore reads." /> + <param argument="correctedErrorRate" type="float" value="" optional="true" min="0" max="1" + label="Maximum corrected overlap mismatch" help="The allowed difference in an overlap between two corrected reads. Assemblies of + low coverage or data with biological differences will benefit from a slight increase + in this. Defaults are 0.045 for PacBio reads and 0.144 for Nanopore reads." /> + <param argument="minReadLength" type="integer" value="1000" min="1" label="Minimum read length" /> + <param argument="minOverlapLength" type="integer" value="500" min="1" label="Minimum overlap" /> + <param argument="corOutCoverage" type="integer" value="40" min="1" label="Target coverage for corrected reads" /> + <param argument="-s" type="data" format="txt" optional="true" label="Additonal options" help="Additional specifications provided in a canu spec file." /> + + <section name="contigFilter" title="Contig Filters"> + <param argument="minReads" type="integer" value="2" min="0" label="Minimum reads" /> + <param argument="minLength" type="integer" value="0" min="0" label="Minimum length" /> + <param argument="singleReadSpan" type="float" value="1.0" min="0.0" max="1.0" label="Maximum single read span (fraction)" /> + <param argument="lowCovSpan" type="float" value="0.5" min="0.0" max="1.0" label="Low coverage span (fraction)" /> + <param argument="lowCovDepth" type="integer" value="5" min="0" label="Low coverage depth" /> + </section> + </inputs> + <outputs> + <data name="contigs" format="fasta" from_work_dir="out_dir/canu.contigs.fasta" label="${tool.name} on ${on_string} (contigs)"> + </data> + <data name="unassembled" format="fasta" from_work_dir="out_dir/canu.unassembled.fasta" label="${tool.name} on ${on_string} (contigs)"> + </data> + <data name="unitigs" format="fasta" from_work_dir="out_dir/canu.unitigs.fasta" label="${tool.name} on ${on_string} (unitigs)"> + </data> + <data name="corrected_reads" format="fasta.gz" from_work_dir="out_dir/canu.correctedReads.fasta.gz" label="${tool.name} on ${on_string} (unitigs)"> + </data> + </outputs> + <tests> + <test> + <!-- test multiple input --> + <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> + <param name="genomeSize" value="4.6m" /> + <param name="minReadLength" value="2000" /> + <output name="contigs" ftype="fasta" file="ecoli_canu_contigs.fa"/> + <output name="unitigs" ftype="fasta" file="ecoli_canu_unitigs.fa"/> + <output name="unassembled" ftype="fasta" file="ecoli_unassembled.fa"/> + </test> + + <test > + <!-- test multiple input --> + <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> + <param name="genomeSize" value="4.6m" /> + <param name="minReadLength" value="2000" /> + <param name="minOverlapLength" value="800" /> + <param name="rawErrorRate" value="0.2" /> + <param name="correctedErrorRate" value="0.05" /> + <param name="corOutCoverage" value="2" /> + <output name="contigs" ftype="fasta" file="canu_contigs_result1.fa"/> + <output name="unitigs" ftype="fasta" file="canu_unitigs_result1.fa"/> + <output name="unassembled" ftype="fasta" file="canu_unassembled_result1.fa"/> + </test> + <test> + <!-- test multiple input --> + <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/> + <param name="minReadLength" value="2000" /> + <param name="stage" value="-correct"/> + <param name="genomeSize" value="4.6m" /> + <section name="contigFilter"> + <param name="minReads" value="10" /> + </section> + <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="canu_corrected_reads.fa.gz"/> + </test> + </tests> + <help> + <![CDATA[ + + Canu specializes in assembling PacBio or Oxford Nanopore sequences. Canu operates in three phases: correction, trimming and assembly. + The correction phase will improve the accuracy of bases in reads. The trimming phase will trim reads to the portion that appears to + be high-quality sequence, removing suspicious regions such as remaining SMRTbell adapter. The assembly phase will order the reads + into contigs, generate consensus sequences and create graphs of alternate paths. + + For eukaryotic genomes, coverage more than 20x is enough to outperform current hybrid methods, however, between 30x and 60x + coverage is the recommended minimum. More coverage will let Canu use longer reads for assembly, which will result in better assemblies. + + http://canu.readthedocs.io + + ]]> + </help> + <citations> + <citation type="doi">10.1101/gr.215087.116</citation> + <citation type="doi">10.1093/bioinformatics/btw753</citation> + <citation type="doi">10.1038/nbt.3238</citation> + <citation type="doi">10.1126/science.287.5461.2196</citation> + <citation type="doi">10.1038/nmeth.4035</citation> + <citation type="doi">10.1038/nmeth.2474</citation> + </citations> +</tool>