diff canu.xml @ 0:4c8f32256fa8 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/canu commit b7904bf39167833b3d3648e250726615f75f7525
author bgruening
date Fri, 08 Jun 2018 04:43:41 -0400
parents
children 58346ef3116b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/canu.xml	Fri Jun 08 04:43:41 2018 -0400
@@ -0,0 +1,172 @@
+<tool id="canu" name="Canu assembler" version="1.7">
+    <description>Assembler optimized for long error-prone reads such as PacBio, Oxford Nanopore </description>
+    <requirements>
+        <requirement type="package" version="1.7">canu</requirement>
+    </requirements>
+    <version_command>canu --version</version_command>
+    <command detect_errors="exit_code">
+    <![CDATA[
+
+    #for $counter, $input in enumerate($inputs):
+        #if $input.ext in ['fastq.gz', 'fasta.gz']
+            ## linking does not work
+            cp '$input' ./input_${counter}.gz &&
+        #end if
+    #end for
+
+    canu
+        $stage
+        -p canu
+        -d out_dir
+        #if $s:
+            -s '$s'
+        #end if
+        genomeSize=$genomeSize
+        #if $rawErrorRate:
+            rawErrorRate=$rawErrorRate
+        #end if
+        #if $correctedErrorRate:
+            correctedErrorRate=$correctedErrorRate
+        #end if
+        minReadLength=$minReadLength
+        minOverlapLength=$minOverlapLength
+        corOutCoverage=$corOutCoverage
+        contigFilter='
+            ${contigFilter.minReads}
+            ${contigFilter.minLength}
+            ${contigFilter.singleReadSpan}
+            ${contigFilter.lowCovSpan}
+            ${contigFilter.lowCovDepth}
+        '
+        genomeSize=$genomeSize
+        stopOnReadQuality=false
+        minThreads=\${GALAXY_SLOTS:-4}
+        maxThreads=\${GALAXY_SLOTS:-4}
+        obtovlThreads=\${GALAXY_SLOTS:-4}
+        utgovlThreads=\${GALAXY_SLOTS:-4}
+        batThreads=\${GALAXY_SLOTS:-4}
+        batMemory=\${GALAXY_MEMORY_MB:-7}
+        cormhapMemory=\${GALAXY_MEMORY_MB:-7}
+        obtovlMemory=\${GALAXY_MEMORY_MB:-7}
+        utgovlMemory=\${GALAXY_MEMORY_MB:-7}
+        gfaThreads=\${GALAXY_SLOTS:-4}
+        corThreads=\${GALAXY_SLOTS:-4}
+        cnsThreads=\${GALAXY_SLOTS:-4}
+        gnuplotTested=true
+        useGrid=false
+        $mode
+        #for $counter, $input in enumerate($inputs):
+            #if $input.ext in ['fastq.gz', 'fasta.gz']
+                ./input_${counter}.gz
+            #else:
+                '$input'
+            #end if
+        #end for
+        2>&1
+    ]]>
+    </command>
+    <inputs>
+        <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input reads" />
+        <param name="mode" type="select" label="Mode">
+            <option value="-nanopore-raw" selected="true">Nanopore raw</option>
+            <option value="-nanopore-corrected">Nanopore corrected</option>
+            <option value="-pacbio-raw">PacBio raw</option>
+            <option value="-pacbio-corrected">PacBio corrected</option>
+        </param>
+        <param name="stage" type="select" label="To restrict canu to only a specific stage, use">
+            <option value="" selected="true">all</option>
+            <option value="-correct">generate corrected reads</option>
+            <option value="-trim">generate trimmed reads</option>
+            <option value="-assemble">generate an assembly</option>
+            <option value="-trim-assemble">generate trimmed reads and then assemble them</option>
+        </param>
+        <param argument="genomeSize" type="text" label="Estimated genome size (e.g. 80m, 15k, 2g)" />
+        <param argument="rawErrorRate" type="float" value="" optional="true" min="0" max="1"
+            label="Maximum raw overlap mismatch" help="The defaults are 0.300 for PacBio reads and 0.500 for Nanopore reads." />
+        <param argument="correctedErrorRate" type="float" value="" optional="true" min="0" max="1"
+            label="Maximum corrected overlap mismatch" help="The allowed difference in an overlap between two corrected reads.  Assemblies of
+                low coverage or data with biological differences will benefit from a slight increase
+                in this.  Defaults are 0.045 for PacBio reads and 0.144 for Nanopore reads." />
+        <param argument="minReadLength" type="integer" value="1000" min="1" label="Minimum read length" />
+        <param argument="minOverlapLength" type="integer" value="500" min="1" label="Minimum overlap" />
+        <param argument="corOutCoverage" type="integer" value="40" min="1" label="Target coverage for corrected reads" />
+        <param argument="-s" type="data" format="txt" optional="true" label="Additonal options" help="Additional specifications provided in a canu spec file." />
+
+        <section name="contigFilter" title="Contig Filters">
+            <param argument="minReads" type="integer" value="2" min="0" label="Minimum reads" />
+            <param argument="minLength" type="integer" value="0" min="0" label="Minimum length" />
+            <param argument="singleReadSpan" type="float" value="1.0" min="0.0" max="1.0" label="Maximum single read span (fraction)" />
+            <param argument="lowCovSpan" type="float" value="0.5" min="0.0" max="1.0" label="Low coverage span (fraction)" />
+            <param argument="lowCovDepth" type="integer" value="5" min="0" label="Low coverage depth" />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="contigs" format="fasta" from_work_dir="out_dir/canu.contigs.fasta" label="${tool.name} on ${on_string} (contigs)">
+        </data>
+        <data name="unassembled" format="fasta" from_work_dir="out_dir/canu.unassembled.fasta" label="${tool.name} on ${on_string} (contigs)">
+        </data>
+        <data name="unitigs" format="fasta" from_work_dir="out_dir/canu.unitigs.fasta" label="${tool.name} on ${on_string} (unitigs)">
+        </data>
+        <data name="corrected_reads" format="fasta.gz" from_work_dir="out_dir/canu.correctedReads.fasta.gz" label="${tool.name} on ${on_string} (unitigs)">
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <!-- test multiple input -->
+            <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
+            <param name="genomeSize" value="4.6m" />
+            <param name="minReadLength" value="2000" />            
+            <output name="contigs" ftype="fasta" file="ecoli_canu_contigs.fa"/>
+            <output name="unitigs" ftype="fasta" file="ecoli_canu_unitigs.fa"/>
+            <output name="unassembled" ftype="fasta" file="ecoli_unassembled.fa"/>
+        </test>
+
+        <test >
+            <!-- test multiple input -->
+            <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
+            <param name="genomeSize" value="4.6m" />
+            <param name="minReadLength" value="2000" />
+            <param name="minOverlapLength" value="800" />
+            <param name="rawErrorRate" value="0.2" />
+            <param name="correctedErrorRate" value="0.05" />
+            <param name="corOutCoverage" value="2" />
+            <output name="contigs" ftype="fasta" file="canu_contigs_result1.fa"/>
+            <output name="unitigs" ftype="fasta" file="canu_unitigs_result1.fa"/>
+            <output name="unassembled" ftype="fasta" file="canu_unassembled_result1.fa"/>
+        </test>
+        <test>
+            <!-- test multiple input -->
+            <param name="inputs" ftype="fasta" value="ecoli-reads.fasta"/>
+            <param name="minReadLength" value="2000" />
+            <param name="stage" value="-correct"/>
+            <param name="genomeSize" value="4.6m" />
+            <section name="contigFilter">
+                <param name="minReads" value="10" />
+            </section>
+            <output name="corrected_reads" ftype="fasta.gz" decompress="True" file="canu_corrected_reads.fa.gz"/>
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+
+        Canu specializes in assembling PacBio or Oxford Nanopore sequences. Canu operates in three phases: correction, trimming and assembly.
+        The correction phase will improve the accuracy of bases in reads. The trimming phase will trim reads to the portion that appears to
+        be high-quality sequence, removing suspicious regions such as remaining SMRTbell adapter. The assembly phase will order the reads
+        into contigs, generate consensus sequences and create graphs of alternate paths.
+
+        For eukaryotic genomes, coverage more than 20x is enough to outperform current hybrid methods, however, between 30x and 60x
+        coverage is the recommended minimum. More coverage will let Canu use longer reads for assembly, which will result in better assemblies.
+
+        http://canu.readthedocs.io
+
+    ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1101/gr.215087.116</citation>
+        <citation type="doi">10.1093/bioinformatics/btw753</citation>
+        <citation type="doi">10.1038/nbt.3238</citation>
+        <citation type="doi">10.1126/science.287.5461.2196</citation>
+        <citation type="doi">10.1038/nmeth.4035</citation>
+        <citation type="doi">10.1038/nmeth.2474</citation>
+    </citations>
+</tool>