diff flye.xml @ 0:d9f4c141d88a draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flye commit 2f6d48e1d2161d03411d9fbb4fc3d16f0fa3d2e1
author bgruening
date Tue, 25 Sep 2018 05:24:27 -0400
parents
children cd256484eb1a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flye.xml	Tue Sep 25 05:24:27 2018 -0400
@@ -0,0 +1,99 @@
+<tool id="flye" name="Assembly" version="2.3.5">
+    <description>of long and error-prone reads</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <version_command>flye --version</version_command>
+    <command detect_errors="exit_code">
+    <![CDATA[
+
+    #for $counter, $input in enumerate($inputs):
+        ln -s '$input' ./input_${counter}.${input.ext} &&
+    #end for
+    
+    flye
+    $mode
+    #for $counter, $input in enumerate($inputs):
+        ./input_${counter}.${input.ext}
+    #end for
+
+    -o out_dir 
+    -g '$g'
+    -t \${GALAXY_SLOTS:-4}
+    -i $i
+    #if $m:
+        -m '$m'
+    #end if    
+    2>&1
+    ]]></command>
+    <inputs>
+        <param name="inputs" type="data" format="fasta,fasta.gz,fastq,fastq.gz" multiple="true" label="Input reads" />
+        <param name="mode" type="select" label="Mode">
+            <option value="--nano-raw">Nanopore raw</option>
+            <option value="--nano-corr">Nanopore corrected</option>
+            <option value="--pacbio-raw">PacBio raw</option>
+            <option value="--pacbio-corr">PacBio corrected</option>
+            <option value="--subassemblies">high-quality contig-like input</option>
+        </param>
+        <param argument="-g" type="text" label="estimated genome size (for example, 5m or 2.6g)">
+            <validator type="regex" message="Genome size must be a float  or integer, optionally followed by the a unit prefix (kmg)">^([0-9]*[.])?[0-9]+[kmg]?$</validator> 
+        </param>
+        <param argument="-i" type="integer" value="1" label="number of polishing iterations" />
+        <param argument="-m" type="integer" optional="true" label="minimum overlap between reads (default: auto)" />
+    </inputs>
+    <outputs>
+        <data name="contigs" format="fasta" from_work_dir="out_dir/contigs.fasta" label="${tool.name} on ${on_string} (contigs)"/>
+        <data name="scaffolds" format="fasta" from_work_dir="out_dir/scaffolds.fasta" label="${tool.name} on ${on_string} (scaffolds)"/>
+        <data name="assembly_info" format="tabular" from_work_dir="out_dir/assembly_info.txt" label="${tool.name} on ${on_string} (assembly_info)"/>
+        <data name="assembly_graph" format="graph_dot" from_work_dir="out_dir/assembly_graph.dot" label="${tool.name} on ${on_string} (assembly_graph)"/>
+        <data name="flye_log" format="txt" from_work_dir="out_dir/flye.log" label="${tool.name} on ${on_string} (log)"/>
+    </outputs>
+    <tests>
+        <test>            
+            <param name="inputs" ftype="fasta" value="E.coli_PacBio_40x_first_200_reads.fasta"/>
+            <param name="mode" value="--pacbio-raw"/>
+            <param name="g" value="1m"/>
+            <output name="contigs" file="result1_contigs.fasta" ftype="fasta"/>
+            <output name="scaffolds" file="result1_scaffolds.fasta" ftype="fasta"/>
+            <output name="assembly_info" file="result1_assembly_info.txt" ftype="tabular"/>
+            <output name="assembly_graph" file="result1_assembly_graph.dot" ftype="graph_dot" compare="sim_size"/>    
+        </test>
+        <test>            
+            <param name="inputs" ftype="fasta" value="Loman_E.coli_MAP006-1_2D_50x_first_500_reads.fasta"/>
+            <param name="mode" value="--nano-raw"/>
+            <param name="g" value="100000"/>
+            <output name="contigs" file="result2_contigs.fasta" ftype="fasta"/>
+            <output name="scaffolds" file="result2_scaffolds.fasta" ftype="fasta"/>
+            <output name="assembly_info" file="result2_assembly_info.txt" ftype="tabular"/>
+            <output name="assembly_graph" file="result2_assembly_graph.dot" ftype="graph_dot" compare="sim_size"/>    
+        </test>
+        <test>            
+            <param name="inputs" ftype="fasta" value="E.coli_PacBio_40x_first_200_reads.fasta"/>
+            <param name="mode" value="--pacbio-raw"/>
+            <param name="g" value="1.1m"/>
+            <param name="i" value="2"/>
+            <output name="contigs" file="result3_contigs.fasta" ftype="fasta"/>
+            <output name="scaffolds" file="result3_scaffolds.fasta" ftype="fasta"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+Input reads could be in FASTA or FASTQ format, uncompressed
+or compressed with gz. Currenlty, raw and corrected reads
+from PacBio and ONT are supported. The expected error rates are
+<30% for raw and <2% for corrected reads. Additionally,
+--subassemblies option performs a consensus assembly of multiple
+sets of high-quality contigs. You may specify multiple
+files with reads (separated by spaces). Mixing different read
+types is not yet supported.
+
+You must provide an estimate of the genome size as input,
+which is used for solid k-mers selection. The estimate could
+be rough (e.g. withing 0.5x-2x range) and does not affect
+the other assembly stages. Standard size modificators are
+supported (e.g. 5m or 2.6g).
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>