diff megahit_wrapper.xml @ 4:de387b2b2803 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/megahit commit d44d890566efb82b4ce8a9ed2903b0902f3884ca
author iuc
date Mon, 18 Feb 2019 12:14:46 -0500
parents 98242353faa4
children 7518ee87b53d
line wrap: on
line diff
--- a/megahit_wrapper.xml	Mon May 14 06:11:34 2018 -0400
+++ b/megahit_wrapper.xml	Mon Feb 18 12:14:46 2019 -0500
@@ -1,8 +1,8 @@
 <?xml version='1.0' encoding='utf-8'?>
-<tool id="megahit" name="MEGAHIT" version="@VERSION@.3">
+<tool id="megahit" name="MEGAHIT" version="@VERSION@.4">
     <description>for metagenomics assembly</description>
     <macros>
-        <token name="@VERSION@">1.1.2</token>
+        <token name="@VERSION@">1.1.3</token>
     </macros>
     <requirements>
         <requirement type="package" version="@VERSION@">megahit</requirement>
@@ -22,15 +22,20 @@
             -1 '${input_option.batchmode.pair_input.forward}'
             -2 '${input_option.batchmode.pair_input.reverse}'
         #end if
+    #else if $input_option.choice == 'interleaved'
+        --12 '${input_option.interleaved_file}'
     #else
         -r '${input_option.single_files}'
     #end if
     ##basic assembly
     --min-count '${basic_section.min_count}'
-    --k-min '${basic_section.k_min}'
-    --k-max '${basic_section.k_max}'
-    --k-step '${basic_section.k_step}'
-    --min-contig-len '${basic_section.min_contig_len}'
+    #if $basic_section.k_mer.k_mer_method == "klist_method":
+        --k-list '${basic_section.k_mer.k_list}'
+    #else:
+        --k-step '${basic_section.k_mer.k_step}'
+        --k-min '${basic_section.k_mer.k_min}'
+        --k-max '${basic_section.k_mer.k_max}'
+    #end if
     ##advanced assembly
     ${advanced_section.nomercy}
     --bubble-level '${advanced_section.bubble_level}'
@@ -40,17 +45,22 @@
     --low-local-ratio '${advanced_section.low_local_ratio}'
     ${advanced_section.nolocal}
     ${advanced_section.kmin1pass}
+    --min-contig-len '${output_section.min_contig_len}'
 && cat megahit_out/log
     ]]></command>
     <inputs>
         <conditional name="input_option">
             <param name="choice" type="select" label="Select your input option">
                 <option value="single" selected="true">Single</option>
+                <option value="interleaved">Interleaved-paired-end</option>
                 <option value="paired">Paired-end</option>
                 <option value="paired_collection">Paired-end collection</option>
             </param>
             <when value="single">
-                <param name="single_files" argument="-r" type="data" format="fastq,fastqsanger,fasta,fastq.gz,fastqsanger.gz" multiple="true" label="Single-end file(s)" help="FASTQ/FASTA/FASTQ.GZ files accepted" />
+                <param name="single_files" argument="-r" type="data" format="fastq,fastqsanger,fasta,fastq.gz,fastqsanger.gz,fastq.bz2" multiple="true" label="Single-end file(s)" help="Accepts fasta (.fasta, .fa) and fastq (.fastq) formats as well as gzip (.gz) and gzip2 (.bz2) files" />
+            </when>
+            <when value="interleaved">
+                <param name="interleaved_file" argument="--12" type="data" format="fastq,fastqsanger,fasta,fastq.gz,fastqsanger.gz,fastq.bz2" multiple="true" label="Interleaved-paired-end file(s)" help="Accepts fasta (.fasta, .fa) and fastq (.fastq) formats as well as gzip (.gz) and gzip2 (.bz2) files" />
             </when>
             <when value="paired">
                 <param name="fastq_input1" argument="-1" type="data" format="fastq,fastqsanger,fasta,fastq.gz,fastqsanger.gz" multiple="true" label="Mate 1 input reads"/>
@@ -72,14 +82,24 @@
             </when>
         </conditional>
         <section name="basic_section" title="Basic assembly options" expanded="True">
-            <param name="min_contig_len" argument="--min-contig-len" type="integer" value="200" label="minimum length of contigs to output" />
-            <param name="min_count" argument="--min-count" type="integer" value="2" label="minimum multiplicity for filtering (k_min+1)-mers"/>
-            <param name="k_min" argument="--k-min" type="integer" value="21" label="minimum kmer size" max="255" help="must be odd number"/>
-            <param name="k_max" argument="--k-max" type="integer" value="141" label="maximum kmer size" max="255" help="must be odd number"/>
-            <param name="k_step" argument="--k-step" type="integer" value="12" label="increment of kmer size of each iteration" max="28" help="must be even number"/>
+            <param name="min_count" argument="--min-count" type="integer" value="2" label="minimum multiplicity for filtering (k_min+1)-mers" help="(kmin+1)-mer with multiplicity lower than d (default 2, specified by --min-count option) will be discarded. You should be cautious to set d less than 2, which will lead to a much larger and noisy graph. We recommend using the default value 2 for metagenomics assembly. If you want to use MEGAHIT to do generic assemblies, please change this value according to the sequencing depth. (recommend --min-count 3 for >40x)."/>
+            <conditional name="k_mer">
+                <param name="k_mer_method" type="select" label="K-mer specification method">
+                    <option value="klist_method">Specify list</option>
+                    <option value="klim_method">Specify min, max, and step values</option>
+                </param>
+                <when value="klist_method">
+                    <param name="k_list" argument="--k-list" type="text" value="21,29,39,59,79,99,119,141" label="comma-separated list of kmer size" help="all must be odd, in the range 15-255, and with increments &lt;= 28"/>
+                </when>
+                <when value="klim_method"> 
+                    <param name="k_min" argument="--k-min" type="integer" value="21" label="minimum kmer size." max="255" help="Must be odd number. For ultra complex metagenomics data such as soil, a larger kmin, say 27, is recommended to reduce the complexity of the de Bruijn graph. Quality trimming is also recommended. For high-depth generic data, large --k-min (25 to 31) is recommended. Smaller --k-step, say 10, is more friendly to low-coverage datasets."/>
+                    <param name="k_max" argument="--k-max" type="integer" value="141" label="maximum kmer size" max="255" help="must be odd number"/>
+                    <param name="k_step" argument="--k-step" type="integer" value="12" label="increment of kmer size of each iteration" max="28" help="must be even number"/>
+                </when>
+            </conditional>
         </section>
         <section name="advanced_section" title="Advanced assembly options" expanded="False">
-            <param name="nomercy" type="boolean" checked="false" truevalue="--no-mercy" falsevalue=""  label="do not add mercy kmers" />
+            <param name="nomercy" type="boolean" checked="false" truevalue="--no-mercy" falsevalue=""  label="do not add mercy kmers" help="Mercy kmers are specially designed for metagenomics assembly to recover low coverage sequences. For generic dataset >= 30x, MEGAHIT may generate better results with no mercy kmers." />
             <param name="bubble_level" argument="--bubble-level" type="integer" value="2" min="0" max="2" label="intensity of bubble merging (0-2), 0 to disable" />
             <param name="merge_level" argument="--merge-level" type="text" label="merge complex bubbles of length " value="20,0.95" />
             <param name="prune_level" argument="--prune-level" type="integer" value="2" min="0" max="3" label="strength of low depth pruning" />
@@ -88,9 +108,17 @@
             <param name="nolocal" type="boolean" checked="false" truevalue="--no-local" falsevalue="" label="disable local assembly" />
             <param name="kmin1pass" type="boolean" checked="false" truevalue="--kmin-1pass" falsevalue="" label="use 1pass mode to build SdBG of k_min" />
         </section>
+        <section name="output_section" title="Output options" expanded="True">
+            <param name="min_contig_len" argument="--min-contig-len" type="integer" value="200" label="minimum length of contigs to output" />
+            <param name="show_intermediate_contigs" type="boolean" checked="false" label="Return intermediate contigs?"/>
+        </section>
     </inputs>
     <outputs>
         <data format="fasta" name="output" from_work_dir="megahit_out/final.contigs.fa" label="Assembly with ${tool.name} on ${on_string}" />
+        <collection name="intermediate_contigs" type="list" label="Intermediate contigs from ${tool.name} on ${on_string}">
+            <filter>output_section['show_intermediate_contigs']</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*\d)\.contigs\.fa$" ext="fasta" directory="megahit_out/intermediate_contigs" />
+        </collection>
     </outputs>
     <tests>
         <test>
@@ -105,6 +133,13 @@
             </output>
         </test>
         <test>
+             <conditional name="input_option">
+                <param name="choice" value="interleaved"/>
+                <param name="interleaved_file" value="interleaved-fq.fa"/>
+            </conditional>
+            <output name="output" file="interleaved_result.fa"/>
+        </test>
+        <test>
             <conditional name="input_option">
                 <param name="choice" value="paired"/>
                 <param name="fastq_input1" value="paired-fq1.fa"/>
@@ -113,23 +148,23 @@
             <output name="output" file="paired_result.fa"/>
         </test>
         <test>
-          <conditional name="input_option">
-            <param name="choice" value="paired_collection"/>
-            <conditional name="batchmode">
-              <param name="processmode" value="merge"/>
-              <param name="pair_input_list">
-                <collection type="list:paired">
-                <element name="Pair1">
-                  <collection type="paired">
-                    <element name="forward" value="paired-fq1.fa" ftype="fasta"/>
-                    <element name="reverse" value="paired-fq2.fa" ftype="fasta"/>
-                  </collection>
-                </element>
-                </collection>
-              </param>
+            <conditional name="input_option">
+                <param name="choice" value="paired_collection"/>
+                <conditional name="batchmode">
+                    <param name="processmode" value="merge"/>
+                    <param name="pair_input_list">
+                        <collection type="list:paired">
+                        <element name="Pair1">
+                          <collection type="paired">
+                            <element name="forward" value="paired-fq1.fa" ftype="fasta"/>
+                            <element name="reverse" value="paired-fq2.fa" ftype="fasta"/>
+                          </collection>
+                        </element>
+                        </collection>
+                    </param>
+                </conditional>
             </conditional>
-          </conditional>
-          <output name="output" file="paired_result.fa"/>
+            <output name="output" file="paired_result.fa"/>
         </test>
     </tests>
     <help><![CDATA[
@@ -149,4 +184,4 @@
     <citations>
         <citation type="doi">10.1093/bioinformatics/btv033</citation>
     </citations>
-</tool>
+</tool>
\ No newline at end of file