diff rnaspades.xml @ 6:b66de1e9abfb draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/spades commit 8734db131db6f76697b500b30f18ee7723d61813"
author iuc
date Sun, 23 Jan 2022 21:32:25 +0000
parents 1035adb112c0
children 675ee1aa5952
line wrap: on
line diff
--- a/rnaspades.xml	Tue Oct 19 15:59:02 2021 +0000
+++ b/rnaspades.xml	Sun Jan 23 21:32:25 2022 +0000
@@ -1,147 +1,246 @@
-<tool id="rnaspades" name="rnaSPAdes" version="3.9.0.3">
-    <description>assembler for RNA-Seq data</description>
-    <xrefs>
-        <xref type="bio.tools">rnaspades</xref>
-    </xrefs>
-    <requirements>
-        <requirement type="package" version="3.9.0">spades</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-        <![CDATA[
+<tool id="rnaspades" name="rnaSPAdes" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
+    <description>de novo transcriptome assembler</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
 
-        if [ -n "\$GALAXY_MEMORY_MB" ]; then
-            GALAXY_MEMORY_GB=\$(( GALAXY_MEMORY_MB / 1024 ));
-        fi &&
+#set $library = 1
+
+@PREPROCESS_INPUT_FILES_MAIN@
+#if $additional_reads.selector == 'true'
+    @PREPROCESS_INPUT_FILES_ADDITIONAL@
+#end if
+@PREPROCESS_NANOPORE_PACBIO_FILES@
+@PREPROCESS_CONTIGS_FILES@
+@PREPROCESS_FL_RNA_FILES@
+
 
-        rnaspades.py -o .
-        ## Forces unzipped output, faster
-        --disable-gzip-output
-        $draft $onlyassembler -t \${GALAXY_SLOTS:-4} -m \${GALAXY_MEMORY_GB:-250} $iontorrent -k $kmer
-        ## Sequence files, libraries
-        #for $i, $library in enumerate( $libraries, start=1 ):
-            #if str( $library.lib_type ) == "paired_end":
-                #set prefix = 'pe'
-            #elif str( $library.lib_type ) == "mate_paired":
-                #set prefix = 'mp'
-            #elif str( $library.lib_type ) == "nxmate_paired":
-                #set prefix = 'nxmate'
-            #else:
-                #set prefix = 'hqmp'
-            #end if
-            --$prefix$i-$library.orientation
-            #for $file in $library.files
-                #if $file.file_type.type == "separate":
-                    --$prefix$i-1 fastq:$file.file_type.fwd_reads
-                    --$prefix$i-2 fastq:$file.file_type.rev_reads
-                #elif $file.file_type.type == "interleaved":
-                    --$prefix$i-12 fastq:$file.file_type.interleaved_reads
-                #elif $file.file_type.type == "unpaired":
-                    --$prefix$i-s fastq:$file.file_type.unpaired_reads
-                #elif $file.file_type.type == "paired-collection":
-                    --$prefix$i-1 fastq:$file.file_type.fastq_collection.forward
-                    --$prefix$i-2 fastq:$file.file_type.fastq_collection.reverse
-                #end if
-            #end for
-        #end for
-        #for $contig in $trusted_contigs:
-            #if $contig:
-                --trusted-contigs $contig.extension:$contig
-            #end if
-        #end for
-        #for $contig in $untrusted_contigs:
-            #if $contig:
-                --untrusted-contigs $contig.extension:$contig
-            #end if
-        #end for
-        ]]>
-    </command>
+## run
+rnaspades.py
+    -o 'output'    
+    @RESOURCES@
+    @INPUT_READS_MAIN@
+    #if $additional_reads.selector == 'true'
+        @INPUT_READS_ADDITIONAL@
+    #end if
+    ## additional reads
+    @FL_RNA@
+    @NANOPORE_PACBIO@
+    @CONTIGS@
+    ## parameter
+    @KMER@
+    @PIPELINE_OPTIONS@
+    @PHREDOFFSET@
+    #if $ss != 'no'
+        --ss '$ss'
+    #end if
+    ## postprocessing
+    @CORRECTED@
+    ]]></command>
     <inputs>
-        <param argument="--draft-assembly" checked="False" falsevalue="" label="Draft assembly. Faster, but more error-prone" name="draft" truevalue="--draft-assembly" type="boolean" />
-        <param argument="--only-assembler" checked="False" falsevalue="" label="Run only assembly? (without read error correction)" name="onlyassembler" truevalue="--only-assembler" type="boolean" />
-        <param argument="--iontorrent" checked="False" falsevalue="" label="Libraries are IonTorrent reads?" name="iontorrent" truevalue="--iontorrent" type="boolean" />
-        <param argument="-k" max="127" label="k-mer size (must be odd and less than 128)" name="kmer" type="integer" value="55" />
-        <repeat help="It is not possible to specify only mate-pair libraries. Scaffolds are not produced if neither a paired-end nor a mate-pair library is provided." min="1" name="libraries" title="Libraries">
-            <param label="Library type" name="lib_type" type="select">
-                <option value="paired_end">Paired-end / Single reads</option>
-                <option value="mate_paired">Mate pairs</option>
-                <option value="high_mate_paired">High Quality Mate pairs</option>
-                <option value="nxmate_paired">Lucigen NxMate pairs</option>
-            </param>
-            <param label="Orientation" name="orientation" type="select">
-                <option selected="true" value="fr">-&gt; &lt;- (fr)</option>
-                <option value="rf"><![CDATA[<- -> (rf)]]></option>
-                <option value="ff"><![CDATA[-> -> (ff)]]></option>
-            </param>
-            <repeat min="1" name="files" title="Files">
-                <conditional name="file_type">
-                    <param label="Select file format" name="type" type="select">
-                        <option value="separate">Separate input files</option>
-                        <option value="interleaved">Interleaved files</option>
-                        <option value="unpaired">Unpaired/Single reads</option>
-                        <option value="paired-collection">Paired List Collection</option>
-                    </param>
-                    <when value="separate">
-                        <param format="fastq,fastq.gz" help="FASTQ format" label="Forward reads" name="fwd_reads" type="data" />
-                        <param format="fastq,fastq.gz" help="FASTQ format" label="Reverse reads" name="rev_reads" type="data" />
-                    </when>
-                    <when value="interleaved">
-                        <param format="fastq,fastq.gz" help="FASTQ format" label="Interleaved paired reads" name="interleaved_reads" type="data" />
-                    </when>
-                    <when value="unpaired">
-                        <param format="fastq,fastq.gz" help="FASTQ format" label="Unpaired reads" name="unpaired_reads" type="data" />
-                    </when>
-                    <when value="paired-collection">
-                        <param name="fastq_collection" type="data_collection" collection_type="paired" format="fastq,fastq.gz" label="Paired-end reads collection" help="FASTQ format" />
-                    </when>
-                </conditional>
-            </repeat>
-        </repeat>
-        <param optional="true" format="fasta,fastq,fastq.gz" label="Trusted contigs" multiple="true" name="trusted_contigs" type="data" />
-        <param optional="true" format="fasta,fastq,fastq.gz" label="Untrusted contigs" multiple="true" name="untrusted_contigs" type="data" />
+        <expand macro="input_files_all" format="fastq,fastq.gz,fastqsanger.gz,fasta,fasta.gz" label="FASTQ RNA-seq file(s)"/>
+        <expand macro="input_additional_files_all" format="fastq,fastq.gz,fastqsanger.gz,fasta,fasta.gz" label="FASTQ RNA-seq file(s)"/>
+        <section name="arf" title="Additional read files">
+            <expand macro="flrna"/>
+            <expand macro="nanopore_pacbio"/>
+            <expand macro="contigs"/>
+        </section>
+        <expand macro="kmer" help="By default rnaSPAdes uses 2 k-mer sizes, which are automatically detected using read length (approximately one third and half of the maximal read length). We recommend not to change this parameter because smaller k-mer sizes typically result in multiple chimeric (misassembled) transcripts."/>
+        <expand macro="phred"/>
+        <param argument="--ss" type="select" label="Set strand specificity" help="rnaSPAdes supports strand-specific RNA-Seq datasets. Use 'RF' when first read in pair corresponds to reverse gene strand (antisense data, e.g. obtained via dUTP protocol) and 'FR' otherwise.  If the dataset is single-end use 'FR' option in case when reads correspond to gene strand and 'RF' otherwise. Note: strand-specificity is not related and should not be confused with FR and RF orientation of paired reads. RNA-Seq paired-end reads typically have forward-reverse orientation, which is assumed by default and no additional options are needed">
+            <option value="no" selected="true">Disabled</option>
+            <option value="fr">FR (normal)</option>
+            <option value="rf">RF (antisense)</option>
+        </param>
+        <expand macro="pipeline_options">
+            <option value="--iontorrent">Iontorrent: although rnaSPAdes supports IonTorrent reads, it was not sufficiently tested on such kind of data (--iontorrent)</option>
+        </expand>
+        <param name="optional_output" type="select" multiple="true" optional="false" label="Select optional output file(s)" help="Only shown in history if selected here and generated by the specific run.">
+            <option value="hft">Hard filtered transcripts</option>
+            <option value="l">Log</option>
+            <option value="sft">Soft filtered transcripts</option>
+            <option value="tr" selected="true">Transcripts</option>
+            <option value="tp">Transcripts paths</option>
+        </param>
     </inputs>
     <outputs>
-        <data format="fasta" label="rnaSPAdes transcripts" name="output_transcripts" from_work_dir="transcripts.fasta" />
+        <expand macro="out_cr"/>
+        <data name="out_hft" format="fasta" from_work_dir="output/hard_filtered_transcripts.fasta" label="${tool.name} on ${on_string}: Hard filtered transcripts">
+            <filter>'hft' in optional_output</filter>
+        </data>
+        <expand macro="out_l"/>
+        <data name="out_sft" format="fasta" from_work_dir="output/soft_filtered_transcripts.fasta" label="${tool.name} on ${on_string}: Soft filtered transcripts">
+            <filter>'sft' in optional_output</filter>
+        </data>
+        <data name="out_tr" format="fasta" from_work_dir="output/transcripts.fasta" label="${tool.name} on ${on_string}: Transcripts">
+            <filter>'tr' in optional_output</filter>
+        </data>
+        <data name="out_tp" format="txt" from_work_dir="output/transcripts.paths" label="${tool.name} on ${on_string}: Transcripts paths">
+            <filter>'tp' in optional_output</filter>
+        </data>
     </outputs>
     <tests>
-        <test>
-            <param name="lib_type" value="paired_end" />
-            <param name="type" value="separate" />
-            <param name="fwd_reads" value="rnaspades-in1-1.fq" ftype="fastq" />
-            <param name="rev_reads" value="rnaspades-in1-2.fq" ftype="fastq" />
-            <output name="output_transcripts" file="rnaspades-out1.fa" ftype="fasta" compare="re_match" lines_diff="1" />
+        <!--
+        used in a test:
+            single library: 12, 1, 2
+            k, phred-offset, disablerr, iontorrent, only-assembler, ss
+
+        not used in a test:
+            single library: merged, s
+            pacbio, nanopore, trusted-contigs, untrusted-contigs, fl-rna
+        -->
+
+        <!-- #1 -->
+        <test expect_num_outputs="1">
+            <conditional name="singlePaired">
+                <param name="sPaired" value="paired_interlaced"/>
+                <param name="input1" value="ecoli_1K.fastq.gz"/>
+            </conditional>
+            <output name="out_tr">
+                <assert_contents>
+                    <has_n_lines n="18"/>
+                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #2 single, separate, fastq, all outputs custom parameters-->
+        <test expect_num_outputs="5">
+            <conditional name="singlePaired">
+                <param name="sPaired" value="paired"/>
+                <param name="input1" value="ecoli_1K_1.fastq.gz"/>
+                <param name="input2" value="ecoli_1K_2.fastq.gz"/>
+            </conditional>
+            <param name="phred_offset" value="33"/>
+            <param name="ss" value="fr"/>
+            <param name="optional_output" value="hft,l,sft,tr,tp"/>
+            <output name="out_hft">
+                <assert_contents>
+                    <has_n_lines n="18"/>
+                </assert_contents>
+            </output>
+            <output name="out_sft">
+                <assert_contents>
+                    <has_n_lines n="18"/>
+                </assert_contents>
+            </output>
+            <output name="out_tr">
+                <assert_contents>
+                    <has_n_lines n="18"/>
+                </assert_contents>
+            </output>
+            <output name="out_tp">
+                <assert_contents>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+            <output name="out_l">
+                <assert_contents>
+                    <has_text_matching expression="Thank you for using SPAdes!"/>
+                </assert_contents>
+            </output>
         </test>
-        <test>
-            <param name="lib_type" value="paired_end" />
-            <param name="type" value="separate" />
-            <param name="fwd_reads" value="rnaspades-in1-1.fq.gz" ftype="fastq.gz" />
-            <param name="rev_reads" value="rnaspades-in1-2.fq.gz" ftype="fastq.gz" />
-            <output name="output_transcripts" file="rnaspades-out1.fa" ftype="fasta" compare="re_match" lines_diff="1" />
+        <!-- #3 single, separate, fasta, default parameters -->
+        <test expect_num_outputs="1">
+            <conditional name="singlePaired">
+                <param name="sPaired" value="paired"/>
+                <param name="input1" value="ecoli_1K_1.fasta.gz"/>
+                <param name="input2" value="ecoli_1K_2.fasta.gz"/>
+            </conditional>
+            <output name="out_tr">
+                <assert_contents>
+                    <has_n_lines n="18"/>
+                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #3 Collection, default parameters -->
+        <test expect_num_outputs="1">
+            <conditional name="singlePaired">
+                <param name="sPaired" value="paired_collection"/>
+                <param name="input">
+                    <collection type="list:paired">
+                        <element name="ecoli.fastq">
+                            <collection type="paired">
+                                <element name="forward" value="ecoli_1K_1.fastq.gz" ftype="fastqsanger.gz"/>
+                                <element name="reverse" value="ecoli_1K_2.fastq.gz" ftype="fastqsanger.gz"/>
+                            </collection>
+                        </element>
+                    </collection>
+                </param>
+            </conditional>
+            <output name="out_tr">
+                <assert_contents>
+                    <has_n_lines n="18"/>
+                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #3 Hibryd assembly -->
+        <test expect_num_outputs="1">
+            <conditional name="singlePaired">
+                <param name="sPaired" value="paired"/>
+                <param name="input1" value="ecoli_1K_1.fasta.gz"/>
+                <param name="input2" value="ecoli_1K_2.fasta.gz"/>
+            </conditional>
+            <section name="arf">
+                <param name="nanopore" value="ecoli_1K.fastq.gz"/>
+                <param name="pacbio" value="ecoli_1K.fastq.gz"/>
+                <param name="trusted_contigs" value="ecoli_1K.fasta.gz"/>
+                <param name="flrna" value="ecoli_1K.fasta.gz"/>
+            </section>
+            <assert_command>
+                <has_text text="--nanopore"/>
+                <has_text text="--pacbio"/>
+                <has_text text="--trusted-contigs"/>
+                <has_text text="--fl-rna"/>
+            </assert_command>
+            <output name="out_tr">
+                <assert_contents>
+                    <has_n_lines n="18"/>
+                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
-    <help>
+    <help><![CDATA[
+.. class:: infomark
+
 **What it does**
 
-SPAdes – St. Petersburg genome assembler – is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes.
+@HELP_WID@
 
-This wrapper runs SPAdes 3.9.0, collects the output, and throws away all the temporary files.
+rnaSPAdes is a subtool for de novo transcriptome assembly from RNA-Seq data and is suitable for all kinds of organisms.
 
-**License**
+**Input**
 
-SPAdes is developed by and copyrighted to Saint-Petersburg Academic University, and is released under GPLv2.
+rnaSPAdes take as an input at least one paired-end or single-end library. For hybrid assembly you can use PacBio or Oxford Nanopore reads.
 
-The original wrapper was written by Lionel Guy, Philip Mabon and was released under the GNU General Public License as published by the Free Software Foundation. The rnaSPAdes extension was developed by the Galaxy team.
+In case you have sequenced several RNA-Seq libraries using the same protocol from different tissues / conditions, and the goal as to assemble a total transcriptome, 
+we suggest to provide all files as a single library. Note, that sequencing using the same protocol implies that the resulting reads have the same length, insert size 
+and strand-specificity. Transcript quantification for each sample can be done afterwards by separately mapping reads from each library to the assembled transcripts.
 
-This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+**Output**
 
-You should have received a copy of the GNU General Public License along with this program.  If not, see http://www.gnu.org/licenses/.
-
-** Acknowledgments **
-
-Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes.
+@HELP_OUT_AG@
+@HELP_OUT_AGS@
+@HELP_OUT_CR@
+- Hard filtered transcripts includes only long and reliable transcripts with rather high expression
+@HELP_OUT_L@
+- Soft filtered transcripts includes short and low-expressed transcipts, likely to contain junk sequences
+- Transcripts
+- Transcripts paths
 
-Nicola Soranzo fixed various bugs.
-    </help>
-    <citations>
-        <citation type="doi">10.1089/cmb.2012.0021</citation>
-    </citations>
+.. class:: infomark
+
+**References**
+
+More information can be found on on `github <https://github.com/ablab/spades>`_ and on the `project website <http://cab.spbu.ru/software/rnaspades>`_.
+    ]]></help>
+    <expand macro="citations">
+        <citation type="doi">10.1101/420208</citation>
+    </expand>
 </tool>