view rnaspades.xml @ 6:b66de1e9abfb draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/spades commit 8734db131db6f76697b500b30f18ee7723d61813"
author iuc
date Sun, 23 Jan 2022 21:32:25 +0000
parents 1035adb112c0
children 675ee1aa5952
line wrap: on
line source

<tool id="rnaspades" name="rnaSPAdes" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
    <description>de novo transcriptome assembler</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[

#set $library = 1

@PREPROCESS_INPUT_FILES_MAIN@
#if $additional_reads.selector == 'true'
    @PREPROCESS_INPUT_FILES_ADDITIONAL@
#end if
@PREPROCESS_NANOPORE_PACBIO_FILES@
@PREPROCESS_CONTIGS_FILES@
@PREPROCESS_FL_RNA_FILES@


## run
rnaspades.py
    -o 'output'    
    @RESOURCES@
    @INPUT_READS_MAIN@
    #if $additional_reads.selector == 'true'
        @INPUT_READS_ADDITIONAL@
    #end if
    ## additional reads
    @FL_RNA@
    @NANOPORE_PACBIO@
    @CONTIGS@
    ## parameter
    @KMER@
    @PIPELINE_OPTIONS@
    @PHREDOFFSET@
    #if $ss != 'no'
        --ss '$ss'
    #end if
    ## postprocessing
    @CORRECTED@
    ]]></command>
    <inputs>
        <expand macro="input_files_all" format="fastq,fastq.gz,fastqsanger.gz,fasta,fasta.gz" label="FASTQ RNA-seq file(s)"/>
        <expand macro="input_additional_files_all" format="fastq,fastq.gz,fastqsanger.gz,fasta,fasta.gz" label="FASTQ RNA-seq file(s)"/>
        <section name="arf" title="Additional read files">
            <expand macro="flrna"/>
            <expand macro="nanopore_pacbio"/>
            <expand macro="contigs"/>
        </section>
        <expand macro="kmer" help="By default rnaSPAdes uses 2 k-mer sizes, which are automatically detected using read length (approximately one third and half of the maximal read length). We recommend not to change this parameter because smaller k-mer sizes typically result in multiple chimeric (misassembled) transcripts."/>
        <expand macro="phred"/>
        <param argument="--ss" type="select" label="Set strand specificity" help="rnaSPAdes supports strand-specific RNA-Seq datasets. Use 'RF' when first read in pair corresponds to reverse gene strand (antisense data, e.g. obtained via dUTP protocol) and 'FR' otherwise.  If the dataset is single-end use 'FR' option in case when reads correspond to gene strand and 'RF' otherwise. Note: strand-specificity is not related and should not be confused with FR and RF orientation of paired reads. RNA-Seq paired-end reads typically have forward-reverse orientation, which is assumed by default and no additional options are needed">
            <option value="no" selected="true">Disabled</option>
            <option value="fr">FR (normal)</option>
            <option value="rf">RF (antisense)</option>
        </param>
        <expand macro="pipeline_options">
            <option value="--iontorrent">Iontorrent: although rnaSPAdes supports IonTorrent reads, it was not sufficiently tested on such kind of data (--iontorrent)</option>
        </expand>
        <param name="optional_output" type="select" multiple="true" optional="false" label="Select optional output file(s)" help="Only shown in history if selected here and generated by the specific run.">
            <option value="hft">Hard filtered transcripts</option>
            <option value="l">Log</option>
            <option value="sft">Soft filtered transcripts</option>
            <option value="tr" selected="true">Transcripts</option>
            <option value="tp">Transcripts paths</option>
        </param>
    </inputs>
    <outputs>
        <expand macro="out_cr"/>
        <data name="out_hft" format="fasta" from_work_dir="output/hard_filtered_transcripts.fasta" label="${tool.name} on ${on_string}: Hard filtered transcripts">
            <filter>'hft' in optional_output</filter>
        </data>
        <expand macro="out_l"/>
        <data name="out_sft" format="fasta" from_work_dir="output/soft_filtered_transcripts.fasta" label="${tool.name} on ${on_string}: Soft filtered transcripts">
            <filter>'sft' in optional_output</filter>
        </data>
        <data name="out_tr" format="fasta" from_work_dir="output/transcripts.fasta" label="${tool.name} on ${on_string}: Transcripts">
            <filter>'tr' in optional_output</filter>
        </data>
        <data name="out_tp" format="txt" from_work_dir="output/transcripts.paths" label="${tool.name} on ${on_string}: Transcripts paths">
            <filter>'tp' in optional_output</filter>
        </data>
    </outputs>
    <tests>
        <!--
        used in a test:
            single library: 12, 1, 2
            k, phred-offset, disablerr, iontorrent, only-assembler, ss

        not used in a test:
            single library: merged, s
            pacbio, nanopore, trusted-contigs, untrusted-contigs, fl-rna
        -->

        <!-- #1 -->
        <test expect_num_outputs="1">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired_interlaced"/>
                <param name="input1" value="ecoli_1K.fastq.gz"/>
            </conditional>
            <output name="out_tr">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
                </assert_contents>
            </output>
        </test>
        <!-- #2 single, separate, fastq, all outputs custom parameters-->
        <test expect_num_outputs="5">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired"/>
                <param name="input1" value="ecoli_1K_1.fastq.gz"/>
                <param name="input2" value="ecoli_1K_2.fastq.gz"/>
            </conditional>
            <param name="phred_offset" value="33"/>
            <param name="ss" value="fr"/>
            <param name="optional_output" value="hft,l,sft,tr,tp"/>
            <output name="out_hft">
                <assert_contents>
                    <has_n_lines n="18"/>
                </assert_contents>
            </output>
            <output name="out_sft">
                <assert_contents>
                    <has_n_lines n="18"/>
                </assert_contents>
            </output>
            <output name="out_tr">
                <assert_contents>
                    <has_n_lines n="18"/>
                </assert_contents>
            </output>
            <output name="out_tp">
                <assert_contents>
                    <has_n_lines n="4"/>
                </assert_contents>
            </output>
            <output name="out_l">
                <assert_contents>
                    <has_text_matching expression="Thank you for using SPAdes!"/>
                </assert_contents>
            </output>
        </test>
        <!-- #3 single, separate, fasta, default parameters -->
        <test expect_num_outputs="1">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired"/>
                <param name="input1" value="ecoli_1K_1.fasta.gz"/>
                <param name="input2" value="ecoli_1K_2.fasta.gz"/>
            </conditional>
            <output name="out_tr">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
                </assert_contents>
            </output>
        </test>
        <!-- #3 Collection, default parameters -->
        <test expect_num_outputs="1">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired_collection"/>
                <param name="input">
                    <collection type="list:paired">
                        <element name="ecoli.fastq">
                            <collection type="paired">
                                <element name="forward" value="ecoli_1K_1.fastq.gz" ftype="fastqsanger.gz"/>
                                <element name="reverse" value="ecoli_1K_2.fastq.gz" ftype="fastqsanger.gz"/>
                            </collection>
                        </element>
                    </collection>
                </param>
            </conditional>
            <output name="out_tr">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
                </assert_contents>
            </output>
        </test>
        <!-- #3 Hibryd assembly -->
        <test expect_num_outputs="1">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired"/>
                <param name="input1" value="ecoli_1K_1.fasta.gz"/>
                <param name="input2" value="ecoli_1K_2.fasta.gz"/>
            </conditional>
            <section name="arf">
                <param name="nanopore" value="ecoli_1K.fastq.gz"/>
                <param name="pacbio" value="ecoli_1K.fastq.gz"/>
                <param name="trusted_contigs" value="ecoli_1K.fasta.gz"/>
                <param name="flrna" value="ecoli_1K.fasta.gz"/>
            </section>
            <assert_command>
                <has_text text="--nanopore"/>
                <has_text text="--pacbio"/>
                <has_text text="--trusted-contigs"/>
                <has_text text="--fl-rna"/>
            </assert_command>
            <output name="out_tr">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

@HELP_WID@

rnaSPAdes is a subtool for de novo transcriptome assembly from RNA-Seq data and is suitable for all kinds of organisms.

**Input**

rnaSPAdes take as an input at least one paired-end or single-end library. For hybrid assembly you can use PacBio or Oxford Nanopore reads.

In case you have sequenced several RNA-Seq libraries using the same protocol from different tissues / conditions, and the goal as to assemble a total transcriptome, 
we suggest to provide all files as a single library. Note, that sequencing using the same protocol implies that the resulting reads have the same length, insert size 
and strand-specificity. Transcript quantification for each sample can be done afterwards by separately mapping reads from each library to the assembled transcripts.

**Output**

@HELP_OUT_AG@
@HELP_OUT_AGS@
@HELP_OUT_CR@
- Hard filtered transcripts includes only long and reliable transcripts with rather high expression
@HELP_OUT_L@
- Soft filtered transcripts includes short and low-expressed transcipts, likely to contain junk sequences
- Transcripts
- Transcripts paths

.. class:: infomark

**References**

More information can be found on on `github <https://github.com/ablab/spades>`_ and on the `project website <http://cab.spbu.ru/software/rnaspades>`_.
    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1101/420208</citation>
    </expand>
</tool>