Mercurial > repos > iuc > spades_coronaspades

<tool id="spades_coronaspades" name="coronaSPAdes" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
    <description>SARS-CoV-2 de novo genome assembler</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[

#set $library = 1

@PREPROCESS_INPUT_FILES_MAIN@
#if $additional_reads.selector == 'true'
    @PREPROCESS_INPUT_FILES_ADDITIONAL@
#end if
@PREPROCESS_NANOPORE_PACBIO_FILES@
@PREPROCESS_CONTIGS_FILES@
@PREPROCESS_SANGER_FILES@


## run
coronaspades.py
    -o 'output'
    @RESOURCES@
    @INPUT_READS_MAIN@
    #if $additional_reads.selector == 'true'
        @INPUT_READS_ADDITIONAL@
    #end if
    ## additional reads
    @NANOPORE_PACBIO@
    @SANGER@
    @CONTIGS@
    ## parameter
    @KMER@
    @PHREDOFFSET@
    @PIPELINE_OPTIONS@
    ## postprocessing
    @STATS@
    @CORRECTED@
    ]]></command>
    <inputs>
        <expand macro="input_files_all" format="fastq,fastq.gz,fastqsanger.gz,fasta,fasta.gz" label="FASTA/FASTQ RNA-seq file(s)"/>
        <expand macro="input_additional_files_all" format="fastq,fastq.gz,fastqsanger.gz,fasta,fasta.gz" label="FASTA/FASTQ RNA-seq file(s)"/>
        <section name="arf" title="Additional read files">
            <expand macro="nanopore_pacbio"/>
            <expand macro="sanger"/>
            <expand macro="contigs"/>
        </section>
        <expand macro="kmer"/>
        <expand macro="phred"/>
        <expand macro="pipeline_options">
            <option value="--iontorrent">Iontorrent: although coronaSPAdes supports IonTorrent reads, it was not sufficiently tested on such kind of data (--iontorrent)</option>
        </expand>
        <param name="optional_output" type="select" multiple="true" optional="false" label="Select output file(s)" help="Only shown in history if selected here and generated by the specific run.">
            <option value="sc" selected="true">Scaffolds</option>
            <option value="rs" selected="true">Raw scaffolds</option>
            <option value="b" selected="true">HMM statistics</option>
            <option value="dg" selected="true">Domain graph</option>
            <option value="l">Log</option>
        </param>
    </inputs>
    <outputs>
        <expand macro="out_sc"/>
        <expand macro="out_rs"/>
        <expand macro="out_b"/>
        <expand macro="out_dg"/>
        <expand macro="out_l"/>
    </outputs>
    <tests>
        <!-- #1 single, unpaired, fastq.gz, default parameters -->
        <test expect_num_outputs="4">
            <conditional name="singlePaired">
                <param name="sPaired" value="single"/>
                <param name="input1" value="covid.fastq.gz"/>
            </conditional>
            <output name="out_b">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression="bCoV_NSP1 - Betacoronavirus replicase NSP1"/>
                </assert_contents>
            </output>
            <output name="out_sc">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE_1_length_1009_cluster_1_candidate_1_domains_2"/>
                </assert_contents>
            </output>
            <output name="out_rs">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE_1_length_1009_cov_429.12"/>
                </assert_contents>
            </output>
            <output name="out_dg">
                <assert_contents>
                    <has_text_matching expression="bCoV_NSP1 13_2_1 1"/>
                </assert_contents>
            </output>
        </test>
        <!-- #2 hybrid assembly fastq.gz: nanopore, pacbio, sanger, contgs -->
        <test expect_num_outputs="4">
            <conditional name="singlePaired">
                <param name="sPaired" value="single"/>
                <param name="input1" value="covid.fastq.gz"/>
            </conditional>
            <section name="arf">
                <param name="nanopore" value="covid.fastq.gz"/>
                <param name="pacbio" value="covid.fastq.gz"/>
                <param name="sanger" value="covid.fastq.gz"/>
                <param name="trusted_contig" value="covid_scaffold.fasta"/>
            </section>
            <output name="out_b">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression="bCoV_NSP1 - Betacoronavirus replicase NSP1"/>
                </assert_contents>
            </output>
            <output name="out_sc">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE_1_length_1009_cluster_1_candidate_1_domains_2"/>
                </assert_contents>
            </output>
            <output name="out_rs">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE_1_length_1009_cov_429.12"/>
                </assert_contents>
            </output>
            <output name="out_dg">
                <assert_contents>
                    <has_text_matching expression="bCoV_NSP1 13_2_1 1"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

@HELP_WID@

Due to an increased interest in coronavirus research, a coronavirus assembly mode for SPAdes assembler has been developed (a.k.a coronaSPAdes).
It enables assembly of full-length coronaviridae genomes from the transcriptomic and metatranscriptomic data. Algorithmically, coronaSPAdes is a HMM-guided assembler and it is built upon biosyntheticSPAdes idea of using profile HMMs specific for gene/organism to enhance assembly.

**Input**

coronasSpades works with Illumina or IonTorrent reads in a single paired-end library and is capable of providing hybrid assemblies using PacBio, Oxford Nanopore and TSLR reads.

Input data can be provided as interlaced, forward and reverse, merged and unpaired files.

**Output**

- Scaffolds: set of putative virus sequences derived from HMM matches.
- Raw scaffolds: full set of scaffolds from input data (derived without HMMs)
- Domain statistics: various information about HMM alignments, including the coordinates of the matches, their order, etc.
- HMM statistics: contains statistics about BGC composition in the sample. First, it outputs number of domain hits in the sample. Then, for each BGC candidate we output domain order with positions on the corresponding DNA sequence from scaffolds.fasta.

.. class:: infomark

**References**

More information can be found on `github <https://github.com/ablab/spades>`_  and on the `project website <https://cab.spbu.ru/software/coronaspades/>`_.
    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1093/bioinformatics/btab597</citation>
    </expand>
</tool>
author	iuc
date	Mon, 09 May 2022 22:52:29 +0000
parents	7004279ff8b7
children	ff41a00a6745