view biosyntheticspades.xml @ 6:d89ced9439f3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/spades commit 35f71aa486d8754ee6a8387659032fc7c93d1be3
author iuc
date Wed, 10 Aug 2022 13:17:51 +0000
parents 42a39792aaae
children 604782a8a53a
line wrap: on
line source

<tool id="spades_biosyntheticspades" name="biosyntheticSPAdes" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
    <description>biosynthetic gene cluster assembly</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[

#set $library = 1

@PREPROCESS_INPUT_FILES_MAIN@
#if $additional_reads.selector == 'true'
    @PREPROCESS_INPUT_FILES_ADDITIONAL@
#end if
@PREPROCESS_NANOPORE_PACBIO_FILES@

@OMP_THREADS@
## run
spades.py --bio
    -o 'output'
    @RESOURCES@
    @INPUT_READS_MAIN@
    #if $additional_reads.selector == 'true'
        @INPUT_READS_ADDITIONAL@
    #end if
    ## reads
    @NANOPORE_PACBIO@
    ## parameter
    @KMER@
    @PHREDOFFSET@
    @PIPELINE_OPTIONS@
    ## postprocessing
    @STATS@
    @CORRECTED@
    ]]></command>
    <inputs>
        <expand macro="input_files_paired" format="fastq,fastq.gz,fastqsanger.gz" label="FASTQ file(s)"/>    
        <expand macro="input_additional_files_paired" format="fastq,fastq.gz,fastqsanger.gz" label="FASTQ file(s)"/>
        <section name="arf" title="Additional read files">
            <expand macro="nanopore_pacbio"/>
        </section>
        <expand macro="kmer"/>
        <expand macro="phred"/>
        <expand macro="pipeline_options">
            <option value="--iontorrent">Iontorrent: required when assembling IonTorrent data (--iontorrent)</option>
        </expand>
        <param name="optional_output" type="select" multiple="true" optional="false" label="Select output file(s)" help="Only shown in history if selected here and generated by the specific run.">
            <option value="sc" selected="true">Scaffolds</option>
            <option value="rs" selected="true">Raw scaffolds</option>
            <option value="b" selected="true">HMM statistics</option>
            <option value="dg" selected="true">Domain graph</option>
            <option value="l">Log</option>
        </param>
    </inputs>
    <outputs>
        <expand macro="out_sc"/>
        <expand macro="out_rs"/>
        <expand macro="out_b"/>
        <expand macro="out_dg"/>
        <expand macro="out_l"/>
    </outputs>
    <tests>
        <!-- #1 single, interlaced, fastq.gz, default parameters -->
        <test expect_num_outputs="4">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired_interlaced"/>
                <param name="input1" value="ecoli_1K.fastq.gz"/>
            </conditional>
            <output name="out_b">
                <assert_contents>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
            <output name="out_dg">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_text_matching expression="digraph.+"/>
                </assert_contents>
            </output>
            <output name="out_sc">
                <assert_contents>
                    <has_n_lines n="0"/>
                </assert_contents>
            </output>
            <output name="out_rs">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
                </assert_contents>
            </output>
        </test>
        <!-- #2 single, separate, fastq, hybrid assembly, iontorrent-->
        <test expect_num_outputs="2">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired"/>
                <param name="input1" value="ecoli_1K_1.fastq.gz"/>
                <param name="input2" value="ecoli_1K_2.fastq.gz"/>
            </conditional>
            <section name="arf">
                <param name="nanopore" value="ecoli_1K.fastq.gz"/>
            </section>
            <param name="mode_sel" value="--iontorrent"/>
            <assert_command>
                <has_text text="--nanopore"/>
                <has_text text="--iontorrent"/>
            </assert_command>
             <conditional name="kmer_cond">
                <param name="kmer_sel" value="manual"/>
                <param name="manual" value="33"/>
            </conditional>
            <param name="phred_offset" value="33"/>           
            <param name="optional_output" value="rs,l"/>
            <output name="out_rs">
                <assert_contents>
                    <has_n_lines n="18"/>
                </assert_contents>
            </output>
        </test>
        <!-- #3 multiple inputs -->
        <test expect_num_outputs="2">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired"/>
                <param name="input1" value="ecoli_1K_1.fastq.gz"/>
                <param name="input2" value="ecoli_1K_2.fastq.gz"/>
            </conditional>
            <conditional name="singlePaired">
                <param name="sPaired" value="paired_interlaced"/>
                <param name="input1" value="ecoli_1K.fastq.gz"/>
            </conditional>
            <param name="optional_output" value="rs,b"/>
            <output name="out_b">
                <assert_contents>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
            <output name="out_rs">
                <assert_contents>
                    <has_n_lines n="18"/>
                    <has_text_matching expression=">NODE\_1\_length\_1000.+"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

@HELP_WID@

biosyntheticSPAdes is a subtool for biosynthetic gene cluster assembly with paired-end reads.

**Input**

biosyntheticSPAdes works with Illumina or IonTorrent reads in a single paired-end library and is capable of providing hybrid assemblies using PacBio, Oxford Nanopore and TSLR reads.

Input data can be provided as interlaced, forward and reverse, merged and unpaired files.

A detailed description can be found in the `input section <https://github.com/ablab/spades/#sec3.1>`_ of the manual.

**Output**


biosyntheticSPAdes outputs four files of interest:

- Scaffolds: contains DNA sequences from putative biosynthetic gene clusters (BGC). Since each sample may contain multiple BGCs and biosyntheticSPAdes can  output several putative DNA sequences for eash cluster, for each contig name we append suffix _cluster_X_candidate_Y, where X is the id of the BGC and Y  is the id of the candidate from the BGC.
- Raw_scaffolds: SPAdes scaffolds generated without domain-graph related algorithms. Very close to regular scaffolds.fasta file.
- HMM statistics: contains statistics about BGC composition in the sample. First, it outputs number of domain hits in the sample. Then, for each BGC candidate we output domain order with positions on the corresponding DNA sequence from scaffolds.fasta.
- Domain graphs: contains domain graph structure, that can be used to assess complexity of the sample and structure of BGCs.

A detailed description can be found in the `output section <https://github.com/ablab/spades/#bgc>`_ of the manual.

.. class:: infomark

**References**

More information can be found on `github <https://github.com/ablab/spades>`_.
    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1101/gr.243477.118</citation>
    </expand>
</tool>