Mercurial > repos > iuc > shovill

<tool id="shovill" name="Shovill" version="@TOOL_VERSION@+galaxy1">
    <description>Faster SPAdes assembly of Illumina reads</description>
    <macros>
        <token name="@TOOL_VERSION@">1.0.4</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">shovill</requirement>
    </requirements>
    <version_command>shovill --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
        #if str($library.lib_type) == "paired"
            #set r1_ext = $library.R1.extension
            #set r2_ext = $library.R2.extension
            ln -s '$library.R1' fastq_r1.'$r1_ext' &&
            ln -s '$library.R2' fastq_r2.'$r2_ext' &&
        #else if str($library.lib_type) == "collection"
            #set r1_ext = $library.input1.forward.extension
            #set r2_ext = $library.input1.reverse.extension
            ln -s '$library.input1.forward' fastq_r1.'$r1_ext' &&
            ln -s '$library.input1.reverse' fastq_r2.'$r2_ext' &&
        #end if

	## Sets the memory used by Shovill according to the following conditions
	## (1) If SHOVILL_RAM is already set, use this value
	## (2) Otherwise, set based on GALAXY_MEMORY_MB
	## (3) Or default to 4 GB if GALAXY_MEMORY_MB is unset
	GALAXY_MEMORY_GB=\$((\${GALAXY_MEMORY_MB:-4096}/1024)) &&
	SHOVILL_RAM=\${SHOVILL_RAM:-\${GALAXY_MEMORY_GB}} &&

        shovill
            --outdir 'out'
            --cpus \${GALAXY_SLOTS:-1}
            --ram \${SHOVILL_RAM:-4}
            --R1 fastq_r1.'$r1_ext'
            --R2 fastq_r2.'$r2_ext'
            $trim
            --namefmt '$adv.namefmt'
            --depth '$adv.depth'
            #if $adv.gsize
                --gsize '$adv.gsize'
            #end if
            #if $adv.kmers
                --kmers '$adv.kmers'
            #end if
            #if $adv.opts
                --opts '$adv.opts'
            #end if
            $adv.nocorr
            --minlen $adv.minlen
            --mincov $adv.mincov
            --assembler $assembler

    ]]></command>
    <inputs>
        <conditional name="library">
            <param name="lib_type" type="select" label="Input reads type, collection or single library" help="Select 'paired end' for a single library or 'collection' for a paired end collection">
                <option value="paired" selected="true">Paired End</option>
                <option value="collection">Paired Collection</option>
            </param>
            <when value="paired">
                <param name="R1" type="data" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Forward reads (R1)" help="The file of forward reads in FASTQ format"/>
                <param name="R2" type="data" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Reverse reads (R2)" help="The file of reverse reads in FASTQ format"/>
            </when>
            <when value="collection">
                <param name="input1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data_collection" collection_type="paired" label="Paired collection" help="See help section for an explanation of dataset collections"/>
            </when>
        </conditional>
        <param name="trim" argument="--trim" type="boolean" truevalue="--trim" falsevalue="" label="Trim reads" help="Use Trimmomatic to remove common adaptors first (default: OFF)" />
        <param name="log" type="boolean" label="Output log file?" checked="true" help="Return the Shovill log file as part of the output. Default is on" />
        <param name="assembler" argument="--assembler" type="select" label="Assembler to use" help="Which assembler would you like shovill to use, default is Spades">
            <option value="skesa">skesa</option>
            <option value="megahit">megahit</option>
            <option value="velvet">velvet</option>
            <option value="spades" selected="true">Spades</option>
        </param>
        <section name="adv" title="Advanced options" expanded="False">
            <param name="namefmt" argument="--namefmt" type="text" value="contig%05d" label="Contig name format" help="Format of contig FASTA IDs in 'printf' style (default: 'contig%05d')" >
                <sanitizer>
                    <valid initial="string.printable">
                        <remove value="&apos;" />
                    </valid>
                    <mapping initial="none">
                        <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;" />
                    </mapping>
                </sanitizer>
            </param>
            <param name="depth" argument="--depth" type="integer" value="100" label="Depth" help="Sub-sample --R1/--R2 to this depth. Disable with --depth 0 (default: 100)" />
            <param name="gsize" argument="--gsize" type="text" value="" label="Estimated genome size" help="An estimate of the final genome size, it will autodetect if this is blank. (default: '')" />
            <param name="kmers" argument="--kmers" type="text" value="" label="List of kmer sizes to use" help="List of K-mer sizes to use in SPAdes. Blank is AUTO. default: ''" />
            <param name="opts" argument="--opts" type="text" value="" label="Extra SPAdes options" help="eg. --plasmid --sc ... (default: '')" />
            <param name="nocorr" argument="--nocorr" type="boolean" truevalue="--nocorr" falsevalue="" checked="True" label="Disable post-assembly correction" help="Disable post assembly correction with pilon (default: ON)" />
            <param name="minlen" argument="--minlen" type="integer" value="0" label="Minimum contig length" help="Minimum length of contig to be output. 0 is AUTO (default: 0)" />
            <param name="mincov" argument="--mincov" type="integer" value="2" label="Minimum contig coverage" help="Minimum coverage to call part of a contig. 0 is AUTO (default: 2)" />
        </section>
    </inputs>

    <outputs>
        <data name="shovill_std_log" format="txt" label="${tool.name} on ${on_string} Log file" from_work_dir="out/shovill.log" >
            <filter>log</filter>
        </data>
        <data format="fasta" name="contigs" label="${tool.name} on ${on_string}: Contigs" from_work_dir="out/contigs.fa"/>
        <data format="txt" name="contigs_graph" label="${tool.name} on ${on_string}: Contig Graph" from_work_dir="out/contigs.gfa"/>
    </outputs>

    <tests>
        <test> <!-- Test 1: Basic test -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[mash]"/>
                    <has_text text="[lighter] Processed 1714 reads"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[spades] ======= SPAdes pipeline finished."/>
                </assert_contents>
            </output>
        </test>
        <test> <!-- Test 2: Auto everything + trim + different name format test -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <param name="trim" value="true" />
            <section name="adv">
                <param name="depth" value="0"/>
                <param name="minlen" value="0"/>
                <param name="mincov" value="0"/>
                <param name="nocorr" value="false"/>
                <param name="namefmt" value="contig%03d"/>
            </section>
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[mash] Writing to"/>
                    <has_text text="[trimmomatic] TrimmomaticPE: Completed successfully"/>
                    <has_text text="[lighter] Processed 2000 reads:"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[spades] ======= SPAdes pipeline finished."/>
                    <has_text text="[bwa+samtools-sort] [samclip] Done."/>
                    <has_text text="[pilon] Mean total coverage:"/>
                    <has_text text="[shovill] Done."/>
                </assert_contents>
            </output>
        </test>
        <test> <!-- Test 3: Alternate assembler #1: Megahit -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <param name="assembler" value="megahit" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[mash]"/>
                    <has_text text="[lighter] Processed 1714 reads"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text_matching expression="\[megahit\] .* ALL DONE"/>
                </assert_contents>
            </output>
        </test>
        <test> <!-- Test 4: Alternate assembler #2: Skesa -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <param name="assembler" value="skesa" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[mash]"/>
                    <has_text text="[lighter] Processed 1714 reads"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[skesa] DONE"/>
                </assert_contents>
            </output>
        </test>
        <test> <!-- Test 5: Alternate assembler #3: Velvet -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <param name="assembler" value="velvet" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[mash]"/>
                    <has_text text="[lighter] Processed 1714 reads"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[velvetg] Final graph has"/>
                </assert_contents>
            </output>
        </test>
        <test> <!-- Test 6: Gzipped input with trimming -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq.gz" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq.gz" ftype="fastqsanger" />
            <param name="trim" value="true" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[mash]"/>
                    <has_text text="[lighter] Processed 1714 reads"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[spades] ======= SPAdes pipeline finished."/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Synopsis:
  Faster de novo assembly pipeline for Illumina paired end reads based around Spades

Details and options:
    - Takes paired end Illumina fastq reads
    - Trim reads:   Use Trimmomatic to remove common adaptors first (default: OFF)
    - Output log file:  If set to "Yes", tool will return Shovill's log file as part of the output
    - Assembler:    Which assembler should shovill use from: Skesa, Megahit, Velvet or Spades. Spades is the default.

Advanced options:
    - Name format:          Format of output contig FASTA IDs in 'printf' style (default: 'contig%05d')
    - Depth:                Sub-sample the reads to this depth. Disable with *Depth: 0* (default: 100)
    - Estimated genomesize: An estimate of the final genome size, it will autodetect if this is blank. (default: '')
    - List of kmers to use: List of K-mer sizes to use in SPAdes. Blank is AUTO. (default: '')
    - Extra SPAdes options: Extra SPAdes options eg. --plasmid --sc ... (default: '')
    - Disable post-assembly correction: Disable post assembly correction with pilon (default: ON)
    - Minimum contig length:    Minimum length of contig to be output. 0 is AUTO (default: 0)
    - Minimum contig coverage:  Minimum coverage to call part of a contig. 0 is AUTO (default: 2)
    - Spades result to correct: Spades result to correct: before_rr, contigs or scaffolds (default: 'contigs')

Documentation can be found at Torsten Seemann `site <https://github.com/tseemann/shovill>`_.
    ]]></help>
    <citations>
        <citation type="bibtex">
@UNPUBLISHED{Seemann2017,
    author = {Seemann, Torsten},
    title = {Shovill: Faster SPAdes assembly of Illumina reads},
    year = {2017},
    url = {https://github.com/tseemann/shovill},
}
        </citation>
    </citations>
</tool>
author	iuc
date	Sun, 28 Jun 2020 04:16:16 -0400
parents	d9f6a00b6db7
children	83ead2be47b2