view shovill.xml @ 9:ee17a294d3a3 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/shovill commit 0c2ed111d1186acce23eead97cf255c9af4efad1
author iuc
date Thu, 12 Oct 2023 06:59:42 +0000
parents ad80238462c1
children
line wrap: on
line source

<tool id="shovill" name="Shovill" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>Faster SPAdes assembly of Illumina reads</description>
    <macros>
        <token name="@TOOL_VERSION@">1.1.0</token>
        <token name="@VERSION_SUFFIX@">2</token>
    </macros>
    <xrefs>
        <xref type="bio.tools">shovill</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">shovill</requirement>
    </requirements>
    <version_command>shovill --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
    ## NOTE: the wrapper uses cp instead of ln -s because of this problem
    ## with shovill 1.1.0: https://github.com/tseemann/shovill/issues/157

        #if str($library.lib_type) == "paired"
            #set r1_ext = $library.R1.extension
            #set r2_ext = $library.R2.extension
            #if $r1_ext == 'fastqsanger.gz' or $r1_ext == 'fastqsanger.bz2'
                cp '$library.R1' fastq_r1.'$r1_ext' &&
                cp '$library.R2' fastq_r2.'$r2_ext' &&
            #else
                ln -s '$library.R1' fastq_r1.'$r1_ext' &&
                ln -s '$library.R2' fastq_r2.'$r2_ext' &&
            #end if
        #else if str($library.lib_type) == "collection"
            #set r1_ext = $library.input1.forward.extension
            #set r2_ext = $library.input1.reverse.extension
            #if $r1_ext == 'fastqsanger.gz' or $r1_ext == 'fastqsanger.bz2'
                cp '$library.input1.forward' fastq_r1.'$r1_ext' &&
                cp '$library.input1.reverse' fastq_r2.'$r2_ext' &&
            #else
                ln -s '$library.input1.forward' fastq_r1.'$r1_ext' &&
                ln -s '$library.input1.reverse' fastq_r2.'$r2_ext' &&
            #end if
        #end if

	## Sets the memory used by Shovill according to the following conditions
	## (1) If SHOVILL_RAM is already set, use this value
	## (2) Otherwise, set based on GALAXY_MEMORY_MB
	## (3) Or default to 4 GB if GALAXY_MEMORY_MB is unset
	GALAXY_MEMORY_GB=\$((\${GALAXY_MEMORY_MB:-4096}/1024)) &&
	SHOVILL_RAM=\${SHOVILL_RAM:-\${GALAXY_MEMORY_GB}} &&

        shovill
            --outdir 'out'
            --cpus \${GALAXY_SLOTS:-1}
            --ram \${SHOVILL_RAM:-4}
            --R1 'fastq_r1.$(r1_ext)'
            --R2 'fastq_r2.$(r2_ext)'
            $trim
            --namefmt '$adv.namefmt'
            --depth '$adv.depth'
            #if $adv.gsize
                --gsize '$adv.gsize'
            #end if
            #if $adv.kmers
                --kmers '$adv.kmers'
            #end if
            #if $adv.opts
                --opts '$adv.opts'
            #end if
            --minlen $adv.minlen
            --mincov $adv.mincov
            --assembler $assembler
            #if $adv.keep_files.nocorr == 'no_correction'
                --nocorr
            #else
                $adv.keep_files.keepfiles
            #end if
    ]]></command>
    <inputs>
        <conditional name="library">
            <param name="lib_type" type="select" label="Input reads type, collection or single library" help="Select 'paired end' for a single library or 'collection' for a paired end collection">
                <option value="paired" selected="true">Paired End</option>
                <option value="collection">Paired Collection</option>
            </param>
            <when value="paired">
                <param name="R1" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Forward reads (R1)" help="The file of forward reads in FASTQ format"/>
                <param name="R2" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Reverse reads (R2)" help="The file of reverse reads in FASTQ format"/>
            </when>
            <when value="collection">
                <param name="input1" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data_collection" collection_type="paired" label="Paired collection" help="See help section for an explanation of dataset collections"/>
            </when>
        </conditional>
        <param argument="--trim" type="boolean" truevalue="--trim" falsevalue="" label="Trim reads" help="Use Trimmomatic to remove common adaptors first (default: OFF)" />
        <param argument="--assembler" type="select" label="Assembler to use" help="Which assembler would you like shovill to use, default is Spades">
            <option value="skesa">skesa</option>
            <option value="megahit">megahit</option>
            <option value="velvet">velvet</option>
            <option value="spades" selected="true">Spades</option>
        </param>
        <section name="adv" title="Advanced options" expanded="False">
            <param argument="--namefmt" type="text" value="contig%05d" label="Contig name format" help="Format of contig FASTA IDs in 'printf' style (default: 'contig%05d')" >
                <sanitizer>
                    <valid initial="string.printable">
                        <remove value="&apos;" />
                    </valid>
                    <mapping initial="none">
                        <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;" />
                    </mapping>
                </sanitizer>
            </param>
            <param argument="--depth" type="integer" value="100" label="Depth" help="Sub-sample --R1/--R2 to this depth. Disable with --depth 0 (default: 100)" />
            <param argument="--gsize" type="text" value="" label="Estimated genome size" help="An estimate of the final genome size, it will autodetect if this is blank. (default: '')" />
            <param argument="--kmers" type="text" value="" label="List of kmer sizes to use" help="List of K-mer sizes to use in SPAdes. Blank is AUTO. default: ''" />
            <param argument="--opts" type="text" value="" label="Extra SPAdes options" help="eg. --plasmid --sc ... (default: '')" />
            <conditional name="keep_files">
                <param argument="--nocorr" type="select" label="Disable post-assembly correction" help="Disable post assembly correction with pilon (default: ON)">
                    <option value="no_correction" selected="true">No corrections</option>
                    <option value="yes_correction">Post assembly corrections</option>
                </param>
                <when value="no_correction"/>
                <when value="yes_correction">
                    <param argument="--keepfiles" type="boolean" truevalue="--keepfiles"  checked="False" falsevalue=""  label="Keep bam files" help="Keep bam files only if post-assembly correction is enable"/>
                </when>
            </conditional>
            <param argument="--minlen" type="integer" value="0" label="Minimum contig length" help="Minimum length of contig to be output. 0 is AUTO (default: 0)" />
            <param argument="--mincov" type="integer" value="2" label="Minimum contig coverage" help="Minimum coverage to call part of a contig. 0 is AUTO (default: 2)" />
        </section>
        <param name="log" type="boolean" label="Output log file?" checked="true" help="Return the Shovill log file as part of the output. Default is on" />

    </inputs>
    <outputs>
        <data name="shovill_std_log" format="txt" label="${tool.name} on ${on_string} Log file" from_work_dir="out/shovill.log" >
            <filter>log == True</filter>
        </data>
        <data name="contigs" format="fasta" label="${tool.name} on ${on_string}: Contigs" from_work_dir="out/contigs.fa"/>
        <data name="contigs_graph" format="txt"  label="${tool.name} on ${on_string}: Contig Graph" from_work_dir="out/contigs.gfa"/>
        <data name="bamfiles" format="unsorted.bam" from_work_dir="out/shovill.bam" label="Bam file for ${tool.name} on ${on_string}">
          <filter> adv['keep_files']['nocorr'] == 'yes_correction' and adv['keep_files']['keepfiles'] == True </filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="3"> <!-- Test 1: Basic test -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="Read stats: total_bp = 300000"/>
                    <has_text text="[lighter] Processed 820 reads"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[spades] ======= SPAdes pipeline finished."/>
                  </assert_contents>
                </output>
        </test>
        <test expect_num_outputs="3"> <!-- Test 2: Auto everything + trim + different name format test -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <param name="trim" value="true" />
            <section name="adv">
                <param name="depth" value="0"/>
                <param name="minlen" value="0"/>
                <param name="mincov" value="0"/>
                <conditional name="keep_files">
                    <param name="nocorr" value="yes_correction"/>
                </conditional>
                <param name="namefmt" value="contig%03d"/>
            </section>
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[trimmomatic] TrimmomaticPE: Completed successfully"/>
                    <has_text text="[lighter] Processed"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[spades] ======= SPAdes pipeline finished."/>
                    <has_text text="[bwa+samtools-sort] [samclip] Done."/>
                    <has_text text="[pilon] Mean total coverage:"/>
                    <has_text text="[shovill] Done."/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3"> <!-- Test 3: Alternate assembler #1: Megahit -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <param name="assembler" value="megahit" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                  </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[lighter] Processed"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text_matching expression="\[megahit\] .* ALL DONE"/>
                    <has_text text="[shovill] Done."/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3"> <!-- Test 4: Alternate assembler #2: Skesa -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <param name="assembler" value="skesa" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[lighter] Processed"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[skesa] DONE"/>
                    <has_text text="[shovill] Done."/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3"> <!-- Test 5: Alternate assembler #3: Velvet -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq" ftype="fastqsanger" />
            <param name="R2" value="mutant_R2.fastq" ftype="fastqsanger" />
            <param name="assembler" value="velvet" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[lighter] Processed"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[velvetg] Final graph has"/>
                    <has_text text="[shovill] Done."/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3"> <!-- Test 6: Gzipped input with trimming -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq.gz" ftype="fastqsanger.gz" />
            <param name="R2" value="mutant_R2.fastq.gz" ftype="fastqsanger.gz" />
            <param name="trim" value="true" />
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="shovill_std_log" ftype="txt" >
                <assert_contents>
                    <has_text text="[lighter] Processed"/>
                    <has_text text="[FLASH] FLASH v1.2.11 complete!"/>
                    <has_text text="[spades] ======= SPAdes pipeline finished."/>
                    <has_text text="[shovill] Done."/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4"> <!-- Test 7: Add keepfiles option -->
            <param name="lib_type" value="paired" />
            <param name="R1" value="mutant_R1.fastq.gz" ftype="fastqsanger.gz" />
            <param name="R2" value="mutant_R2.fastq.gz" ftype="fastqsanger.gz" />
            <param name="trim" value="true" />
            <section name="adv">
                <conditional name="keep_files">
                    <param name="nocorr" value="yes_correction" />
                    <param name="keepfiles" value="True" />
                </conditional>
                <param name="log" value="true"/>
            </section>
            <output name="contigs" ftype="fasta">
                <assert_contents>
                    <has_text text="&gt;contig00001"/>
                </assert_contents>
            </output>
            <output name="bamfiles" ftype="unsorted.bam">
                <assert_contents>
                    <has_size value="36359" delta="1000" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
      Synopsis:
      Faster de novo assembly pipeline for Illumina paired end reads based around Spades

      Details and options:
      - Takes paired end Illumina fastq reads
      - Trim reads:   Use Trimmomatic to remove common adaptors first (default: OFF)
      - Output log file:  If set to "Yes", tool will return Shovill's log file as part of the output
      - Assembler:    Which assembler should shovill use from: Skesa, Megahit, Velvet or Spades. Spades is the default.

      Advanced options:
      - Name format:          Format of output contig FASTA IDs in 'printf' style (default: 'contig%05d')
      - Depth:                Sub-sample the reads to this depth. Disable with *Depth: 0* (default: 100)
      - Estimated genomesize: An estimate of the final genome size, it will autodetect if this is blank. (default: '')
      - List of kmers to use: List of K-mer sizes to use in SPAdes. Blank is AUTO. (default: '')
      - Extra SPAdes options: Extra SPAdes options eg. --plasmid --sc ... (default: '')
      - Disable post-assembly correction: Disable post assembly correction with pilon (default: ON)
      - Keep the bam files : Enable to keep mapped files from bwa in the post assembly correction (default: OFF)
      - Minimum contig length:    Minimum length of contig to be output. 0 is AUTO (default: 0)
      - Minimum contig coverage:  Minimum coverage to call part of a contig. 0 is AUTO (default: 2)
      - Spades result to correct: Spades result to correct: before_rr, contigs or scaffolds (default: 'contigs')

    Documentation can be found at Torsten Seemann `site <https://github.com/tseemann/shovill>`_.
    ]]></help>
    <citations>
        <citation type="bibtex">
@UNPUBLISHED{Seemann2017,
    author = {Seemann, Torsten},
    title = {Shovill: Faster SPAdes assembly of Illumina reads},
    year = {2017},
    url = {https://github.com/tseemann/shovill},
}
        </citation>
    </citations>
</tool>