view sequel_wrapper.xml @ 2:208ce57f9221 draft default tip

Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
author crs4
date Fri, 18 Jul 2014 09:19:43 -0400
parents ccadfae70b02
children
line wrap: on
line source

<tool id="sequel_wrapper" name="SEQuel" version="0.2">
  <description></description>
  <requirements>
    <requirement type="package" version="0.7.7">bwa</requirement>
    <requirement type="package" version="35x1">blat</requirement>
    <requirement type="package" version="1.0.2">sequel</requirement>
  </requirements>
  <command interpreter="python">
    sequel_wrapper.py -t \${GALAXY_SLOTS:-8} -p \${GALAXY_SLOTS:-8} -u 1
    --sequel_jar_path=\$SEQUEL_JAR_PATH --read1=$read1 --read2=$read2 --contigs=$contigs
    #if str($bases_length)
      --bases_length=$bases_length
    #end if
    #if str($kmer_size)
      --kmer_size=$kmer_size
    #end if
    #if str($max_positional_error)
      --max_positional_error=$max_positional_error
    #end if
    #if str($min_fraction)
      --min_fraction=$min_fraction
    #end if
    #if str($min_aln_length)
      --min_aln_length=$min_aln_length
    #end if
    #if str($min_avg_coverage)
      --min_avg_coverage=$min_avg_coverage
    #end if
    #if str($discard_kmers)
      --discard_kmers=$discard_kmers
    #end if
    #if str($discard_positional)
      --discard_positional=$discard_positional
    #end if
    #if str($min_aln_score)
      --min_aln_score=$min_aln_score
    #end if
    #if $single_cell_mode
      --single_cell_mode
    #end if
    #if $report_changes
      --report_changes
    #end if
    #if $extend_contig
      --extend_contig
    #end if
    #if $reference_genome
      --reference_genome=$reference_genome
    #end if
    --contigs_refined=$contigs_refined
    --logprep=$logprep
    --logseq=$logseq
    --logfile_prep=$logfile_prep
    --logfile_seq=$logfile_seq
  </command>

  <inputs>
    <param name="read1" type="data" format="fasta,fastq" label="Paired-end reads 1 from sequencing (-r1)" help="FASTA or FASTQ format" />
    <param name="read2" type="data" format="fasta,fastq" label="Paired-end reads 2 from sequencing (-r2)" help="FASTA or FASTQ format" />
    <param name="contigs" type="data" format="fasta,fastq" label="Contigs from assembly (-c)" help="FASTA or FASTQ format" />

    <param name="bases_length" type="integer" value="0" optional="true" label="Preprocessing: do not refine contigs shorter than n bases (-l)" help="Contigs shorter than n bases will appear unchanged in the final output file" />

    <param name="kmer_size" type="integer" value="50" optional="true" label="K-mer size (-k)" help="" />

    <param name="max_positional_error" type="integer" value="25" optional="true" label="Max positional error Delta (-d)" help="" />

    <param name="min_fraction" type="float" value="0.9" optional="true" label="Min fraction of matches in alignment (-f)" help="" />

    <param name="min_aln_length" type="integer" value="" optional="true" label="Min alignment length (-l)" help="bp or fraction of contig. Optional." />

    <param name="min_avg_coverage" type="float" value="20.0" optional="true" label="Min average coverage to incorporate changes (-v)" help="" />

    <param name="discard_kmers" type="integer" value="1" optional="true" label="Discard k-mers observed less than m times (-m)" help="" />

    <param name="discard_positional" type="integer" value="1" optional="true" label="Discard positional k-mers observed less than n times (-n)" help="" />

    <param name="min_aln_score" type="integer" value="1" optional="true" label="Min alignment score (MAPQ) of reads to consider (-q)" help="" />

    <param name="single_cell_mode" type="boolean" checked="false" label="Single cell mode, sort partial-contigs by coverage (-s)" />

    <param name="report_changes" type="boolean" checked="false" label="Report changes (slow) for all input-contigs (-r)" />

    <param name="extend_contig" type="boolean" checked="false" label="Extend contig with flanking regions of alignment (-e)" />

    <param name="reference_genome" type="data" format="fasta,twobit" optional="true" label="Evaluate refinement using reference genome (-g)" help="FASTA or 2bit format" />
  </inputs>

  <outputs>
    <data name="logfile_prep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing)" />
    <data name="logfile_seq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel)" />
    <data name="logprep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing, official)" />
    <data name="logseq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel, official)" />
    <data name="contigs_refined" format="fasta" label="${tool.name} on ${on_string}: refined contigs" />
  </outputs>

  <tests>

  </tests>
  <help>
**What it does**

SEQuel is a tool for correcting errors (i.e., insertions, deletions, and substitutions) in contigs output from assembly. While assemblies of next generation sequencing (NGS) data are accurate, they still contain a substantial number of errors that need to be corrected after the assembly process. The algorithm behind SEQuel makes use of a graph structure called the positional de Bruijn graph, which models k-mers within reads while incorporating their approximate positions into the model.

SEQuel substantially reduces the number of small insertions, deletions and substitutions errors in assemblies of both standard (multi-cell) and single-cell sequencing data. SEQuel was tested mainly on Illumina sequence data, in combination with multiple NGS assemblers, such as Euler-SR, Velvet, SoapDeNovo, ALLPATHS and SPAdes.

**Known issues**

.. class:: warningmark

During the pre-processing stage, a SAM file per contig is created. Due to runtime considerations, these files are kept open simultaneously. The program will crash when the number of contigs in the assembly is too high.

**License and citation**

This Galaxy tool is Copyright © 2013-2014 `CRS4 Srl.`_ and is released under the `MIT license`_.

.. _CRS4 Srl.: http://www.crs4.it/
.. _MIT license: http://opensource.org/licenses/MIT

You can use this tool only if you agree to the license terms of: `SEQuel`_.

.. _SEQuel: http://bix.ucsd.edu/SEQuel/

If you use this tool, please cite:

- |Cuccuru2014|_
- |Ronen2012|_.

.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929
.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928
.. |Ronen2012| replace:: Ronen R., *et al.* (2012) SEQuel: improving the accuracy of genome assemblies. *Bioinformatics* 28 (12), i188-i196
.. _Ronen2012: http://bioinformatics.oxfordjournals.org/content/28/12/i188
  </help>
</tool>