0
|
1 <tool id="ssake" name="SSAKE" version="0.0.10">
|
|
2 <description>short DNA sequences assembler</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="3.8">ssake</requirement>
|
|
5 </requirements>
|
|
6 <command interpreter="python">
|
|
7 ssake.py
|
|
8 #if $kind_of_reads.kind_of_reads_select == '0'
|
|
9 --if_unpaired $infile
|
|
10 #else
|
|
11 --if_paired_r1 $infile_r1
|
|
12 --if_paired_r2 $infile_r2
|
|
13 --iz ${kind_of_reads.insert_size}
|
|
14 -k ${kind_of_reads.minnumlinks}
|
|
15 -e ${kind_of_reads.error}
|
|
16 -a ${kind_of_reads.maxlinkratio}
|
|
17 -x ${kind_of_reads.minoverlap}
|
|
18 #end if
|
|
19 #if $seeds
|
|
20 -s $seeds
|
|
21 #end if
|
|
22 -w $mindepthofcoverage
|
|
23 -m $minoverlap
|
|
24 -o $mincall
|
|
25 -r $baseratio
|
|
26 --ignore_header 1
|
|
27 --kind_of_reads ${kind_of_reads.kind_of_reads_select}
|
|
28 --out1 $contig
|
|
29 --out2 $short
|
|
30 --out3 $singlets
|
|
31 --logfile $log
|
|
32 </command>
|
|
33 <inputs>
|
|
34 <conditional name="kind_of_reads">
|
|
35 <param name="kind_of_reads_select" type="select" label="Kind of reads (-p)">
|
|
36 <option value="0">Unpaired </option>
|
|
37 <option value="1">Paired and equal (both files must have the same number of sequences, arranged in the same order)</option>
|
|
38 <option value="2">Paired and unequal (files can have different number of sequences in any order)</option>
|
|
39 </param>
|
|
40 <when value="0">
|
|
41 <param name="infile" type="data" format="fasta" label="Input FASTA file" />
|
|
42 </when>
|
|
43 <when value="1">
|
|
44 <param name="infile_r1" type="data" format="fasta" label="Input FASTA file (read 1)" />
|
|
45 <param name="infile_r2" type="data" format="fasta" label="Input FASTA file (read 2)" />
|
|
46 <param name="insert_size" type="integer" value="200" label="Library insert size" />
|
|
47 <param name="minnumlinks" type="integer" value="4" label="Minimum number of links (read pairs) to compute scaffold (-k)" />
|
|
48 <param name="error" type="float" value="0.75" min="0" max="1" label="Error (%) allowed on mean distance (-e)" />
|
|
49 <param name="maxlinkratio" type="float" value="0.5" label="Maximum link ratio between two best contig pairs (-a)" />
|
|
50 <param name="minoverlap" type="integer" value="20" label="Minimum overlap required between contigs to merge adjacent contigs in a scaffold (-x)" />
|
|
51 </when>
|
|
52 <when value="2">
|
|
53 <param name="infile_r1" type="data" format="fasta" label="Input FASTA file (read 1)" />
|
|
54 <param name="infile_r2" type="data" format="fasta" label="Input FASTA file (read 2)" />
|
|
55 <param name="insert_size" type="integer" value="200" label="Library insert size" />
|
|
56 <param name="minnumlinks" type="integer" value="4" label="Minimum number of links (read pairs) to compute scaffold (-k)" />
|
|
57 <param name="error" type="float" value="0.75" min="0" max="1" label="Error (%) allowed on mean distance (-e)" />
|
|
58 <param name="maxlinkratio" type="float" value="0.5" label="Maximum link ratio between two best contig pairs (-a)" />
|
|
59 <param name="minoverlap" type="integer" value="20" label="Minimum overlap required between contigs to merge adjacent contigs in a scaffold (-x)" />
|
|
60 </when>
|
|
61 </conditional>
|
|
62 <param name="seeds" type="data" format="fasta" optional="true" label="FASTA file containing sequences to use as seeds exclusively (-s)" help="Optional, specify only if different from read set" />
|
|
63 <param name="mindepthofcoverage" type="integer" value="1" label="Minimum depth of coverage allowed for contigs (-w)" />
|
|
64 <param name="minoverlap" type="integer" value="20" label="Minimum number of overlapping bases with the seed/contig during overhang consensus build up (-m)" />
|
|
65 <param name="mincall" type="integer" value="2" label="Minimum number of reads needed to call a base during an extension (-o)" />
|
|
66 <param name="baseratio" type="float" value="0.7" label="Minimum base ratio used to accept a overhang consensus base (-r)" />
|
|
67 </inputs>
|
|
68
|
|
69 <outputs>
|
|
70 <data name="contig" format="fasta" label="${tool.name} on ${on_string}: contigs" />
|
|
71 <data name="log" format="txt" label="${tool.name} on ${on_string}: log" />
|
|
72 <data name="short" format="txt" label="${tool.name} on ${on_string}: unacceptable reads" />
|
|
73 <data name="singlets" format="fasta" label="${tool.name} on ${on_string}: unassembled reads" />
|
|
74 </outputs>
|
|
75 <help>
|
|
76 **What it does**
|
|
77
|
|
78 SSAKE is a genomics application for de novo assembly of millions of very short DNA sequences.
|
|
79 It is an easy-to-use, robust, reliable and tractable clustering algorithm for very short sequence reads, such as those generated by Illumina Ltd.
|
|
80
|
|
81 **License and citation**
|
|
82
|
|
83 This Galaxy tool is Copyright © 2012-2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
|
|
84
|
|
85 .. _CRS4 Srl.: http://www.crs4.it/
|
|
86 .. _MIT license: http://opensource.org/licenses/MIT
|
|
87
|
|
88 If you use this tool in Galaxy, please cite |Cuccuru2013|_.
|
|
89
|
|
90 .. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
|
|
91 .. _Cuccuru2013: http://orione.crs4.it/
|
|
92
|
|
93 This tool uses `SSAKE`_, which is licensed separately. Please cite |Warren2007|_.
|
|
94
|
|
95 .. _SSAKE: http://www.bcgsc.ca/platform/bioinfo/software/ssake/
|
|
96 .. |Warren2007| replace:: Warren RL, Sutton GG, Jones SJM, Holt RA. 2007. Assembling millions of short DNA sequences using SSAKE. Bioinformatics. 23(4):500-501
|
|
97 .. _Warren2007: http://bioinformatics.oxfordjournals.org/content/23/4/500
|
|
98 </help>
|
|
99 </tool>
|