comparison snap_caller.xml @ 0:6231ae8f87b8

Uploaded
author wolma
date Wed, 11 Feb 2015 08:29:02 -0500
parents
children a548b3c6ed00
comparison
equal deleted inserted replaced
-1:000000000000 0:6231ae8f87b8
1 <tool id="read_alignment" name="SNAP Read Alignment">
2 <description>Map sequence reads to a reference genome using SNAP</description>
3 <version_command>mimodd version -q</version_command>
4 <command>
5 mimodd snap-batch -s
6 ## SNAP calls (considering different cases)
7
8 #for $i in $datasets
9 "snap ${i.mode_choose.mode} '$ref_genome'
10 #if $str($i.mode_choose.mode) == "paired" and $str($i.mode_choose.input.iformat) in ("fastq", "gz"):
11 '${i.mode_choose.input.ifile1}' '${i.mode_choose.input.ifile2}'
12 #else:
13 '${i.mode_choose.input.ifile}'
14 #end if
15 --ofile '$outputfile' --iformat ${i.mode_choose.input.iformat} --oformat $oformat
16 --idx-seedsize '$set.seedsize'
17 --idx-slack '$set.slack' --maxseeds '$set.maxseeds' --maxhits '$set.maxhits' --clipping=$set.clipping --maxdist '$set.maxdist' --confdiff '$set.confdiff' --confadapt '$set.confadpt'
18 #if $i.mode_choose.input.header:
19 --header '${i.mode_choose.input.header}'
20 #end if
21 #if $str($i.mode_choose.mode) == "paired":
22 --spacing '$set.sp_min' '$set.sp_max'
23 #end if
24 #if $str($set.selectivity) != "off":
25 --selectivity '$set.selectivity'
26 #end if
27 #if $str($set.filter_output) != "off":
28 --filter-output $set.filter_output
29 #end if
30 #if $str($set.sort) != "off":
31 --sort $set.sort
32 #end if
33 #if $str($set.mmatch_notation) == "general":
34 -M
35 #end if
36 --max-mate-overlap '$set.max_mate_overlap'
37 --verbose
38 "
39 #end for
40 </command>
41
42 <inputs>
43 ## mandatory arguments (and mode-conditionals)
44
45 <param name="ref_genome" type="data" format="fasta" label="reference genome" help="The fasta reference genome that SNAP should align reads against."/>
46
47 <repeat name="datasets" title="datasets" default="1" min="1">
48 <conditional name="mode_choose">
49 <param name="mode" type="select" label="choose mode" help="Reads obtained from single-end sequencing runs should be aligned in 'single' mode, paired-end reads in 'paired' mode. **WARNING**: if the read input file is in SAM/BAM format, the current version of this tool will **not** verify the mode and may produce erroneous alignments with wrong settings!">
50 <option value="single">single-end</option>
51 <option value="paired">paired-end</option>
52 </param>
53
54 <when value="single">
55 <conditional name="input">
56 <param name="iformat" type="select" label="input file format">
57 <option value="bam">BAM</option>
58 <option value="sam">SAM</option>
59 <option value="gz">gz</option>
60 <option value="fastq">fastq</option>
61 </param>
62 <when value="bam">
63 <param name="ifile" type="data" format="bam" label="input file"/>
64 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
65 </when>
66 <when value="sam">
67 <param name="ifile" type="data" format="sam" label="input file"/>
68 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
69 </when>
70 <when value="gz">
71 <param name="ifile" type="data" label="input file"/>
72 <param name="header" type="data" format="sam" label="header file" />
73 </when>
74 <when value="fastq">
75 <param name="ifile" type="data" format="fastq" label="input file"/>
76 <param name="header" type="data" format="sam" label="header file" />
77 </when>
78 </conditional>
79 </when>
80 <when value="paired">
81 <conditional name="input">
82 <param name="iformat" type="select" label="input file format">
83 <option value="bam">BAM</option>
84 <option value="sam">SAM</option>
85 <option value="gz">gz</option>
86 <option value="fastq">fastq</option>
87 </param>
88 <when value="bam">
89 <param name="ifile" type="data" format="bam" label="input file"/>
90 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
91 </when>
92 <when value="sam">
93 <param name="ifile" type="data" format="sam" label="input file"/>
94 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
95 </when>
96 <when value="fastq">
97 <param name="ifile1" type="data" format="fastq" label="inputfile with the first set of reads of paired-end data"/>
98 <param name="ifile2" type="data" format="fastq" label="inputfile with the second set of reads of paired-end data"/>
99 <param name="header" type="data" format="sam" label="header file" help="required" />
100 </when>
101 <when value="gz">
102 <param name="ifile1" type="data" label="inputfile with the first set of reads of paired-end data"/>
103 <param name="ifile2" type="data" label="inputfile with the second set of reads of paired-end data"/>
104 <param name="header" type="data" format="sam" label="header file" help="required" />
105 </when>
106 </conditional>
107 </when>
108 </conditional>
109 </repeat>
110
111 <param name="oformat" type="select" label="output file format">
112 <option value="bam">BAM</option>
113 <option value="sam">SAM</option>
114 </param>
115
116 ## optional arguments
117
118 <conditional name="set">
119 <param name="settings_mode" type="select" label="further parameter settings" help="This section lets you specify the detailed parameter settings for the SNAP aligner. Only change them if you know what you are doing, i.e., read the documentation first.">
120 <option value="default">default settings</option>
121 <option value="change">change settings</option>
122 </param>
123
124 ## default settings
125
126 <when value="default">
127 <param name="seedsize" type="hidden" value="20"/>
128 <param name="slack" type="hidden" value="0.3"/>
129 <param name="sp_min" type="hidden" value="100"/>
130 <param name="sp_max" type="hidden" value="10000"/>
131 <param name="maxdist" type="hidden" value="8"/>
132 <param name="confdiff" type="hidden" value="2"/>
133 <param name="confadpt" type="hidden" value="7"/>
134
135 <param name="maxseeds" type="hidden" value="25"/>
136 <param name="maxhits" type="hidden" value="250"/>
137 <param name="clipping" type="hidden" value="++"/>
138
139 <param name="selectivity" type="hidden" value="off"/>
140 <param name="filter_output" type="hidden" value="off"/>
141 <param name="sort" type="hidden" value="0"/>
142 <param name="mmatch_notation" type="hidden" value="general"/>
143 <param name="max_mate_overlap" type="hidden" value="0" />
144 </when>
145
146 ## change settings
147
148 <when value="change">
149 <param name="seedsize" type="integer" value="20" label="seed size (default: 20)" help="Length of the seeds used in the reference genome hash table (SNAP index option -s)."/>
150 <param name="slack" type="float" value="0.3" label="hash table slack size (default: 0.3)" help="Corresponds to the -h option of SNAP index."/>
151
152 ## paired-end specific options
153 <param name="sp_min" type="integer" value="100" label="minimum spacing to allow between paired ends (default: 100)" help="Corresponds to the first value of the SNAP option -s."/>
154 <param name="sp_max" type="integer" value="10000" label="maximum spacing to allow between paired ends (default: 10000)" help="Corresponds to the second value of the SNAP option -s."/>
155 <param name="max_mate_overlap" type="float" value="0" label="Maximal overlap between the reads in a pair (as a fraction of their combined length; default: 0, no overlap allowed)" help="If the reads of a read pair overlap by more than this fraction of their combined length, they are filtered out" />
156
157 <param name="maxdist" type="integer" value="8" label="edit distance (default: 8)" help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/>
158 <param name="maxhits" type="integer" value="250" label="maximum hits per seed (default: 250)" help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/>
159 <param name="confdiff" type="integer" value="2" label="confidence threshold (default: 2)" help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/>
160 <param name="confadpt" type="integer" value="7" label="adaptive confdiff behaviour (default: 7)" help="Specifies how many seeds of a read may be ignored (based on the maximum hits value above) before the confidence threshold above gets increased by one for that read; helps fine-tuning alignment accuracy in repetitive regions of the genome."/>
161 <param name="maxseeds" type="integer" value="25" label="maximum seeds per read (default: 25)" help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/>
162 <param name="clipping" type="select" label="read clipping (default: from back and front)" help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)">
163 <option value="++">from back and front</option>
164 <option value="-+">from back only</option>
165 <option value="+-">from front only</option>
166 <option value="--">no clipping</option>
167 </param>
168 <param name="selectivity" type="integer" value="1" label="selectivity (default: 1)" help="randomly choose 1/selectivity of the reads to score (SNAP option -S). The tool uses the default of 1 (or a 0 setting) to indicate that all reads should be worked with." />
169 <param name="filter_output" type="select" label="filter output (default: no filtering)" help="filter output (SNAP option -F for certain classes of reads.">
170 <option value="off">no filtering</option>
171 <option value="a">aligned only</option>
172 <option value="s">single-aligned only</option>
173 <option value="u">unaligned only</option>
174 </param>
175 <param name="sort" type="select" label="output sorting (default: sort by read coordinates)" help="Sort the output file by alignment location (SNAP option --so).">
176 <option value="0">sort by read coordinates</option>
177 <option value="off">no sorting</option>
178 </param>
179 <param name="mmatch_notation" type="select" label="CIGAR symbols for alignment matches/mismatches (default: M notation)" help="Indicates whether CIGAR strings in the generated SAM/BAM file should use M (alignment match) rather than = and X (sequence (mis-)match). Warning: Downstream variant calling based on samtools currently relies on the old-style M notation!!" >
180 <option value="general">use M for both matches and mismatches</option>
181 <option value="differentiate">use = for matches, X for mismatches</option>
182 </param>
183 </when>
184 </conditional>
185 </inputs>
186
187 <outputs>
188 <data name="outputfile" format="bam" label="Aligned reads from MiModd ${tool.name} on ${on_string}">
189 <change_format>
190 <when input="oformat" value="sam" format="sam"/>
191 </change_format>
192 </data>
193 </outputs>
194
195 <help>
196 .. class:: infomark
197
198 **What it does**
199
200 The tool aligns the sequenced reads in an arbitrary number of input datasets against a common reference genome and stores the results in a single, possibly multi-sample output file. It supports a variety of different sequenced reads input formats, i.e., SAM, BAM, fastq and gzipped fastq, and both single-end and paired-end data.
201
202 Internally, the tool uses the ultrafast, hashtable-based aligner SNAP (http://snap.cs.berkeley.edu), hence its name.
203
204 **Notes:**
205
206 1) In its standard configuration Galaxy will decompress any .gz files during their upload, so the option to align gzipped fastq input is useful only with customized Galaxy instances or by using linked files as explained in our `recipe for using gzipped fastq files in Galaxy`_ from the `MiModD user guide`_.
207
208 2) To use paired-end fastq data with the tool the read mate information needs to be split over two fastq files in corresponding order.
209
210 **TIP:** If your paired-end data is arranged differently, you may look into the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can convert your files to the expected format.
211
212 3) The tool supports the alignment of reads from the same sequencing run, but distributed across several input files.
213
214 Generally, it expects the reads from each input dataset to belong to one read-group and will abort with an error message if any input dataset declares more than one read group or sample names in its header. Different datasets, however, are allowed to contain reads from the same read-group (as indicated by matching read-group IDs and sample names in their headers), in which case the reads will be combined into one group in the output.
215
216 4) Read-group information is required for every input dataset!
217
218 We generally recommend to store NGS datasets in SAM/BAM format with run metadata stored in the file header. You can use the *NGS Run Annotation* and *Convert* tools to convert data in fastq format to SAM/BAM with added run information.
219
220 While it is not our recommended approach, you can, if you prefer it, align reads from fastq files or SAM/BAM files without header read-group information. To do so, you **must** specify a SAM file that provides the missing information in its header along with the input dataset. You can generate a SAM header file with the *NGS Run Annotation* tool.
221
222 Optionally, a SAM header file can also be used to replace existing read-group information in a headered SAM/BAM input file. This can be used to resolve read-group ID conflicts between multiple input files at tool runtime.
223
224 4) Currently, you cannot configure aligner-specific options separately for specific input files from within this Galaxy tool. If you need this advanced level of control, you should use the command line tool ``mimodd snap-batch``.
225
226 .. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531
227 .. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy
228 .. _MiModD user guide: http://mimodd.readthedocs.org/en/latest
229
230 </help>
231 </tool>
232