comparison tools/metag_tools/shrimp_wrapper.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 <tool id="shrimp_wrapper" name="SHRiMP for Letter-space" version="1.0.0">
2 <description>reads mapping against reference sequence </description>
3 <command interpreter="python">
4 #if ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="skip") #shrimp_wrapper.py $input_target $output1 $output2 $input_query
5 #elif ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="skip") #shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size
6 #elif ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="full") #shrimp_wrapper.py $input_target $output1 $output2 $input_query $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_hit_threshold
7 #elif ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="full") #shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_hit_threshold
8 #end if#
9 </command>
10 <inputs>
11 <page>
12 <conditional name="type_of_reads">
13 <param name="single_or_paired" type="select" label="Single- or Paired-ends">
14 <option value="single">Single-end</option>
15 <option value="paired">Paired-end</option>
16 </param>
17 <when value="single">
18 <param name="input_query" type="data" format="fastqsolexa" label="Align sequencing reads" help="No dataset? Read tip below"/>
19 </when>
20 <when value="paired">
21 <param name="insertion_size" type="integer" size="5" value="600" label="Insertion length between two ends" help="bp" />
22 <param name="input1" type="data" format="fastqsolexa" label="Align sequencing reads, one end" />
23 <param name="input2" type="data" format="fastqsolexa" label="and the other end" />
24 </when>
25 </conditional>
26 <param name="input_target" type="data" format="fasta" label="against reference" />
27 <conditional name="param">
28 <param name="skip_or_full" type="select" label="SHRiMP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
29 <option value="skip">Commonly used</option>
30 <option value="full">Full Parameter List</option>
31 </param>
32 <when value="skip" />
33 <when value="full">
34 <param name="spaced_seed" type="text" size="30" value="111111011111" label="Spaced Seed" />
35 <param name="seed_matches_per_window" type="integer" size="5" value="2" label="Seed Matches per Window" />
36 <param name="seed_hit_taboo_length" type="integer" size="5" value="4" label="Seed Hit Taboo Length" />
37 <param name="seed_generation_taboo_length" type="integer" size="5" value="0" label="Seed Generation Taboo Length" />
38 <param name="seed_window_length" type="float" size="10" value="115.0" label="Seed Window Length" help="in percentage"/>
39 <param name="max_hits_per_read" type="integer" size="10" value="100" label="Maximum Hits per Read" />
40 <param name="max_read_length" type="integer" size="10" value="1000" label="Maximum Read Length" />
41 <param name="kmer" type="integer" size="10" value="-1" label="Kmer Std. Deviation Limit" help="-1 as None"/>
42 <param name="sw_match_value" type="integer" size="10" value="100" label="S-W Match Value" />
43 <param name="sw_mismatch_value" type="integer" size="10" value="-150" label="S-W Mismatch Value" />
44 <param name="sw_gap_open_ref" type="integer" size="10" value="-400" label="S-W Gap Open Penalty (Reference)" />
45 <param name="sw_gap_open_query" type="integer" size="10" value="-400" label="S-W Gap Open Penalty (Query)" />
46 <param name="sw_gap_ext_ref" type="integer" size="10" value="-70" label="S-W Gap Extend Penalty (Reference)" />
47 <param name="sw_gap_ext_query" type="integer" size="10" value="-70" label="S-W Gap Extend Penalty (Query)" />
48 <param name="sw_hit_threshold" type="float" size="10" value="68.0" label="S-W Hit Threshold" help="in percentage"/>
49 </when>
50 </conditional>
51 </page>
52 </inputs>
53 <outputs>
54 <data name="output1" format="tabular"/>
55 <data name="output2" format="tabular"/>
56 </outputs>
57 <requirements>
58 <requirement type="binary">rmapper-ls</requirement>
59 </requirements>
60 <tests>
61 <test>
62 <param name="single_or_paired" value="single" />
63 <param name="skip_or_full" value="skip" />
64 <param name="input_target" value="shrimp_phix_anc.fa" ftype="fasta" />
65 <param name="input_query" value="shrimp_wrapper_test1.fastq" ftype="fastqsolexa"/>
66 <output name="output1" file="shrimp_wrapper_test1.out1" />
67 </test>
68 <!--
69 <test>
70 <param name="single_or_paired" value="paired" />
71 <param name="skip_or_full" value="skip" />
72 <param name="input_target" value="shrimp_eca_chrMT.fa" ftype="fasta" />
73 <param name="input1" value="shrimp_wrapper_test2_end1.fastq" ftype="fastqsolexa" />
74 <param name="input2" value="shrimp_wrapper_test2_end2.fastq" ftype="fastqsolexa" />
75 <param name="insertion_size" value="600" />
76 <output name="output1" file="shrimp_wrapper_test2.out1" />
77 </test>
78 <test>
79 <param name="single_or_paired" value="single" />
80 <param name="skip_or_full" value="full" />
81 <param name="input_target" value="shrimp_phix_anc.fa" ftype="fasta" />
82 <param name="input_query" value="shrimp_wrapper_test1.fastq" ftype="fastqsolexa"/>
83 <param name="spaced_seed" value="111111011111" />
84 <param name="seed_matches_per_window" value="2" />
85 <param name="seed_hit_taboo_length" value="4" />
86 <param name="seed_generation_taboo_length" value="0" />
87 <param name="seed_window_length" value="115.0" />
88 <param name="max_hits_per_read" value="100" />
89 <param name="max_read_length" value="1000" />
90 <param name="kmer" value="-1" />
91 <param name="sw_match_value" value="100" />
92 <param name="sw_mismatch_value" value="-150" />
93 <param name="sw_gap_open_ref" value="-400" />
94 <param name="sw_gap_open_query" value="-400" />
95 <param name="sw_gap_ext_ref" value="-70" />
96 <param name="sw_gap_ext_query" value="-70" />
97 <param name="sw_hit_threshold" value="68.0" />
98 <output name="output1" file="shrimp_wrapper_test1.out1" />
99 </test>
100 <test>
101 <param name="single_or_paired" value="paired" />
102 <param name="skip_or_full" value="full" />
103 <param name="input_target" value="shrimp_eca_chrMT.fa" ftype="fasta" />
104 <param name="spaced_seed" value="111111011111" />
105 <param name="seed_matches_per_window" value="2" />
106 <param name="seed_hit_taboo_length" value="4" />
107 <param name="seed_generation_taboo_length" value="0" />
108 <param name="seed_window_length" value="115.0" />
109 <param name="max_hits_per_read" value="100" />
110 <param name="max_read_length" value="1000" />
111 <param name="kmer" value="-1" />
112 <param name="sw_match_value" value="100" />
113 <param name="sw_mismatch_value" value="-150" />
114 <param name="sw_gap_open_ref" value="-400" />
115 <param name="sw_gap_open_query" value="-400" />
116 <param name="sw_gap_ext_ref" value="-70" />
117 <param name="sw_gap_ext_query" value="-70" />
118 <param name="sw_hit_threshold" value="68.0" />
119 <param name="input1" value="shrimp_wrapper_test2_end1.fastq" ftype="fastqsolexa"/>
120 <param name="input2" value="shrimp_wrapper_test2_end2.fastq" ftype="fastqsolexa"/>
121 <param name="insertion_size" value="600" />
122 <output name="output1" file="shrimp_wrapper_test2.out1" />
123 </test>
124 -->
125 </tests>
126 <help>
127
128 .. class:: warningmark
129
130 IMPORTANT: This tool currently only supports data where the quality scores are integers or ASCII quality scores with base 64. Click pencil icon next to your dataset to set datatype to *fastqsolexa*.
131
132
133 -----
134
135 **What it does**
136
137 SHRiMP (SHort Read Mapping Package) is a software package for aligning genomic reads against a target genome.
138
139 This wrapper post-processes the default SHRiMP/rmapper-ls output and generates a table with all information from reads and reference for the mapping. The tool takes single- or paired-end reads. For single-end reads, only uniquely mapped alignment is considered. In paired-end reads, only pairs that meet the following criteria will be used to generate the table: 1). the ends fall within the insertion size; 2). the ends are mapped at the opposite directions. If there are still multiple mappings after applying the criteria, this paired-end read will be discarded.
140
141
142 -----
143
144 **Input formats**
145
146 A multiple-fastq file, for example::
147
148 @seq1
149 TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT
150 +seq1
151 hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
152
153
154 -----
155
156 **Outputs**
157
158 The tool gives two outputs.
159
160 **Table output**
161
162 Table output contains 8 columns::
163
164 1 2 3 4 5 6 7 8
165 ----------------------------------------------------
166 chrM 14711 seq1 0 T A 40 1
167 chrM 14712 seq1 1 T T 40 1
168
169 where::
170
171 1. (chrM) - Reference sequence id
172 2. (14711) - Position of the mapping in the reference
173 3. (seq1) - Read id
174 4. (0) - Position of the mapping in the read
175 5. (T) - Nucleotide in the reference
176 6. (A) - Nucleotide in the read
177 7. (40) - Quality score for the nucleotide in the position of the read
178 8. (1) - The number of times this position is covered by reads
179
180
181 **SHRiMP output**
182
183 This is the default output from SHRiMP/rmapper-ls::
184
185 1 2 3 4 5 6 7 8 9 10
186 -------------------------------------------------------------------
187 seq1 chrM + 3644 3679 1 36 36 3600 36
188
189 where::
190
191 1. (seq1) - Read id
192 2. (chrM) - Reference sequence id
193 3. (+) - Strand of the read
194 4. (3466) - Start position of the alignment in the reference
195 5. (3679) - End position of the alignment in the reference
196 6. (1) - Start position of the alignment in the read
197 7. (36) - End position of the alignment in the read
198 8. (36) - Length of the read
199 9. (3600) - Score
200 10. (36) - Edit string
201
202
203 -----
204
205 **SHRiMP parameter list**
206
207 The commonly used parameters with default value setting::
208
209 -s Spaced Seed (default: 111111011111)
210 The spaced seed is a single contiguous string of 0's and 1's.
211 0's represent wildcards, or positions which will always be
212 considered as matching, whereas 1's dictate positions that
213 must match. A string of all 1's will result in a simple kmer scan.
214 -n Seed Matches per Window (default: 2)
215 The number of seed matches per window dictates how many seeds
216 must match within some window length of the genome before that
217 region is considered for Smith-Waterman alignment. A lower
218 value will increase sensitivity while drastically increasing
219 running time. Higher values will have the opposite effect.
220 -t Seed Hit Taboo Length (default: 4)
221 The seed taboo length specifies how many target genome bases
222 or colors must exist prior to a previous seed match in order
223 to count another seed match as a hit.
224 -9 Seed Generation Taboo Length (default: 0)
225
226 -w Seed Window Length (default: 115.00%)
227 This parameter specifies the genomic span in bases (or colours)
228 in which *seed_matches_per_window* must exist before the read
229 is given consideration by the Simth-Waterman alignment machinery.
230 -o Maximum Hits per Read (default: 100)
231 This parameter specifies how many hits to remember for each read.
232 If more hits are encountered, ones with lower scores are dropped
233 to make room.
234 -r Maximum Read Length (default: 1000)
235 This parameter specifies the maximum length of reads that will
236 be encountered in the dataset. If larger reads than the default
237 are used, an appropriate value must be passed to *rmapper*.
238 -d Kmer Std. Deviation Limit (default: -1 [None])
239 This option permits pruning read kmers, which occur with
240 frequencies greater than *kmer_std_dev_limit* standard
241 deviations above the average. This can shorten running
242 time at the cost of some sensitivity.
243 *Note*: A negative value disables this option.
244 -m S-W Match Value (default: 100)
245 The value applied to matches during the Smith-Waterman score calculation.
246 -i S-W Mismatch Value (default: -150)
247 The value applied to mismatches during the Smith-Waterman
248 score calculation.
249 -g S-W Gap Open Penalty (Reference) (default: -400)
250 The value applied to gap opens along the reference sequence
251 during the Smith-Waterman score calculation.
252 *Note*: Note that for backward compatibility, if -g is set
253 and -q is not set, the gap open penalty for the query will
254 be set to the same value as specified for the reference.
255 -q S-W Gap Open Penalty (Query) (default: -400)
256 The value applied to gap opens along the query sequence during
257 the Smith-Waterman score calculation.
258 -e S-W Gap Extend Penalty (Reference) (default: -70)
259 The value applied to gap extends during the Smith-Waterman score calculation.
260 *Note*: Note that for backward compatibility, if -e is set
261 and -f is not set, the gap exten penalty for the query will
262 be set to the same value as specified for the reference.
263 -f S-W Gap Extend Penalty (Query) (default: -70)
264 The value applied to gap extends during the Smith-Waterman score calculation.
265 -h S-W Hit Threshold (default: 68.00%)
266 In letter-space, this parameter determines the threshold
267 score for both vectored and full Smith-Waterman alignments.
268 Any values less than this quantity will be thrown away.
269 *Note* This option differs slightly in meaning between letter-space and color-space.
270
271
272 -----
273
274 **Reference**
275
276 **SHRiMP**: Stephen M. Rumble, Michael Brudno, Phil Lacroute, Vladimir Yanovsky, Marc Fiume, Adrian Dalca. shrimp at cs dot toronto dot edu.
277
278 </help>
279 </tool>