Mercurial > repos > galaxyp > blast_plus_remote_blastp
comparison tools/blast_plus_remote_blastp.xml @ 0:820c41bff462
Uploaded
author | galaxyp |
---|---|
date | Wed, 01 Oct 2014 20:47:55 -0400 |
parents | |
children | db990c5edc14 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:820c41bff462 |
---|---|
1 <tool id="blast_plus_remote_blastp" name="NCBI BLAST+ remote blastp" version="1.0"> | |
2 <description>Search protein database with protein query sequence(s)</description> | |
3 <!-- If job splitting is enabled, break up the query file into four (This only works if output is tabular) --> | |
4 <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism> | |
5 <version_command>blastp -version</version_command> | |
6 <requirements> | |
7 <requirement type="package" version="333">binaries_for_blast_plus</requirement> | |
8 </requirements> | |
9 <command> | |
10 ## The command is a Cheetah template which allows some Python based syntax. | |
11 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces | |
12 blastp | |
13 -query "$query" | |
14 #if $db_opts.db_opts_selector == "db": | |
15 -db "${db_opts.database.fields.path}" | |
16 #elif $db_opts.db_opts_selector == "remote": | |
17 -db $db_opts.database | |
18 -remote | |
19 #set $txids = [] | |
20 #set $ntxids = [] | |
21 #for $i, $org in enumerate($db_opts.taxid_repeat): | |
22 #if $org.exclude: | |
23 #set $ntxids = $ntxids + ["txid" + $org.taxid.__str__] | |
24 #else | |
25 #set $txids = $txids + ["txid" + $org.taxid.__str__] | |
26 #end if | |
27 #end for | |
28 #if (len($txids) + len($ntxids)) > 0: | |
29 #set $entrez_query = '' | |
30 #if len($txids) > 0: | |
31 #set $entrez_query = $entrez_query + '(' + ' OR '.join($txids) + ')' | |
32 #end if | |
33 #if len($ntxids) > 0: | |
34 #set $entrez_query = $entrez_query + ' NOT (' + ' OR '.join($ntxids) + ')' | |
35 #end if | |
36 -entrez_query '$entrez_query' | |
37 #end if | |
38 #else: | |
39 -subject "$db_opts.subject" | |
40 #end if | |
41 -task $blast_type | |
42 -evalue $evalue_cutoff | |
43 -out blast_output | |
44 ##Set the extended list here so if/when we add things, saved workflows are not affected | |
45 #if str($fmt_opt.out_format)=="text": | |
46 -outfmt "$fmt_opt.outfmt" $fmt_opt.html | |
47 #if $fmt_opt.num_descriptions.__str__.strip() != '': | |
48 -num_descriptions $fmt_opt.num_descriptions | |
49 #end if | |
50 #if $fmt_opt.num_alignments.__str__.strip() != '': | |
51 -num_alignments $fmt_opt.num_alignments | |
52 #end if | |
53 #else: | |
54 -outfmt "$fmt_opt.outfmt" | |
55 #if $fmt_opt.max_target_seqs.__str__.strip() != '': | |
56 -max_target_seqs $fmt_opt.max_target_seqs | |
57 #end if | |
58 #end if | |
59 #if $db_opts.db_opts_selector != "remote": | |
60 -num_threads 8 | |
61 #end if | |
62 #if $adv_opts.adv_opts_selector=="advanced": | |
63 $adv_opts.filter_query | |
64 -matrix $adv_opts.scoring.matrix | |
65 $adv_opts.scoring.gap_costs | |
66 | |
67 #if $adv_opts.word_size.__str__.strip() != '': | |
68 -word_size $adv_opts.word_size | |
69 #end if | |
70 | |
71 #if $adv_opts.window_size.__str__.strip() != '': | |
72 -window_size $adv_opts.window_size | |
73 #end if | |
74 | |
75 #if $adv_opts.threshold.__str__.strip() != '': | |
76 -threshold $adv_opts.threshold | |
77 #end if | |
78 | |
79 #if $adv_opts.comp_based_stats.__str__.strip() != '': | |
80 -comp_based_stats $adv_opts.comp_based_stats | |
81 #end if | |
82 | |
83 ##Ungapped disabled for now - see comments below | |
84 ##$adv_opts.ungapped | |
85 $adv_opts.use_sw_tback | |
86 $adv_opts.parse_deflines | |
87 ## End of advanced options: | |
88 #end if | |
89 </command> | |
90 <inputs> | |
91 <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> | |
92 <conditional name="db_opts"> | |
93 <param name="db_opts_selector" type="select" label="Subject database/sequences"> | |
94 <option value="db" selected="True">Local BLAST Database</option> | |
95 <option value="file">Local FASTA file</option> | |
96 <option value="remote">NCBI Remote Database</option> | |
97 </param> | |
98 <when value="db"> | |
99 <param name="database" type="select" label="Protein BLAST database"> | |
100 <options from_file="blastdb_p.loc"> | |
101 <column name="value" index="0"/> | |
102 <column name="name" index="1"/> | |
103 <column name="path" index="2"/> | |
104 </options> | |
105 </param> | |
106 <param name="subject" type="hidden" value="" /> | |
107 </when> | |
108 <when value="file"> | |
109 <param name="database" type="hidden" value="" /> | |
110 <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> | |
111 </when> | |
112 <when value="remote"> | |
113 <param name="database" type="select" label="Protein BLAST database"> | |
114 <option value="nr" selected="selected" >Non-redundant protein sequences (nr)</option> | |
115 <option value="refseq_protein" >Reference proteins (refseq_protein)</option> | |
116 <option value="swissprot" >UniProtKB/Swiss-Prot(swissprot)</option> | |
117 <option value="pat" >Patented protein sequences(pat)</option> | |
118 <option value="pdb" >Protein Data Bank proteins(pdb)</option> | |
119 <option value="env_nr" >Metagenomic proteins(env_nr)</option> | |
120 </param> | |
121 <repeat name="taxid_repeat" title="Search Organism Restriction" min="0"> | |
122 <param name="taxid" type="integer" value="" label="NCBI Taxon ID" help="For example: Human is 9606 (see: http://www.ncbi.nlm.nih.gov/taxonomy ) "> | |
123 <dsvalidator type="in_range" min="0" /> | |
124 </param> | |
125 <param name="exclude" type="boolean" checked="false" label="Exclude this NCBI Taxon ID" help=""/> | |
126 </repeat> | |
127 </when> | |
128 </conditional> | |
129 <param name="blast_type" type="select" display="radio" label="Type of BLAST"> | |
130 <option value="blastp">blastp</option> | |
131 <option value="blastp-short">blastp-short</option> | |
132 </param> | |
133 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> | |
134 <conditional name="fmt_opt"> | |
135 <param name="out_format" type="select" label="Output format"> | |
136 <option value="tabular" selected="True">Tabular</option> | |
137 <option value="blastxml">BLAST XML</option> | |
138 <option value="text">Text Report</option> | |
139 </param> | |
140 <when value="tabular"> | |
141 <param name="outfmt" type="select" label="Tabular columns"> | |
142 <option value="6" selected="True">Tabular (standard 12 columns)</option> | |
143 <option value="7">Tabular (standard 12 columns) with comments</option> | |
144 <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen">Tabular (extended 24 columns)</option> | |
145 </param> | |
146 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits"> | |
147 <validator type="in_range" min="0" /> | |
148 </param> | |
149 </when> | |
150 <when value="blastxml"> | |
151 <param name="outfmt" type="hidden" value="5"/> | |
152 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits"> | |
153 <validator type="in_range" min="0" /> | |
154 </param> | |
155 </when> | |
156 <when value="text"> | |
157 <param name="outfmt" type="select" label="Text format"> | |
158 <option value="0">Pairwise text</option> | |
159 <option value="1">Query-anchored text showing identitites</option> | |
160 <option value="2">Query-anchored text</option> | |
161 <option value="3">Flat query-anchored text showing identitites</option> | |
162 <option value="4">Flat query-anchored text</option> | |
163 </param> | |
164 <param name="html" type="boolean" label="Html" truevalue="-html" falsevalue="" checked="true" /> | |
165 <param name="num_descriptions" type="integer" value="500" optional="true" label="Maximum Decriptions to show" help="Show one-line descriptions for this number of database sequences."> | |
166 <validator type="in_range" min="0" /> | |
167 </param> | |
168 <param name="num_alignments" type="integer" value="250" optional="true" label="Maximum alignments to show" help="Show alignments for this number of database sequences."> | |
169 <validator type="in_range" min="0" /> | |
170 </param> | |
171 </when> | |
172 </conditional> | |
173 <conditional name="adv_opts"> | |
174 <param name="adv_opts_selector" type="select" label="Advanced Options"> | |
175 <option value="basic" selected="True">Hide Advanced Options</option> | |
176 <option value="advanced">Show Advanced Options</option> | |
177 </param> | |
178 <when value="basic" /> | |
179 <when value="advanced"> | |
180 <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' --> | |
181 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" /> | |
182 <conditional name="scoring"> | |
183 <param name="matrix" type="select" label="Scoring matrix"> | |
184 <option value="BLOSUM90">BLOSUM90</option> | |
185 <option value="BLOSUM80">BLOSUM80</option> | |
186 <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> | |
187 <option value="BLOSUM50">BLOSUM50</option> | |
188 <option value="BLOSUM45">BLOSUM45</option> | |
189 <option value="PAM250">PAM250</option> | |
190 <option value="PAM70">PAM70</option> | |
191 <option value="PAM30">PAM30</option> | |
192 </param> | |
193 <when value="BLOSUM90"> | |
194 <param name="gap_costs" type="select" label="Gap Costs"> | |
195 <option value="">Use Defaults</option> | |
196 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option> | |
197 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
198 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
199 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
200 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> | |
201 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option> | |
202 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
203 </param> | |
204 | |
205 </when> | |
206 <when value="BLOSUM80"> | |
207 <param name="gap_costs" type="select" label="Gap Costs"> | |
208 <option value="">Use Defaults</option> | |
209 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
210 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
211 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
212 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> | |
213 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option> | |
214 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
215 </param> | |
216 </when> | |
217 <when value="BLOSUM62"> | |
218 <param name="gap_costs" type="select" label="Gap Costs"> | |
219 <option value="">Use Defaults</option> | |
220 <option value="-gapopen 11 -gapextend 2">Existense: 11 Extension: 2</option> | |
221 <option value="-gapopen 10 -gapextend 2">Existense: 10 Extension: 2</option> | |
222 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option> | |
223 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
224 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
225 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
226 <option value="-gapopen 13 -gapextend 1">Existense: 13 Extension: 1</option> | |
227 <option value="-gapopen 12 -gapextend 1">Existense: 12 Extension: 1</option> | |
228 <option value="-gapopen 11 -gapextend 1" selected="true">Existense: 11 Extension: 1 (default)</option> | |
229 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option> | |
230 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
231 </param> | |
232 | |
233 </when> | |
234 <when value="BLOSUM50"> | |
235 <param name="gap_costs" type="select" label="Gap Costs"> | |
236 <option value="">Use Defaults</option> | |
237 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> | |
238 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> | |
239 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option> | |
240 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option> | |
241 <option value="-gapopen 9 -gapextend 3">Existense: 9 Extension: 3</option> | |
242 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option> | |
243 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option> | |
244 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option> | |
245 <option value="-gapopen 13 -gapextend 2" selected="true">Existense: 13 Extension: 2 (default)</option> | |
246 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option> | |
247 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> | |
248 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> | |
249 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> | |
250 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option> | |
251 <option value="-gapopen 15 -gapextend 1">Existense: 15 Extension: 1</option> | |
252 </param> | |
253 | |
254 </when> | |
255 <when value="BLOSUM45"> | |
256 <param name="gap_costs" type="select" label="Gap Costs"> | |
257 <option value="">Use Defaults</option> | |
258 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> | |
259 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> | |
260 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option> | |
261 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option> | |
262 <option value="-gapopen 15 -gapextend 2" selected="true">Existense: 15 Extension: 2 (default)</option> | |
263 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option> | |
264 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option> | |
265 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option> | |
266 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> | |
267 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> | |
268 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> | |
269 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option> | |
270 </param> | |
271 </when> | |
272 <when value="PAM250"> | |
273 <param name="gap_costs" type="select" label="Gap Costs"> | |
274 <option value="">Use Defaults</option> | |
275 <option value="-gapopen 15 -gapextend 3">Existense: 15 Extension: 3</option> | |
276 <option value="-gapopen 14 -gapextend 3">Existense: 14 Extension: 3</option> | |
277 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> | |
278 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> | |
279 <option value="-gapopen 17 -gapextend 2">Existense: 17 Extension: 2</option> | |
280 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option> | |
281 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option> | |
282 <option value="-gapopen 14 -gapextend 2" selected="true">Existense: 14 Extension: 2 (default)</option> | |
283 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option> | |
284 <option value="-gapopen 21 -gapextend 1">Existense: 21 Extension: 1</option> | |
285 <option value="-gapopen 20 -gapextend 1">Existense: 20 Extension: 1</option> | |
286 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> | |
287 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> | |
288 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> | |
289 </param> | |
290 | |
291 </when> | |
292 <when value="PAM70"> | |
293 <param name="gap_costs" type="select" label="Gap Costs"> | |
294 <option value="">Use Defaults</option> | |
295 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
296 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
297 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
298 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> | |
299 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option> | |
300 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
301 </param> | |
302 | |
303 </when> | |
304 <when value="PAM30"> | |
305 <param name="gap_costs" type="select" label="Gap Costs"> | |
306 <option value="">Use Defaults</option> | |
307 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
308 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
309 <option value="-gapopen 5 -gapextend 2">Existense: 5 Extension: 2</option> | |
310 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option> | |
311 <option value="-gapopen 9 -gapextend 1" selected="true">Existense: 9 Extension: 1 (default)</option> | |
312 <option value="-gapopen 8 -gapextend 1">Existense: 8 Extension: 1</option> | |
313 </param> | |
314 </when> | |
315 <!-- | |
316 Can't use '-ungapped' on its own, error back is: | |
317 Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search | |
318 Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.' | |
319 <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" /> | |
320 --> | |
321 </conditional> | |
322 | |
323 <!-- I'd like word_size to be optional, with minimum 2 for blastp --> | |
324 <param name="word_size" type="integer" value="" optional="true" label="Word size for wordfinder algorithm" help="Recommended: blastp 3, blastp-short 2"> | |
325 <validator type="in_range" min="2" /> | |
326 </param> | |
327 <param name="window_size" type="integer" value="" optional="true" label="Multiple hits window size, use 0 to specify 1-hit algorithm." help="Recommended: blastp 40, blastp-short 15"> | |
328 <validator type="in_range" min="0" /> | |
329 </param> | |
330 <param name="threshold" type="integer" value="" optional="true" label="Threshold: Minimum score to add a word to the BLAST lookup table." help="Recommended: blastp 11, blastp-short 16"> | |
331 <validator type="in_range" min="1" /> | |
332 </param> | |
333 <param name="comp_based_stats" type="select" optional="true" label="Use composition-based statistics" | |
334 help="Recommended: blastp: 2 blastp-short: 0"> | |
335 <option value="">Leave Unspecified</option> | |
336 <option value="0">0 or F (No composition-based statistics)</option> | |
337 <option value="1">1 (Composition-based statistics as in NAR 29:2994-3005, 2001)</option> | |
338 <option value="2">2, T, or D (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties)</option> | |
339 <option value="3">3 (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally)</option> | |
340 </param> | |
341 | |
342 <param name="use_sw_tback" type="boolean" label="Compute locally optimal Smith-Waterman alignments?" truevalue="-use_sw_tback" falsevalue="" checked="false" /> | |
343 | |
344 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> | |
345 </when> | |
346 </conditional> | |
347 </inputs> | |
348 | |
349 <outputs> | |
350 <data name="output_tabular" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
351 <filter>fmt_opt['out_format'] == "tabular"</filter> | |
352 </data> | |
353 <data name="output_xml" format="blastxml" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
354 <filter>fmt_opt['out_format'] == "blastxml"</filter> | |
355 </data> | |
356 <data name="output_txt" format="txt" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
357 <filter>fmt_opt['out_format'] == "text" and not fmt_opt['html']</filter> | |
358 </data> | |
359 <data name="output_html" format="html" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
360 <filter>fmt_opt['out_format'] == "text" and fmt_opt['html']</filter> | |
361 </data> | |
362 </outputs> | |
363 | |
364 <stdio> | |
365 <exit_code range="1" level="fatal" description="Bad input dataset or BLAST options" /> | |
366 <exit_code range="2" level="fatal" description="Error in BLAST database" /> | |
367 <exit_code range="3" level="fatal" description="Error in BLAST engine" /> | |
368 <exit_code range="4" level="fatal" description="Out of Memory" /> | |
369 <exit_code range="5:" level="fatal" description="Unknown Error" /> | |
370 </stdio> | |
371 | |
372 <tests> | |
373 <test> | |
374 <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> | |
375 <param name="db_opts_selector" value="file" /> | |
376 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
377 <param name="database" value="" /> | |
378 <param name="evalue_cutoff" value="1e-8" /> | |
379 <param name="blast_type" value="blastp" /> | |
380 <param name="out_format" value="blastxml" /> | |
381 <param name="outfmt" value="5" /> | |
382 <param name="adv_opts_selector" value="advanced" /> | |
383 <param name="filter_query" value="False" /> | |
384 <param name="matrix" value="BLOSUM62" /> | |
385 <param name="max_target_seqs" value="" /> | |
386 <param name="word_size" value="" /> | |
387 <param name="parse_deflines" value="True" /> | |
388 <output name="output_xml"> | |
389 <assert_contents> | |
390 <has_text text="sp|Q9BS26|ERP44_HUMAN"/> | |
391 </assert_contents> | |
392 </output> | |
393 </test> | |
394 <test> | |
395 <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> | |
396 <param name="db_opts_selector" value="file" /> | |
397 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
398 <param name="database" value="" /> | |
399 <param name="evalue_cutoff" value="1e-8" /> | |
400 <param name="blast_type" value="blastp" /> | |
401 <param name="out_format" value="tabular" /> | |
402 <param name="outfmt" value="6" /> | |
403 <param name="adv_opts_selector" value="advanced" /> | |
404 <param name="filter_query" value="False" /> | |
405 <param name="matrix" value="BLOSUM62" /> | |
406 <param name="max_target_seqs" value="" /> | |
407 <param name="word_size" value="" /> | |
408 <param name="parse_deflines" value="True" /> | |
409 <output name="output_tabular"> | |
410 <assert_contents> | |
411 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" /> | |
412 <has_text text="BAB21486.1"/> | |
413 </assert_contents> | |
414 </output> | |
415 </test> | |
416 <test> | |
417 <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> | |
418 <param name="db_opts_selector" value="file" /> | |
419 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
420 <param name="database" value="" /> | |
421 <param name="evalue_cutoff" value="1e-8" /> | |
422 <param name="blast_type" value="blastp" /> | |
423 <param name="out_format" value="tabular" /> | |
424 <param name="outfmt" value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" /> | |
425 <param name="adv_opts_selector" value="advanced" /> | |
426 <param name="filter_query" value="False" /> | |
427 <param name="matrix" value="BLOSUM62" /> | |
428 <param name="max_target_seqs" value="" /> | |
429 <param name="word_size" value="" /> | |
430 <param name="parse_deflines" value="True" /> | |
431 <output name="output_tabular"> | |
432 <assert_contents> | |
433 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+\t\S+\t\d+\t\d+\t\d+\t\d+\t\S+\t\d+\t\d+\t\S+\t\S+\t\d+\t\d+" /> | |
434 </assert_contents> | |
435 </output> | |
436 </test> | |
437 <test> | |
438 <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
439 <param name="db_opts_selector" value="file" /> | |
440 <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> | |
441 <param name="database" value="" /> | |
442 <param name="evalue_cutoff" value="1e-8" /> | |
443 <param name="blast_type" value="blastp" /> | |
444 <param name="out_format" value="tabular" /> | |
445 <param name="outfmt" value="6" /> | |
446 <param name="adv_opts_selector" value="basic" /> | |
447 <output name="output_tabular"> | |
448 <assert_contents> | |
449 <has_text_matching expression="gi.283855846.gb.ADB45242.1.\tsp.P08100.OPSD_HUMAN\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" /> | |
450 <has_text text="BAB21486.1"/> | |
451 </assert_contents> | |
452 </output> | |
453 </test> | |
454 </tests> | |
455 <help> | |
456 | |
457 .. class:: warningmark | |
458 | |
459 **Note**. Database searches may take a substantial amount of time. | |
460 For large input datasets it is advisable to allow overnight processing. | |
461 | |
462 ----- | |
463 | |
464 **What it does** | |
465 | |
466 Search a *protein database* using a *protein query*, | |
467 using the NCBI BLAST+ blastp command line tool. | |
468 | |
469 The search can be performed using a local database, against a sequence supplied in a fasta file, | |
470 or the blast can be performed remotely at NCBI. | |
471 | |
472 The remote operation allows searches to be targeted at specific organisms. | |
473 | |
474 ----- | |
475 | |
476 **Output format** | |
477 | |
478 Because Galaxy focuses on processing tabular data, the default output of this | |
479 tool is tabular. The standard BLAST+ tabular output contains 12 columns: | |
480 | |
481 ====== ========= ============================================ | |
482 Column NCBI name Description | |
483 ------ --------- -------------------------------------------- | |
484 1 qseqid Query Seq-id (ID of your sequence) | |
485 2 sseqid Subject Seq-id (ID of the database hit) | |
486 3 pident Percentage of identical matches | |
487 4 length Alignment length | |
488 5 mismatch Number of mismatches | |
489 6 gapopen Number of gap openings | |
490 7 qstart Start of alignment in query | |
491 8 qend End of alignment in query | |
492 9 sstart Start of alignment in subject (database hit) | |
493 10 send End of alignment in subject (database hit) | |
494 11 evalue Expectation value (E-value) | |
495 12 bitscore Bit score | |
496 ====== ========= ============================================ | |
497 | |
498 The BLAST+ tools can optionally output additional columns of information, | |
499 but this takes longer to calculate. Most (but not all) of these columns are | |
500 included by selecting the extended tabular output. The extra columns are | |
501 included *after* the standard 12 columns. This is so that you can write | |
502 workflow filtering steps that accept either the 12 or 24 column tabular | |
503 BLAST output. | |
504 | |
505 ====== ============= =========================================== | |
506 Column NCBI name Description | |
507 ------ ------------- ------------------------------------------- | |
508 13 sallseqid All subject Seq-id(s), separated by a ';' | |
509 14 score Raw score | |
510 15 nident Number of identical matches | |
511 16 positive Number of positive-scoring matches | |
512 17 gaps Total number of gaps | |
513 18 ppos Percentage of positive-scoring matches | |
514 19 qframe Query frame | |
515 20 sframe Subject frame | |
516 21 qseq Aligned part of query sequence | |
517 22 sseq Aligned part of subject sequence | |
518 23 qlen Query sequence length | |
519 24 slen Subject sequence length | |
520 ====== ============= =========================================== | |
521 | |
522 The third option is BLAST XML output, which is designed to be parsed by | |
523 another program, and is understood by some Galaxy tools. | |
524 | |
525 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). | |
526 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. | |
527 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. | |
528 The two query anchored outputs show a multiple sequence alignment between the query and all the matches, | |
529 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). | |
530 | |
531 ------- | |
532 | |
533 **References** | |
534 | |
535 Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. | |
536 | |
537 Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005. | |
538 | |
539 </help> | |
540 </tool> |