0
|
1 <tool id="blast_plus_remote_blastp" name="NCBI BLAST+ remote blastp" version="1.0">
|
|
2 <description>Search protein database with protein query sequence(s)</description>
|
|
3 <!-- If job splitting is enabled, break up the query file into four (This only works if output is tabular) -->
|
|
4 <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism>
|
|
5 <version_command>blastp -version</version_command>
|
|
6 <requirements>
|
|
7 <requirement type="package" version="333">binaries_for_blast_plus</requirement>
|
|
8 </requirements>
|
|
9 <command>
|
|
10 ## The command is a Cheetah template which allows some Python based syntax.
|
|
11 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
|
|
12 blastp
|
|
13 -query "$query"
|
|
14 #if $db_opts.db_opts_selector == "db":
|
|
15 -db "${db_opts.database.fields.path}"
|
|
16 #elif $db_opts.db_opts_selector == "remote":
|
|
17 -db $db_opts.database
|
|
18 -remote
|
|
19 #set $txids = []
|
|
20 #set $ntxids = []
|
|
21 #for $i, $org in enumerate($db_opts.taxid_repeat):
|
|
22 #if $org.exclude:
|
|
23 #set $ntxids = $ntxids + ["txid" + $org.taxid.__str__]
|
|
24 #else
|
|
25 #set $txids = $txids + ["txid" + $org.taxid.__str__]
|
|
26 #end if
|
|
27 #end for
|
|
28 #if (len($txids) + len($ntxids)) > 0:
|
|
29 #set $entrez_query = ''
|
|
30 #if len($txids) > 0:
|
|
31 #set $entrez_query = $entrez_query + '(' + ' OR '.join($txids) + ')'
|
|
32 #end if
|
|
33 #if len($ntxids) > 0:
|
|
34 #set $entrez_query = $entrez_query + ' NOT (' + ' OR '.join($ntxids) + ')'
|
|
35 #end if
|
|
36 -entrez_query '$entrez_query'
|
|
37 #end if
|
|
38 #else:
|
|
39 -subject "$db_opts.subject"
|
|
40 #end if
|
|
41 -task $blast_type
|
|
42 -evalue $evalue_cutoff
|
|
43 -out blast_output
|
|
44 ##Set the extended list here so if/when we add things, saved workflows are not affected
|
|
45 #if str($fmt_opt.out_format)=="text":
|
|
46 -outfmt "$fmt_opt.outfmt" $fmt_opt.html
|
|
47 #if $fmt_opt.num_descriptions.__str__.strip() != '':
|
|
48 -num_descriptions $fmt_opt.num_descriptions
|
|
49 #end if
|
|
50 #if $fmt_opt.num_alignments.__str__.strip() != '':
|
|
51 -num_alignments $fmt_opt.num_alignments
|
|
52 #end if
|
|
53 #else:
|
|
54 -outfmt "$fmt_opt.outfmt"
|
|
55 #if $fmt_opt.max_target_seqs.__str__.strip() != '':
|
|
56 -max_target_seqs $fmt_opt.max_target_seqs
|
|
57 #end if
|
|
58 #end if
|
|
59 #if $db_opts.db_opts_selector != "remote":
|
|
60 -num_threads 8
|
|
61 #end if
|
|
62 #if $adv_opts.adv_opts_selector=="advanced":
|
|
63 $adv_opts.filter_query
|
|
64 -matrix $adv_opts.scoring.matrix
|
|
65 $adv_opts.scoring.gap_costs
|
|
66
|
|
67 #if $adv_opts.word_size.__str__.strip() != '':
|
|
68 -word_size $adv_opts.word_size
|
|
69 #end if
|
|
70
|
|
71 #if $adv_opts.window_size.__str__.strip() != '':
|
|
72 -window_size $adv_opts.window_size
|
|
73 #end if
|
|
74
|
|
75 #if $adv_opts.threshold.__str__.strip() != '':
|
|
76 -threshold $adv_opts.threshold
|
|
77 #end if
|
|
78
|
|
79 #if $adv_opts.comp_based_stats.__str__.strip() != '':
|
|
80 -comp_based_stats $adv_opts.comp_based_stats
|
|
81 #end if
|
|
82
|
|
83 ##Ungapped disabled for now - see comments below
|
|
84 ##$adv_opts.ungapped
|
|
85 $adv_opts.use_sw_tback
|
|
86 $adv_opts.parse_deflines
|
|
87 ## End of advanced options:
|
|
88 #end if
|
|
89 </command>
|
|
90 <inputs>
|
|
91 <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/>
|
|
92 <conditional name="db_opts">
|
|
93 <param name="db_opts_selector" type="select" label="Subject database/sequences">
|
|
94 <option value="db" selected="True">Local BLAST Database</option>
|
|
95 <option value="file">Local FASTA file</option>
|
|
96 <option value="remote">NCBI Remote Database</option>
|
|
97 </param>
|
|
98 <when value="db">
|
|
99 <param name="database" type="select" label="Protein BLAST database">
|
|
100 <options from_file="blastdb_p.loc">
|
|
101 <column name="value" index="0"/>
|
|
102 <column name="name" index="1"/>
|
|
103 <column name="path" index="2"/>
|
|
104 </options>
|
|
105 </param>
|
|
106 <param name="subject" type="hidden" value="" />
|
|
107 </when>
|
|
108 <when value="file">
|
|
109 <param name="database" type="hidden" value="" />
|
|
110 <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/>
|
|
111 </when>
|
|
112 <when value="remote">
|
|
113 <param name="database" type="select" label="Protein BLAST database">
|
|
114 <option value="nr" selected="selected" >Non-redundant protein sequences (nr)</option>
|
|
115 <option value="refseq_protein" >Reference proteins (refseq_protein)</option>
|
|
116 <option value="swissprot" >UniProtKB/Swiss-Prot(swissprot)</option>
|
|
117 <option value="pat" >Patented protein sequences(pat)</option>
|
|
118 <option value="pdb" >Protein Data Bank proteins(pdb)</option>
|
|
119 <option value="env_nr" >Metagenomic proteins(env_nr)</option>
|
|
120 </param>
|
|
121 <repeat name="taxid_repeat" title="Search Organism Restriction" min="0">
|
|
122 <param name="taxid" type="integer" value="" label="NCBI Taxon ID" help="For example: Human is 9606 (see: http://www.ncbi.nlm.nih.gov/taxonomy ) ">
|
|
123 <dsvalidator type="in_range" min="0" />
|
|
124 </param>
|
|
125 <param name="exclude" type="boolean" checked="false" label="Exclude this NCBI Taxon ID" help=""/>
|
|
126 </repeat>
|
|
127 </when>
|
|
128 </conditional>
|
|
129 <param name="blast_type" type="select" display="radio" label="Type of BLAST">
|
|
130 <option value="blastp">blastp</option>
|
|
131 <option value="blastp-short">blastp-short</option>
|
|
132 </param>
|
|
133 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
|
|
134 <conditional name="fmt_opt">
|
|
135 <param name="out_format" type="select" label="Output format">
|
|
136 <option value="tabular" selected="True">Tabular</option>
|
|
137 <option value="blastxml">BLAST XML</option>
|
|
138 <option value="text">Text Report</option>
|
|
139 </param>
|
|
140 <when value="tabular">
|
|
141 <param name="outfmt" type="select" label="Tabular columns">
|
|
142 <option value="6" selected="True">Tabular (standard 12 columns)</option>
|
|
143 <option value="7">Tabular (standard 12 columns) with comments</option>
|
|
144 <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen">Tabular (extended 24 columns)</option>
|
|
145 </param>
|
|
146 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits">
|
|
147 <validator type="in_range" min="0" />
|
|
148 </param>
|
|
149 </when>
|
|
150 <when value="blastxml">
|
|
151 <param name="outfmt" type="hidden" value="5"/>
|
|
152 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits">
|
|
153 <validator type="in_range" min="0" />
|
|
154 </param>
|
|
155 </when>
|
|
156 <when value="text">
|
|
157 <param name="outfmt" type="select" label="Text format">
|
|
158 <option value="0">Pairwise text</option>
|
|
159 <option value="1">Query-anchored text showing identitites</option>
|
|
160 <option value="2">Query-anchored text</option>
|
|
161 <option value="3">Flat query-anchored text showing identitites</option>
|
|
162 <option value="4">Flat query-anchored text</option>
|
|
163 </param>
|
|
164 <param name="html" type="boolean" label="Html" truevalue="-html" falsevalue="" checked="true" />
|
|
165 <param name="num_descriptions" type="integer" value="500" optional="true" label="Maximum Decriptions to show" help="Show one-line descriptions for this number of database sequences.">
|
|
166 <validator type="in_range" min="0" />
|
|
167 </param>
|
|
168 <param name="num_alignments" type="integer" value="250" optional="true" label="Maximum alignments to show" help="Show alignments for this number of database sequences.">
|
|
169 <validator type="in_range" min="0" />
|
|
170 </param>
|
|
171 </when>
|
|
172 </conditional>
|
|
173 <conditional name="adv_opts">
|
|
174 <param name="adv_opts_selector" type="select" label="Advanced Options">
|
|
175 <option value="basic" selected="True">Hide Advanced Options</option>
|
|
176 <option value="advanced">Show Advanced Options</option>
|
|
177 </param>
|
|
178 <when value="basic" />
|
|
179 <when value="advanced">
|
|
180 <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
|
|
181 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
|
|
182 <conditional name="scoring">
|
|
183 <param name="matrix" type="select" label="Scoring matrix">
|
|
184 <option value="BLOSUM90">BLOSUM90</option>
|
|
185 <option value="BLOSUM80">BLOSUM80</option>
|
|
186 <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
|
|
187 <option value="BLOSUM50">BLOSUM50</option>
|
|
188 <option value="BLOSUM45">BLOSUM45</option>
|
|
189 <option value="PAM250">PAM250</option>
|
|
190 <option value="PAM70">PAM70</option>
|
|
191 <option value="PAM30">PAM30</option>
|
|
192 </param>
|
|
193 <when value="BLOSUM90">
|
|
194 <param name="gap_costs" type="select" label="Gap Costs">
|
|
195 <option value="">Use Defaults</option>
|
|
196 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option>
|
|
197 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option>
|
|
198 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
199 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
200 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option>
|
|
201 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option>
|
|
202 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option>
|
|
203 </param>
|
|
204
|
|
205 </when>
|
|
206 <when value="BLOSUM80">
|
|
207 <param name="gap_costs" type="select" label="Gap Costs">
|
|
208 <option value="">Use Defaults</option>
|
|
209 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option>
|
|
210 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
211 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
212 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option>
|
|
213 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option>
|
|
214 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option>
|
|
215 </param>
|
|
216 </when>
|
|
217 <when value="BLOSUM62">
|
|
218 <param name="gap_costs" type="select" label="Gap Costs">
|
|
219 <option value="">Use Defaults</option>
|
|
220 <option value="-gapopen 11 -gapextend 2">Existense: 11 Extension: 2</option>
|
|
221 <option value="-gapopen 10 -gapextend 2">Existense: 10 Extension: 2</option>
|
|
222 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option>
|
|
223 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option>
|
|
224 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
225 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
226 <option value="-gapopen 13 -gapextend 1">Existense: 13 Extension: 1</option>
|
|
227 <option value="-gapopen 12 -gapextend 1">Existense: 12 Extension: 1</option>
|
|
228 <option value="-gapopen 11 -gapextend 1" selected="true">Existense: 11 Extension: 1 (default)</option>
|
|
229 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option>
|
|
230 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option>
|
|
231 </param>
|
|
232
|
|
233 </when>
|
|
234 <when value="BLOSUM50">
|
|
235 <param name="gap_costs" type="select" label="Gap Costs">
|
|
236 <option value="">Use Defaults</option>
|
|
237 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option>
|
|
238 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option>
|
|
239 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option>
|
|
240 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option>
|
|
241 <option value="-gapopen 9 -gapextend 3">Existense: 9 Extension: 3</option>
|
|
242 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option>
|
|
243 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option>
|
|
244 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option>
|
|
245 <option value="-gapopen 13 -gapextend 2" selected="true">Existense: 13 Extension: 2 (default)</option>
|
|
246 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option>
|
|
247 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option>
|
|
248 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option>
|
|
249 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option>
|
|
250 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option>
|
|
251 <option value="-gapopen 15 -gapextend 1">Existense: 15 Extension: 1</option>
|
|
252 </param>
|
|
253
|
|
254 </when>
|
|
255 <when value="BLOSUM45">
|
|
256 <param name="gap_costs" type="select" label="Gap Costs">
|
|
257 <option value="">Use Defaults</option>
|
|
258 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option>
|
|
259 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option>
|
|
260 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option>
|
|
261 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option>
|
|
262 <option value="-gapopen 15 -gapextend 2" selected="true">Existense: 15 Extension: 2 (default)</option>
|
|
263 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option>
|
|
264 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option>
|
|
265 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option>
|
|
266 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option>
|
|
267 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option>
|
|
268 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option>
|
|
269 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option>
|
|
270 </param>
|
|
271 </when>
|
|
272 <when value="PAM250">
|
|
273 <param name="gap_costs" type="select" label="Gap Costs">
|
|
274 <option value="">Use Defaults</option>
|
|
275 <option value="-gapopen 15 -gapextend 3">Existense: 15 Extension: 3</option>
|
|
276 <option value="-gapopen 14 -gapextend 3">Existense: 14 Extension: 3</option>
|
|
277 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option>
|
|
278 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option>
|
|
279 <option value="-gapopen 17 -gapextend 2">Existense: 17 Extension: 2</option>
|
|
280 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option>
|
|
281 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option>
|
|
282 <option value="-gapopen 14 -gapextend 2" selected="true">Existense: 14 Extension: 2 (default)</option>
|
|
283 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option>
|
|
284 <option value="-gapopen 21 -gapextend 1">Existense: 21 Extension: 1</option>
|
|
285 <option value="-gapopen 20 -gapextend 1">Existense: 20 Extension: 1</option>
|
|
286 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option>
|
|
287 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option>
|
|
288 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option>
|
|
289 </param>
|
|
290
|
|
291 </when>
|
|
292 <when value="PAM70">
|
|
293 <param name="gap_costs" type="select" label="Gap Costs">
|
|
294 <option value="">Use Defaults</option>
|
|
295 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option>
|
|
296 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
297 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
298 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option>
|
|
299 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option>
|
|
300 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option>
|
|
301 </param>
|
|
302
|
|
303 </when>
|
|
304 <when value="PAM30">
|
|
305 <param name="gap_costs" type="select" label="Gap Costs">
|
|
306 <option value="">Use Defaults</option>
|
|
307 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
308 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
309 <option value="-gapopen 5 -gapextend 2">Existense: 5 Extension: 2</option>
|
|
310 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option>
|
|
311 <option value="-gapopen 9 -gapextend 1" selected="true">Existense: 9 Extension: 1 (default)</option>
|
|
312 <option value="-gapopen 8 -gapextend 1">Existense: 8 Extension: 1</option>
|
|
313 </param>
|
|
314 </when>
|
|
315 <!--
|
|
316 Can't use '-ungapped' on its own, error back is:
|
|
317 Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
|
|
318 Tried using '-ungapped -comp_based_stats F' and blastp crashed with 'Attempt to access NULL pointer.'
|
|
319 <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped -comp_based_stats F" falsevalue="" checked="false" />
|
|
320 -->
|
|
321 </conditional>
|
|
322
|
|
323 <!-- I'd like word_size to be optional, with minimum 2 for blastp -->
|
|
324 <param name="word_size" type="integer" value="" optional="true" label="Word size for wordfinder algorithm" help="Recommended: blastp 3, blastp-short 2">
|
|
325 <validator type="in_range" min="2" />
|
|
326 </param>
|
|
327 <param name="window_size" type="integer" value="" optional="true" label="Multiple hits window size, use 0 to specify 1-hit algorithm." help="Recommended: blastp 40, blastp-short 15">
|
|
328 <validator type="in_range" min="0" />
|
|
329 </param>
|
|
330 <param name="threshold" type="integer" value="" optional="true" label="Threshold: Minimum score to add a word to the BLAST lookup table." help="Recommended: blastp 11, blastp-short 16">
|
|
331 <validator type="in_range" min="1" />
|
|
332 </param>
|
|
333 <param name="comp_based_stats" type="select" optional="true" label="Use composition-based statistics"
|
|
334 help="Recommended: blastp: 2 blastp-short: 0">
|
|
335 <option value="">Leave Unspecified</option>
|
|
336 <option value="0">0 or F (No composition-based statistics)</option>
|
|
337 <option value="1">1 (Composition-based statistics as in NAR 29:2994-3005, 2001)</option>
|
|
338 <option value="2">2, T, or D (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties)</option>
|
|
339 <option value="3">3 (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally)</option>
|
|
340 </param>
|
|
341
|
|
342 <param name="use_sw_tback" type="boolean" label="Compute locally optimal Smith-Waterman alignments?" truevalue="-use_sw_tback" falsevalue="" checked="false" />
|
|
343
|
|
344 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
|
|
345 </when>
|
|
346 </conditional>
|
|
347 </inputs>
|
|
348
|
|
349 <outputs>
|
|
350 <data name="output_tabular" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output">
|
|
351 <filter>fmt_opt['out_format'] == "tabular"</filter>
|
|
352 </data>
|
|
353 <data name="output_xml" format="blastxml" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output">
|
|
354 <filter>fmt_opt['out_format'] == "blastxml"</filter>
|
|
355 </data>
|
|
356 <data name="output_txt" format="txt" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output">
|
|
357 <filter>fmt_opt['out_format'] == "text" and not fmt_opt['html']</filter>
|
|
358 </data>
|
|
359 <data name="output_html" format="html" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output">
|
|
360 <filter>fmt_opt['out_format'] == "text" and fmt_opt['html']</filter>
|
|
361 </data>
|
|
362 </outputs>
|
|
363
|
|
364 <stdio>
|
|
365 <exit_code range="1" level="fatal" description="Bad input dataset or BLAST options" />
|
|
366 <exit_code range="2" level="fatal" description="Error in BLAST database" />
|
|
367 <exit_code range="3" level="fatal" description="Error in BLAST engine" />
|
|
368 <exit_code range="4" level="fatal" description="Out of Memory" />
|
|
369 <exit_code range="5:" level="fatal" description="Unknown Error" />
|
|
370 </stdio>
|
|
371
|
|
372 <tests>
|
|
373 <test>
|
|
374 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
375 <param name="db_opts_selector" value="file" />
|
|
376 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
|
|
377 <param name="database" value="" />
|
|
378 <param name="evalue_cutoff" value="1e-8" />
|
|
379 <param name="blast_type" value="blastp" />
|
|
380 <param name="out_format" value="blastxml" />
|
|
381 <param name="outfmt" value="5" />
|
|
382 <param name="adv_opts_selector" value="advanced" />
|
|
383 <param name="filter_query" value="False" />
|
|
384 <param name="matrix" value="BLOSUM62" />
|
|
385 <param name="max_target_seqs" value="" />
|
|
386 <param name="word_size" value="" />
|
|
387 <param name="parse_deflines" value="True" />
|
|
388 <output name="output_xml">
|
|
389 <assert_contents>
|
|
390 <has_text text="sp|Q9BS26|ERP44_HUMAN"/>
|
|
391 </assert_contents>
|
|
392 </output>
|
|
393 </test>
|
|
394 <test>
|
|
395 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
396 <param name="db_opts_selector" value="file" />
|
|
397 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
|
|
398 <param name="database" value="" />
|
|
399 <param name="evalue_cutoff" value="1e-8" />
|
|
400 <param name="blast_type" value="blastp" />
|
|
401 <param name="out_format" value="tabular" />
|
|
402 <param name="outfmt" value="6" />
|
|
403 <param name="adv_opts_selector" value="advanced" />
|
|
404 <param name="filter_query" value="False" />
|
|
405 <param name="matrix" value="BLOSUM62" />
|
|
406 <param name="max_target_seqs" value="" />
|
|
407 <param name="word_size" value="" />
|
|
408 <param name="parse_deflines" value="True" />
|
|
409 <output name="output_tabular">
|
|
410 <assert_contents>
|
|
411 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" />
|
|
412 <has_text text="BAB21486.1"/>
|
|
413 </assert_contents>
|
|
414 </output>
|
|
415 </test>
|
|
416 <test>
|
|
417 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
418 <param name="db_opts_selector" value="file" />
|
|
419 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
|
|
420 <param name="database" value="" />
|
|
421 <param name="evalue_cutoff" value="1e-8" />
|
|
422 <param name="blast_type" value="blastp" />
|
|
423 <param name="out_format" value="tabular" />
|
|
424 <param name="outfmt" value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" />
|
|
425 <param name="adv_opts_selector" value="advanced" />
|
|
426 <param name="filter_query" value="False" />
|
|
427 <param name="matrix" value="BLOSUM62" />
|
|
428 <param name="max_target_seqs" value="" />
|
|
429 <param name="word_size" value="" />
|
|
430 <param name="parse_deflines" value="True" />
|
|
431 <output name="output_tabular">
|
|
432 <assert_contents>
|
|
433 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+\t\S+\t\d+\t\d+\t\d+\t\d+\t\S+\t\d+\t\d+\t\S+\t\S+\t\d+\t\d+" />
|
|
434 </assert_contents>
|
|
435 </output>
|
|
436 </test>
|
|
437 <test>
|
|
438 <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" />
|
|
439 <param name="db_opts_selector" value="file" />
|
|
440 <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
|
|
441 <param name="database" value="" />
|
|
442 <param name="evalue_cutoff" value="1e-8" />
|
|
443 <param name="blast_type" value="blastp" />
|
|
444 <param name="out_format" value="tabular" />
|
|
445 <param name="outfmt" value="6" />
|
|
446 <param name="adv_opts_selector" value="basic" />
|
|
447 <output name="output_tabular">
|
|
448 <assert_contents>
|
|
449 <has_text_matching expression="gi.283855846.gb.ADB45242.1.\tsp.P08100.OPSD_HUMAN\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" />
|
|
450 <has_text text="BAB21486.1"/>
|
|
451 </assert_contents>
|
|
452 </output>
|
|
453 </test>
|
|
454 </tests>
|
|
455 <help>
|
|
456
|
|
457 .. class:: warningmark
|
|
458
|
|
459 **Note**. Database searches may take a substantial amount of time.
|
|
460 For large input datasets it is advisable to allow overnight processing.
|
|
461
|
|
462 -----
|
|
463
|
|
464 **What it does**
|
|
465
|
|
466 Search a *protein database* using a *protein query*,
|
|
467 using the NCBI BLAST+ blastp command line tool.
|
|
468
|
|
469 The search can be performed using a local database, against a sequence supplied in a fasta file,
|
|
470 or the blast can be performed remotely at NCBI.
|
|
471
|
|
472 The remote operation allows searches to be targeted at specific organisms.
|
|
473
|
|
474 -----
|
|
475
|
|
476 **Output format**
|
|
477
|
|
478 Because Galaxy focuses on processing tabular data, the default output of this
|
|
479 tool is tabular. The standard BLAST+ tabular output contains 12 columns:
|
|
480
|
|
481 ====== ========= ============================================
|
|
482 Column NCBI name Description
|
|
483 ------ --------- --------------------------------------------
|
|
484 1 qseqid Query Seq-id (ID of your sequence)
|
|
485 2 sseqid Subject Seq-id (ID of the database hit)
|
|
486 3 pident Percentage of identical matches
|
|
487 4 length Alignment length
|
|
488 5 mismatch Number of mismatches
|
|
489 6 gapopen Number of gap openings
|
|
490 7 qstart Start of alignment in query
|
|
491 8 qend End of alignment in query
|
|
492 9 sstart Start of alignment in subject (database hit)
|
|
493 10 send End of alignment in subject (database hit)
|
|
494 11 evalue Expectation value (E-value)
|
|
495 12 bitscore Bit score
|
|
496 ====== ========= ============================================
|
|
497
|
|
498 The BLAST+ tools can optionally output additional columns of information,
|
|
499 but this takes longer to calculate. Most (but not all) of these columns are
|
|
500 included by selecting the extended tabular output. The extra columns are
|
|
501 included *after* the standard 12 columns. This is so that you can write
|
|
502 workflow filtering steps that accept either the 12 or 24 column tabular
|
|
503 BLAST output.
|
|
504
|
|
505 ====== ============= ===========================================
|
|
506 Column NCBI name Description
|
|
507 ------ ------------- -------------------------------------------
|
|
508 13 sallseqid All subject Seq-id(s), separated by a ';'
|
|
509 14 score Raw score
|
|
510 15 nident Number of identical matches
|
|
511 16 positive Number of positive-scoring matches
|
|
512 17 gaps Total number of gaps
|
|
513 18 ppos Percentage of positive-scoring matches
|
|
514 19 qframe Query frame
|
|
515 20 sframe Subject frame
|
|
516 21 qseq Aligned part of query sequence
|
|
517 22 sseq Aligned part of subject sequence
|
|
518 23 qlen Query sequence length
|
|
519 24 slen Subject sequence length
|
|
520 ====== ============= ===========================================
|
|
521
|
|
522 The third option is BLAST XML output, which is designed to be parsed by
|
|
523 another program, and is understood by some Galaxy tools.
|
|
524
|
|
525 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
|
|
526 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
|
|
527 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
|
|
528 The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
|
|
529 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
|
|
530
|
|
531 -------
|
|
532
|
|
533 **References**
|
|
534
|
|
535 Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
|
|
536
|
|
537 Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
|
|
538
|
|
539 </help>
|
|
540 </tool>
|