0
|
1 <tool id="blast_plus_remote_blastp" name="NCBI BLAST+ remote blastp" version="1.0">
|
|
2 <description>Search protein database with protein query sequence(s)</description>
|
|
3 <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism>
|
|
4 <version_command>blastp -version</version_command>
|
|
5 <requirements>
|
|
6 <requirement type="package" version="333">binaries_for_blast_plus</requirement>
|
|
7 </requirements>
|
|
8 <command>
|
|
9 ## The command is a Cheetah template which allows some Python based syntax.
|
|
10 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
|
|
11 blastp
|
|
12 -query "$query"
|
|
13 #if $db_opts.db_opts_selector == "db":
|
|
14 -db "${db_opts.database.fields.path}"
|
|
15 #elif $db_opts.db_opts_selector == "remote":
|
|
16 -db $db_opts.database
|
|
17 -remote
|
|
18 #set $txids = []
|
|
19 #set $ntxids = []
|
|
20 #for $i, $org in enumerate($db_opts.taxid_repeat):
|
|
21 #if $org.exclude:
|
|
22 #set $ntxids = $ntxids + ["txid" + $org.taxid.__str__]
|
|
23 #else
|
|
24 #set $txids = $txids + ["txid" + $org.taxid.__str__]
|
|
25 #end if
|
|
26 #end for
|
|
27 #if (len($txids) + len($ntxids)) > 0:
|
|
28 #set $entrez_query = ''
|
|
29 #if len($txids) > 0:
|
|
30 #set $entrez_query = $entrez_query + '(' + ' OR '.join($txids) + ')'
|
|
31 #end if
|
|
32 #if len($ntxids) > 0:
|
|
33 #set $entrez_query = $entrez_query + ' NOT (' + ' OR '.join($ntxids) + ')'
|
|
34 #end if
|
|
35 -entrez_query '$entrez_query'
|
|
36 #end if
|
|
37 #else:
|
|
38 -subject "$db_opts.subject"
|
|
39 #end if
|
|
40 -task $blast_type
|
|
41 -evalue $evalue_cutoff
|
|
42 -out blast_output
|
|
43 ##Set the extended list here so if/when we add things, saved workflows are not affected
|
|
44 #if str($fmt_opt.out_format)=="text":
|
|
45 -outfmt "$fmt_opt.outfmt" $fmt_opt.html
|
|
46 #if $fmt_opt.num_descriptions.__str__.strip() != '':
|
|
47 -num_descriptions $fmt_opt.num_descriptions
|
|
48 #end if
|
|
49 #if $fmt_opt.num_alignments.__str__.strip() != '':
|
|
50 -num_alignments $fmt_opt.num_alignments
|
|
51 #end if
|
|
52 #else:
|
|
53 -outfmt "$fmt_opt.outfmt"
|
|
54 #if $fmt_opt.max_target_seqs.__str__.strip() != '':
|
|
55 -max_target_seqs $fmt_opt.max_target_seqs
|
|
56 #end if
|
|
57 #end if
|
|
58 #if $db_opts.db_opts_selector != "remote":
|
|
59 -num_threads 8
|
|
60 #end if
|
|
61 #if $adv_opts.adv_opts_selector=="advanced":
|
|
62 $adv_opts.filter_query
|
|
63 -matrix $adv_opts.scoring.matrix
|
|
64 $adv_opts.scoring.gap_costs
|
|
65
|
|
66 #if $adv_opts.word_size.__str__.strip() != '':
|
|
67 -word_size $adv_opts.word_size
|
|
68 #end if
|
|
69
|
|
70 #if $adv_opts.window_size.__str__.strip() != '':
|
|
71 -window_size $adv_opts.window_size
|
|
72 #end if
|
|
73
|
|
74 #if $adv_opts.threshold.__str__.strip() != '':
|
|
75 -threshold $adv_opts.threshold
|
|
76 #end if
|
|
77
|
|
78 #if $adv_opts.comp_based_stats.__str__.strip() != '':
|
|
79 -comp_based_stats $adv_opts.comp_based_stats
|
|
80 #end if
|
|
81
|
|
82 ##Ungapped disabled for now - see comments below
|
|
83 ##$adv_opts.ungapped
|
|
84 $adv_opts.use_sw_tback
|
|
85 $adv_opts.parse_deflines
|
|
86 ## End of advanced options:
|
|
87 #end if
|
|
88 </command>
|
|
89 <inputs>
|
|
90 <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/>
|
|
91 <conditional name="db_opts">
|
|
92 <param name="db_opts_selector" type="select" label="Subject database/sequences">
|
|
93 <option value="db" selected="True">Local BLAST Database</option>
|
|
94 <option value="file">Local FASTA file</option>
|
|
95 <option value="remote">NCBI Remote Database</option>
|
|
96 </param>
|
|
97 <when value="db">
|
|
98 <param name="database" type="select" label="Protein BLAST database">
|
|
99 <options from_file="blastdb_p.loc">
|
|
100 <column name="value" index="0"/>
|
|
101 <column name="name" index="1"/>
|
|
102 <column name="path" index="2"/>
|
|
103 </options>
|
|
104 </param>
|
|
105 <param name="subject" type="hidden" value="" />
|
|
106 </when>
|
|
107 <when value="file">
|
|
108 <param name="database" type="hidden" value="" />
|
|
109 <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/>
|
|
110 </when>
|
|
111 <when value="remote">
|
|
112 <param name="database" type="select" label="Protein BLAST database">
|
|
113 <option value="nr" selected="selected" >Non-redundant protein sequences (nr)</option>
|
|
114 <option value="refseq_protein" >Reference proteins (refseq_protein)</option>
|
|
115 <option value="swissprot" >UniProtKB/Swiss-Prot(swissprot)</option>
|
|
116 <option value="pat" >Patented protein sequences(pat)</option>
|
|
117 <option value="pdb" >Protein Data Bank proteins(pdb)</option>
|
|
118 <option value="env_nr" >Metagenomic proteins(env_nr)</option>
|
|
119 </param>
|
|
120 <repeat name="taxid_repeat" title="Search Organism Restriction" min="0">
|
|
121 <param name="taxid" type="integer" value="" label="NCBI Taxon ID" help="For example: Human is 9606 (see: http://www.ncbi.nlm.nih.gov/taxonomy ) ">
|
|
122 <dsvalidator type="in_range" min="0" />
|
|
123 </param>
|
|
124 <param name="exclude" type="boolean" checked="false" label="Exclude this NCBI Taxon ID" help=""/>
|
|
125 </repeat>
|
|
126 </when>
|
|
127 </conditional>
|
|
128 <param name="blast_type" type="select" display="radio" label="Type of BLAST">
|
|
129 <option value="blastp">blastp</option>
|
|
130 <option value="blastp-short">blastp-short</option>
|
|
131 </param>
|
|
132 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
|
|
133 <conditional name="fmt_opt">
|
|
134 <param name="out_format" type="select" label="Output format">
|
|
135 <option value="tabular" selected="True">Tabular</option>
|
|
136 <option value="blastxml">BLAST XML</option>
|
|
137 <option value="text">Text Report</option>
|
|
138 </param>
|
|
139 <when value="tabular">
|
|
140 <param name="outfmt" type="select" label="Tabular columns">
|
|
141 <option value="6" selected="True">Tabular (standard 12 columns)</option>
|
|
142 <option value="7">Tabular (standard 12 columns) with comments</option>
|
|
143 <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen">Tabular (extended 24 columns)</option>
|
|
144 </param>
|
|
145 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits">
|
|
146 <validator type="in_range" min="0" />
|
|
147 </param>
|
|
148 </when>
|
|
149 <when value="blastxml">
|
|
150 <param name="outfmt" type="hidden" value="5"/>
|
|
151 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits">
|
|
152 <validator type="in_range" min="0" />
|
|
153 </param>
|
|
154 </when>
|
|
155 <when value="text">
|
|
156 <param name="outfmt" type="select" label="Text format">
|
|
157 <option value="0">Pairwise text</option>
|
|
158 <option value="1">Query-anchored text showing identitites</option>
|
|
159 <option value="2">Query-anchored text</option>
|
|
160 <option value="3">Flat query-anchored text showing identitites</option>
|
|
161 <option value="4">Flat query-anchored text</option>
|
|
162 </param>
|
|
163 <param name="html" type="boolean" label="Html" truevalue="-html" falsevalue="" checked="true" />
|
|
164 <param name="num_descriptions" type="integer" value="500" optional="true" label="Maximum Decriptions to show" help="Show one-line descriptions for this number of database sequences.">
|
|
165 <validator type="in_range" min="0" />
|
|
166 </param>
|
|
167 <param name="num_alignments" type="integer" value="250" optional="true" label="Maximum alignments to show" help="Show alignments for this number of database sequences.">
|
|
168 <validator type="in_range" min="0" />
|
|
169 </param>
|
|
170 </when>
|
|
171 </conditional>
|
|
172 <conditional name="adv_opts">
|
|
173 <param name="adv_opts_selector" type="select" label="Advanced Options">
|
|
174 <option value="basic" selected="True">Hide Advanced Options</option>
|
|
175 <option value="advanced">Show Advanced Options</option>
|
|
176 </param>
|
|
177 <when value="basic" />
|
|
178 <when value="advanced">
|
|
179 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" />
|
|
180 <conditional name="scoring">
|
|
181 <param name="matrix" type="select" label="Scoring matrix">
|
|
182 <option value="BLOSUM90">BLOSUM90</option>
|
|
183 <option value="BLOSUM80">BLOSUM80</option>
|
|
184 <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>
|
|
185 <option value="BLOSUM50">BLOSUM50</option>
|
|
186 <option value="BLOSUM45">BLOSUM45</option>
|
|
187 <option value="PAM250">PAM250</option>
|
|
188 <option value="PAM70">PAM70</option>
|
|
189 <option value="PAM30">PAM30</option>
|
|
190 </param>
|
|
191 <when value="BLOSUM90">
|
|
192 <param name="gap_costs" type="select" label="Gap Costs">
|
|
193 <option value="">Use Defaults</option>
|
|
194 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option>
|
|
195 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option>
|
|
196 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
197 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
198 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option>
|
|
199 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option>
|
|
200 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option>
|
|
201 </param>
|
|
202
|
|
203 </when>
|
|
204 <when value="BLOSUM80">
|
|
205 <param name="gap_costs" type="select" label="Gap Costs">
|
|
206 <option value="">Use Defaults</option>
|
|
207 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option>
|
|
208 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
209 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
210 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option>
|
|
211 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option>
|
|
212 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option>
|
|
213 </param>
|
|
214 </when>
|
|
215 <when value="BLOSUM62">
|
|
216 <param name="gap_costs" type="select" label="Gap Costs">
|
|
217 <option value="">Use Defaults</option>
|
|
218 <option value="-gapopen 11 -gapextend 2">Existense: 11 Extension: 2</option>
|
|
219 <option value="-gapopen 10 -gapextend 2">Existense: 10 Extension: 2</option>
|
|
220 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option>
|
|
221 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option>
|
|
222 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
223 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
224 <option value="-gapopen 13 -gapextend 1">Existense: 13 Extension: 1</option>
|
|
225 <option value="-gapopen 12 -gapextend 1">Existense: 12 Extension: 1</option>
|
|
226 <option value="-gapopen 11 -gapextend 1" selected="true">Existense: 11 Extension: 1 (default)</option>
|
|
227 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option>
|
|
228 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option>
|
|
229 </param>
|
|
230
|
|
231 </when>
|
|
232 <when value="BLOSUM50">
|
|
233 <param name="gap_costs" type="select" label="Gap Costs">
|
|
234 <option value="">Use Defaults</option>
|
|
235 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option>
|
|
236 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option>
|
|
237 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option>
|
|
238 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option>
|
|
239 <option value="-gapopen 9 -gapextend 3">Existense: 9 Extension: 3</option>
|
|
240 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option>
|
|
241 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option>
|
|
242 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option>
|
|
243 <option value="-gapopen 13 -gapextend 2" selected="true">Existense: 13 Extension: 2 (default)</option>
|
|
244 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option>
|
|
245 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option>
|
|
246 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option>
|
|
247 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option>
|
|
248 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option>
|
|
249 <option value="-gapopen 15 -gapextend 1">Existense: 15 Extension: 1</option>
|
|
250 </param>
|
|
251
|
|
252 </when>
|
|
253 <when value="BLOSUM45">
|
|
254 <param name="gap_costs" type="select" label="Gap Costs">
|
|
255 <option value="">Use Defaults</option>
|
|
256 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option>
|
|
257 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option>
|
|
258 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option>
|
|
259 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option>
|
|
260 <option value="-gapopen 15 -gapextend 2" selected="true">Existense: 15 Extension: 2 (default)</option>
|
|
261 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option>
|
|
262 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option>
|
|
263 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option>
|
|
264 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option>
|
|
265 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option>
|
|
266 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option>
|
|
267 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option>
|
|
268 </param>
|
|
269 </when>
|
|
270 <when value="PAM250">
|
|
271 <param name="gap_costs" type="select" label="Gap Costs">
|
|
272 <option value="">Use Defaults</option>
|
|
273 <option value="-gapopen 15 -gapextend 3">Existense: 15 Extension: 3</option>
|
|
274 <option value="-gapopen 14 -gapextend 3">Existense: 14 Extension: 3</option>
|
|
275 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option>
|
|
276 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option>
|
|
277 <option value="-gapopen 17 -gapextend 2">Existense: 17 Extension: 2</option>
|
|
278 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option>
|
|
279 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option>
|
|
280 <option value="-gapopen 14 -gapextend 2" selected="true">Existense: 14 Extension: 2 (default)</option>
|
|
281 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option>
|
|
282 <option value="-gapopen 21 -gapextend 1">Existense: 21 Extension: 1</option>
|
|
283 <option value="-gapopen 20 -gapextend 1">Existense: 20 Extension: 1</option>
|
|
284 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option>
|
|
285 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option>
|
|
286 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option>
|
|
287 </param>
|
|
288
|
|
289 </when>
|
|
290 <when value="PAM70">
|
|
291 <param name="gap_costs" type="select" label="Gap Costs">
|
|
292 <option value="">Use Defaults</option>
|
|
293 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option>
|
|
294 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
295 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
296 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option>
|
|
297 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option>
|
|
298 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option>
|
|
299 </param>
|
|
300
|
|
301 </when>
|
|
302 <when value="PAM30">
|
|
303 <param name="gap_costs" type="select" label="Gap Costs">
|
|
304 <option value="">Use Defaults</option>
|
|
305 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option>
|
|
306 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option>
|
|
307 <option value="-gapopen 5 -gapextend 2">Existense: 5 Extension: 2</option>
|
|
308 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option>
|
|
309 <option value="-gapopen 9 -gapextend 1" selected="true">Existense: 9 Extension: 1 (default)</option>
|
|
310 <option value="-gapopen 8 -gapextend 1">Existense: 8 Extension: 1</option>
|
|
311 </param>
|
|
312 </when>
|
|
313 </conditional>
|
|
314
|
|
315 <param name="word_size" type="integer" value="" optional="true" label="Word size for wordfinder algorithm" help="Recommended: blastp 3, blastp-short 2">
|
|
316 <validator type="in_range" min="2" />
|
|
317 </param>
|
|
318 <param name="window_size" type="integer" value="" optional="true" label="Multiple hits window size, use 0 to specify 1-hit algorithm." help="Recommended: blastp 40, blastp-short 15">
|
|
319 <validator type="in_range" min="0" />
|
|
320 </param>
|
|
321 <param name="threshold" type="integer" value="" optional="true" label="Threshold: Minimum score to add a word to the BLAST lookup table." help="Recommended: blastp 11, blastp-short 16">
|
|
322 <validator type="in_range" min="1" />
|
|
323 </param>
|
|
324 <param name="comp_based_stats" type="select" optional="true" label="Use composition-based statistics"
|
|
325 help="Recommended: blastp: 2 blastp-short: 0">
|
|
326 <option value="">Leave Unspecified</option>
|
|
327 <option value="0">0 or F (No composition-based statistics)</option>
|
|
328 <option value="1">1 (Composition-based statistics as in NAR 29:2994-3005, 2001)</option>
|
|
329 <option value="2">2, T, or D (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties)</option>
|
|
330 <option value="3">3 (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally)</option>
|
|
331 </param>
|
|
332
|
|
333 <param name="use_sw_tback" type="boolean" label="Compute locally optimal Smith-Waterman alignments?" truevalue="-use_sw_tback" falsevalue="" checked="false" />
|
|
334
|
|
335 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/>
|
|
336 </when>
|
|
337 </conditional>
|
|
338 </inputs>
|
|
339
|
|
340 <outputs>
|
|
341 <data name="output_tabular" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output">
|
|
342 <filter>fmt_opt['out_format'] == "tabular"</filter>
|
|
343 </data>
|
|
344 <data name="output_xml" format="blastxml" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output">
|
|
345 <filter>fmt_opt['out_format'] == "blastxml"</filter>
|
|
346 </data>
|
|
347 <data name="output_txt" format="txt" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output">
|
|
348 <filter>fmt_opt['out_format'] == "text" and not fmt_opt['html']</filter>
|
|
349 </data>
|
|
350 <data name="output_html" format="html" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output">
|
|
351 <filter>fmt_opt['out_format'] == "text" and fmt_opt['html']</filter>
|
|
352 </data>
|
|
353 </outputs>
|
|
354
|
|
355 <stdio>
|
|
356 <exit_code range="1" level="fatal" description="Bad input dataset or BLAST options" />
|
|
357 <exit_code range="2" level="fatal" description="Error in BLAST database" />
|
|
358 <exit_code range="3" level="fatal" description="Error in BLAST engine" />
|
|
359 <exit_code range="4" level="fatal" description="Out of Memory" />
|
|
360 <exit_code range="5:" level="fatal" description="Unknown Error" />
|
|
361 </stdio>
|
|
362
|
|
363 <tests>
|
|
364 <test>
|
|
365 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
366 <param name="db_opts_selector" value="file" />
|
|
367 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
|
|
368 <param name="database" value="" />
|
|
369 <param name="evalue_cutoff" value="1e-8" />
|
|
370 <param name="blast_type" value="blastp" />
|
|
371 <param name="out_format" value="blastxml" />
|
|
372 <param name="outfmt" value="5" />
|
|
373 <param name="adv_opts_selector" value="advanced" />
|
|
374 <param name="filter_query" value="False" />
|
|
375 <param name="matrix" value="BLOSUM62" />
|
|
376 <param name="max_target_seqs" value="" />
|
|
377 <param name="word_size" value="" />
|
|
378 <param name="parse_deflines" value="True" />
|
|
379 <output name="output_xml">
|
|
380 <assert_contents>
|
|
381 <has_text text="sp|Q9BS26|ERP44_HUMAN"/>
|
|
382 </assert_contents>
|
|
383 </output>
|
|
384 </test>
|
|
385 <test>
|
|
386 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
387 <param name="db_opts_selector" value="file" />
|
|
388 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
|
|
389 <param name="database" value="" />
|
|
390 <param name="evalue_cutoff" value="1e-8" />
|
|
391 <param name="blast_type" value="blastp" />
|
|
392 <param name="out_format" value="tabular" />
|
|
393 <param name="outfmt" value="6" />
|
|
394 <param name="adv_opts_selector" value="advanced" />
|
|
395 <param name="filter_query" value="False" />
|
|
396 <param name="matrix" value="BLOSUM62" />
|
|
397 <param name="max_target_seqs" value="" />
|
|
398 <param name="word_size" value="" />
|
|
399 <param name="parse_deflines" value="True" />
|
|
400 <output name="output_tabular">
|
|
401 <assert_contents>
|
|
402 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" />
|
|
403 <has_text text="BAB21486.1"/>
|
|
404 </assert_contents>
|
|
405 </output>
|
|
406 </test>
|
|
407 <test>
|
|
408 <param name="query" value="four_human_proteins.fasta" ftype="fasta" />
|
|
409 <param name="db_opts_selector" value="file" />
|
|
410 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" />
|
|
411 <param name="database" value="" />
|
|
412 <param name="evalue_cutoff" value="1e-8" />
|
|
413 <param name="blast_type" value="blastp" />
|
|
414 <param name="out_format" value="tabular" />
|
|
415 <param name="outfmt" value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" />
|
|
416 <param name="adv_opts_selector" value="advanced" />
|
|
417 <param name="filter_query" value="False" />
|
|
418 <param name="matrix" value="BLOSUM62" />
|
|
419 <param name="max_target_seqs" value="" />
|
|
420 <param name="word_size" value="" />
|
|
421 <param name="parse_deflines" value="True" />
|
|
422 <output name="output_tabular">
|
|
423 <assert_contents>
|
|
424 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+\t\S+\t\d+\t\d+\t\d+\t\d+\t\S+\t\d+\t\d+\t\S+\t\S+\t\d+\t\d+" />
|
|
425 </assert_contents>
|
|
426 </output>
|
|
427 </test>
|
|
428 <test>
|
|
429 <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" />
|
|
430 <param name="db_opts_selector" value="file" />
|
|
431 <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
|
|
432 <param name="database" value="" />
|
|
433 <param name="evalue_cutoff" value="1e-8" />
|
|
434 <param name="blast_type" value="blastp" />
|
|
435 <param name="out_format" value="tabular" />
|
|
436 <param name="outfmt" value="6" />
|
|
437 <param name="adv_opts_selector" value="basic" />
|
|
438 <output name="output_tabular">
|
|
439 <assert_contents>
|
|
440 <has_text_matching expression="gi.283855846.gb.ADB45242.1.\tsp.P08100.OPSD_HUMAN\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" />
|
|
441 <has_text text="BAB21486.1"/>
|
|
442 </assert_contents>
|
|
443 </output>
|
|
444 </test>
|
|
445 </tests>
|
|
446 <help>
|
|
447
|
|
448 .. class:: warningmark
|
|
449
|
|
450 **Note**. Database searches may take a substantial amount of time.
|
|
451 For large input datasets it is advisable to allow overnight processing.
|
|
452
|
|
453 -----
|
|
454
|
|
455 **What it does**
|
|
456
|
|
457 Search a *protein database* using a *protein query*,
|
|
458 using the NCBI BLAST+ blastp command line tool.
|
|
459
|
|
460 The search can be performed using a local database, against a sequence supplied in a fasta file,
|
|
461 or the blast can be performed remotely at NCBI.
|
|
462
|
|
463 The remote operation allows searches to be targeted at specific organisms.
|
|
464
|
|
465 -----
|
|
466
|
|
467 **Output format**
|
|
468
|
|
469 Because Galaxy focuses on processing tabular data, the default output of this
|
|
470 tool is tabular. The standard BLAST+ tabular output contains 12 columns:
|
|
471
|
|
472 ====== ========= ============================================
|
|
473 Column NCBI name Description
|
|
474 ------ --------- --------------------------------------------
|
|
475 1 qseqid Query Seq-id (ID of your sequence)
|
|
476 2 sseqid Subject Seq-id (ID of the database hit)
|
|
477 3 pident Percentage of identical matches
|
|
478 4 length Alignment length
|
|
479 5 mismatch Number of mismatches
|
|
480 6 gapopen Number of gap openings
|
|
481 7 qstart Start of alignment in query
|
|
482 8 qend End of alignment in query
|
|
483 9 sstart Start of alignment in subject (database hit)
|
|
484 10 send End of alignment in subject (database hit)
|
|
485 11 evalue Expectation value (E-value)
|
|
486 12 bitscore Bit score
|
|
487 ====== ========= ============================================
|
|
488
|
|
489 The BLAST+ tools can optionally output additional columns of information,
|
|
490 but this takes longer to calculate. Most (but not all) of these columns are
|
|
491 included by selecting the extended tabular output. The extra columns are
|
|
492 included *after* the standard 12 columns. This is so that you can write
|
|
493 workflow filtering steps that accept either the 12 or 24 column tabular
|
|
494 BLAST output.
|
|
495
|
|
496 ====== ============= ===========================================
|
|
497 Column NCBI name Description
|
|
498 ------ ------------- -------------------------------------------
|
|
499 13 sallseqid All subject Seq-id(s), separated by a ';'
|
|
500 14 score Raw score
|
|
501 15 nident Number of identical matches
|
|
502 16 positive Number of positive-scoring matches
|
|
503 17 gaps Total number of gaps
|
|
504 18 ppos Percentage of positive-scoring matches
|
|
505 19 qframe Query frame
|
|
506 20 sframe Subject frame
|
|
507 21 qseq Aligned part of query sequence
|
|
508 22 sseq Aligned part of subject sequence
|
|
509 23 qlen Query sequence length
|
|
510 24 slen Subject sequence length
|
|
511 ====== ============= ===========================================
|
|
512
|
|
513 The third option is BLAST XML output, which is designed to be parsed by
|
|
514 another program, and is understood by some Galaxy tools.
|
|
515
|
|
516 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
|
|
517 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.
|
|
518 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.
|
|
519 The two query anchored outputs show a multiple sequence alignment between the query and all the matches,
|
|
520 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).
|
|
521
|
|
522 -------
|
|
523
|
|
524 **References**
|
|
525
|
|
526 Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402.
|
|
527
|
|
528 Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005.
|
|
529
|
|
530 </help>
|
|
531 </tool>
|