Mercurial > repos > galaxyp > blast_plus_remote_blastp
annotate tools/blast_plus_remote_blastp.xml @ 4:a51980bc0ffe
Add the -show_gis option (NCBI-gis on the NCBI Blast website)
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Mon, 04 May 2015 09:58:57 -0500 |
parents | 9f369b905447 |
children |
rev | line source |
---|---|
4
a51980bc0ffe
Add the -show_gis option (NCBI-gis on the NCBI Blast website)
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
1 <tool id="blast_plus_remote_blastp" name="NCBI BLAST+ remote blastp" version="1.0.1"> |
0 | 2 <description>Search protein database with protein query sequence(s)</description> |
3 <parallelism method="multi" split_inputs="query" split_mode="number_of_parts" split_size="4" shared_inputs="subject" merge_outputs="output1"></parallelism> | |
4 <version_command>blastp -version</version_command> | |
5 <requirements> | |
3 | 6 <requirement type="binary">blastp</requirement> |
7 <requirement type="package" version="2.2.29">blast+</requirement> | |
0 | 8 </requirements> |
9 <command> | |
10 ## The command is a Cheetah template which allows some Python based syntax. | |
11 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces | |
12 blastp | |
13 -query "$query" | |
14 #if $db_opts.db_opts_selector == "db": | |
15 -db "${db_opts.database.fields.path}" | |
16 #elif $db_opts.db_opts_selector == "remote": | |
17 -db $db_opts.database | |
18 -remote | |
19 #set $txids = [] | |
20 #set $ntxids = [] | |
21 #for $i, $org in enumerate($db_opts.taxid_repeat): | |
22 #if $org.exclude: | |
23 #set $ntxids = $ntxids + ["txid" + $org.taxid.__str__] | |
24 #else | |
25 #set $txids = $txids + ["txid" + $org.taxid.__str__] | |
26 #end if | |
27 #end for | |
28 #if (len($txids) + len($ntxids)) > 0: | |
29 #set $entrez_query = '' | |
30 #if len($txids) > 0: | |
31 #set $entrez_query = $entrez_query + '(' + ' OR '.join($txids) + ')' | |
32 #end if | |
33 #if len($ntxids) > 0: | |
34 #set $entrez_query = $entrez_query + ' NOT (' + ' OR '.join($ntxids) + ')' | |
35 #end if | |
36 -entrez_query '$entrez_query' | |
37 #end if | |
38 #else: | |
39 -subject "$db_opts.subject" | |
40 #end if | |
41 -task $blast_type | |
42 -evalue $evalue_cutoff | |
43 -out blast_output | |
44 ##Set the extended list here so if/when we add things, saved workflows are not affected | |
45 #if str($fmt_opt.out_format)=="text": | |
4
a51980bc0ffe
Add the -show_gis option (NCBI-gis on the NCBI Blast website)
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
46 -outfmt "$fmt_opt.outfmt" $fmt_opt.html $fmt_opt.show_gis |
0 | 47 #if $fmt_opt.num_descriptions.__str__.strip() != '': |
48 -num_descriptions $fmt_opt.num_descriptions | |
49 #end if | |
50 #if $fmt_opt.num_alignments.__str__.strip() != '': | |
51 -num_alignments $fmt_opt.num_alignments | |
52 #end if | |
53 #else: | |
54 -outfmt "$fmt_opt.outfmt" | |
55 #if $fmt_opt.max_target_seqs.__str__.strip() != '': | |
56 -max_target_seqs $fmt_opt.max_target_seqs | |
57 #end if | |
58 #end if | |
59 #if $db_opts.db_opts_selector != "remote": | |
60 -num_threads 8 | |
61 #end if | |
62 #if $adv_opts.adv_opts_selector=="advanced": | |
63 $adv_opts.filter_query | |
64 -matrix $adv_opts.scoring.matrix | |
65 $adv_opts.scoring.gap_costs | |
66 | |
67 #if $adv_opts.word_size.__str__.strip() != '': | |
68 -word_size $adv_opts.word_size | |
69 #end if | |
70 | |
71 #if $adv_opts.window_size.__str__.strip() != '': | |
72 -window_size $adv_opts.window_size | |
73 #end if | |
74 | |
75 #if $adv_opts.threshold.__str__.strip() != '': | |
76 -threshold $adv_opts.threshold | |
77 #end if | |
78 | |
79 #if $adv_opts.comp_based_stats.__str__.strip() != '': | |
80 -comp_based_stats $adv_opts.comp_based_stats | |
81 #end if | |
82 | |
83 ##Ungapped disabled for now - see comments below | |
84 ##$adv_opts.ungapped | |
85 $adv_opts.use_sw_tback | |
86 $adv_opts.parse_deflines | |
87 ## End of advanced options: | |
88 #end if | |
89 </command> | |
90 <inputs> | |
91 <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> | |
92 <conditional name="db_opts"> | |
93 <param name="db_opts_selector" type="select" label="Subject database/sequences"> | |
94 <option value="db" selected="True">Local BLAST Database</option> | |
95 <option value="file">Local FASTA file</option> | |
96 <option value="remote">NCBI Remote Database</option> | |
97 </param> | |
98 <when value="db"> | |
99 <param name="database" type="select" label="Protein BLAST database"> | |
100 <options from_file="blastdb_p.loc"> | |
101 <column name="value" index="0"/> | |
102 <column name="name" index="1"/> | |
103 <column name="path" index="2"/> | |
104 </options> | |
105 </param> | |
106 <param name="subject" type="hidden" value="" /> | |
107 </when> | |
108 <when value="file"> | |
109 <param name="database" type="hidden" value="" /> | |
110 <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> | |
111 </when> | |
112 <when value="remote"> | |
113 <param name="database" type="select" label="Protein BLAST database"> | |
114 <option value="nr" selected="selected" >Non-redundant protein sequences (nr)</option> | |
115 <option value="refseq_protein" >Reference proteins (refseq_protein)</option> | |
116 <option value="swissprot" >UniProtKB/Swiss-Prot(swissprot)</option> | |
117 <option value="pat" >Patented protein sequences(pat)</option> | |
118 <option value="pdb" >Protein Data Bank proteins(pdb)</option> | |
119 <option value="env_nr" >Metagenomic proteins(env_nr)</option> | |
120 </param> | |
121 <repeat name="taxid_repeat" title="Search Organism Restriction" min="0"> | |
122 <param name="taxid" type="integer" value="" label="NCBI Taxon ID" help="For example: Human is 9606 (see: http://www.ncbi.nlm.nih.gov/taxonomy ) "> | |
123 <dsvalidator type="in_range" min="0" /> | |
124 </param> | |
125 <param name="exclude" type="boolean" checked="false" label="Exclude this NCBI Taxon ID" help=""/> | |
126 </repeat> | |
127 </when> | |
128 </conditional> | |
129 <param name="blast_type" type="select" display="radio" label="Type of BLAST"> | |
130 <option value="blastp">blastp</option> | |
131 <option value="blastp-short">blastp-short</option> | |
132 </param> | |
133 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> | |
134 <conditional name="fmt_opt"> | |
135 <param name="out_format" type="select" label="Output format"> | |
136 <option value="tabular" selected="True">Tabular</option> | |
137 <option value="blastxml">BLAST XML</option> | |
138 <option value="text">Text Report</option> | |
139 </param> | |
140 <when value="tabular"> | |
141 <param name="outfmt" type="select" label="Tabular columns"> | |
142 <option value="6" selected="True">Tabular (standard 12 columns)</option> | |
143 <option value="7">Tabular (standard 12 columns) with comments</option> | |
144 <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen">Tabular (extended 24 columns)</option> | |
145 </param> | |
146 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits"> | |
147 <validator type="in_range" min="0" /> | |
148 </param> | |
149 </when> | |
150 <when value="blastxml"> | |
151 <param name="outfmt" type="hidden" value="5"/> | |
152 <param name="max_target_seqs" type="integer" value="500" optional="true" label="Maximum hits to show" help="Use zero for default limits"> | |
153 <validator type="in_range" min="0" /> | |
154 </param> | |
155 </when> | |
156 <when value="text"> | |
157 <param name="outfmt" type="select" label="Text format"> | |
158 <option value="0">Pairwise text</option> | |
159 <option value="1">Query-anchored text showing identitites</option> | |
160 <option value="2">Query-anchored text</option> | |
161 <option value="3">Flat query-anchored text showing identitites</option> | |
162 <option value="4">Flat query-anchored text</option> | |
163 </param> | |
164 <param name="html" type="boolean" label="Html" truevalue="-html" falsevalue="" checked="true" /> | |
4
a51980bc0ffe
Add the -show_gis option (NCBI-gis on the NCBI Blast website)
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
165 <param name="show_gis" type="boolean" label="NCBI-gis" truevalue="-show_gis" falsevalue="" checked="false" |
a51980bc0ffe
Add the -show_gis option (NCBI-gis on the NCBI Blast website)
Jim Johnson <jj@umn.edu>
parents:
3
diff
changeset
|
166 help="Show the NCBI gis in the Subject def lines"/> |
0 | 167 <param name="num_descriptions" type="integer" value="500" optional="true" label="Maximum Decriptions to show" help="Show one-line descriptions for this number of database sequences."> |
168 <validator type="in_range" min="0" /> | |
169 </param> | |
170 <param name="num_alignments" type="integer" value="250" optional="true" label="Maximum alignments to show" help="Show alignments for this number of database sequences."> | |
171 <validator type="in_range" min="0" /> | |
172 </param> | |
173 </when> | |
174 </conditional> | |
175 <conditional name="adv_opts"> | |
176 <param name="adv_opts_selector" type="select" label="Advanced Options"> | |
177 <option value="basic" selected="True">Hide Advanced Options</option> | |
178 <option value="advanced">Show Advanced Options</option> | |
179 </param> | |
180 <when value="basic" /> | |
181 <when value="advanced"> | |
182 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" /> | |
183 <conditional name="scoring"> | |
184 <param name="matrix" type="select" label="Scoring matrix"> | |
185 <option value="BLOSUM90">BLOSUM90</option> | |
186 <option value="BLOSUM80">BLOSUM80</option> | |
187 <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> | |
188 <option value="BLOSUM50">BLOSUM50</option> | |
189 <option value="BLOSUM45">BLOSUM45</option> | |
190 <option value="PAM250">PAM250</option> | |
191 <option value="PAM70">PAM70</option> | |
192 <option value="PAM30">PAM30</option> | |
193 </param> | |
194 <when value="BLOSUM90"> | |
195 <param name="gap_costs" type="select" label="Gap Costs"> | |
196 <option value="">Use Defaults</option> | |
197 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option> | |
198 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
199 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
200 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
201 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> | |
202 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option> | |
203 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
204 </param> | |
205 | |
206 </when> | |
207 <when value="BLOSUM80"> | |
208 <param name="gap_costs" type="select" label="Gap Costs"> | |
209 <option value="">Use Defaults</option> | |
210 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
211 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
212 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
213 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> | |
214 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option> | |
215 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
216 </param> | |
217 </when> | |
218 <when value="BLOSUM62"> | |
219 <param name="gap_costs" type="select" label="Gap Costs"> | |
220 <option value="">Use Defaults</option> | |
221 <option value="-gapopen 11 -gapextend 2">Existense: 11 Extension: 2</option> | |
222 <option value="-gapopen 10 -gapextend 2">Existense: 10 Extension: 2</option> | |
223 <option value="-gapopen 9 -gapextend 2">Existense: 9 Extension: 2</option> | |
224 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
225 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
226 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
227 <option value="-gapopen 13 -gapextend 1">Existense: 13 Extension: 1</option> | |
228 <option value="-gapopen 12 -gapextend 1">Existense: 12 Extension: 1</option> | |
229 <option value="-gapopen 11 -gapextend 1" selected="true">Existense: 11 Extension: 1 (default)</option> | |
230 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option> | |
231 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
232 </param> | |
233 | |
234 </when> | |
235 <when value="BLOSUM50"> | |
236 <param name="gap_costs" type="select" label="Gap Costs"> | |
237 <option value="">Use Defaults</option> | |
238 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> | |
239 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> | |
240 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option> | |
241 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option> | |
242 <option value="-gapopen 9 -gapextend 3">Existense: 9 Extension: 3</option> | |
243 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option> | |
244 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option> | |
245 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option> | |
246 <option value="-gapopen 13 -gapextend 2" selected="true">Existense: 13 Extension: 2 (default)</option> | |
247 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option> | |
248 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> | |
249 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> | |
250 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> | |
251 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option> | |
252 <option value="-gapopen 15 -gapextend 1">Existense: 15 Extension: 1</option> | |
253 </param> | |
254 | |
255 </when> | |
256 <when value="BLOSUM45"> | |
257 <param name="gap_costs" type="select" label="Gap Costs"> | |
258 <option value="">Use Defaults</option> | |
259 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> | |
260 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> | |
261 <option value="-gapopen 11 -gapextend 3">Existense: 11 Extension: 3</option> | |
262 <option value="-gapopen 10 -gapextend 3">Existense: 10 Extension: 3</option> | |
263 <option value="-gapopen 15 -gapextend 2" selected="true">Existense: 15 Extension: 2 (default)</option> | |
264 <option value="-gapopen 14 -gapextend 2">Existense: 14 Extension: 2</option> | |
265 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option> | |
266 <option value="-gapopen 12 -gapextend 2">Existense: 12 Extension: 2</option> | |
267 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> | |
268 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> | |
269 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> | |
270 <option value="-gapopen 16 -gapextend 1">Existense: 16 Extension: 1</option> | |
271 </param> | |
272 </when> | |
273 <when value="PAM250"> | |
274 <param name="gap_costs" type="select" label="Gap Costs"> | |
275 <option value="">Use Defaults</option> | |
276 <option value="-gapopen 15 -gapextend 3">Existense: 15 Extension: 3</option> | |
277 <option value="-gapopen 14 -gapextend 3">Existense: 14 Extension: 3</option> | |
278 <option value="-gapopen 13 -gapextend 3">Existense: 13 Extension: 3</option> | |
279 <option value="-gapopen 12 -gapextend 3">Existense: 12 Extension: 3</option> | |
280 <option value="-gapopen 17 -gapextend 2">Existense: 17 Extension: 2</option> | |
281 <option value="-gapopen 16 -gapextend 2">Existense: 16 Extension: 2</option> | |
282 <option value="-gapopen 15 -gapextend 2">Existense: 15 Extension: 2</option> | |
283 <option value="-gapopen 14 -gapextend 2" selected="true">Existense: 14 Extension: 2 (default)</option> | |
284 <option value="-gapopen 13 -gapextend 2">Existense: 13 Extension: 2</option> | |
285 <option value="-gapopen 21 -gapextend 1">Existense: 21 Extension: 1</option> | |
286 <option value="-gapopen 20 -gapextend 1">Existense: 20 Extension: 1</option> | |
287 <option value="-gapopen 19 -gapextend 1">Existense: 19 Extension: 1</option> | |
288 <option value="-gapopen 18 -gapextend 1">Existense: 18 Extension: 1</option> | |
289 <option value="-gapopen 17 -gapextend 1">Existense: 17 Extension: 1</option> | |
290 </param> | |
291 | |
292 </when> | |
293 <when value="PAM70"> | |
294 <param name="gap_costs" type="select" label="Gap Costs"> | |
295 <option value="">Use Defaults</option> | |
296 <option value="-gapopen 8 -gapextend 2">Existense: 8 Extension: 2</option> | |
297 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
298 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
299 <option value="-gapopen 11 -gapextend 1">Existense: 11 Extension: 1</option> | |
300 <option value="-gapopen 10 -gapextend 1" selected="true">Existense: 10 Extension: 1 (default)</option> | |
301 <option value="-gapopen 9 -gapextend 1">Existense: 9 Extension: 1</option> | |
302 </param> | |
303 | |
304 </when> | |
305 <when value="PAM30"> | |
306 <param name="gap_costs" type="select" label="Gap Costs"> | |
307 <option value="">Use Defaults</option> | |
308 <option value="-gapopen 7 -gapextend 2">Existense: 7 Extension: 2</option> | |
309 <option value="-gapopen 6 -gapextend 2">Existense: 6 Extension: 2</option> | |
310 <option value="-gapopen 5 -gapextend 2">Existense: 5 Extension: 2</option> | |
311 <option value="-gapopen 10 -gapextend 1">Existense: 10 Extension: 1</option> | |
312 <option value="-gapopen 9 -gapextend 1" selected="true">Existense: 9 Extension: 1 (default)</option> | |
313 <option value="-gapopen 8 -gapextend 1">Existense: 8 Extension: 1</option> | |
314 </param> | |
315 </when> | |
316 </conditional> | |
317 | |
318 <param name="word_size" type="integer" value="" optional="true" label="Word size for wordfinder algorithm" help="Recommended: blastp 3, blastp-short 2"> | |
319 <validator type="in_range" min="2" /> | |
320 </param> | |
321 <param name="window_size" type="integer" value="" optional="true" label="Multiple hits window size, use 0 to specify 1-hit algorithm." help="Recommended: blastp 40, blastp-short 15"> | |
322 <validator type="in_range" min="0" /> | |
323 </param> | |
324 <param name="threshold" type="integer" value="" optional="true" label="Threshold: Minimum score to add a word to the BLAST lookup table." help="Recommended: blastp 11, blastp-short 16"> | |
325 <validator type="in_range" min="1" /> | |
326 </param> | |
327 <param name="comp_based_stats" type="select" optional="true" label="Use composition-based statistics" | |
328 help="Recommended: blastp: 2 blastp-short: 0"> | |
329 <option value="">Leave Unspecified</option> | |
330 <option value="0">0 or F (No composition-based statistics)</option> | |
331 <option value="1">1 (Composition-based statistics as in NAR 29:2994-3005, 2001)</option> | |
332 <option value="2">2, T, or D (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties)</option> | |
333 <option value="3">3 (Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally)</option> | |
334 </param> | |
335 | |
336 <param name="use_sw_tback" type="boolean" label="Compute locally optimal Smith-Waterman alignments?" truevalue="-use_sw_tback" falsevalue="" checked="false" /> | |
337 | |
338 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> | |
339 </when> | |
340 </conditional> | |
341 </inputs> | |
342 | |
343 <outputs> | |
344 <data name="output_tabular" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
345 <filter>fmt_opt['out_format'] == "tabular"</filter> | |
346 </data> | |
347 <data name="output_xml" format="blastxml" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
348 <filter>fmt_opt['out_format'] == "blastxml"</filter> | |
349 </data> | |
350 <data name="output_txt" format="txt" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
351 <filter>fmt_opt['out_format'] == "text" and not fmt_opt['html']</filter> | |
352 </data> | |
353 <data name="output_html" format="html" label="${blast_type.value_label} on ${db_opts.db_opts_selector} ${db_opts.database}" from_work_dir="blast_output"> | |
354 <filter>fmt_opt['out_format'] == "text" and fmt_opt['html']</filter> | |
355 </data> | |
356 </outputs> | |
357 | |
358 <stdio> | |
359 <exit_code range="1" level="fatal" description="Bad input dataset or BLAST options" /> | |
360 <exit_code range="2" level="fatal" description="Error in BLAST database" /> | |
361 <exit_code range="3" level="fatal" description="Error in BLAST engine" /> | |
362 <exit_code range="4" level="fatal" description="Out of Memory" /> | |
363 <exit_code range="5:" level="fatal" description="Unknown Error" /> | |
364 </stdio> | |
365 | |
366 <tests> | |
367 <test> | |
368 <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> | |
369 <param name="db_opts_selector" value="file" /> | |
370 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
371 <param name="database" value="" /> | |
372 <param name="evalue_cutoff" value="1e-8" /> | |
373 <param name="blast_type" value="blastp" /> | |
374 <param name="out_format" value="blastxml" /> | |
375 <param name="outfmt" value="5" /> | |
376 <param name="adv_opts_selector" value="advanced" /> | |
377 <param name="filter_query" value="False" /> | |
378 <param name="matrix" value="BLOSUM62" /> | |
379 <param name="max_target_seqs" value="" /> | |
380 <param name="word_size" value="" /> | |
381 <param name="parse_deflines" value="True" /> | |
382 <output name="output_xml"> | |
383 <assert_contents> | |
384 <has_text text="sp|Q9BS26|ERP44_HUMAN"/> | |
385 </assert_contents> | |
386 </output> | |
387 </test> | |
388 <test> | |
389 <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> | |
390 <param name="db_opts_selector" value="file" /> | |
391 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
392 <param name="database" value="" /> | |
393 <param name="evalue_cutoff" value="1e-8" /> | |
394 <param name="blast_type" value="blastp" /> | |
395 <param name="out_format" value="tabular" /> | |
396 <param name="outfmt" value="6" /> | |
397 <param name="adv_opts_selector" value="advanced" /> | |
398 <param name="filter_query" value="False" /> | |
399 <param name="matrix" value="BLOSUM62" /> | |
400 <param name="max_target_seqs" value="" /> | |
401 <param name="word_size" value="" /> | |
402 <param name="parse_deflines" value="True" /> | |
403 <output name="output_tabular"> | |
404 <assert_contents> | |
405 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" /> | |
406 <has_text text="BAB21486.1"/> | |
407 </assert_contents> | |
408 </output> | |
409 </test> | |
410 <test> | |
411 <param name="query" value="four_human_proteins.fasta" ftype="fasta" /> | |
412 <param name="db_opts_selector" value="file" /> | |
413 <param name="subject" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
414 <param name="database" value="" /> | |
415 <param name="evalue_cutoff" value="1e-8" /> | |
416 <param name="blast_type" value="blastp" /> | |
417 <param name="out_format" value="tabular" /> | |
418 <param name="outfmt" value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" /> | |
419 <param name="adv_opts_selector" value="advanced" /> | |
420 <param name="filter_query" value="False" /> | |
421 <param name="matrix" value="BLOSUM62" /> | |
422 <param name="max_target_seqs" value="" /> | |
423 <param name="word_size" value="" /> | |
424 <param name="parse_deflines" value="True" /> | |
425 <output name="output_tabular"> | |
426 <assert_contents> | |
427 <has_text_matching expression="sp.P08100.OPSD_HUMAN\tgi.283855846.gb.ADB45242.1.\t\d+.\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+\t\S+\t\d+\t\d+\t\d+\t\d+\t\S+\t\d+\t\d+\t\S+\t\S+\t\d+\t\d+" /> | |
428 </assert_contents> | |
429 </output> | |
430 </test> | |
431 <test> | |
432 <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" /> | |
433 <param name="db_opts_selector" value="file" /> | |
434 <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> | |
435 <param name="database" value="" /> | |
436 <param name="evalue_cutoff" value="1e-8" /> | |
437 <param name="blast_type" value="blastp" /> | |
438 <param name="out_format" value="tabular" /> | |
439 <param name="outfmt" value="6" /> | |
440 <param name="adv_opts_selector" value="basic" /> | |
441 <output name="output_tabular"> | |
442 <assert_contents> | |
443 <has_text_matching expression="gi.283855846.gb.ADB45242.1.\tsp.P08100.OPSD_HUMAN\t\d+.\d+\t\d+\t\d+\t0\t\d+\t\d+\t\d+\t\d+\t\S+\t\s*\d+" /> | |
444 <has_text text="BAB21486.1"/> | |
445 </assert_contents> | |
446 </output> | |
447 </test> | |
448 </tests> | |
449 <help> | |
450 | |
451 .. class:: warningmark | |
452 | |
453 **Note**. Database searches may take a substantial amount of time. | |
454 For large input datasets it is advisable to allow overnight processing. | |
455 | |
456 ----- | |
457 | |
458 **What it does** | |
459 | |
460 Search a *protein database* using a *protein query*, | |
461 using the NCBI BLAST+ blastp command line tool. | |
462 | |
463 The search can be performed using a local database, against a sequence supplied in a fasta file, | |
464 or the blast can be performed remotely at NCBI. | |
465 | |
466 The remote operation allows searches to be targeted at specific organisms. | |
467 | |
468 ----- | |
469 | |
470 **Output format** | |
471 | |
472 Because Galaxy focuses on processing tabular data, the default output of this | |
473 tool is tabular. The standard BLAST+ tabular output contains 12 columns: | |
474 | |
475 ====== ========= ============================================ | |
476 Column NCBI name Description | |
477 ------ --------- -------------------------------------------- | |
478 1 qseqid Query Seq-id (ID of your sequence) | |
479 2 sseqid Subject Seq-id (ID of the database hit) | |
480 3 pident Percentage of identical matches | |
481 4 length Alignment length | |
482 5 mismatch Number of mismatches | |
483 6 gapopen Number of gap openings | |
484 7 qstart Start of alignment in query | |
485 8 qend End of alignment in query | |
486 9 sstart Start of alignment in subject (database hit) | |
487 10 send End of alignment in subject (database hit) | |
488 11 evalue Expectation value (E-value) | |
489 12 bitscore Bit score | |
490 ====== ========= ============================================ | |
491 | |
492 The BLAST+ tools can optionally output additional columns of information, | |
493 but this takes longer to calculate. Most (but not all) of these columns are | |
494 included by selecting the extended tabular output. The extra columns are | |
495 included *after* the standard 12 columns. This is so that you can write | |
496 workflow filtering steps that accept either the 12 or 24 column tabular | |
497 BLAST output. | |
498 | |
499 ====== ============= =========================================== | |
500 Column NCBI name Description | |
501 ------ ------------- ------------------------------------------- | |
502 13 sallseqid All subject Seq-id(s), separated by a ';' | |
503 14 score Raw score | |
504 15 nident Number of identical matches | |
505 16 positive Number of positive-scoring matches | |
506 17 gaps Total number of gaps | |
507 18 ppos Percentage of positive-scoring matches | |
508 19 qframe Query frame | |
509 20 sframe Subject frame | |
510 21 qseq Aligned part of query sequence | |
511 22 sseq Aligned part of subject sequence | |
512 23 qlen Query sequence length | |
513 24 slen Subject sequence length | |
514 ====== ============= =========================================== | |
515 | |
516 The third option is BLAST XML output, which is designed to be parsed by | |
517 another program, and is understood by some Galaxy tools. | |
518 | |
519 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). | |
520 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. | |
521 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. | |
522 The two query anchored outputs show a multiple sequence alignment between the query and all the matches, | |
523 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). | |
524 | |
525 ------- | |
526 | |
527 **References** | |
528 | |
529 Altschul et al. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. 1997. Nucleic Acids Res. 25:3389-3402. | |
530 | |
531 Schaffer et al. Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements. 2001. Nucleic Acids Res. 29:2994-3005. | |
532 | |
533 </help> | |
534 </tool> |