Mercurial > repos > devteam > ncbi_blast_plus
comparison tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml @ 11:4c4a0da938ff draft
Uploaded v0.0.22, now wraps BLAST+ 2.2.28 allowing extended tabular output to include the hit descriptions as column 25.
Supports $GALAXY_SLOTS.
Includes more tests and heavy use of macros.
author | peterjc |
---|---|
date | Thu, 05 Dec 2013 06:55:59 -0500 |
parents | 70e7dcbf6573 |
children | 623f727cdff1 |
comparison
equal
deleted
inserted
replaced
10:70e7dcbf6573 | 11:4c4a0da938ff |
---|---|
1 <tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.20"> | 1 <tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.22"> |
2 <description>Search nucleotide database with nucleotide query sequence(s)</description> | 2 <description>Search nucleotide database with nucleotide query sequence(s)</description> |
3 <!-- If job splitting is enabled, break up the query file into parts --> | 3 <!-- If job splitting is enabled, break up the query file into parts --> |
4 <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism> | 4 <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism> |
5 <requirements> | 5 <macros> |
6 <requirement type="binary">blastn</requirement> | 6 <token name="@BINARY@">blastn</token> |
7 <requirement type="package" version="2.2.26+">blast+</requirement> | 7 <import>ncbi_macros.xml</import> |
8 </requirements> | 8 </macros> |
9 <version_command>blastn -version</version_command> | 9 <expand macro="requirements" /> |
10 <command> | 10 <command> |
11 ## The command is a Cheetah template which allows some Python based syntax. | 11 ## The command is a Cheetah template which allows some Python based syntax. |
12 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces | 12 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces |
13 blastn | 13 blastn |
14 -query "$query" | 14 -query "$query" |
15 #if $db_opts.db_opts_selector == "db": | 15 @BLAST_DB_SUBJECT@ |
16 -db "${db_opts.database.fields.path}" | |
17 #elif $db_opts.db_opts_selector == "histdb": | |
18 -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" | |
19 #else: | |
20 -subject "$db_opts.subject" | |
21 #end if | |
22 -task $blast_type | 16 -task $blast_type |
23 -evalue $evalue_cutoff | 17 -evalue $evalue_cutoff |
24 -out "$output1" | 18 @BLAST_OUTPUT@ |
25 ##Set the extended list here so if/when we add things, saved workflows are not affected | 19 @THREADS@ |
26 #if str($out_format)=="ext": | |
27 -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" | |
28 #else: | |
29 -outfmt $out_format | |
30 #end if | |
31 -num_threads 8 | |
32 #if $adv_opts.adv_opts_selector=="advanced": | 20 #if $adv_opts.adv_opts_selector=="advanced": |
33 $adv_opts.filter_query | |
34 $adv_opts.strand | 21 $adv_opts.strand |
35 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string | 22 @ADVANCED_OPTIONS@ |
36 ## Note -max_target_seqs overrides -num_descriptions and -num_alignments | |
37 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): | |
38 -max_target_seqs $adv_opts.max_hits | |
39 #end if | |
40 #if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ): | 23 #if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ): |
41 -perc_identity $adv_opts.identity_cutoff | 24 -perc_identity $adv_opts.identity_cutoff |
42 #end if | 25 #end if |
43 #if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): | |
44 -word_size $adv_opts.word_size | |
45 #end if | |
46 $adv_opts.ungapped | 26 $adv_opts.ungapped |
47 $adv_opts.parse_deflines | |
48 ## End of advanced options: | 27 ## End of advanced options: |
49 #end if | 28 #end if |
50 </command> | 29 </command> |
51 <stdio> | 30 |
52 <!-- Anything other than zero is an error --> | 31 <expand macro="stdio" /> |
53 <exit_code range="1:" /> | 32 |
54 <exit_code range=":-1" /> | |
55 <!-- In case the return code has not been set propery check stderr too --> | |
56 <regex match="Error:" /> | |
57 <regex match="Exception:" /> | |
58 </stdio> | |
59 <inputs> | 33 <inputs> |
60 <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> | 34 <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> |
61 <conditional name="db_opts"> | 35 |
62 <param name="db_opts_selector" type="select" label="Subject database/sequences"> | 36 <expand macro="input_conditional_nucleotide_db" /> |
63 <option value="db" selected="True">Locally installed BLAST database</option> | 37 |
64 <option value="histdb">BLAST database from your history</option> | |
65 <option value="file">FASTA file from your history (see warning note below)</option> | |
66 </param> | |
67 <when value="db"> | |
68 <param name="database" type="select" label="Nucleotide BLAST database"> | |
69 <options from_file="blastdb.loc"> | |
70 <column name="value" index="0"/> | |
71 <column name="name" index="1"/> | |
72 <column name="path" index="2"/> | |
73 </options> | |
74 </param> | |
75 <param name="histdb" type="hidden" value="" /> | |
76 <param name="subject" type="hidden" value="" /> | |
77 </when> | |
78 <when value="histdb"> | |
79 <param name="database" type="hidden" value="" /> | |
80 <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" /> | |
81 <param name="subject" type="hidden" value="" /> | |
82 </when> | |
83 <when value="file"> | |
84 <param name="database" type="hidden" value="" /> | |
85 <param name="histdb" type="hidden" value="" /> | |
86 <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> | |
87 </when> | |
88 </conditional> | |
89 <param name="blast_type" type="select" display="radio" label="Type of BLAST"> | 38 <param name="blast_type" type="select" display="radio" label="Type of BLAST"> |
90 <option value="megablast">megablast</option> | 39 <option value="megablast">megablast</option> |
91 <option value="blastn">blastn</option> | 40 <option value="blastn">blastn</option> |
92 <option value="blastn-short">blastn-short</option> | 41 <option value="blastn-short">blastn-short</option> |
93 <option value="dc-megablast">dc-megablast</option> | 42 <option value="dc-megablast">dc-megablast</option> |
94 <!-- Using BLAST 2.2.24+ this gives an error: | 43 <!-- Using BLAST 2.2.24+ this gives an error: |
95 BLAST engine error: Program type 'vecscreen' not supported | 44 BLAST engine error: Program type 'vecscreen' not supported |
96 <option value="vecscreen">vecscreen</option> | 45 <option value="vecscreen">vecscreen</option> |
97 --> | 46 --> |
98 </param> | 47 </param> |
99 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> | 48 <expand macro="input_evalue" /> |
100 <param name="out_format" type="select" label="Output format"> | 49 <expand macro="input_out_format" /> |
101 <option value="6">Tabular (standard 12 columns)</option> | 50 <expand macro="advanced_options"> |
102 <option value="ext" selected="True">Tabular (extended 24 columns)</option> | 51 <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' --> |
103 <option value="5">BLAST XML</option> | 52 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" /> |
104 <option value="0">Pairwise text</option> | 53 <expand macro="input_strand" /> |
105 <option value="0 -html">Pairwise HTML</option> | 54 <expand macro="input_max_hits" /> |
106 <option value="2">Query-anchored text</option> | 55 <param name="identity_cutoff" type="float" min="0" max="100" value="0" label="Percent identity cutoff (-perc_identity)" help="Use zero for no cutoff" /> |
107 <option value="2 -html">Query-anchored HTML</option> | 56 |
108 <option value="4">Flat query-anchored text</option> | 57 <!-- I'd like word_size to be optional, with minimum 4 for blastn --> |
109 <option value="4 -html">Flat query-anchored HTML</option> | 58 <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4."> |
110 <!-- | 59 <validator type="in_range" min="0" /> |
111 <option value="-outfmt 11">BLAST archive format (ASN.1)</option> | |
112 --> | |
113 </param> | |
114 <conditional name="adv_opts"> | |
115 <param name="adv_opts_selector" type="select" label="Advanced Options"> | |
116 <option value="basic" selected="True">Hide Advanced Options</option> | |
117 <option value="advanced">Show Advanced Options</option> | |
118 </param> | 60 </param> |
119 <when value="basic" /> | 61 <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" /> |
120 <when value="advanced"> | 62 <expand macro="input_parse_deflines" /> |
121 <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' --> | 63 </expand> |
122 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" /> | |
123 <param name="strand" type="select" label="Query strand(s) to search against database/subject"> | |
124 <option value="-strand both">Both</option> | |
125 <option value="-strand plus">Plus (forward)</option> | |
126 <option value="-strand minus">Minus (reverse complement)</option> | |
127 </param> | |
128 <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> | |
129 <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> | |
130 <validator type="in_range" min="0" /> | |
131 </param> | |
132 <param name="identity_cutoff" type="float" min="0" max="100" value="0" label="Percent identity cutoff (-perc_identity)" help="Use zero for no cutoff" /> | |
133 <!-- I'd like word_size to be optional, with minimum 4 for blastn --> | |
134 <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4."> | |
135 <validator type="in_range" min="0" /> | |
136 </param> | |
137 <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" /> | |
138 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> | |
139 </when> | |
140 </conditional> | |
141 </inputs> | 64 </inputs> |
142 <outputs> | 65 <outputs> |
143 <data name="output1" format="tabular" label="${blast_type.value_label} on ${on_string}"> | 66 <data name="output1" format="tabular" label="${blast_type.value_label} $query.name vs @ON_DB_SUBJECT@"> |
144 <change_format> | 67 <expand macro="output_change_format" /> |
145 <when input="out_format" value="0" format="txt"/> | |
146 <when input="out_format" value="0 -html" format="html"/> | |
147 <when input="out_format" value="2" format="txt"/> | |
148 <when input="out_format" value="2 -html" format="html"/> | |
149 <when input="out_format" value="4" format="txt"/> | |
150 <when input="out_format" value="4 -html" format="html"/> | |
151 <when input="out_format" value="5" format="blastxml"/> | |
152 </change_format> | |
153 </data> | 68 </data> |
154 </outputs> | 69 </outputs> |
155 <tests> | 70 <tests> |
156 <test> | 71 <test> |
157 <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> | 72 <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> |
164 <output name="output1" file="blastn_rhodopsin_vs_three_human.tabular" ftype="tabular" /> | 79 <output name="output1" file="blastn_rhodopsin_vs_three_human.tabular" ftype="tabular" /> |
165 </test> | 80 </test> |
166 </tests> | 81 </tests> |
167 <help> | 82 <help> |
168 | 83 |
169 .. class:: warningmark | 84 @SEARCH_TIME_WARNING@ |
170 | |
171 **Note**. Database searches may take a substantial amount of time. | |
172 For large input datasets it is advisable to allow overnight processing. | |
173 | |
174 ----- | |
175 | 85 |
176 **What it does** | 86 **What it does** |
177 | 87 |
178 Search a *nucleotide database* using a *nucleotide query*, | 88 Search a *nucleotide database* using a *nucleotide query*, |
179 using the NCBI BLAST+ blastn command line tool. | 89 using the NCBI BLAST+ blastn command line tool. |
180 Algorithms include blastn, megablast, and discontiguous megablast. | 90 Algorithms include blastn, megablast, and discontiguous megablast. |
181 | 91 |
182 .. class:: warningmark | 92 @FASTA_WARNING@ |
183 | |
184 You can also search against a FASTA file of subject nucleotide | |
185 sequences. This is *not* advised because it is slower (only one | |
186 CPU is used), but more importantly gives e-values for pairwise | |
187 searches (very small e-values which will look overly signficiant). | |
188 In most cases you should instead turn the other FASTA file into a | |
189 database first using *makeblastdb* and search against that. | |
190 | 93 |
191 ----- | 94 ----- |
192 | 95 |
193 **Output format** | 96 @OUTPUT_FORMAT@ |
194 | |
195 Because Galaxy focuses on processing tabular data, the default output of this | |
196 tool is tabular. The standard BLAST+ tabular output contains 12 columns: | |
197 | |
198 ====== ========= ============================================ | |
199 Column NCBI name Description | |
200 ------ --------- -------------------------------------------- | |
201 1 qseqid Query Seq-id (ID of your sequence) | |
202 2 sseqid Subject Seq-id (ID of the database hit) | |
203 3 pident Percentage of identical matches | |
204 4 length Alignment length | |
205 5 mismatch Number of mismatches | |
206 6 gapopen Number of gap openings | |
207 7 qstart Start of alignment in query | |
208 8 qend End of alignment in query | |
209 9 sstart Start of alignment in subject (database hit) | |
210 10 send End of alignment in subject (database hit) | |
211 11 evalue Expectation value (E-value) | |
212 12 bitscore Bit score | |
213 ====== ========= ============================================ | |
214 | |
215 The BLAST+ tools can optionally output additional columns of information, | |
216 but this takes longer to calculate. Most (but not all) of these columns are | |
217 included by selecting the extended tabular output. The extra columns are | |
218 included *after* the standard 12 columns. This is so that you can write | |
219 workflow filtering steps that accept either the 12 or 24 column tabular | |
220 BLAST output. Galaxy now uses this extended 24 column output by default. | |
221 | |
222 ====== ============= =========================================== | |
223 Column NCBI name Description | |
224 ------ ------------- ------------------------------------------- | |
225 13 sallseqid All subject Seq-id(s), separated by a ';' | |
226 14 score Raw score | |
227 15 nident Number of identical matches | |
228 16 positive Number of positive-scoring matches | |
229 17 gaps Total number of gaps | |
230 18 ppos Percentage of positive-scoring matches | |
231 19 qframe Query frame | |
232 20 sframe Subject frame | |
233 21 qseq Aligned part of query sequence | |
234 22 sseq Aligned part of subject sequence | |
235 23 qlen Query sequence length | |
236 24 slen Subject sequence length | |
237 ====== ============= =========================================== | |
238 | |
239 The third option is BLAST XML output, which is designed to be parsed by | |
240 another program, and is understood by some Galaxy tools. | |
241 | |
242 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). | |
243 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. | |
244 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. | |
245 The two query anchored outputs show a multiple sequence alignment between the query and all the matches, | |
246 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). | |
247 | 97 |
248 ------- | 98 ------- |
249 | 99 |
250 **References** | 100 **References** |
251 | 101 |
252 If you use this Galaxy tool in work leading to a scientific publication please | 102 If you use this Galaxy tool in work leading to a scientific publication please |
253 cite the following papers: | 103 cite the following papers: |
254 | 104 |
255 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). | 105 @REFERENCES@ |
256 Galaxy tools and workflows for sequence analysis with applications | |
257 in molecular plant pathology. PeerJ 1:e167 | |
258 http://dx.doi.org/10.7717/peerj.167 | |
259 | |
260 Christiam Camacho et al. (2009). | |
261 BLAST+: architecture and applications. | |
262 BMC Bioinformatics. 15;10:421. | |
263 http://dx.doi.org/10.1186/1471-2105-10-421 | |
264 | |
265 This wrapper is available to install into other Galaxy Instances via the Galaxy | |
266 Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus | |
267 </help> | 106 </help> |
268 </tool> | 107 </tool> |