Mercurial > repos > edward-kirton > blast
comparison blast/blastn.xml @ 0:f3ac34855f5e default tip
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | edward-kirton |
---|---|
date | Tue, 07 Jun 2011 17:30:11 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f3ac34855f5e |
---|---|
1 <tool id="blastn" name="blastn" version="0.0.1"> | |
2 <description>Search nucleotide database with nucleotide query sequence(s)</description> | |
3 <command> | |
4 ## The command is a Cheetah template which allows some Python based syntax. | |
5 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces | |
6 blastn | |
7 -query "$query" | |
8 #if $db_opts.db_opts_selector == "db": | |
9 -db "$db_opts.database" | |
10 #elif $db_opts.db_opts_selector == "user_db": | |
11 -db ${os.path.join($db_opts.db.extra_files_path,'blastdb')} | |
12 #else: | |
13 -subject "$db_opts.subject" | |
14 #end if | |
15 -task $blast_type | |
16 -evalue $evalue_cutoff | |
17 -out $output1 | |
18 -outfmt "$out_format" | |
19 #if $adv_opts.adv_opts_selector=="advanced": | |
20 $adv_opts.filter_query | |
21 $adv_opts.strand | |
22 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string | |
23 ## Note -max_target_seqs overrides -num_descriptions and -num_alignments | |
24 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): | |
25 -max_target_seqs $adv_opts.max_hits | |
26 #end if | |
27 #if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): | |
28 -word_size $adv_opts.word_size | |
29 #end if | |
30 $adv_opts.ungapped | |
31 $adv_opts.parse_deflines | |
32 ## End of advanced options: | |
33 #end if | |
34 </command> | |
35 <inputs> | |
36 <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> | |
37 <conditional name="db_opts"> | |
38 <param name="db_opts_selector" type="select" label="Subject database/sequences"> | |
39 <option value="db" selected="True">Precompiled BLAST Database</option> | |
40 <option value="user_db">BLAST Database in your History</option> | |
41 <option value="fasta">FASTA file</option> | |
42 </param> | |
43 <when value="db"> | |
44 <param name="database" type="select" label="Precompiled Nucleotide BLAST database"> | |
45 <!-- The BLAST loc file has three columns: | |
46 column 0 is an identifier (not used here, see legacy megablast wrapper), | |
47 column 1 is the caption (show this to the user), | |
48 column 2 is the database path (given to BLAST+) --> | |
49 <options from_file="blastdb.loc"> | |
50 <column name="name" index="1"/> | |
51 <column name="value" index="2"/> | |
52 </options> | |
53 </param> | |
54 <param name="subject" type="hidden" value="" /> | |
55 </when> | |
56 <when value="user_db"> | |
57 <param name="database" type="hidden" value="" /> | |
58 <param name="db" type="data" format="blastdb" label="Blast DB" /> | |
59 </when> | |
60 <when value="fasta"> | |
61 <param name="database" type="hidden" value="" /> | |
62 <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> | |
63 </when> | |
64 </conditional> | |
65 <param name="blast_type" type="select" display="radio" label="Type of BLAST"> | |
66 <option value="megablast">megablast</option> | |
67 <option value="blastn">blastn</option> | |
68 <option value="blastn-short">blastn-short</option> | |
69 <option value="dc-megablast">dc-megablast</option> | |
70 <!-- Using BLAST 2.2.24+ this gives an error: | |
71 BLAST engine error: Program type 'vecscreen' not supported | |
72 <option value="vecscreen">vecscreen</option> | |
73 --> | |
74 </param> | |
75 <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> | |
76 <param name="out_format" type="select" label="Output format"> | |
77 <option value="6" selected="True">Tabular (standard 12 columns)</option> | |
78 <option value="6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq">Tabular (extended 22 columns)</option> | |
79 <option value="5">BLAST XML</option> | |
80 <option value="0">Pairwise text</option> | |
81 <option value="0 -html">Pairwise HTML</option> | |
82 <option value="2">Query-anchored text</option> | |
83 <option value="2 -html">Query-anchored HTML</option> | |
84 <option value="4">Flat query-anchored text</option> | |
85 <option value="4 -html">Flat query-anchored HTML</option> | |
86 <!-- | |
87 <option value="-outfmt 11">BLAST archive format (ASN.1)</option> | |
88 --> | |
89 </param> | |
90 <conditional name="adv_opts"> | |
91 <param name="adv_opts_selector" type="select" label="Advanced Options"> | |
92 <option value="basic" selected="True">Hide Advanced Options</option> | |
93 <option value="advanced">Show Advanced Options</option> | |
94 </param> | |
95 <when value="basic" /> | |
96 <when value="advanced"> | |
97 <!-- Could use a select (yes, no, other) where other allows setting 'level window linker' --> | |
98 <param name="filter_query" type="boolean" label="Filter out low complexity regions (with DUST)" truevalue="-dust yes" falsevalue="-dust no" checked="true" /> | |
99 <param name="strand" type="select" label="Query strand(s) to search against database/subject"> | |
100 <option value="-strand both">Both</option> | |
101 <option value="-strand plus">Plus (forward)</option> | |
102 <option value="-strand minus">Minus (reverse complement)</option> | |
103 </param> | |
104 <!-- Why doesn't optional override a validator? I want to accept an empty string OR a non-negative integer --> | |
105 <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> | |
106 <validator type="in_range" min="0" /> | |
107 </param> | |
108 <!-- I'd like word_size to be optional, with minimum 4 for blastn --> | |
109 <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4."> | |
110 <validator type="in_range" min="0" /> | |
111 </param> | |
112 <param name="ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" /> | |
113 <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> | |
114 </when> | |
115 </conditional> | |
116 </inputs> | |
117 <outputs> | |
118 <data name="output1" format="tabular" label="${blast_type.value_label} on ${db_opts.db_opts_selector}"> | |
119 <change_format> | |
120 <when input="out_format" value="0" format="txt"/> | |
121 <when input="out_format" value="0 -html" format="html"/> | |
122 <when input="out_format" value="2" format="txt"/> | |
123 <when input="out_format" value="2 -html" format="html"/> | |
124 <when input="out_format" value="4" format="txt"/> | |
125 <when input="out_format" value="4 -html" format="html"/> | |
126 <when input="out_format" value="5" format="blastxml"/> | |
127 </change_format> | |
128 </data> | |
129 </outputs> | |
130 <requirements> | |
131 <requirement type="binary">blastn</requirement> | |
132 </requirements> | |
133 <tests> | |
134 </tests> | |
135 <help> | |
136 | |
137 .. class:: warningmark | |
138 | |
139 **Note**. Database searches may take a substantial amount of time. | |
140 For large input datasets it is advisable to allow overnight processing. | |
141 | |
142 ----- | |
143 | |
144 **What it does** | |
145 | |
146 Search a *nucleotide database* using a *nucleotide query*, | |
147 using the NCBI BLAST+ blastn command line tool. | |
148 Algorithms include blastn, megablast, and discontiguous megablast. | |
149 | |
150 ----- | |
151 | |
152 **Output format** | |
153 | |
154 Because Galaxy focuses on processing tabular data, the default output of this | |
155 tool is tabular. The standard BLAST+ tabular output contains 12 columns: | |
156 | |
157 ====== ========= ============================================ | |
158 Column NCBI name Description | |
159 ------ --------- -------------------------------------------- | |
160 1 qseqid Query Seq-id (ID of your sequence) | |
161 2 sseqid Subject Seq-id (ID of the database hit) | |
162 3 pident Percentage of identical matches | |
163 4 length Alignment length | |
164 5 mismatch Number of mismatches | |
165 6 gapopen Number of gap openings | |
166 7 qstart Start of alignment in query | |
167 8 qend End of alignment in query | |
168 9 sstart Start of alignment in subject (database hit) | |
169 10 send End of alignment in subject (database hit) | |
170 11 evalue Expectation value (E-value) | |
171 12 bitscore Bit score | |
172 ====== ========= ============================================ | |
173 | |
174 The BLAST+ tools can optionally output additional columns of information, | |
175 but this takes longer to calculate. Most (but not all) of these columns are | |
176 included by selecting the extended tabular output. The extra columns are | |
177 included *after* the standard 12 columns. This is so that you can write | |
178 workflow filtering steps that accept either the 12 or 22 column tabular | |
179 BLAST output. | |
180 | |
181 ====== ============= =========================================== | |
182 Column NCBI name Description | |
183 ------ ------------- ------------------------------------------- | |
184 13 sallseqid All subject Seq-id(s), separated by a ';' | |
185 14 score Raw score | |
186 15 nident Number of identical matches | |
187 16 positive Number of positive-scoring matches | |
188 17 gaps Total number of gaps | |
189 18 ppos Percentage of positive-scoring matches | |
190 19 qframe Query frame | |
191 20 sframe Subject frame | |
192 21 qseq Aligned part of query sequence | |
193 22 sseq Aligned part of subject sequence | |
194 ====== ============= =========================================== | |
195 | |
196 The third option is BLAST XML output, which is designed to be parsed by | |
197 another program, and is understood by some Galaxy tools. | |
198 | |
199 You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). | |
200 The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. | |
201 The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. | |
202 The two query anchored outputs show a multiple sequence alignment between the query and all the matches, | |
203 and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). | |
204 | |
205 ------- | |
206 | |
207 **References** | |
208 | |
209 Zhang et al. A Greedy Algorithm for Aligning DNA Sequences. 2000. JCB: 203-214. | |
210 | |
211 </help> | |
212 </tool> |