comparison diamond.xml @ 1:df7738595640 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/diamond commit cc80b878817d052398db16574917900ebe15292e
author bgruening
date Mon, 06 Feb 2017 07:08:25 -0500
parents 98037ef3d2a9
children 830516f9521b
comparison
equal deleted inserted replaced
0:98037ef3d2a9 1:df7738595640
1 <tool id="bg_diamond" name="Diamond" version="0.1.6.0"> 1 <tool id="bg_diamond" name="Diamond" version="@VERSION@">
2 <description>alignment tool for short sequences against a protein database</description> 2 <description>alignment tool for short sequences against a protein database</description>
3 <requirements> 3 <macros>
4 <requirement type="package" version="0.6.13">diamond</requirement> 4 <import>macros.xml</import>
5 </requirements> 5 </macros>
6 <expand macro="requirements" />
7 <expand macro="stdio" />
8 <expand macro="version_command" />
6 <command> 9 <command>
7 <![CDATA[ 10 <![CDATA[
8 11
9 #if $ref_db_source.db_source == "history": 12 #if $ref_db_source.db_source == "history":
10 ln -s $ref_db_source.reference_database ./database.dmnd 13 ln -s $ref_db_source.reference_database ./database.dmnd
13 #end if 16 #end if
14 17
15 && 18 &&
16 19
17 diamond 20 diamond
18 $method.method_select 21 $method_select
19 --threads "\${GALAXY_SLOTS:-12}" 22 --threads "\${GALAXY_SLOTS:-12}"
20 --db ./database 23 --db ./database
21 --query $query 24 --query '$query'
22 --out $blast_output 25 --query-gencode '$query_gencode'
23 ##--sam $sam_output 26
24 --compress 0 27 #if $output.outfmt == "5"
25 --tmpdir ./ 28 --outfmt '5'
29 --out '$blast_xml'
30 $output.salltitles
31 #else if $output.outfmt == "6"
32 --outfmt '6' #echo ' '.join(str($output.fields).split(','))
33 --out '$blast_tabular'
34 #else if $output.outfmt == "101"
35 --outfmt '101'
36 --out '$sam_output'
37 $output.salltitles
38 #end if
39
40 --compress '0'
41 $sensitive
42 $more_sensitive
43 --gapopen '$gapopen'
44 --gapextend '$gapextend'
45 --matrix '$matrix'
46 --seg '$seg'
26 47
27 #if str($hit_filter.hit_filter_select) == 'max': 48 #if str($hit_filter.hit_filter_select) == 'max':
28 --max-target-seqs $hit_filter.max 49 --max-target-seqs '$hit_filter.max_target_seqs'
29 #else: 50 #else:
30 --top $hit_filter.percentage 51 --top '$hit_filter.top'
31 #end if 52 #end if
32 53
33 #if str($filter_score.filter_score_select) == 'evalue': 54 #if str($filter_score.filter_score_select) == 'evalue':
34 --evalue $filter_score.evalue 55 --evalue '$filter_score.evalue'
35 #else: 56 #else:
36 --evalue $filter_score.bitscore 57 --min-score '$filter_score.min_score'
37 #end if 58 #end if
38 59
39 --id $identity 60 --id '$id'
40 $sensitive 61 --query-cover '$query_cover'
41 --gapopen $method.gapopen 62 --block-size '$block_size'
42 --gapextend $method.gapextend
43 --matrix $matrix
44 $seg
45 $salltitles
46
47 ]]> 63 ]]>
48 </command> 64 </command>
65
49 <inputs> 66 <inputs>
50 67 <param name="method_select" type="select" label="What do you want to align?" help="(--blastp/--blastx)">
51 <param name="query" type="data" format="fasta" label="Input query file in FASTA format" /> 68 <option value="blastp">Align amino acid query sequences (blastp)</option>
52 69 <option value="blastx">Align DNA query sequences (blastx)</option>
70 </param>
71 <param argument="--query" type="data" format="fasta,fastq" label="Input query file in FASTA or FASTQ format" />
53 <conditional name="ref_db_source"> 72 <conditional name="ref_db_source">
54 <param name="db_source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> 73 <param name="db_source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
55 <option value="indexed">Use a built-in index</option> 74 <option value="indexed">Use a built-in index</option>
56 <option value="history">Use one from the history</option> 75 <option value="history">Use one from the history</option>
57 </param> 76 </param>
60 <options from_data_table="diamond_database"> 79 <options from_data_table="diamond_database">
61 <filter type="sort_by" column="2"/> 80 <filter type="sort_by" column="2"/>
62 <validator type="no_options" message="No indexes are available for the selected input dataset"/> 81 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
63 </options> 82 </options>
64 </param> 83 </param>
65 </when> <!-- build-in --> 84 </when>
66 <when value="history"> 85 <when value="history">
67 <param name="reference_database" type="data" format="diamond_database" label="Select the reference database" /> 86 <param name="reference_database" type="data" format="dmnd" label="Select the reference database" />
68 </when> <!-- history --> 87 </when>
69 </conditional> 88 </conditional>
70 89 <param name="query_gencode" argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help="">
71 <conditional name="method"> 90 <option value="1">The Standard Code</option>
72 <param name="method_select" type="select" label="What do you want to align" help="(--blastp/--blastx)"> 91 <option value="2">The Vertebrate Mitochondrial Code</option>
73 <option value="blastp">Align amino acid query sequences (blastp)</option> 92 <option value="3">The Yeast Mitochondrial Code</option>
74 <option value="blastx">Align DNA query sequences (blastx)</option> 93 <option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
75 </param> 94 <option value="5">The Invertebrate Mitochondrial Code</option>
76 <when value="blastp"> 95 <option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
77 <param name="gapopen" type="integer" value="11" label="Gap open panalty" help="(--gapopen)" /> 96 <option value="9">The Echinoderm and Flatworm Mitochondrial Code</option>
78 <param name="gapextend" type="integer" value="1" label="Gap extend panalty" help="(--gapextend)" /> 97 <option value="10">The Euplotid Nuclear Code</option>
79 </when> 98 <option value="11">The Bacterial, Archaeal and Plant Plastid Code</option>
80 <when value="blastx"> 99 <option value="12">The Alternative Yeast Nuclear Code</option>
81 <param name="gapopen" type="integer" value="-1" label="Gap open panalty" help="(--gapopen)" /> 100 <option value="13">The Ascidian Mitochondrial Code</option>
82 <param name="gapextend" type="integer" value="-1" label="Gap extend panalty" help="(--gapextend)" /> 101 <option value="14">The Alternative Flatworm Mitochondrial Code</option>
83 </when> 102 <option value="16">Chlorophycean Mitochondrial Code</option>
84 </conditional> 103 <option value="21">Trematode Mitochondrial Code</option>
85 104 <option value="22">Scenedesmus obliquus Mitochondrial Code</option>
86 <param name="matrix" type="select" label="Select scoring matrix" help="(--matrix)"> 105 <option value="23">Thraustochytrium Mitochondrial Code</option>
87 <option value="BLOSUM45">BLOSUM45</option> 106 <option value="24">Pterobranchia Mitochondrial Code</option>
88 <option value="BLOSUM50">BLOSUM50</option> 107 <option value="5">Candidate Division SR1 and Gracilibacteria Code</option>
89 <option value="BLOSUM62" selected="True">BLOSUM62</option> 108 <option value="26">Pachysolen tannophilus Nuclear Code</option>
90 <option value="BLOSUM80">BLOSUM80</option>
91 <option value="BLOSUM90">BLOSUM90</option>
92 <option value="PAM250">PAM250</option>
93 <option value="PAM70">PAM70</option>
94 <option value="PAM30">PAM30</option>
95 </param> 109 </param>
96 110 <conditional name="output">
111 <param argument="--outfmt" type="select" label="Format of output file " help="">
112 <option value="5">BLAST XML</option>
113 <option value="6">BLAST tabular</option>
114 <option value="101">SAM</option>
115 </param>
116 <when value="5">
117 <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full length subject titles in output?" help=""/>
118 </when>
119 <when value="6">
120 <param name="fields" type="select" label="Tabular fields" help="" multiple="true">
121 <option value="qseqid" selected="true">Query Seq - id</option>
122 <option value="sseqid" selected="true">Subject Seq - id</option>
123 <option value="sallseqid">All subject Seq - id(s)</option>
124 <option value="qlen">Query sequence length</option>
125 <option value="slen">Subject sequence length</option>
126 <option value="pident" selected="true">Percentage of identical matches</option>
127 <option value="length" selected="true">Alignment length</option>
128 <option value="nident">Number of identical matches</option>
129 <option value="mismatch" selected="true">Number of mismatches</option>
130 <option value="positive">Number of positive - scoring matches</option>
131 <option value="gapopen" selected="true">Number of gap openings</option>
132 <option value="gaps">Total number of gaps</option>
133 <option value="ppos">Percentage of positive - scoring matches</option>
134 <option value="qstart" selected="true">Start of alignment in query</option>
135 <option value="qend" selected="true">End of alignment in query</option>
136 <option value="sstart" selected="true">Start of alignment in subject</option>
137 <option value="send" selected="true">End of alignment in subject</option>
138 <option value="qseq">Aligned part of query sequence</option>
139 <option value="sseq">Aligned part of subject sequence</option>
140 <option value="evalue" selected="true">Expect value</option>
141 <option value="bitscore" selected="true">Bit score</option>
142 <option value="score">Raw score</option>
143 <option value="qframe">Query frame</option>
144 <option value="stitle">Subject Title</option>
145 <option value="salltitles">All Subject Title(s)</option>
146 <option value="qcovhsp">Query Coverage Per HSP</option>
147 </param>
148 </when>
149 <when value="101">
150 <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full length subject titles in output?" help=""/>
151 </when>
152 </conditional>
153 <param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="false" label="Trigger the sensitive alignment mode with a 16x9 seed shape configuration?" help=""/>
154 <param name="more_sensitive" argument="--more-sensitive" type="boolean" truevalue="--more-sensitive" falsevalue="" checked="false" label="Trigger the more sensitive mode?" help="This mode provides some additional sensitivity compared to the sensitive mode."/>
155 <param argument="--gapopen" type="integer" value="11" label="Gap open penalty" help="" />
156 <param argument="--gapextend" type="integer" value="1" label="Gap extension penalty" help="" />
157 <param argument="--matrix" type="select" label="Scoring matrix" help="In brackets are the supported values for (gap open)/(gap extend)">
158 <option value="BLOSUM45">BLOSUM45 ((10-13)/3; (12-16)/2; (16-19)/1)</option>
159 <option value="BLOSUM50">BLOSUM50 ((9-13)/3; (12-16)/2; (15-19)/1)</option>
160 <option value="BLOSUM62" selected="True">BLOSUM62 ((6-11)/2; (9-13)/1)</option>
161 <option value="BLOSUM80">BLOSUM80 ((6-9)/2; 13/2; 25/2; (9-11)/1)</option>
162 <option value="BLOSUM90">BLOSUM90 ((6-9)/2; (9-11)/1)</option>
163 <option value="PAM250">PAM250 ((11-15)/3; (13-17)/2; (17-21)/1)</option>
164 <option value="PAM70">PAM70 ((6-8)/2; (9-11)/1)</option>
165 <option value="PAM30">PAM30 ((5-7)/2; (8-10)/1)</option>
166 </param>
167 <param argument="--seg" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Enable SEG masking of low complexity segments in the query?" help=""/>
168 <conditional name="hit_filter">
169 <param name="hit_filter_select" type="select" label="Method to restrict the number of hits?">
170 <option value="max">Maximum number of target sequences</option>
171 <option value="top">Percentage of top alignment score</option>
172 </param>
173 <when value="max">
174 <param name="max_target_seqs" argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to keep alignments for" help="" />
175 </when>
176 <when value="top">
177 <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a quer" help="" />
178 </when>
179 </conditional>
97 <conditional name="filter_score"> 180 <conditional name="filter_score">
98 <param name="filter_score_select" type="select" label="Filter by score" help="(--evalue/--min-score)"> 181 <param name="filter_score_select" type="select" label="Method to filter?" help="(--evalue/--min-score)">
99 <option value="evalue">Maximum e-value to report alignments</option> 182 <option value="evalue">Maximum e-value to report alignments</option>
100 <option value="bit">Minimum bit score to report alignments</option> 183 <option value="min-score">Minimum bit score to report alignments</option>
101 </param> 184 </param>
102 <when value="evalue"> 185 <when value="evalue">
103 <param name="evalue" type="float" value="0.001" label="Filter by evalue" help="(--evalue)" /> 186 <param argument="--evalue" type="float" value="0.001" label="Maximum expected value to keep an alignment" />
104 </when> 187 </when>
105 <when value="bit"> 188 <when value="min-score">
106 <param name="bitscore" type="integer" value="0" label="Filter by bit score" help="(--min-score)" /> 189 <param name="min_score" argument="--min-score" type="integer" value="0" label="Minimum bit score to keep an alignment" help="(--min-score)" />
107 </when> 190 </when>
108 </conditional> 191 </conditional>
109 192 <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="" />
110 <conditional name="hit_filter"> 193 <param name="query_cover" argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="" />
111 <param name="hit_filter_select" type="select" label="Restrict number of hits by" help="(--max-target-seqs/--top)"> 194 <param name="block_size" argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" help="" />
112 <option value="max">Maximum number of target sequences</option>
113 <option value="percentage">Percentage of top alignment score</option>
114 </param>
115 <when value="max">
116 <param name="max" type="integer" value="25" label="How many hits?" help="(--max-target-seqs)" />
117 </when>
118 <when value="percentage">
119 <param name="percentage" type="integer" value="0" label="How many percentage" help="(--top)" />
120 </when>
121 </conditional>
122
123 <param name="identity" type="integer" value="0" label="minimum identity to report an alignment" help="in percentage (--id)" />
124 <param name="salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="false"
125 label="Print subject titles into the blast tabular format" help="(--salltitles)"/>
126 <param name="seg" type="boolean" truevalue="--seg yes" falsevalue="--seg no" checked="true"
127 label="Enable SEG masking of queries" help="(--seg)"/>
128 <param name="sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="false"
129 label="Enable sensitive mode" help="(--sensitive)"/>
130 </inputs> 195 </inputs>
196
131 <outputs> 197 <outputs>
132 <!--data format="sam" name="sam_output"/--> 198 <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}">
133 <data format="tabular" name="blast_output"/> 199 <filter>output["outfmt"] == "5"</filter>
200 </data>
201 <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}">
202 <filter>output["outfmt"] == "6"</filter>
203 </data>
204 <data format="sam" name="sam_output" label="${tool.name} on ${on_string}">
205 <filter>output["outfmt"] == "101"</filter>
206 </data>
134 </outputs> 207 </outputs>
208
135 <tests> 209 <tests>
136 <test> 210 <test>
137 <param name="method" value="blastp"/> 211 <param name="method_select" value="blastp" />
138 <param name="query" value="protein.fasta" ftype="fasta"/> 212 <param name="query" value="protein.fasta" ftype="fasta"/>
139 <param name="reference_database" value="diamond_makedb_result1.dmnd" ftype="diamond_database"/>
140 <param name="db_source" value="history"/> 213 <param name="db_source" value="history"/>
141 <output name="blast_output" file="diamond_result1.tabular" ftpye="tabular"/> 214 <param name="reference_database" value="db.dmnd"/>
215 <param name="query_gencode" value="1"/>
216 <param name="outfmt" value="6"/>
217 <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore"/>
218 <param name="sensitive" value=""/>
219 <param name="more_sensitive" value=""/>
220 <param name="gapopen" value="11"/>
221 <param name="gapextend" value="1"/>
222 <param name="matrix" value="BLOSUM62"/>
223 <param name="seg" value="yes"/>
224 <param name="hit_filter_select" value="max"/>
225 <param name="max_target_seqs" value="25" />
226 <param name="filter_score_select" value="evalue"/>
227 <param name="evalue" value="0.001" />
228 <param name="id" value="0"/>
229 <param name="query_cover" value="0"/>
230 <param name="block_size" value="2"/>
231 <output name="blast_tabular" file="diamond_results.tabular"/>
142 </test> 232 </test>
143 </tests> 233 </tests>
234
144 <help> 235 <help>
145 <![CDATA[ 236 <![CDATA[
146
147 .. class:: infomark
148 237
149 **What it does** 238 **What it does**
150 239
151 DIAMOND_ is a new alignment tool for aligning short DNA sequencing reads to a protein reference database such as NCBI-NR. 240 DIAMOND_ is a new alignment tool for aligning short DNA sequencing reads to a protein reference database such as NCBI-NR.
152 On Illumina reads of length 100-150bp, in fast mode, DIAMOND is about 20,000 times faster than BLASTX, while reporting 241 On Illumina reads of length 100-150bp, in fast mode, DIAMOND is about 20,000 times faster than BLASTX, while reporting
153 about 80-90% of all matches that BLASTX finds, with an e-value of at most 1e-5. In sensitive mode, DIAMOND ist about 2,500 242 about 80-90% of all matches that BLASTX finds, with an e-value of at most 1e-5. In sensitive mode, DIAMOND ist about 2,500
154 times faster than BLASTX, finding more than 94% of all matches. 243 times faster than BLASTX, finding more than 94% of all matches.
155 244
245 The DIAMOND algorithm is designed for the alignment of large datasets. The algorithm is not efficient for a small number of query sequences or only a single one of them, and speed will be low. BLAST is recommend for small datasets.
246
156 .. _DIAMOND: http://ab.inf.uni-tuebingen.de/software/diamond/ 247 .. _DIAMOND: http://ab.inf.uni-tuebingen.de/software/diamond/
248
249 **Input**
250
251 Input data is a large protein or nucleotide sequence file.
252
253
254 **Output**
255
256 Diamond gives you a tabular output file with 12 columns:
257
258 Column Description
259 1 Query Seq-id (ID of your sequence)
260 2 Subject Seq-id (ID of the database hit)
261 3 Percentage of identical matches
262 4 Alignment length
263 5 Number of mismatches
264 6 Number of gap openings
265 7 Start of alignment in query
266 8 End of alignment in query
267 9 Start of alignment in subject (database hit)
268 10 End of alignment in subject (database hit)
269 11 Expectation value (E-value)
270 12 Bit score
157 271
158 272
159 Supported values for gap open and gap extend parameters depending on the selected scoring matrix. 273 Supported values for gap open and gap extend parameters depending on the selected scoring matrix.
160 274
161 ======== ============================================ 275 ======== ============================================
172 ======== ============================================ 286 ======== ============================================
173 287
174 288
175 ]]> 289 ]]>
176 </help> 290 </help>
177 <citations> 291 <expand macro="citations" />
178 <citation type="doi">10.1038/nmeth.3176</citation>
179 </citations>
180 </tool> 292 </tool>