annotate hmmer.xml @ 1:66f8262e1686

add datatype
author eskirton@lbl.gov
date Mon, 05 Mar 2012 22:51:04 -0800
parents c16d8db9338a
children 376092ae10ed
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
1 <tool id="hmmer" name="hmmer" version="1.0.0">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
2 <description>hmmscan/search seqs vs profiles</description>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
3 <command>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
4 $program
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
5 ##--cpu 8
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
6 --tblout $tblout
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
7 --domtblout $domtblout
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
8 $acc
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
9 $noali
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
10 --notextw
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
11 #if $threshold.select == 'E':
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
12 -E $threshold.profile
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
13 --domE $threshold.dom
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
14 #else:
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
15 -T $threshold.profile
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
16 --domT $threshold.dom
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
17 #end if
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
18 --incE $incE
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
19 --incdomE $incdomE
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
20 #if $acceleration.select == "1":
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
21 $acceleration.max
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
22 --F1 $acceleration.F1
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
23 --F2 $acceleration.F2
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
24 --F3 $acceleration.F3
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
25 $acceleration.nobias
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
26 #end if
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
27 #if $other.select == "1":
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
28 $other.nonull2
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
29 --seed $other.seed
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
30 #end if
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
31 -o $logfile
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
32 #if $hmmdb.select == 'db':
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
33 $hmmdb.file
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
34 #else:
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
35 ${hmmdb.file.extra_files_path}/hmm
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
36 #end if
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
37 $seqfile
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
38 </command>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
39 <inputs>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
40 <param name="program" type="select" display="radio" label="Query">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
41 <option value="hmmscan">Sequence (i.e. hmmscan)</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
42 <option value="hmmsearch">Profile (i.e. hmmsearch)</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
43 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
44
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
45 <param name="seqfile" type="data" format="fasta" label="Sequences" /> <!-- NYI embl, genbank -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
46
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
47 <conditional name="hmmdb">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
48 <param name="select" type="select" label="HMM Db">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
49 <option value="db" selected="True">Precompiled HMM Database</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
50 <option value="user">HMM Database in your History</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
51 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
52 <when value="db">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
53 <param name="file" type="select" label="Precompiled HMM database">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
54 <options from_file="hmmdb.loc">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
55 <column name="name" index="1"/>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
56 <column name="value" index="2"/>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
57 </options>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
58 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
59 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
60 <when value="user">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
61 <param name="file" type="data" format="hmmer" label="HMM database" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
62 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
63 </conditional>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
64
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
65 <param name="acc" type="boolean" truevalue="--acc" falsevalue="" checked="false" label="[--acc] Prefer accessions over names in output" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
66 <param name="noali" type="boolean" truevalue="--noali" falsevalue="" checked="false" label="[--noali] Omit the alignment section from the main output" help="This can greatly reduce the output volume" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
67
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
68 <!-- OPTIONS FOR REPORTING THRESHOLDS -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
69 <conditional name="threshold">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
70 <param name="select" type="select" label="Select reporting threshold to control which hits are reported in output files">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
71 <option value="E">Using E-value thresholds</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
72 <option value="T">Using bit score thresholds</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
73 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
74 <when value="E">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
75 <param name="profile" type="float" value="10.0" label="[-E] Report target profiles with an E-value of &lt;= this value" help="The default is 10.0, meaning that on average, about 10 false positives will be reported per query, so you can see the top of the noise and decide for yourself if it is really noise." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
76 <param name="dom" type="float" value="10.0" label="[--domE] In the per-domain output, for target profiles that have already satisfied the per-profile reporting threshold, report individual domains with a conditional E-value of &lt;= this value" help="The default value is 10.0. A conditional E-value means the expected number of additional false positive domains in the smaller search space of those comparisons that already satisfied the per-profile reporting threshold (and thus must have at least one homologous domain already)." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
77 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
78 <when value="T">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
79 <param name="profile" type="integer" value="100" label="[-T] Report target profiles with a bit score of &gt;= this value" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
80 <param name="dom" type="integer" value="100" label="[--domT] Report domains with a bit score &gt;= this value" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
81 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
82 </conditional>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
83
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
84 <!-- OPTIONS FOR INCLUSION THRESHOLDS; incT & incdomT WERE DELIBERATELY EXCLUDED SINCE THEY ARE NOT RECOMMENDED -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
85 <param name="incE" type="float" value="0.01" label="[--incE] Use an E-value of &lt;= this value as the per-target inclusion threshold" help="The default is 0.01, meaning that on average, about 1 false positive would be expected in every 100 searches with different query subsequences." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
86 <param name="incdomE" type="float" value="0.01" label="[--incdomE] Use a conditional E-value of &lt;= this value as the per-domain inclusion threshold, in targets that have already satisfied the overall per-target inclusion threshold" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
87
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
88 <!-- NYI: OPTIONS FOR MODEL-SPECIFIC SCORE THRESHOLDING -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
89
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
90 <!-- CONTROL OF THE ACCELERATION PIPELINE -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
91 <conditional name="acceleration">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
92 <param name="select" type="select" label="Control of the acceleration pipeline" help="HMMER3 searches are accelerated in a three-step filter pipeline: the MSV filter, the Viterbi filter, and the Forward filter. The first filter is the fastest and most approximate; the last is the full Forward scoring algorithm.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
93 There is also a bias filter step between MSV and Viterbi. Targets that pass all the steps in the acceleration pipeline are then subjected to postprocessing -- domain identification and scoring using the Forward/Backward algorithm. Changing filter thresholds only removes or includes targets from consideration;
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
94 changing filter thresholds does not alter bit scores, E-values, or alignments, all of which are determined solely in postprocessing.">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
95 <option value="0">Use defaults</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
96 <option value="1">Define options</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
97 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
98 <when value="0">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
99 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
100 <when value="1">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
101 <param name="max" type="boolean" truevalue="--max" falsevalue="" label="[--max] Turn off all filters, including the bias filter, and run full Forward/Backward postprocessing on every target." help="This increases sensitivity somewhat, at a large cost in speed" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
102 <param name="F1" type="float" value="0.02" label="[--F1] Set the P-value threshold for the MSV filter step." help="The default is 0.02, meaning that roughly 2% of the highest scoring nonhomologous targets are expected to pass the filter" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
103 <param name="F2" type="float" value="0.001" label="[--F2] Set the P-value threshold for the Viterbi filter step." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
104 <param name="F3" type="float" value="0.00001" label="[--F3] Set the P-value threshold for the Forward filter step." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
105 <param name="nobias" type="boolean" truevalue="--nobias" falsevalue="" label="[--nobias] Turn off the bias filter." help="This increases sensitivity somewhat, but can come at a high cost in speed, especially if the query has biased residue composition (such as a repetitive sequence region, or if it is a membrane protein with large regions of hydrophobicity). Without the bias filter, too many sequences may pass the filter with biased queries, leading to slower than expected performance as the computationally intensive Forward/Backward algorithms shoulder an abnormally heavy load." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
106 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
107 </conditional>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
108
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
109 <!-- OTHER OPTIONS -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
110 <conditional name="other">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
111 <param name="select" type="select" label="Other options">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
112 <option value="0">Use defaults</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
113 <option value="1">Define options</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
114 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
115 <when value="0">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
116 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
117 <when value="1">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
118 <param name="nonull2" type="boolean" truevalue="--nonull2" falsevalue="" label="[--nonull2] Turn off the null2 score corrections for biased composition." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
119 <!-- NYI: Z, domZ -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
120 <param name="seed" type="integer" value="42" label="[--seed] Set the random number seed" help="Some steps in postprocessing require Monte Carlo simulation. The default is to use a fixed seed (42), so that results are exactly reproducible. Any other positive integer will give different (but also reproducible) results. A choice of 0 uses an arbitrarily chosen seed." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
121 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
122 </conditional>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
123 </inputs>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
124 <outputs>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
125 <data name="logfile" format="txt" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
126 <data name="tblout" format="tabular" label="${tool.name} on $on_string: Per-sequence hits" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
127 <data name="domtblout" format="tabular" label="${tool.name} on $on_string: Per-domain hits" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
128 </outputs>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
129 <requirements>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
130 <requirement type="binary">hmmscan</requirement>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
131 <requirement type="binary">hmmsearch</requirement>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
132 </requirements>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
133 <tests>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
134 </tests>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
135 <help>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
136 .. class:: warningmark
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
137
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
138 **Note**. Hidden Markov Model (HMM) searches take a substantial amount of time.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
139 For large input datasets it is advisable to allow overnight processing.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
140
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
141 -----
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
142
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
143 **What it does**
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
144
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
145 hmmscan is used to search sequences against collections of profiles. For each sequence in seqfile,
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
146 use that query sequence to search the target database of profiles in hmmdb, and output ranked lists of
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
147 the profiles with the most significant matches to the sequence.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
148
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
149 hmmsearch is used to search one or more profiles against a sequence database.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
150 For each profile in "hmmfile", use that query profile to search the target database of profiles in "seqdb",
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
151 and output ranked lists of the sequences with the most significant matches to the profile.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
152
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
153 If using a user-supplied profile database, it needs to be pressed using hmmpress before it can be searched with hmmscan.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
154
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
155 **Author**
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
156
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
157 Sean Eddy, Howard Hughes Medical Institute and Dept. of Genetics, Washington University School of Medicine
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
158
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
159 http://www.genetics.wustl.edu/eddy/
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
160
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
161 **Manual**
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
162
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
163 ftp://selab.janelia.org/pub/software/hmmer/CURRENT/Userguide.pdf
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
164 </help>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
165 </tool>