comparison hmmer.xml @ 0:c16d8db9338a

init repo
author eskirton@lbl.gov
date Mon, 05 Mar 2012 22:43:09 -0800
parents
children 376092ae10ed
comparison
equal deleted inserted replaced
-1:000000000000 0:c16d8db9338a
1 <tool id="hmmer" name="hmmer" version="1.0.0">
2 <description>hmmscan/search seqs vs profiles</description>
3 <command>
4 $program
5 ##--cpu 8
6 --tblout $tblout
7 --domtblout $domtblout
8 $acc
9 $noali
10 --notextw
11 #if $threshold.select == 'E':
12 -E $threshold.profile
13 --domE $threshold.dom
14 #else:
15 -T $threshold.profile
16 --domT $threshold.dom
17 #end if
18 --incE $incE
19 --incdomE $incdomE
20 #if $acceleration.select == "1":
21 $acceleration.max
22 --F1 $acceleration.F1
23 --F2 $acceleration.F2
24 --F3 $acceleration.F3
25 $acceleration.nobias
26 #end if
27 #if $other.select == "1":
28 $other.nonull2
29 --seed $other.seed
30 #end if
31 -o $logfile
32 #if $hmmdb.select == 'db':
33 $hmmdb.file
34 #else:
35 ${hmmdb.file.extra_files_path}/hmm
36 #end if
37 $seqfile
38 </command>
39 <inputs>
40 <param name="program" type="select" display="radio" label="Query">
41 <option value="hmmscan">Sequence (i.e. hmmscan)</option>
42 <option value="hmmsearch">Profile (i.e. hmmsearch)</option>
43 </param>
44
45 <param name="seqfile" type="data" format="fasta" label="Sequences" /> <!-- NYI embl, genbank -->
46
47 <conditional name="hmmdb">
48 <param name="select" type="select" label="HMM Db">
49 <option value="db" selected="True">Precompiled HMM Database</option>
50 <option value="user">HMM Database in your History</option>
51 </param>
52 <when value="db">
53 <param name="file" type="select" label="Precompiled HMM database">
54 <options from_file="hmmdb.loc">
55 <column name="name" index="1"/>
56 <column name="value" index="2"/>
57 </options>
58 </param>
59 </when>
60 <when value="user">
61 <param name="file" type="data" format="hmmer" label="HMM database" />
62 </when>
63 </conditional>
64
65 <param name="acc" type="boolean" truevalue="--acc" falsevalue="" checked="false" label="[--acc] Prefer accessions over names in output" />
66 <param name="noali" type="boolean" truevalue="--noali" falsevalue="" checked="false" label="[--noali] Omit the alignment section from the main output" help="This can greatly reduce the output volume" />
67
68 <!-- OPTIONS FOR REPORTING THRESHOLDS -->
69 <conditional name="threshold">
70 <param name="select" type="select" label="Select reporting threshold to control which hits are reported in output files">
71 <option value="E">Using E-value thresholds</option>
72 <option value="T">Using bit score thresholds</option>
73 </param>
74 <when value="E">
75 <param name="profile" type="float" value="10.0" label="[-E] Report target profiles with an E-value of &lt;= this value" help="The default is 10.0, meaning that on average, about 10 false positives will be reported per query, so you can see the top of the noise and decide for yourself if it is really noise." />
76 <param name="dom" type="float" value="10.0" label="[--domE] In the per-domain output, for target profiles that have already satisfied the per-profile reporting threshold, report individual domains with a conditional E-value of &lt;= this value" help="The default value is 10.0. A conditional E-value means the expected number of additional false positive domains in the smaller search space of those comparisons that already satisfied the per-profile reporting threshold (and thus must have at least one homologous domain already)." />
77 </when>
78 <when value="T">
79 <param name="profile" type="integer" value="100" label="[-T] Report target profiles with a bit score of &gt;= this value" />
80 <param name="dom" type="integer" value="100" label="[--domT] Report domains with a bit score &gt;= this value" />
81 </when>
82 </conditional>
83
84 <!-- OPTIONS FOR INCLUSION THRESHOLDS; incT & incdomT WERE DELIBERATELY EXCLUDED SINCE THEY ARE NOT RECOMMENDED -->
85 <param name="incE" type="float" value="0.01" label="[--incE] Use an E-value of &lt;= this value as the per-target inclusion threshold" help="The default is 0.01, meaning that on average, about 1 false positive would be expected in every 100 searches with different query subsequences." />
86 <param name="incdomE" type="float" value="0.01" label="[--incdomE] Use a conditional E-value of &lt;= this value as the per-domain inclusion threshold, in targets that have already satisfied the overall per-target inclusion threshold" />
87
88 <!-- NYI: OPTIONS FOR MODEL-SPECIFIC SCORE THRESHOLDING -->
89
90 <!-- CONTROL OF THE ACCELERATION PIPELINE -->
91 <conditional name="acceleration">
92 <param name="select" type="select" label="Control of the acceleration pipeline" help="HMMER3 searches are accelerated in a three-step filter pipeline: the MSV filter, the Viterbi filter, and the Forward filter. The first filter is the fastest and most approximate; the last is the full Forward scoring algorithm.
93 There is also a bias filter step between MSV and Viterbi. Targets that pass all the steps in the acceleration pipeline are then subjected to postprocessing -- domain identification and scoring using the Forward/Backward algorithm. Changing filter thresholds only removes or includes targets from consideration;
94 changing filter thresholds does not alter bit scores, E-values, or alignments, all of which are determined solely in postprocessing.">
95 <option value="0">Use defaults</option>
96 <option value="1">Define options</option>
97 </param>
98 <when value="0">
99 </when>
100 <when value="1">
101 <param name="max" type="boolean" truevalue="--max" falsevalue="" label="[--max] Turn off all filters, including the bias filter, and run full Forward/Backward postprocessing on every target." help="This increases sensitivity somewhat, at a large cost in speed" />
102 <param name="F1" type="float" value="0.02" label="[--F1] Set the P-value threshold for the MSV filter step." help="The default is 0.02, meaning that roughly 2% of the highest scoring nonhomologous targets are expected to pass the filter" />
103 <param name="F2" type="float" value="0.001" label="[--F2] Set the P-value threshold for the Viterbi filter step." />
104 <param name="F3" type="float" value="0.00001" label="[--F3] Set the P-value threshold for the Forward filter step." />
105 <param name="nobias" type="boolean" truevalue="--nobias" falsevalue="" label="[--nobias] Turn off the bias filter." help="This increases sensitivity somewhat, but can come at a high cost in speed, especially if the query has biased residue composition (such as a repetitive sequence region, or if it is a membrane protein with large regions of hydrophobicity). Without the bias filter, too many sequences may pass the filter with biased queries, leading to slower than expected performance as the computationally intensive Forward/Backward algorithms shoulder an abnormally heavy load." />
106 </when>
107 </conditional>
108
109 <!-- OTHER OPTIONS -->
110 <conditional name="other">
111 <param name="select" type="select" label="Other options">
112 <option value="0">Use defaults</option>
113 <option value="1">Define options</option>
114 </param>
115 <when value="0">
116 </when>
117 <when value="1">
118 <param name="nonull2" type="boolean" truevalue="--nonull2" falsevalue="" label="[--nonull2] Turn off the null2 score corrections for biased composition." />
119 <!-- NYI: Z, domZ -->
120 <param name="seed" type="integer" value="42" label="[--seed] Set the random number seed" help="Some steps in postprocessing require Monte Carlo simulation. The default is to use a fixed seed (42), so that results are exactly reproducible. Any other positive integer will give different (but also reproducible) results. A choice of 0 uses an arbitrarily chosen seed." />
121 </when>
122 </conditional>
123 </inputs>
124 <outputs>
125 <data name="logfile" format="txt" />
126 <data name="tblout" format="tabular" label="${tool.name} on $on_string: Per-sequence hits" />
127 <data name="domtblout" format="tabular" label="${tool.name} on $on_string: Per-domain hits" />
128 </outputs>
129 <requirements>
130 <requirement type="binary">hmmscan</requirement>
131 <requirement type="binary">hmmsearch</requirement>
132 </requirements>
133 <tests>
134 </tests>
135 <help>
136 .. class:: warningmark
137
138 **Note**. Hidden Markov Model (HMM) searches take a substantial amount of time.
139 For large input datasets it is advisable to allow overnight processing.
140
141 -----
142
143 **What it does**
144
145 hmmscan is used to search sequences against collections of profiles. For each sequence in seqfile,
146 use that query sequence to search the target database of profiles in hmmdb, and output ranked lists of
147 the profiles with the most significant matches to the sequence.
148
149 hmmsearch is used to search one or more profiles against a sequence database.
150 For each profile in "hmmfile", use that query profile to search the target database of profiles in "seqdb",
151 and output ranked lists of the sequences with the most significant matches to the profile.
152
153 If using a user-supplied profile database, it needs to be pressed using hmmpress before it can be searched with hmmscan.
154
155 **Author**
156
157 Sean Eddy, Howard Hughes Medical Institute and Dept. of Genetics, Washington University School of Medicine
158
159 http://www.genetics.wustl.edu/eddy/
160
161 **Manual**
162
163 ftp://selab.janelia.org/pub/software/hmmer/CURRENT/Userguide.pdf
164 </help>
165 </tool>