annotate hmmer.xml @ 2:376092ae10ed draft

reformat output to tabular
author Edward Kirton <eskirton@lbl.gov>
date Thu, 30 Aug 2012 11:58:27 -0700
parents c16d8db9338a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
1 <tool id="hmmer" name="hmmer" version="1.0.0">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
2 <description>hmmscan/search seqs vs profiles</description>
2
376092ae10ed reformat output to tabular
Edward Kirton <eskirton@lbl.gov>
parents: 0
diff changeset
3 <requirements>
376092ae10ed reformat output to tabular
Edward Kirton <eskirton@lbl.gov>
parents: 0
diff changeset
4 <requirement type="package">hmmer</requirement>
376092ae10ed reformat output to tabular
Edward Kirton <eskirton@lbl.gov>
parents: 0
diff changeset
5 </requirements>
376092ae10ed reformat output to tabular
Edward Kirton <eskirton@lbl.gov>
parents: 0
diff changeset
6 <command interpreter="perl">
376092ae10ed reformat output to tabular
Edward Kirton <eskirton@lbl.gov>
parents: 0
diff changeset
7 hmmer_wrapper.pl
0
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
8 $program
2
376092ae10ed reformat output to tabular
Edward Kirton <eskirton@lbl.gov>
parents: 0
diff changeset
9 --cpu 8
0
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
10 --tblout $tblout
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
11 --domtblout $domtblout
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
12 $acc
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
13 $noali
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
14 --notextw
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
15 #if $threshold.select == 'E':
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
16 -E $threshold.profile
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
17 --domE $threshold.dom
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
18 #else:
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
19 -T $threshold.profile
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
20 --domT $threshold.dom
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
21 #end if
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
22 --incE $incE
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
23 --incdomE $incdomE
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
24 #if $acceleration.select == "1":
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
25 $acceleration.max
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
26 --F1 $acceleration.F1
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
27 --F2 $acceleration.F2
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
28 --F3 $acceleration.F3
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
29 $acceleration.nobias
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
30 #end if
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
31 #if $other.select == "1":
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
32 $other.nonull2
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
33 --seed $other.seed
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
34 #end if
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
35 -o $logfile
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
36 #if $hmmdb.select == 'db':
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
37 $hmmdb.file
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
38 #else:
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
39 ${hmmdb.file.extra_files_path}/hmm
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
40 #end if
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
41 $seqfile
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
42 </command>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
43 <inputs>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
44 <param name="program" type="select" display="radio" label="Query">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
45 <option value="hmmscan">Sequence (i.e. hmmscan)</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
46 <option value="hmmsearch">Profile (i.e. hmmsearch)</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
47 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
48
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
49 <param name="seqfile" type="data" format="fasta" label="Sequences" /> <!-- NYI embl, genbank -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
50
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
51 <conditional name="hmmdb">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
52 <param name="select" type="select" label="HMM Db">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
53 <option value="db" selected="True">Precompiled HMM Database</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
54 <option value="user">HMM Database in your History</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
55 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
56 <when value="db">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
57 <param name="file" type="select" label="Precompiled HMM database">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
58 <options from_file="hmmdb.loc">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
59 <column name="name" index="1"/>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
60 <column name="value" index="2"/>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
61 </options>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
62 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
63 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
64 <when value="user">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
65 <param name="file" type="data" format="hmmer" label="HMM database" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
66 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
67 </conditional>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
68
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
69 <param name="acc" type="boolean" truevalue="--acc" falsevalue="" checked="false" label="[--acc] Prefer accessions over names in output" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
70 <param name="noali" type="boolean" truevalue="--noali" falsevalue="" checked="false" label="[--noali] Omit the alignment section from the main output" help="This can greatly reduce the output volume" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
71
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
72 <!-- OPTIONS FOR REPORTING THRESHOLDS -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
73 <conditional name="threshold">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
74 <param name="select" type="select" label="Select reporting threshold to control which hits are reported in output files">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
75 <option value="E">Using E-value thresholds</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
76 <option value="T">Using bit score thresholds</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
77 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
78 <when value="E">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
79 <param name="profile" type="float" value="10.0" label="[-E] Report target profiles with an E-value of &lt;= this value" help="The default is 10.0, meaning that on average, about 10 false positives will be reported per query, so you can see the top of the noise and decide for yourself if it is really noise." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
80 <param name="dom" type="float" value="10.0" label="[--domE] In the per-domain output, for target profiles that have already satisfied the per-profile reporting threshold, report individual domains with a conditional E-value of &lt;= this value" help="The default value is 10.0. A conditional E-value means the expected number of additional false positive domains in the smaller search space of those comparisons that already satisfied the per-profile reporting threshold (and thus must have at least one homologous domain already)." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
81 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
82 <when value="T">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
83 <param name="profile" type="integer" value="100" label="[-T] Report target profiles with a bit score of &gt;= this value" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
84 <param name="dom" type="integer" value="100" label="[--domT] Report domains with a bit score &gt;= this value" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
85 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
86 </conditional>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
87
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
88 <!-- OPTIONS FOR INCLUSION THRESHOLDS; incT & incdomT WERE DELIBERATELY EXCLUDED SINCE THEY ARE NOT RECOMMENDED -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
89 <param name="incE" type="float" value="0.01" label="[--incE] Use an E-value of &lt;= this value as the per-target inclusion threshold" help="The default is 0.01, meaning that on average, about 1 false positive would be expected in every 100 searches with different query subsequences." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
90 <param name="incdomE" type="float" value="0.01" label="[--incdomE] Use a conditional E-value of &lt;= this value as the per-domain inclusion threshold, in targets that have already satisfied the overall per-target inclusion threshold" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
91
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
92 <!-- NYI: OPTIONS FOR MODEL-SPECIFIC SCORE THRESHOLDING -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
93
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
94 <!-- CONTROL OF THE ACCELERATION PIPELINE -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
95 <conditional name="acceleration">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
96 <param name="select" type="select" label="Control of the acceleration pipeline" help="HMMER3 searches are accelerated in a three-step filter pipeline: the MSV filter, the Viterbi filter, and the Forward filter. The first filter is the fastest and most approximate; the last is the full Forward scoring algorithm.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
97 There is also a bias filter step between MSV and Viterbi. Targets that pass all the steps in the acceleration pipeline are then subjected to postprocessing -- domain identification and scoring using the Forward/Backward algorithm. Changing filter thresholds only removes or includes targets from consideration;
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
98 changing filter thresholds does not alter bit scores, E-values, or alignments, all of which are determined solely in postprocessing.">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
99 <option value="0">Use defaults</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
100 <option value="1">Define options</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
101 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
102 <when value="0">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
103 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
104 <when value="1">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
105 <param name="max" type="boolean" truevalue="--max" falsevalue="" label="[--max] Turn off all filters, including the bias filter, and run full Forward/Backward postprocessing on every target." help="This increases sensitivity somewhat, at a large cost in speed" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
106 <param name="F1" type="float" value="0.02" label="[--F1] Set the P-value threshold for the MSV filter step." help="The default is 0.02, meaning that roughly 2% of the highest scoring nonhomologous targets are expected to pass the filter" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
107 <param name="F2" type="float" value="0.001" label="[--F2] Set the P-value threshold for the Viterbi filter step." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
108 <param name="F3" type="float" value="0.00001" label="[--F3] Set the P-value threshold for the Forward filter step." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
109 <param name="nobias" type="boolean" truevalue="--nobias" falsevalue="" label="[--nobias] Turn off the bias filter." help="This increases sensitivity somewhat, but can come at a high cost in speed, especially if the query has biased residue composition (such as a repetitive sequence region, or if it is a membrane protein with large regions of hydrophobicity). Without the bias filter, too many sequences may pass the filter with biased queries, leading to slower than expected performance as the computationally intensive Forward/Backward algorithms shoulder an abnormally heavy load." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
110 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
111 </conditional>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
112
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
113 <!-- OTHER OPTIONS -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
114 <conditional name="other">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
115 <param name="select" type="select" label="Other options">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
116 <option value="0">Use defaults</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
117 <option value="1">Define options</option>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
118 </param>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
119 <when value="0">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
120 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
121 <when value="1">
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
122 <param name="nonull2" type="boolean" truevalue="--nonull2" falsevalue="" label="[--nonull2] Turn off the null2 score corrections for biased composition." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
123 <!-- NYI: Z, domZ -->
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
124 <param name="seed" type="integer" value="42" label="[--seed] Set the random number seed" help="Some steps in postprocessing require Monte Carlo simulation. The default is to use a fixed seed (42), so that results are exactly reproducible. Any other positive integer will give different (but also reproducible) results. A choice of 0 uses an arbitrarily chosen seed." />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
125 </when>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
126 </conditional>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
127 </inputs>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
128 <outputs>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
129 <data name="logfile" format="txt" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
130 <data name="tblout" format="tabular" label="${tool.name} on $on_string: Per-sequence hits" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
131 <data name="domtblout" format="tabular" label="${tool.name} on $on_string: Per-domain hits" />
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
132 </outputs>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
133 <tests>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
134 </tests>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
135 <help>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
136 .. class:: warningmark
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
137
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
138 **Note**. Hidden Markov Model (HMM) searches take a substantial amount of time.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
139 For large input datasets it is advisable to allow overnight processing.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
140
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
141 -----
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
142
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
143 **What it does**
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
144
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
145 hmmscan is used to search sequences against collections of profiles. For each sequence in seqfile,
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
146 use that query sequence to search the target database of profiles in hmmdb, and output ranked lists of
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
147 the profiles with the most significant matches to the sequence.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
148
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
149 hmmsearch is used to search one or more profiles against a sequence database.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
150 For each profile in "hmmfile", use that query profile to search the target database of profiles in "seqdb",
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
151 and output ranked lists of the sequences with the most significant matches to the profile.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
152
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
153 If using a user-supplied profile database, it needs to be pressed using hmmpress before it can be searched with hmmscan.
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
154
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
155 **Author**
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
156
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
157 Sean Eddy, Howard Hughes Medical Institute and Dept. of Genetics, Washington University School of Medicine
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
158
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
159 http://www.genetics.wustl.edu/eddy/
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
160
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
161 **Manual**
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
162
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
163 ftp://selab.janelia.org/pub/software/hmmer/CURRENT/Userguide.pdf
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
164 </help>
c16d8db9338a init repo
eskirton@lbl.gov
parents:
diff changeset
165 </tool>