annotate cmbuild.xml @ 2:fac157e22e1b draft

Uploaded
author bgruening
date Fri, 13 Feb 2015 03:10:51 -0500
parents 652f9d550531
children 2c2c5e5e495b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
1 <tool id="infernal_cmbuild" name="Build covariance models" version="1.1.0.1">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
2 <description>from sequence alignments (cmbuild)</description>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
3 <parallelism method="multi" split_inputs="alignment_infile" split_mode="to_size" split_size="10" shared_inputs="" merge_outputs="cmfile_outfile"></parallelism>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
4 <requirements>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
5 <requirement type="package">infernal</requirement>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
6 <requirement type="package" version="1.1">infernal</requirement>
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
7 <requirement type="package" version="8.22">gnu_coreutils</requirement>
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
8 </requirements>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
9 <command>
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
10 <![CDATA[
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
11 cmbuild
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
12 #if $is_summery_output:
652f9d550531 Uploaded
bgruening
parents:
diff changeset
13 -o $summary_outfile
652f9d550531 Uploaded
bgruening
parents:
diff changeset
14 #end if
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
15
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
16 ## to many outputs, is that one really needed?
652f9d550531 Uploaded
bgruening
parents:
diff changeset
17 ##-O $annotated_source_alignment_outfile
652f9d550531 Uploaded
bgruening
parents:
diff changeset
18
652f9d550531 Uploaded
bgruening
parents:
diff changeset
19 $model_construction_opts.model_construction_opts_selector
652f9d550531 Uploaded
bgruening
parents:
diff changeset
20 #if $model_construction_opts.model_construction_opts_selector == '--fast':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
21 --symfrac $model_construction_opts.symfrac
652f9d550531 Uploaded
bgruening
parents:
diff changeset
22 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
23
652f9d550531 Uploaded
bgruening
parents:
diff changeset
24 $noss
652f9d550531 Uploaded
bgruening
parents:
diff changeset
25
652f9d550531 Uploaded
bgruening
parents:
diff changeset
26 $relative_weights_opts.relative_weights_opts_selector
652f9d550531 Uploaded
bgruening
parents:
diff changeset
27 #if $relative_weights_opts.relative_weights_opts_selector == '--wblosum':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
28 --wid $relative_weights_opts.wid
652f9d550531 Uploaded
bgruening
parents:
diff changeset
29 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
30
652f9d550531 Uploaded
bgruening
parents:
diff changeset
31 $effective_opts.effective_opts_selector
652f9d550531 Uploaded
bgruening
parents:
diff changeset
32 #if str($effective_opts.effective_opts_selector) == '--eent':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
33 --ere $effective_opts.ere
652f9d550531 Uploaded
bgruening
parents:
diff changeset
34 --eminseq $effective_opts.eminseq
652f9d550531 Uploaded
bgruening
parents:
diff changeset
35 --ehmmre $effective_opts.ehmmre
652f9d550531 Uploaded
bgruening
parents:
diff changeset
36 --eset $effective_opts.eset
652f9d550531 Uploaded
bgruening
parents:
diff changeset
37 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
38
652f9d550531 Uploaded
bgruening
parents:
diff changeset
39 #if str($refining_opts.refining_opts_selector) == '--refine':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
40 #if $refining_opts.refine_output:
652f9d550531 Uploaded
bgruening
parents:
diff changeset
41 --refine $refined_multiple_alignment_output
652f9d550531 Uploaded
bgruening
parents:
diff changeset
42 #else:
652f9d550531 Uploaded
bgruening
parents:
diff changeset
43 --refine /dev/null
652f9d550531 Uploaded
bgruening
parents:
diff changeset
44 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
45
652f9d550531 Uploaded
bgruening
parents:
diff changeset
46 $l
652f9d550531 Uploaded
bgruening
parents:
diff changeset
47 $refining_opts.gibbs_opts.gibbs_opts_selector
652f9d550531 Uploaded
bgruening
parents:
diff changeset
48
652f9d550531 Uploaded
bgruening
parents:
diff changeset
49 #if str($refining_opts.gibbs_opts.gibbs_opts_selector) == '--gibbs':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
50 $refining_opts.gibbs_opts.random_seed
652f9d550531 Uploaded
bgruening
parents:
diff changeset
51 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
52
652f9d550531 Uploaded
bgruening
parents:
diff changeset
53 $notrunc
652f9d550531 Uploaded
bgruening
parents:
diff changeset
54 $cyk
652f9d550531 Uploaded
bgruening
parents:
diff changeset
55 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
56
652f9d550531 Uploaded
bgruening
parents:
diff changeset
57 $cmfile_outfile
652f9d550531 Uploaded
bgruening
parents:
diff changeset
58 $alignment_infile
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
59
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
60 ]]>
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
61 </command>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
62 <inputs>
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
63 <!-- Stockholm or SELEX
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
64 SELEX is defined in EMBOSS datatypes
652f9d550531 Uploaded
bgruening
parents:
diff changeset
65 -->
652f9d550531 Uploaded
bgruening
parents:
diff changeset
66 <param name="alignment_infile" type="data" format="stockholm,selex" label="Sequence database"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
67
652f9d550531 Uploaded
bgruening
parents:
diff changeset
68 <conditional name="model_construction_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
69 <param name="model_construction_opts_selector" type="select" label="These options control how consensus columns are defined in an alignment" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
70 <option value="--fast" selected="true">automatic (--fast)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
71 <option value="--hand">user defined (--hand)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
72 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
73 <when value="--fast">
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
74 <param name="symfrac" type="float" value="0.5" size="5"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
75 label="Define the residue fraction threshold necessary to define a consensus (--symfrac)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
76 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
77 <when value="--hand"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
78 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
79
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
80 <param name="noss" truevalue="--noss" falsevalue="" checked="False" type="boolean"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
81 label="Ignore the secondary structure annotation, if any, in your multiple alignment file (--noss)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
82
652f9d550531 Uploaded
bgruening
parents:
diff changeset
83 <conditional name="relative_weights_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
84 <param name="relative_weights_opts_selector" type="select" label="Options controlling relative weights" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
85 <option value="--wpb" selected="true">Henikoff (--wgb)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
86 <option value="--wgsc">Gerstein/Sonnhammer/Chothia (--wgsc)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
87 <option value="--wnone">no sequence weighting (--wnone)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
88 <option value="--wgiven">Sequence weight from given in input file (--wgiven)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
89 <option value="--wblosum">BLOSUM filtering algorithm (--wblosum)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
90 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
91 <when value="--wpb"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
92 <when value="--wgsc"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
93 <when value="--wnone"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
94 <when value="--wgiven"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
95 <when value="--wblosum">
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
96 <param name="wid" type="float" value="0.5" size="5"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
97 label="Percent identity for clustering the alignment (--wid)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
98 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
99 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
100
652f9d550531 Uploaded
bgruening
parents:
diff changeset
101
652f9d550531 Uploaded
bgruening
parents:
diff changeset
102 <conditional name="effective_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
103 <param name="effective_opts_selector" type="select" label="Options controlling effective sequence number" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
104 <option value="--eent" selected="true">entropy weighting strategy (--eent)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
105 <option value="--enone">Turn off the entropy weighting strategy (--enone)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
106 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
107 <when value="--enone"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
108 <when value="--eent">
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
109 <param name="ere" type="float" value="0.59" size="5"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
110 label="Set the target mean match state relative entropy (--ere)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
111
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
112 <param name="eminseq" type="integer" value="" size="5"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
113 label="Define the minimum allowed effective sequence number (--eminseq)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
114
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
115 <param name="ehmmre" type="float" value="" size="5"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
116 label="Set the target HMM mean match state relative entropy (--ehmmre)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
117
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
118 <param name="eset" type="integer" value="" size="5"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
119 label="Set the effective sequence number for entropy weighting (--eset)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
120 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
121 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
122
652f9d550531 Uploaded
bgruening
parents:
diff changeset
123
652f9d550531 Uploaded
bgruening
parents:
diff changeset
124 <conditional name="refining_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
125 <param name="refining_opts_selector" type="select" label="Options for refining the input alignment" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
126 <option value="" selected="true">No refinement</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
127 <option value="--refine">refine the input alignment</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
128 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
129 <when value=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
130 <when value="--refine">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
131
652f9d550531 Uploaded
bgruening
parents:
diff changeset
132 <conditional name="gibbs_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
133 <param name="gibbs_opts_selector" type="select" label="refinement mode" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
134 <option value="" selected="true">expectation-maximization (EM)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
135 <option value="--gibbs">Gibbs sampling</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
136 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
137 <when value=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
138 <when value="--gibbs">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
139 <param name="random_seed" type="integer" value="0" label="Randam Seed" help="" />
652f9d550531 Uploaded
bgruening
parents:
diff changeset
140 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
141 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
142
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
143 <param name="l" truevalue="-l" falsevalue="" checked="False" type="boolean"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
144 label="Turn on the local alignment algorithm" help="... which allows the alignment to span two or more subsequences if necessary"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
145
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
146 <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
147 label="Turn off the truncated alignment algorithm" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
148
652f9d550531 Uploaded
bgruening
parents:
diff changeset
149 <param name="cyk" type="select" label="Options for refining the input alignment" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
150 <option value="" selected="true">optimal accuracy algorithm</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
151 <option value="--cyk">align with the CYK algorithm</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
152 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
153
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
154 <param name="refine_output" truevalue="" falsevalue="" checked="False" type="boolean"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
155 label="Output the refined alignment file as it is used to build the covariance model" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
156
652f9d550531 Uploaded
bgruening
parents:
diff changeset
157 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
158 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
159
652f9d550531 Uploaded
bgruening
parents:
diff changeset
160
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
161 <param name="is_summery_output" truevalue="" falsevalue="" checked="False" type="boolean"
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
162 label="Output a summery file?" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
163
652f9d550531 Uploaded
bgruening
parents:
diff changeset
164 </inputs>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
165 <outputs>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
166
652f9d550531 Uploaded
bgruening
parents:
diff changeset
167 <data format="text" name="summary_outfile" label="cmbuild summary on ${on_string}">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
168 <filter>is_summery_output is True</filter>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
169 </data>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
170 <!--<data format="stockholm" name="annotated_source_alignment_outfile" label="Annotated alignment from ${on_string}"/>-->
652f9d550531 Uploaded
bgruening
parents:
diff changeset
171 <data format="cm" name="cmfile_outfile" label="Covariance models from ${on_string}"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
172
652f9d550531 Uploaded
bgruening
parents:
diff changeset
173 <data format="stockholm" name="refined_multiple_alignment_output" label="refined alignment file of ${on_string}">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
174 <filter>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
175 ((
652f9d550531 Uploaded
bgruening
parents:
diff changeset
176 refining_opts['refining_opts_selector'] == "--refine" and
652f9d550531 Uploaded
bgruening
parents:
diff changeset
177 refining_opts['refine_output'] is True
652f9d550531 Uploaded
bgruening
parents:
diff changeset
178 ))
652f9d550531 Uploaded
bgruening
parents:
diff changeset
179 </filter>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
180 </data>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
181
652f9d550531 Uploaded
bgruening
parents:
diff changeset
182 </outputs>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
183 <help>
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
184 <![CDATA[
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
185
652f9d550531 Uploaded
bgruening
parents:
diff changeset
186
652f9d550531 Uploaded
bgruening
parents:
diff changeset
187 **What it does**
652f9d550531 Uploaded
bgruening
parents:
diff changeset
188
652f9d550531 Uploaded
bgruening
parents:
diff changeset
189 For each multiple sequence alignment build a covariance model.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
190 The alignment file must be in Stockholm or SELEX format, and must contain consensus secondary structure annotation.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
191 cmbuild uses the consensus structure to determine the architecture of the CM.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
192
652f9d550531 Uploaded
bgruening
parents:
diff changeset
193 In addition to writing CM(s) to CMFILE_OUT, cmbuild also outputs a single line for each model created to stdout. Each
652f9d550531 Uploaded
bgruening
parents:
diff changeset
194 line has the following fields: ”aln”: the index of the alignment used to build the CM; ”idx”: the index of the CM in the
652f9d550531 Uploaded
bgruening
parents:
diff changeset
195 CMFILE_OUT; ”name”: the name of the CM; ”nseq”: the number of sequences in the alignment used to build the CM;
652f9d550531 Uploaded
bgruening
parents:
diff changeset
196 ”eff nseq”: the effective number of sequences used to build the model; ”alen”: the length of the alignment used to build
652f9d550531 Uploaded
bgruening
parents:
diff changeset
197 the CM; ”clen”: the number of columns from the alignment defined as consensus (match) columns; ”bps”: the number
652f9d550531 Uploaded
bgruening
parents:
diff changeset
198 of basepairs in the CM; ”bifs”: the number of bifurcations in the CM; ”rel entropy: CM”: the total relative entropy of the
652f9d550531 Uploaded
bgruening
parents:
diff changeset
199 model divided by the number of consensus columns; ”rel entropy: HMM”: the total relative entropy of the model ignoring
652f9d550531 Uploaded
bgruening
parents:
diff changeset
200 secondary structure divided by the number of consensus columns. ”description”: description of the model/alignment.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
201
652f9d550531 Uploaded
bgruening
parents:
diff changeset
202
652f9d550531 Uploaded
bgruening
parents:
diff changeset
203 Options controlling model construction
652f9d550531 Uploaded
bgruening
parents:
diff changeset
204 --------------------------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
205
652f9d550531 Uploaded
bgruening
parents:
diff changeset
206 These options control how consensus columns are defined in an alignment.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
207
652f9d550531 Uploaded
bgruening
parents:
diff changeset
208 * --fast Define consensus columns automatically as those that have a fraction >= symfrac of residues as opposed to gaps. (See below for the --symfrac option.) This is the default.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
209 * --hand Use reference coordinate annotation (#=GC RF line, in Stockholm) to determine which columns are consensus, and which are inserts. Any non-gap character indicates a consensus column. (For example, mark consensus columns with ”x”, and insert columns with ”.”.)
652f9d550531 Uploaded
bgruening
parents:
diff changeset
210 * --symfrac Define the residue fraction threshold necessary to define a consensus column when not using --hand. The default is 0.5. The symbol fraction in each column is calculated after taking relative sequence weighting into account. Setting this to 0.0 means that every alignment column will be assigned as consensus, which may be useful in some cases. Setting it to 1.0 means that only columns that include 0 gaps will be assigned as consensus.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
211 * --noss Ignore the secondary structure annotation, if any, in MSA-Infile and build a CM with zero basepairs. This model will be similar to a profile HMM and the cmsearch and cmscan programs will use HMM algorithms which are faster than CM ones for this model. Additionally, a zero basepair model need not be calibrated with cmcalibrate prior to running cmsearch with it. The --noss option must be used if there is no secondary structure annotation in MSA-Infile.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
212
652f9d550531 Uploaded
bgruening
parents:
diff changeset
213
652f9d550531 Uploaded
bgruening
parents:
diff changeset
214 Options controlling relative weights
652f9d550531 Uploaded
bgruening
parents:
diff changeset
215 ------------------------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
216
652f9d550531 Uploaded
bgruening
parents:
diff changeset
217 cmbuild uses an ad hoc sequence weighting algorithm to downweight closely related sequences and upweight distantly
652f9d550531 Uploaded
bgruening
parents:
diff changeset
218 related ones. This has the effect of making models less biased by uneven phylogenetic representation. For example,
652f9d550531 Uploaded
bgruening
parents:
diff changeset
219 two identical sequences would typically each receive half the weight that one sequence would. These options control
652f9d550531 Uploaded
bgruening
parents:
diff changeset
220 which algorithm gets used.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
221
652f9d550531 Uploaded
bgruening
parents:
diff changeset
222 * --wpb Use the Henikoff position-based sequence weighting scheme [Henikoff and Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
223 * --wgsc Use the Gerstein/Sonnhammer/Chothia weighting algorithm [Gerstein et al, J. Mol. Biol. 235:1067, 1994].
652f9d550531 Uploaded
bgruening
parents:
diff changeset
224 * --wnone Turn sequence weighting off; e.g. explicitly set all sequence weights to 1.0.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
225 * --wgiven Use sequence weights as given in annotation in the input alignment file. If no weights were given, assume they are all 1.0. The default is to determine new sequence weights by the Gerstein/Sonnhammer/Chothia algorithm, ignoring any annotated weights.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
226 * --wblosum Use the BLOSUM filtering algorithm to weight the sequences, instead of the default GSC weighting. Cluster the sequences at a given percentage identity (see --wid); assign each cluster a total weight of 1.0, distributed equally amongst the members of that cluster.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
227 * --wid Controls the behavior of the --wblosum weighting option by setting the percent identity for clustering the alignment.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
228
652f9d550531 Uploaded
bgruening
parents:
diff changeset
229
652f9d550531 Uploaded
bgruening
parents:
diff changeset
230 Options controlling effective sequence number
652f9d550531 Uploaded
bgruening
parents:
diff changeset
231 ---------------------------------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
232
652f9d550531 Uploaded
bgruening
parents:
diff changeset
233 After relative weights are determined, they are normalized to sum to a total effective sequence number, eff nseq. This
652f9d550531 Uploaded
bgruening
parents:
diff changeset
234 number may be the actual number of sequences in the alignment, but it is almost always smaller than that. The default
652f9d550531 Uploaded
bgruening
parents:
diff changeset
235 entropy weighting method (--eent) reduces the effective sequence number to reduce the information content (relative
652f9d550531 Uploaded
bgruening
parents:
diff changeset
236 entropy, or average expected score on true homologs) per consensus position. The target relative entropy is controlled
652f9d550531 Uploaded
bgruening
parents:
diff changeset
237 by a two-parameter function, where the two parameters are settable with --ere and --esigma.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
238
652f9d550531 Uploaded
bgruening
parents:
diff changeset
239 * --eent Use the entropy weighting strategy to determine the effective sequence number that gives a target mean match state relative entropy. This option is the default, and can be turned off with --enone. The default target mean match state relative entropy is 0.59 bits for models with at least 1 basepair and 0.38 bits for models with zero basepairs, but changed with --ere. The default of 0.59 or 0.38 bits is automatically changed if the total relative entropy of the model (summed match state relative entropy) is less than a cutoff, which is is 6.0 bits by default, but can be changed with the expert, undocumented --eX option. If you really want to play with that option, consult the source code.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
240 * --enone Turn off the entropy weighting strategy. The effective sequence number is just the number of sequences in the alignment.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
241 * --ere Set the target mean match state relative entropy. By default the target relative entropy per match position is 0.59 bits for models with at least 1 basepair and 0.38 for models with zero basepairs.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
242 * --eminseq Define the minimum allowed effective sequence number.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
243 * --ehmmre Set the target HMM mean match state relative entropy. Entropy for basepairing match states is calculated using marginalized basepair emission probabilities.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
244 * --eset Set the effective sequence number for entropy weighting.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
245
652f9d550531 Uploaded
bgruening
parents:
diff changeset
246
652f9d550531 Uploaded
bgruening
parents:
diff changeset
247
652f9d550531 Uploaded
bgruening
parents:
diff changeset
248 Options for refining the input alignment
652f9d550531 Uploaded
bgruening
parents:
diff changeset
249 ----------------------------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
250
652f9d550531 Uploaded
bgruening
parents:
diff changeset
251 * --refine Attempt to refine the alignment before building the CM using expectation-maximization (EM). A CM is first built from the initial alignment as usual. Then, the sequences in the alignment are realigned optimally (with the HMM banded CYK algorithm, optimal means optimal given the bands) to the CM, and a new CM is built from the resulting alignment. The sequences are then realigned to the new CM, and a new CM is built from that alignment. This is continued until convergence, specifically when the alignments for two successive iterations are not significantly different (the summed bit scores of all the sequences in the alignment changes less than 1% between two successive iterations).
652f9d550531 Uploaded
bgruening
parents:
diff changeset
252 * -l Turn on the local alignment algorithm, which allows the alignment to span two or more subsequences if necessary (e.g. if the structures of the query model and target sequence are only partially shared), allowing certain large insertions and deletions in the structure to be penalized differently than normal indels. The default is to globally align the query model to the target sequences.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
253 * --gibbs Modifies the behavior of --refine so Gibbs sampling is used instead of EM. The difference is that during the alignment stage the alignment is not necessarily optimal, instead an alignment (parsetree) for each sequences is sampled from the posterior distribution of alignments as determined by the Inside algorithm. Due to this sampling step --gibbs is non- deterministic, so different runs with the same alignment may yield different results. This is not true when --refine is used without the --gibbs option, in which case the final alignment and CM will always be the same. When --gibbs is enabled, the --seed "number" option can be used to seed the random number generator predictably, making the results reproducible. The goal of the --gibbs option is to help expert RNA alignment curators refine structural alignments by allowing them to observe alternative high scoring alignments.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
254 * --seed Seed the random number generator with an integer >= 0. This option can only be used in combination with --gibbs. If the given number is nonzero, stochastic sampling of alignments will be reproducible; the same command will give the same results. If the given number is 0, the random number generator is seeded arbitrarily, and stochastic samplings may vary from run to run of the same command. The default seed is 0.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
255 * --cyk With --refine, align with the CYK algorithm. By default the optimal accuracy algorithm is used. There is more information on this in the cmalign manual page.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
256 * --notrunc With --refine, turn off the truncated alignment algorithm. There is more information on this in the cmalign manual page.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
257
652f9d550531 Uploaded
bgruening
parents:
diff changeset
258
652f9d550531 Uploaded
bgruening
parents:
diff changeset
259 For further questions please refere to the Infernal Userguide_.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
260
652f9d550531 Uploaded
bgruening
parents:
diff changeset
261 .. _Userguide: http://selab.janelia.org/software/infernal/Userguide.pdf
652f9d550531 Uploaded
bgruening
parents:
diff changeset
262
652f9d550531 Uploaded
bgruening
parents:
diff changeset
263
652f9d550531 Uploaded
bgruening
parents:
diff changeset
264 How do I cite Infernal?
652f9d550531 Uploaded
bgruening
parents:
diff changeset
265 -----------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
266
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
267 The recommended citation for using Infernal 1.1 is E. P. Nawrocki and S. R. Eddy, Infernal 1.1: 100-fold faster RNA homology searches , Bioinformatics 29:2933-2935 (2013).
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
268
652f9d550531 Uploaded
bgruening
parents:
diff changeset
269 **Galaxy Wrapper Author**::
652f9d550531 Uploaded
bgruening
parents:
diff changeset
270
652f9d550531 Uploaded
bgruening
parents:
diff changeset
271 * Bjoern Gruening, University of Freiburg
652f9d550531 Uploaded
bgruening
parents:
diff changeset
272
2
fac157e22e1b Uploaded
bgruening
parents: 0
diff changeset
273 ]]>
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
274 </help>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
275 </tool>