comparison cmsearch.xml @ 5:6e18e0b098cd draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
author bgruening
date Sat, 21 Jan 2017 17:36:57 -0500
parents 2c2c5e5e495b
children c9e29ac5d099
comparison
equal deleted inserted replaced
4:c47a7c52ac4f 5:6e18e0b098cd
1 <tool id="infernal_cmsearch" name="Search covariance model(s)" version="1.1.0.2"> 1 <tool id="infernal_cmsearch" name="cmsearch" version="@VERSION@.0">
2 <description>against a sequence database (cmsearch)</description> 2 <description>Search covariance model(s) against a sequence database </description>
3 <parallelism method="multi" split_inputs="seqdb" split_mode="to_size" split_size="500" shared_inputs="" merge_outputs="outfile,multiple_alignment_output"></parallelism> 3 <macros>
4 <requirements> 4 <import>macros.xml</import>
5 <requirement type="package">infernal</requirement> 5 </macros>
6 <requirement type="package" version="1.1">infernal</requirement> 6 <parallelism method="multi" split_inputs="seqdb" split_mode="to_size" split_size="500" merge_outputs="outfile,multiple_alignment_output"></parallelism>
7 <requirement type="package" version="8.22">gnu_coreutils</requirement> 7 <expand macro="requirements"/>
8 </requirements> 8 <expand macro="stdio" />
9 <command> 9 <command>
10 <![CDATA[ 10 <![CDATA[
11 ## a temp file is needed, because the standard tabular output from infernal is not usefull in Galaxy 11 ## a temp file is needed, because the standard tabular output from infernal is not usefull in Galaxy
12 ## it will be converted to a tab delimited file and piped to Galaxy 12 ## it will be converted to a tab delimited file and piped to Galaxy
13 temp_tabular_output=\$(mktemp); 13 temp_tabular_output=\$(mktemp) &&
14 14
15 cmsearch 15 cmsearch
16 ## Infernal Options 16 ## Infernal Options
17 --cpu "\${GALAXY_SLOTS:-12}" 17 --cpu "\${GALAXY_SLOTS:-2}"
18 -o /dev/null 18 -o /dev/null
19 --tformat $seqdb.ext ##target format: fasta, embl, genbank, ddbj, stockholm, pfam, a2m, afa, clustal, and phylip 19 --tformat $seqdb.ext ##target format: fasta, embl, genbank, ddbj, stockholm, pfam, a2m, afa, clustal, and phylip
20 $bottomonly 20 $bottomonly
21 $toponly 21 $toponly
22 $cyk 22 $cyk
23 $acyk
23 $notrunc 24 $notrunc
24 $max 25 $anytrunc
25 $nohmm 26 $nonull3
26 $mid 27 #if $smxsize <> 128.0
27 ##$bitscore_thresholds 28 --smxsize $smxsize
29 #end if
30 #if $mxsize <> 128.0
31 --mxsize $mxsize
32 #end if
28 --tblout \$temp_tabular_output 33 --tblout \$temp_tabular_output
29 $g 34 $g
35 #if $Z
36 -Z $Z
37 #end if
30 #if $A: 38 #if $A:
31 $A $multiple_alignment_output 39 $A '$multiple_alignment_output'
32 #end if 40 #end if
33
34 #if str($inclusion_thresholds_opts.inclusion_thresholds_selector) == "--incE": 41 #if str($inclusion_thresholds_opts.inclusion_thresholds_selector) == "--incE":
35 --incE $inclusion_thresholds_opts.incE 42 --incE $inclusion_thresholds_opts.incE
36 #elif str($inclusion_thresholds_opts.inclusion_thresholds_selector) == "--incT": 43 #elif str($inclusion_thresholds_opts.inclusion_thresholds_selector) == "--incT":
37 --incT $inclusion_thresholds_opts.incT 44 --incT $inclusion_thresholds_opts.incT
38 #end if 45 #end if
39
40 #if str($reporting_thresholds_opts.reporting_thresholds_selector) == "-E": 46 #if str($reporting_thresholds_opts.reporting_thresholds_selector) == "-E":
41 -E $reporting_thresholds_opts.E 47 -E $reporting_thresholds_opts.E
42 #elif str($reporting_thresholds_opts.reporting_thresholds_selector) == "-T": 48 #elif str($reporting_thresholds_opts.reporting_thresholds_selector) == "-T":
43 -T $reporting_thresholds_opts.T 49 -T $reporting_thresholds_opts.T
44 #end if 50 #end if
45 51 $model_thresholds.cut_ga
52 $model_thresholds.cut_nc
53 $model_thresholds.cut_tc
54 #if $acceleration_huristics.acceleration_huristics_selector == "FZ"
55 --FZ $$acceleration_huristics.FZ
56 #else
57 $acceleration_huristics.acceleration_huristics_selector
58 #if $acceleration_huristics.acceleration_huristics_selector == "--mid"
59 --Fmid $acceleration_huristics.Fmid
60 #end if
61 #end if
46 ## CM file from the history or stored as database on disc 62 ## CM file from the history or stored as database on disc
47
48 #if str($cm_opts.cm_opts_selector) == "db": 63 #if str($cm_opts.cm_opts_selector) == "db":
49 $cm_opts.database.fields.path 64 $cm_opts.database.fields.path
50 #else: 65 #else:
51 $cm_opts.cmfile 66 $cm_opts.cmfile
52 #end if 67 #end if
53
54 ## sequence file 68 ## sequence file
55 $seqdb 69 '$seqdb'
56 2>&1 70 2>&1
57 ; 71 &&
58
59 ## 1. replace all lines starting # (comment lines) 72 ## 1. replace all lines starting # (comment lines)
60 ## 2. replace the first 18 spaces with tabs, 18th field is a free text field (can contain spaces) 73 ## 2. replace the first 18 spaces with tabs, 18th field is a free text field (can contain spaces)
61 sed -e 's/#.*$//' -e '/^$/d' -e 's/\s\+/\t/g' -e 's/\t/ /18g' \$temp_tabular_output > $outfile 74 sed -e 's/#.*$//' -e '/^$/d' -e 's/\s\+/\t/g' -e 's/\t/ /18g' \$temp_tabular_output > '$outfile'
62 75
63 ]]> 76 ]]>
64 </command> 77 </command>
65 <inputs> 78 <inputs>
66 79
67 <param name="seqdb" type="data" format="fasta" label="Sequence database"/> 80 <param name="seqdb" type="data" format="fasta" label="Sequence database"/>
68 81
69 <conditional name="cm_opts"> 82 <conditional name="cm_opts">
70 <param name="cm_opts_selector" type="select" label="Subject covariance models"> 83 <param name="cm_opts_selector" type="select" label="Subject covariance models">
71 <option value="db" selected="True">Locally installed covariance models</option> 84 <option value="db" selected="True">Locally installed covariance models</option>
72 <option value="histdb">Covariance model from your history</option> 85 <option value="histdb">Covariance model from your history</option>
73 </param>
74 <when value="db">
75 <param name="database" type="select" label="Covariance models">
76 <options from_file="infernal.loc">
77 <column name="value" index="0"/>
78 <column name="name" index="1"/>
79 <column name="path" index="2"/>
80 </options>
81 </param>
82 </when>
83 <when value="histdb">
84 <param name="cmfile" type="data" format="cm" label="Covariance models file from the history."/>
85 </when>
86 </conditional>
87
88 <param name="g" truevalue="-g" falsevalue="" checked="False" type="boolean"
89 label="Turn on the glocal alignment algorithm" help="... global with respect to the query model and local with respect to the target database."/>
90
91 <param name="bottomonly" truevalue="--bottomonly" falsevalue="" checked="False" type="boolean"
92 label="Only search the bottom (Crick) strand of target sequences" help="in the sequence database"/>
93 <param name="toponly" truevalue="--toponly" falsevalue="" checked="False" type="boolean"
94 label="Only search the top (Watson) strand of target sequences" help="in the sequence database"/>
95
96 <param name="cyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
97 label="Use the CYK algorithm, not Inside, to determine the final score of all hits" help=""/>
98 <param name="--acyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
99 label="Use the CYK algorithm to align hits" help="By default, the Durbin/Holmes optimal accuracy algorithm is used, which finds the alignment that maximizes the expected accuracy of all aligned residues."/>
100
101 <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
102 label="Turn off truncated hit detection" help=""/>
103
104 <!-- accelleration pipeline -->
105
106 <param name="max" truevalue="--max" falsevalue="" checked="False" type="boolean"
107 label="Turn off all filters, and run non-banded Inside on every full-length target sequence" help="This
108 increases sensitivity somewhat, at an extremely large cost in speed."/>
109
110 <param name="nohmm" truevalue="--nohmm" falsevalue="" checked="False" type="boolean"
111 label="Turn off all HMM filter stages " help=""/>
112
113 <param name="mid" truevalue="--mid" falsevalue="" checked="False" type="boolean"
114 label="Turn off the HMM SSV and Viterbi filter stages" help=""/>
115
116
117 <!-- Options for model-specific score thresholding -->
118 <!--
119 <param name="bitscore_thresholds" type="select" label="Bit score thresholds" help="Curated CM databases may define specific bit score thresholds for each CM, superseding any thresholding based on statistical significance alone.">
120 <option value="" selected="true">None</option>
121 <option value=" - -cut_ga">GA (gathering) bit scores</option>
122 <option value=" - -cut_nc">NC (noise cutoff) bit score</option>
123 <option value=" - -cut_tc">TC (trusted cutoff) bit score</option>
124 </param> 86 </param>
125 --> 87 <when value="db">
126 <!-- Options for inclusion thresholds --> 88 <param name="database" type="select" label="Covariance models">
127 89 <options from_file="infernal.loc">
128 90 <column name="value" index="0"/>
129 <conditional name="inclusion_thresholds_opts"> 91 <column name="name" index="1"/>
130 <param name="inclusion_thresholds_selector" type="select" label="Inclusion thresholds" 92 <column name="path" index="2"/>
131 help="Inclusion thresholds are stricter than reporting thresholds. Inclusion thresholds control which hits are considered to be reliable enough to be included in an output alignment or in a possible subsequent search round, or marked as significant (”!”) as opposed to questionable (”?”) in hit output."> 93 </options>
132 <option value="" selected="true">default</option> 94 </param>
133 <option value="--incE">Use E-value</option> 95 </when>
134 <option value="--incT">Use bit score</option> 96 <when value="histdb">
135 </param> 97 <param name="cmfile" type="data" format="cm" label="Covariance models file from the history."/>
136 <when value=""/> 98 </when>
137 <when value="--incE"> 99 </conditional>
138 <param name="incE" type="float" value="0.01" label="Use E-value" help="of &lt;= X as the hit inclusion threshold."> 100
139 <sanitizer> 101 <param argument="-g" truevalue="-g" falsevalue="" checked="False" type="boolean"
140 <valid initial="string.printable"> 102 label="Turn on the glocal alignment algorithm" help="... global with respect to the query model and local with respect to the target database."/>
141 <remove value="&apos;"/> 103 <param argument="-Z" type="float" min="0" optional="true" label="Calculate E-values as if the search space size is 'x' megabases (Mb)" help=""/>
142 </valid> 104
143 </sanitizer> 105 <param argument="--bottomonly" truevalue="--bottomonly" falsevalue="" checked="False" type="boolean"
144 </param> 106 label="Only search the bottom (Crick) strand of target sequences" help="in the sequence database"/>
145 </when> 107 <param argument="--toponly" truevalue="--toponly" falsevalue="" checked="False" type="boolean"
146 <when value="--incT"> 108 label="Only search the top (Watson) strand of target sequences" help="in the sequence database"/>
147 <param name="incT" type="integer" value="0" label="Use bit score" help="of >= X as the hit inclusion threshold."> 109
148 <sanitizer> 110 <param argument="--cyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
149 <valid initial="string.printable"> 111 label="Use the CYK algorithm, not Inside, to determine the final score of all hits" help=""/>
150 <remove value="&apos;"/> 112 <param argument="--acyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
151 </valid> 113 label="Use the CYK algorithm to align hits" help="By default, the Durbin/Holmes optimal accuracy algorithm is used, which finds the alignment that maximizes the expected accuracy of all aligned residues."/>
152 </sanitizer> 114
153 </param> 115 <param argument="--notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
154 </when> 116 label="Skip truncated hit detection" help=""/>
155 </conditional> 117 <param argument="--anytrunc" truevalue="--anytrunc" falsevalue="" checked="false" type="boolean"
156 118 label="Allow full and truncated hits anywhere within sequences" help=""/>
157 <!-- Options controlling reporting thresholds --> 119 <param argument="--nonull3" truevalue="--nonull3" falsevalue="" checked="false" type="boolean"
158 120 label="Turn off the null3 CM score corrections for biased composition" help="This correction is not used during the HMM filter stages."/>
159 <conditional name="reporting_thresholds_opts"> 121 <param argument="--mxsize" type="float" value="128.0" min="0.1"
160 <param name="reporting_thresholds_selector" type="select" label="reporting thresholds" 122 label="Set the maximum allowable CM DP matrix size to 'x' megabytes" help=""/>
161 help="Reporting thresholds control which hits are reported in output files"> 123 <param argument="--smxsize" type="float" value="128.0" min="0.1"
162 <option value="" selected="true">default</option> 124 label="Set the maximum allowable CM search DP matrix size to 'x' megabytes." help=""/>
163 <option value="-E">Use E-value</option> 125
164 <option value="-T">Use bit score</option> 126 <!-- accelleration pipeline -->
165 </param> 127 <conditional name="acceleration_huristics">
166 <when value=""/> 128 <param name="acceleration_huristics_selector" type="select" label="Options controlling acceleration heuristics" help="These options are, in order from least strict (slowest but most sensitive) to most strict (fastest but least sensitive)">
167 <when value="-E"> 129 <option value="--max">Turn all heuristic filters off (--max)</option>
168 <param name="E" type="float" value="10.0" label="Use E-value" help="of &lt;= X as the hit reporting threshold. The default is 10.0, meaning that on average, about 10 false positives will be reported per query, so you can see the top of the noise and decide for yourself if it’s really noise."> 130 <option value="--nohmm">Skip all HMM filter stages, use only CM (--nohmm)</option>
169 <sanitizer> 131 <option value="--mid">Skip first two HMM filter stages (SSV and Vit) (--mid)</option>
170 <valid initial="string.printable"> 132 <option value="--default" selected="true">Run search space size-dependent pipeline (--default)</option>
171 <remove value="&apos;"/> 133 <option value="--rfam">Use a strict filtering strategy devised for large databases (more than 20 Gb) (--rfam)</option>
172 </valid> 134 <option value="--hmmonly">Use HMM only, don't use a CM at all (--hmmonly)</option>
173 </sanitizer> 135 <option value="FZ">set filters to defaults used for a search space of size 'x' Mb (--FZ)</option>
174 </param> 136 </param>
175 </when> 137 <when value="--max">
176 <when value="-T"> 138 </when>
177 <param name="T" type="integer" value="0" label="Use bit score" help="of >= X as the hit reporting threshold."> 139 <when value="--nohmm">
178 <sanitizer> 140 </when>
179 <valid initial="string.printable"> 141 <when value="--mid">
180 <remove value="&apos;"/> 142 <param argument="--Fmid" type="float" value="0.02" label="P-value threshold for HMM stages"/>
181 </valid> 143 </when>
182 </sanitizer> 144 <when value="--default">
183 </param> 145 </when>
184 </when> 146 <when value="--rfam">
185 </conditional> 147 </when>
186 148 <when value="--hmmonly">
187 <param name="A" truevalue="-A" falsevalue="" checked="False" type="boolean" 149 </when>
188 label="Save a multiple alignment of all significant hits" help="... those satisfying inclusion thresholds"/> 150 <when value="FZ">
189 151 <param argument="--FZ" type="float" value="" label="Size of search space in Mb"/>
190 </inputs> 152 </when>
153 </conditional>
154
155
156
157 <!-- Options for model-specific score thresholding -->
158
159 <section name="model_thresholds" title="Options controlling model-specific reporting thresholds" help="Curated CM databases may define specific bit score thresholds for each CM, superseding any thresholding based on statistical significance alone.">
160 <param argument="--cut_ga" truevalue="--cut_ga" falsevalue="" checked="false" type="boolean"
161 label="Use CM's GA gathering cutoffs as reporting thresholds" help="GA thresholds are generally considered to be the reliable curated thresholds defining family membership"/>
162 <param argument="--cut_nc" truevalue="--cut_nc" falsevalue="" checked="false" type="boolean"
163 label="use CM's NC noise cutoffs as reporting thresholds" help="NC thresholds are generally considered to be the score of the highest-scoring known false positive."/>
164 <param argument="--cut_tc" truevalue="--cut_tc" falsevalue="" checked="false" type="boolean"
165 label="use CM's TC trusted cutoffs as reporting thresholds" help="TC thresholds are generally considered to be the score of the lowest-scoring known true positive that is above all known false positives."/>
166 </section>
167
168 <!-- Options for inclusion thresholds -->
169 <conditional name="inclusion_thresholds_opts">
170 <param name="inclusion_thresholds_selector" type="select" label="Inclusion thresholds"
171 help="Inclusion thresholds are stricter than reporting thresholds. Inclusion thresholds control which hits are considered to be reliable enough to be included in an output alignment or in a possible subsequent search round, or marked as significant (”!”) as opposed to questionable (”?”) in hit output.">
172 <option value="" selected="true">default</option>
173 <option value="--incE">Use E-value</option>
174 <option value="--incT">Use bit score</option>
175 </param>
176 <when value=""/>
177 <when value="--incE">
178 <param name="incE" type="float" value="0.01" label="Use E-value" help="of &lt;= X as the hit inclusion threshold.">
179 <sanitizer>
180 <valid initial="string.printable">
181 <remove value="&apos;"/>
182 </valid>
183 </sanitizer>
184 </param>
185 </when>
186 <when value="--incT">
187 <param name="incT" type="integer" value="0" label="Use bit score" help="of >= X as the hit inclusion threshold.">
188 <sanitizer>
189 <valid initial="string.printable">
190 <remove value="&apos;"/>
191 </valid>
192 </sanitizer>
193 </param>
194 </when>
195 </conditional>
196
197 <!-- Options controlling reporting thresholds -->
198
199 <conditional name="reporting_thresholds_opts">
200 <param name="reporting_thresholds_selector" type="select" label="reporting thresholds"
201 help="Reporting thresholds control which hits are reported in output files">
202 <option value="" selected="true">default</option>
203 <option value="-E">Use E-value</option>
204 <option value="-T">Use bit score</option>
205 </param>
206 <when value=""/>
207 <when value="-E">
208 <param name="E" type="float" value="10.0" label="Use E-value" help="of &lt;= X as the hit reporting threshold. The default is 10.0, meaning that on average, about 10 false positives will be reported per query, so you can see the top of the noise and decide for yourself if it’s really noise.">
209 <sanitizer>
210 <valid initial="string.printable">
211 <remove value="&apos;"/>
212 </valid>
213 </sanitizer>
214 </param>
215 </when>
216 <when value="-T">
217 <param name="T" type="integer" value="0" label="Use bit score" help="of >= X as the hit reporting threshold.">
218 <sanitizer>
219 <valid initial="string.printable">
220 <remove value="&apos;"/>
221 </valid>
222 </sanitizer>
223 </param>
224 </when>
225 </conditional>
226
227 <param argument="-A" truevalue="-A" falsevalue="" checked="False" type="boolean"
228 label="Save a multiple alignment of all significant hits" help="... those satisfying inclusion thresholds"/>
229 <param argument="--noali" type="boolean" truevalue="--noali" falsevalue="" checked="false"
230 label="Omit the alignment section from the main input" help="This can greatly reduce the output volume"/>
231 <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" checked="false"
232 label="Include extra search pipeline statistics in the main output" help="They include filter survival statistics for truncated hit detection and number of envelopes discarded due to matrix size overflows."/>
233 </inputs>
191 <outputs> 234 <outputs>
192 235
193 <data format="tabular" name="outfile" label="cmsearch on ${on_string}"/> 236 <data format="tabular" name="outfile" label="cmsearch on ${on_string}"/>
194 <data format="tabular" name="multiple_alignment_output" label="cmsearch on ${on_string} (multi alignment)"> 237 <data format="tabular" name="multiple_alignment_output" label="cmsearch on ${on_string} (multi alignment)">
195 <filter>A is True</filter> 238 <filter>A is True</filter>
196 </data> 239 </data>
197 240
198 </outputs> 241 </outputs>
242 <tests>
243 <test>
244 <conditional name="cm_opts">
245 <param name="cm_opts_selector" value="histdb"/>
246 <param name="cmfile" value="cmsearch_input1.cm"/>
247 </conditional>
248 <param name="seqdb" value="cmsearch_input2.fa"/>
249 <output name="outfile" file="cmsearch_result.tabular"/>
250
251 </test>
252 </tests>
253
199 <help> 254 <help>
200 <![CDATA[ 255 <![CDATA[
201 256
202 257
203 **What it does** 258 **What it does**
208 To build CMs from multiple alignments, see cmbuild (build covariance models). 263 To build CMs from multiple alignments, see cmbuild (build covariance models).
209 264
210 265
211 **Input** 266 **Input**
212 267
213 The CM query file must have been calibrated for E-values with cmcalibrate. As a special exception, any models CM query files that have zero basepairs need not be calibrated. 268 The CM query file must have been calibrated for E-values with cmcalibrate. As a special exception, any models CM query files that have zero basepairs need not be calibrated.
214 269
215 270
216 **Options** 271 **Options**
217 272
218 - *Turn on the glocal alignment algorithm*: global with respect to the query model and local with respect to the target database. By default, the local alignment algorithm is used which is local with respect to both the target sequence and the model. In local mode, the alignment to span two or more subsequences if necessary (e.g. if the structures of the query model and target sequence are only partially shared), allowing certain large insertions and deletions in the structure to be penalized differently than normal indels. Local mode performs better on empirical benchmarks and is significantly more sensitive for remote homology detection. Empirically, glocal searches return many fewer hits than local searches, so glocal may be desired for some applications. With *Turn on the glocal alignment algorithm*, all models must be calibrated, even those with zero basepairs. 273 - *Turn on the glocal alignment algorithm*: global with respect to the query model and local with respect to the target database. By default, the local alignment algorithm is used which is local with respect to both the target sequence and the model. In local mode, the alignment to span two or more subsequences if necessary (e.g. if the structures of the query model and target sequence are only partially shared), allowing certain large insertions and deletions in the structure to be penalized differently than normal indels. Local mode performs better on empirical benchmarks and is significantly more sensitive for remote homology detection. Empirically, glocal searches return many fewer hits than local searches, so glocal may be desired for some applications. With *Turn on the glocal alignment algorithm*, all models must be calibrated, even those with zero basepairs.
221 276
222 - *Only search the top (Watson) strand of target sequences*: Hits can occur on either the top (Watson) or bottom (Crick) strand of the target sequence. By default, both strands are searched. 277 - *Only search the top (Watson) strand of target sequences*: Hits can occur on either the top (Watson) or bottom (Crick) strand of the target sequence. By default, both strands are searched.
223 278
224 - *Use the CYK algorithm, not Inside, to determine the final score of all hits*: If selecting "yes", the CYK algorithm instead of the CM Inside algorithm (the SCFG analog of the HMM Forward algorithm) is used. 279 - *Use the CYK algorithm, not Inside, to determine the final score of all hits*: If selecting "yes", the CYK algorithm instead of the CM Inside algorithm (the SCFG analog of the HMM Forward algorithm) is used.
225 280
226 - *Use the CYK algorithm to align hits*: By default, the Durbin/Holmes optimal accuracy algo- 281 - *Use the CYK algorithm to align hits*: By default, the Durbin/Holmes optimal accuracy algorithm is used, which finds the alignment that maximizes the expected accuracy of all aligned residues.
227 rithm is used, which finds the alignment that maximizes the expected accuracy of all aligned
228 residues.
229 282
230 - *Turn off truncated hit detection*: Turns off truncated hit detection and will reduce the running time most significantly for target files that include many short sequences. 283 - *Turn off truncated hit detection*: Turns off truncated hit detection and will reduce the running time most significantly for target files that include many short sequences.
231 284
232 - *Turn off all filters, and run non-banded Inside on every full-length target sequence*: This 285 - *Turn off all filters, and run non-banded Inside on every full-length target sequence*: This increases sensitivity somewhat, at an extremely large cost in speed.
233 increases sensitivity somewhat, at an extremely large cost in speed.
234 286
235 - *Turn off all HMM filter stages*: The CYK filter, using QDBs, will be run on every full-length target sequence and will enforce a P-value threshold of 0.0001. Each subsequence that survives CYK will be passed to Inside, which will also use QDBs (but a looser set). This increases sensitivity somewhat, at a very large cost in speed. 287 - *Turn off all HMM filter stages*: The CYK filter, using QDBs, will be run on every full-length target sequence and will enforce a P-value threshold of 0.0001. Each subsequence that survives CYK will be passed to Inside, which will also use QDBs (but a looser set). This increases sensitivity somewhat, at a very large cost in speed.
236 288
237 -*Turn off the HMM SSV and Viterbi filter stages*:Sets remaining HMM filter 289 - *Turn off the HMM SSV and Viterbi filter stages*:Sets remaining HMM filter thresholds to 0.02 by default. This may increase sensitivity, at a significant cost in speed.
238 thresholds to 0.02 by default. This may increase sensitivity, at a significant cost in speed. 290
239 291 - *Inclusion thresholds*: *Use E-value* - Use an E-value as the hit inclusion threshold. The default is 0.01, meaning that on average, about 1 false positive would be expected in every 100 searches with different query sequences. *Use Bit Score* - Instead of using E-values for setting the inclusion threshold, instead use a bit score as the hit inclusion threshold. By default this option is unset.
240 - *Inclusion thresholds*: *Use E-value* - Use an E-value as the hit inclusion threshold. The default is 0.01, meaning that on average, about 1 false positive would be expected in every 100 searches with different
241 query sequences. *Use Bit Score* - Instead of using E-values for setting the inclusion threshold, instead use a bit score as the hit inclusion threshold. By default this option is unset.
242 292
243 293
244 **Output Options** 294 **Output Options**
245 295
246 - *reporting thresholds*: Hits are ranked by statistical significance (E-value). By *default*, all hits with an E-value <= 10 are reported. The following options allow you to change the default *E-value* reporting thresholds, or to use *bit score* thresholds instead. 296 - *reporting thresholds*: Hits are ranked by statistical significance (E-value). By *default*, all hits with an E-value <= 10 are reported. The following options allow you to change the default *E-value* reporting thresholds, or to use *bit score* thresholds instead.
247 297
248 298
249 Output Example: 299 Output columns:
250 300
251 301 (1) rank
252 # cmsearch :: search CM(s) against a sequence database 302
253 # INFERNAL 1.1.1 (July 2014) 303 (2) E-value
254 # Copyright (C) 2014 Howard Hughes Medical Institute. 304
255 # Freely distributed under the GNU General Public License (GPLv3). 305 (3) score
256 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 306
257 # query CM file: tRNA5.cm 307 (4) bias
258 # target sequence database: tutorial/mrum-genome.fa 308
259 # number of worker threads: 8 309 (5) sequence
260 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 310
261 311 (6) start
262 312
263 The second section is a list of ranked top hits (sorted by E-value, most significant hit first): 313 (7) end
264 314
265 rank E-value score bias sequence start end mdl trunc gc description 315 (8) mdl
316
317 (9) trunc
318
319 (10) gc
320
321 (11) description
322
266 ---- --------- ------ ----- ----------- ------- ------- --- ----- ---- ----------- 323 ---- --------- ------ ----- ----------- ------- ------- --- ----- ---- -----------
267 (1) ! 1.3e-18 71.5 0.0 NC_013790.1 362026 361955 - cm no 0.50 Methanobrevibacter ruminantium M1 324
268 (2) ! 3.3e-18 70.2 0.0 NC_013790.1 2585265 2585193 - cm no 0.60 Methanobrevibacter ruminantium M1 325 ! 1.3e-18 71.5 0.0 NC_013790.1 362026 361955 - cm no 0.50 Methanobrevibacter ruminantium M1
269 326
270 327 ! 3.3e-18 70.2 0.0 NC_013790.1 2585265 2585193 - cm no 0.60 Methanobrevibacter ruminantium M1
271 328
272 For further questions please refere to the Infernal `Userguide <http://selab.janelia.org/software/infernal/Userguide.pdf>`_. 329
330 For further questions please refere to the Infernal `Userguide <http://eddylab.org/infernal/Userguide.pdf>`_.
273 331
274 ]]> 332 ]]>
275 </help> 333 </help>
276 <citations> 334 <expand macro="citations" />
277 <citation type="doi">10.1093/bioinformatics/btt509</citation> 335
278 <citation type="bibtex"> 336
279 @ARTICLE{bgruening_galaxytools,
280 Author = {Björn Grüning, Cameron Smith, Torsten Houwaart, Nicola Soranzo, Eric Rasche},
281 keywords = {bioinformatics, ngs, galaxy, cheminformatics, rna},
282 title = {{Galaxy Tools - A collection of bioinformatics and cheminformatics tools for the Galaxy environment}},
283 url = {https://github.com/bgruening/galaxytools}
284 }
285 </citation>
286 </citations>
287
288
289 </tool> 337 </tool>