comparison cmscan.xml @ 5:6e18e0b098cd draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
author bgruening
date Sat, 21 Jan 2017 17:36:57 -0500
parents
children c9e29ac5d099
comparison
equal deleted inserted replaced
4:c47a7c52ac4f 5:6e18e0b098cd
1 <tool id="infernal_cmscan" name="cmscan" version="@VERSION@.0">
2 <description> Search sequences against collections of covariance models</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <command>
9 <![CDATA[
10 ## a temp file is needed, because the standard tabular output from infernal is not usefull in Galaxy
11 ## it will be converted to a tab delimited file and piped to Galaxy
12 temp_tabular_output=\$(mktemp) &&
13
14 #if str($cm_opts.cm_opts_selector) == "histdb":
15 ln -s '$cm_opts.cmfile' cmdb.cm &&
16 #end if
17
18 tar xvf '$aux_files' &&
19 ln -s `find *.i1f` cmdb.cm.i1f &&
20 ln -s `find *.i1i` cmdb.cm.i1i &&
21 ln -s `find *.i1m` cmdb.cm.i1m &&
22 ln -s `find *.i1p` cmdb.cm.i1p &&
23
24 cmscan
25 --cpu "\${GALAXY_SLOTS:-2}"
26 --tblout '\$temp_tabular_output'
27 -o /dev/null
28 $g
29 #if $Z
30 -Z $Z
31 #end if
32 $verbose
33 $other_opts.notrunc
34 $other_opts.anytrunc
35 $other_opts.nonull3
36 #if $other_opts.smxsize <> 128.0
37 --smxsize $other_opts.smxsize
38 #end if
39 #if $other_opts.mxsize <> 128.0
40 --mxsize $other_opts.mxsize
41 #end if
42 $other_opts.cyk
43 $other_opts.acyk
44 $other_opts.bottomonly
45 $other_opts.toponly
46 #if str($inclusion_thresholds_opts.inclusion_thresholds_selector) == "--incE":
47 --incE $inclusion_thresholds_opts.incE
48 #elif str($inclusion_thresholds_opts.inclusion_thresholds_selector) == "--incT":
49 --incT $inclusion_thresholds_opts.incT
50 #end if
51 #if str($reporting_thresholds_opts.reporting_thresholds_selector) == "-E":
52 -E $reporting_thresholds_opts.E
53 #elif str($reporting_thresholds_opts.reporting_thresholds_selector) == "-T":
54 -T $reporting_thresholds_opts.T
55 #end if
56 $model_thresholds.cut_ga
57 $model_thresholds.cut_nc
58 $model_thresholds.cut_tc
59 #if $acceleration_huristics.acceleration_huristics_selector == "FZ"
60 --FZ $$acceleration_huristics.FZ
61 #else
62 $acceleration_huristics.acceleration_huristics_selector
63 #if $acceleration_huristics.acceleration_huristics_selector == "--mid"
64 --Fmid $acceleration_huristics.Fmid
65 #end if
66 #end if
67 ## CM file from the history or stored as database on disc
68 #if str($cm_opts.cm_opts_selector) == "db":
69 '$cm_opts.database.fields.path'
70 #else:
71 ##'$cm_opts.cmfile'
72 cmdb.cm
73 #end if
74 ## sequence file
75 '$seqdb'
76 &&
77 sed 's/ \+ /\t/g' '\$temp_tabular_output' > '$outfile'
78
79 ]]>
80 </command>
81 <inputs>
82 <param name="seqdb" type="data" format="fasta" label="Sequence database &lt;seqfile&gt;"/>
83
84 <conditional name="cm_opts">
85 <param name="cm_opts_selector" type="select" label="Subject covariance models &lt;cmdb&gt; ">
86 <option value="db" >Locally installed covariance models</option>
87 <option value="histdb" selected="True">Covariance model from your history</option>
88 </param>
89 <when value="db">
90 <param name="database" type="select" label="Covariance models">
91 <options from_file="infernal.loc">
92 <column name="value" index="0"/>
93 <column name="name" index="1"/>
94 <column name="path" index="2"/>
95 </options>
96 </param>
97 </when>
98 <when value="histdb">
99 <param name="cmfile" type="data" format="cm" label="Covariance models file from the history."/>
100 </when>
101 </conditional>
102 <param name="aux_files" type="data" format="tar" label="Auxillury files" help="A tar file contains the four auxillury files suffixed .i1{fimp}. These files are generated after pressing the cm files using cmpress"/>
103
104 <param argument="-g" truevalue="-g" falsevalue="" checked="False" type="boolean"
105 label="Turn on the glocal alignment algorithm" help="... global with respect to the query model and local with respect to the target database."/>
106 <param argument="-Z" type="float" optional="true" min="0" label="Search space size in *Mb* for E-value calculations" help="Without the use of this option, the search space size changes for each query sequence"/>
107 <param argument="--verbose" truevalue="--verbose" falsevalue="" checked="False" type="boolean"
108 label="Be verbose" help="report extra information; mainly useful for debugging"/>
109
110
111
112 <!-- Options for inclusion thresholds -->
113 <conditional name="inclusion_thresholds_opts">
114 <param name="inclusion_thresholds_selector" type="select" label="Inclusion thresholds"
115 help="Inclusion thresholds are stricter than reporting thresholds. Inclusion thresholds control which hits are considered to be reliable enough to be included in an output alignment or in a possible subsequent search round, or marked as significant (”!”) as opposed to questionable (”?”) in hit output.">
116 <option value="" selected="true">default</option>
117 <option value="--incE">Use E-value</option>
118 <option value="--incT">Use bit score</option>
119 </param>
120 <when value=""/>
121 <when value="--incE">
122 <param name="incE" type="float" value="0.01" label="Use E-value" help="of &lt;= X as the hit inclusion threshold.">
123 <sanitizer>
124 <valid initial="string.printable">
125 <remove value="&apos;"/>
126 </valid>
127 </sanitizer>
128 </param>
129 </when>
130 <when value="--incT">
131 <param name="incT" type="integer" value="0" label="Use bit score" help="of >= X as the hit inclusion threshold.">
132 <sanitizer>
133 <valid initial="string.printable">
134 <remove value="&apos;"/>
135 </valid>
136 </sanitizer>
137 </param>
138 </when>
139 </conditional>
140
141 <!-- Options controlling reporting thresholds -->
142
143 <conditional name="reporting_thresholds_opts">
144 <param name="reporting_thresholds_selector" type="select" label="reporting thresholds"
145 help="Reporting thresholds control which hits are reported in output files">
146 <option value="" selected="true">default</option>
147 <option value="-E">Use E-value</option>
148 <option value="-T">Use bit score</option>
149 </param>
150 <when value=""/>
151 <when value="-E">
152 <param name="E" type="float" value="10.0" label="Use E-value" help="of &lt;= X as the hit reporting threshold. The default is 10.0, meaning that on average, about 10 false positives will be reported per query, so you can see the top of the noise and decide for yourself if it’s really noise.">
153 <sanitizer>
154 <valid initial="string.printable">
155 <remove value="&apos;"/>
156 </valid>
157 </sanitizer>
158 </param>
159 </when>
160 <when value="-T">
161 <param name="T" type="integer" value="0" label="Use bit score" help="of >= X as the hit reporting threshold.">
162 <sanitizer>
163 <valid initial="string.printable">
164 <remove value="&apos;"/>
165 </valid>
166 </sanitizer>
167 </param>
168 </when>
169 </conditional>
170
171 <section name="model_thresholds" title="Options controlling model-specific reporting thresholds" help="Curated CM databases may define specific bit score thresholds for each CM, superseding any thresholding based on statistical significance alone.">
172 <param argument="--cut_ga" truevalue="--cut_ga" falsevalue="" checked="false" type="boolean"
173 label="Use CM's GA gathering cutoffs as reporting thresholds" help="GA thresholds are generally considered to be the reliable curated thresholds defining family membership"/>
174 <param argument="--cut_nc" truevalue="--cut_nc" falsevalue="" checked="false" type="boolean"
175 label="use CM's NC noise cutoffs as reporting thresholds" help="NC thresholds are generally considered to be the score of the highest-scoring known false positive."/>
176 <param argument="--cut_tc" truevalue="--cut_tc" falsevalue="" checked="false" type="boolean"
177 label="use CM's TC trusted cutoffs as reporting thresholds" help="TC thresholds are generally considered to be the score of the lowest-scoring known true positive that is above all known false positives."/>
178 </section>
179
180 <conditional name="acceleration_huristics">
181 <param name="acceleration_huristics_selector" type="select" label="Options controlling acceleration heuristics" help="These options are, in order from least strict (slowest but most sensitive) to most strict (fastest but least sensitive)">
182 <option value="--max">Turn all heuristic filters off (--max)</option>
183 <option value="--nohmm">Skip all HMM filter stages, use only CM (--nohmm)</option>
184 <option value="--mid">Skip first two HMM filter stages (SSV and Vit) (--mid)</option>
185 <option value="--default" selected="true">Run search space size-dependent pipeline (--default)</option>
186 <option value="--rfam">Use a strict filtering strategy devised for large databases (more than 20 Gb) (--rfam)</option>
187 <option value="--hmmonly">Use HMM only, don't use a CM at all (--hmmonly)</option>
188 <option value="FZ">set filters to defaults used for a search space of size 'x' Mb (--FZ)</option>
189 </param>
190 <when value="--max">
191 </when>
192 <when value="--nohmm">
193 </when>
194 <when value="--mid">
195 <param argument="--Fmid" type="float" value="0.02" label="P-value threshold for HMM stages"/>
196 </when>
197 <when value="--default">
198 </when>
199 <when value="--rfam">
200 </when>
201 <when value="--hmmonly">
202 </when>
203 <when value="FZ">
204 <param argument="--FZ" type="float" value="125" label="Size of search space in Mb"/>
205 </when>
206 </conditional>
207
208 <section name="other_opts" title="Other options">
209 <param argument="--notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
210 label="Skip truncated hit detection" help=""/>
211 <param argument="--anytrunc" truevalue="--anytrunc" falsevalue="" checked="false" type="boolean"
212 label="Allow full and truncated hits anywhere within sequences" help=""/>
213 <param argument="--nonull3" truevalue="--nonull3" falsevalue="" checked="false" type="boolean"
214 label="Turn off the null3 CM score corrections for biased composition" help="This correction is not used during the HMM filter stages."/>
215 <param argument="--mxsize" type="float" value="128.0" min="0.1"
216 label="Set the maximum allowable CM DP matrix size to 'x' megabytes" help=""/>
217 <param argument="--smxsize" type="float" value="128.0" min="0.1"
218 label="Set the maximum allowable CM search DP matrix size to 'x' megabytes." help=""/>
219 <param argument="--cyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
220 label="Use the CYK algorithm, not Inside, to determine the final score of all hits" help=""/>
221 <param argument="--acyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
222 label="Use the CYK algorithm to align hits" help="By default, the Durbin/Holmes optimal accuracy algorithm is used, which finds the alignment that maximizes the expected accuracy of all aligned residues."/>
223 <param argument="--bottomonly" truevalue="--bottomonly" falsevalue="" checked="False" type="boolean"
224 label="Only search the bottom (Crick) strand of target sequences" help="in the sequence database"/>
225 <param argument="--toponly" truevalue="--toponly" falsevalue="" checked="False" type="boolean"
226 label="Only search the top (Watson) strand of target sequences" help="in the sequence database"/>
227
228 </section>
229
230
231
232 </inputs>
233 <outputs>
234 <data format="tabular" name="outfile" label="cmscan on ${on_string}"/>
235 </outputs>
236 <tests>
237 <test>
238 <conditional name="cm_opts">
239 <param name="cm_opts_selector" value="histdb"/>
240 <param name="cmfile" value="minifam.cm" />
241 </conditional>
242 <param name="aux_files" value="minifam.tar" ftype="tar"/>
243 <param name="seqdb" value="metag-example.fa"/>
244 <output name="outfile">
245 <assert_contents>
246 <has_text text="AAGA01015927.1"/>
247 </assert_contents>
248 </output>
249 </test>
250
251 </tests>
252 <help>
253 <![CDATA[
254
255
256 **What it does**
257
258 cmscan is used to search sequences against collections of covariance models.
259 For each sequence in <seqfile>, use that query sequence to search the target database of CMs in <cmdb>,
260 and output ranked lists of the CMs with the most significant matches to the sequence
261
262 **Input format**
263
264 The <seqfile> may contain more than one query sequence. It can be in FASTA format, or several other common
265 sequence file formats (genbank, embl, and among others), or in alignment file formats (stockholm, aligned fasta, and
266 others).
267
268 The <cmdb> needs to be press’ed using cmpress before it can be searched with cmscan. This creates four binary
269 files, suffixed .i1{fimp}. Additionally, <cmdb> must have been calibrated for E-values with cmcalibrate before being
270 press’ed with cmpress.
271
272 NOTE: Please provid a tar file that contains the .cm file in addition to the four binary files, suffixed .i1{fimp},
273 and specify the file type as "tar" before uploading the file. Otherwise Galaxy will not read the binary files properly.
274
275 **Output format**
276
277 The output format is designed to be human-readable.
278
279 For further questions please refere to the Infernal `Userguide <http://eddylab.org/infernal/Userguide.pdf>`_.
280
281
282 ]]>
283 </help>
284
285 <expand macro="citations" />
286
287 </tool>