comparison cmalign.xml @ 5:6e18e0b098cd draft

planemo upload for repository commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
author bgruening
date Sat, 21 Jan 2017 17:36:57 -0500
equal deleted inserted replaced
4:c47a7c52ac4f 5:6e18e0b098cd
1 <tool id="infernal_cmalign" name="cmalign" version="@VERSION@.0">
2 <description>Align sequences to a covariance model against a sequence database</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="stdio" />
8 <command>
9 <![CDATA[
11 cmalign
12 ## Infernal Options
13 --cpu "\${GALAXY_SLOTS:-2}"
14 -o '$outfile'
16 $alignment_algorithm_cont.alignment_algorithm.alignment_algorithm_selector
17 #if $alignment_algorithm_cont.alignment_algorithm.alignment_algorithm_selector=="--sample"
18 --seed $alignment_algorithm_cont.alignment_algorithm.seed
19 #end if
20 $alignment_algorithm_cont.notrunc_opt.notrunc
21 #if $alignment_algorithm_cont.notrunc_opt.notrunc=="--notrunc"
22 $alignment_algorithm_cont.notrunc_opt.sub
23 #end if
24 $speed_memory_cont.hmm_banding.banding_selector
25 #if $speed_memory_cont.hmm_banding.banding_selector=="--hbanded"
26 --tau $speed_memory_cont.hmm_banding.tau
27 --mxsize $speed_memory_cont.hmm_banding.mxsize
28 $speed_memory_cont.hmm_banding.fixedtau
29 --maxtau $speed_memory_cont.hmm_banding.maxtau
30 #end if
31 #if $speed_memory_cont.hmm_banding.banding_selector=="--nonbanded"
32 --mxsize $speed_memory_cont.hmm_banding.mxsize
33 #end if
35 #if $other_opts.mapali_opt.mapali_cond
36 --mapali '$other_opts.mapali_opt.mapali'
37 $other_opts.mapali_opt.mapstr
38 #end if
40 --informat $seqdb.ext
41 --outformat $other_opts.outformat_selector
42 $other_opts.dnaout
43 $other_opts.noprob
44 $other_opts.matchonly
45 $other_opts.ileaved
47 ##--small requirements according to cmalign.c
49 #if $alignment_algorithm_cont.alignment_algorithm.alignment_algorithm_selector=="--cyk" \
50 and $other_opts.noprob \
51 and $speed_memory_cont.hmm_banding.banding_selector=="--nonbanded" \
52 and $alignment_algorithm_cont.notrunc_opt.notrunc=="--notrunc"
53 $speed_memory_cont.small
54 #end if
56 ## CM file from the history or stored as database on disc
57 #if $cm_opts.cm_opts_selector == "db":
58 $cm_opts.database.fields.path
59 #else:
60 '$cm_opts.cmfile'
61 #end if
62 ## sequence file
63 '$seqdb'
65 ]]>
66 </command>
67 <inputs>
69 <param name="seqdb" type="data" format="fasta" label="Sequence database"/>
71 <conditional name="cm_opts">
72 <param name="cm_opts_selector" type="select" label="Subject covariance models">
73 <option value="db" selected="True">Locally installed covariance models</option>
74 <option value="histdb">Covariance model from your history</option>
75 </param>
76 <when value="db">
77 <param name="database" type="select" label="Covariance models">
78 <options from_file="infernal.loc">
79 <column name="value" index="0"/>
80 <column name="name" index="1"/>
81 <column name="path" index="2"/>
82 </options>
83 </param>
84 </when>
85 <when value="histdb">
86 <param name="cmfile" type="data" format="cm" label="Covariance models file from the history."/>
87 </when>
88 </conditional>
90 <param argument="-g" truevalue="-g" falsevalue="" checked="False" type="boolean"
91 label="Turn on the glocal alignment algorithm" help="... global with respect to the query model and local with respect to the target database."/>
93 <section name="alignment_algorithm_cont" title="Options controlling alignment algorithm" >
94 <conditional name="alignment_algorithm">
95 <param name="alignment_algorithm_selector" type="select" label="Options controlling alignment algorithm" help="">
96 <option value="--optacc" selected="true">The Holmes/Durbin optimal accuracy algorithm (--optacc)</option>
97 <option value="--cyk"> The CYK algorithm (--cyk)</option>
98 <option value="--sample">Sample alignment of each seq from posterior distribution (--sample)</option>
99 </param>
100 <when value="--optacc"></when>
101 <when value="--cyk"></when>
102 <when value="--sample">
103 <param argument="--seed" type="integer" value="0" label="set RNG seed to 'n'" help="if 0: one-time arbitrary seed"/>
104 </when>
105 </conditional>
106 <conditional name="notrunc_opt">
107 <param argument="--notrunc" truevalue="" falsevalue="--notrunc" checked="true" type="boolean"
108 label="Use truncated alignment algorithm" help=""/>
109 <when value=""></when>
110 <when value="--notrunc">
111 <param argument="--sub" type="boolean" truevalue="--sub" falsevalue="" checked="false"
112 label="Build sub CM for columns b/t HMM predicted start/end points" help=""/>
114 </when>
115 </conditional>
116 </section>
118 <section name="speed_memory_cont" title="Options controlling speed and memory requirements">
119 <conditional name="hmm_banding">
120 <param name="banding_selector" type="select" label="HMM banding">
121 <option value="--hbanded" selected="true"></option>
122 <option value="--nonbanded"></option>
123 </param>
124 <when value="--hbanded">
125 <param argument="--tau" type="float" value="1e-7" min="1e-18" max="1.0"
126 label="Tail loss probability for HMM bands" help="This is the amount of probability mass within the HMM posterior probabilities that is considered negligible."/>
127 <param argument="--mxsize" type="integer" value="1028" min="1"
128 label="Set the maximum allowable CM DP matrix size to 'x' megabytes" help=""/>
129 <param argument="--fixedtau" type="boolean" truevalue="--fixedtau" falsevalue="" checked="false"
130 label="Turn off the HMM band tightening strategy" help="Cmalign will attempt to iteratively tighten the HMM bands it uses to constrain the alignment by raising the tau parameter and recalculating the bands until the total matrix size needed falls below 'x' megabytes or the maximum allowable tau value (0.05 by default, but changeable with --maxtau) is reached."/>
131 <param argument="--maxtau" type="float" value="0.05" min="1e-18" max="1.0"
132 label="Set the maximum allowed value for tau during band tightening" help=""/>
133 </when>
134 <when value="--nonbanded">
135 <param argument="--mxsize" type="integer" value="1028" min="1"
136 label="Set the maximum allowable CM DP matrix size to 'x' megabytes" help=""/>
137 </when>
138 </conditional>
139 <param argument="--small" type="boolean" truevalue="--small" falsevalue="" checked="false"
140 label="Use small memory divide and conquer algorithm" help="Allows CM alignment within practical memory limits, reducing the memory required for alignment. WARNING: It can only be used when --cyk,--noprob,--nonbanded,--notrunc are all enabled"/>
141 </section>
143 <section name="other_opts" title="Other options">
144 <conditional name="mapali_opt">
145 <param name="mapali_cond" type="boolean" checked="false"
146 label="Read the aligment file that used to build the CM" help="The alignment from the file is held fixed. This allows you to align sequences to a model with cmalign and view them in the context of an existing trusted multiple alignment."/>
147 <when value="true">
148 <param argument="--mapali" type="data" format="fasta"
149 label="The aligment file that used to build the CM" help=""/>
150 <param argument="--mapstr" type="boolean" truevalue="--small" falsevalue="" checked="true"
151 label="Use small memory divide and conquer algorithm" help=""/>
152 </when>
153 <when value="false"></when>
154 </conditional>
155 <param name="outformat_selector" type="select" label="" argument="--outformat">
156 <option value="Stockholm" selected="true">Stockholm</option>
157 <option value="Pfam">Pfam</option>
158 <option value="AFA">AFA: aligned fasta</option>
159 <option value="A2M">A2M</option>
160 <option value="Clustal">Clustal</option>
161 <option value="Phylip">Phylip</option>
162 </param>
163 <param argument="--dnaout" type="boolean" truevalue="--dnaout" falsevalue="" checked="false"
164 label="Output the alignments as DNA sequence alignments" help="... instead of RNA ones."/>
165 <param argument="--noprob" type="boolean" truevalue="" falsevalue="--noprob" checked="true"
166 label="Annotate the output alignment with posterior probabilities" help=""/>
167 <param argument="--matchonly" type="boolean" truevalue="--matchonly" falsevalue="" checked="false"
168 label="Only include match columns in the output alignment" help="Do not include any insertions relative to the consensus model."/>
169 <param argument="--ileaved" type="boolean" truevalue="--ileaved" falsevalue="" checked="false"
170 label="Output the alignment in interleaved Stockholm format of a fixed width" help="will only work for alignments of up to 100,000 sequences or a total of 100,000,000 aligned nucleotides"/>
171 </section>
174 </inputs>
175 <outputs>
176 <data name="outfile" format="text" label="cmalign on ${on_string}"/>
177 </outputs>
178 <tests>
179 <test>
180 <conditional name="cm_opts">
181 <param name="cm_opts_selector" value="histdb"/>
182 <param name="cmfile" value=""/>
183 </conditional>
184 <param name="seqdb" value="cmalign_input_mrum_tRNAs10.fa"/>
185 <output name="outfile">
186 <assert_contents>
187 <has_text text="GGAGCUAUAGCUCAAU..GGC"/>
188 </assert_contents>
189 </output>
190 </test>
191 </tests>
192 <help>
193 <![CDATA[
196 **What it does**
198 cmalign aligns the RNA sequences to the covariance model (CM).
201 The sequence file must be in FASTA or Genbank format. cmalign
202 uses an HMM banding technique to accelerate alignment by default. By default,
203 cmalign computes the alignment with maximum expected accuracy that is consistent with constraints
204 (bands) derived from an HMM, using a banded version of the Durbin/Holmes optimal accuracy algorithm. cmalign takes special care to correctly align truncated sequences, where some nucleotides from the beginning (5’) and/or end (3’) of the actual full length biological sequence are not present in the input sequence. This behavior is on by default.
209 **Output format**
212 (1) target name: The name of the target sequence or profile.
213 (2) accession: The accession of the target sequence or profile, or ’-’ if none.
214 (3) query name: The name of the query sequence or profile.
215 (4) accession: The accession of the query sequence or profile, or ’-’ if none.
216 (5) mdl (model): Which type of model was used to compute the final score. Either ’cm’ or ’hmm’. A CM is used to compute the final hit scores unless the model has zero basepairs or the --hmmonly option is used, in which case a HMM will be used.
217 (6) mdl from (model coord): The start of the alignment of this hit with respect to the profile (CM or HMM), numbered 1..N for a profile of N consensus positions.
218 (7) mdl to (model coord): The end of the alignment of this hit with respect to the profile (CM or HMM), numbered 1..N for a profile of N consensus positions.
219 (8) seq from (ali coord): The start of the alignment of this hit with respect to the sequence, numbered 1..L for a sequence of L residues.
220 (9) seq to (ali coord): The end of the alignment of this hit with respect to the sequence, numbered 1..L for a sequence of L residues.
221 (10) strand: The strand on which the hit occurs on the sequence. ’+’ if the hit is on the top (Watson) strand, ’-’ if the hit is on the bottom (Crick) strand. If on the top strand, the “seq from” value will be less than or equal to the “seq to” value, else it will be greater than or equal to it.
222 (11) trunc: Indicates if this is predicted to be a truncated CM hit or not. This will be “no” if it is a CM hit that is not predicted to be truncated by the end of the sequence, “5’ ” or “3’ ” if the hit is predicted to have one or more 5’ or 3’ residues missing due to a artificial truncation of the sequence, or “5’&3”’ if the hit is predicted to have one or more 5’ residues missing and one or more 3’ residues missing. If the hit is an HMM hit, this will always be ’-’.
223 (12) pass: Indicates what “pass” of the pipeline the hit was detected on. This is probably only useful for testing and debugging. Non-truncated hits are found on the first pass, truncated hits are found on successive passes.
224 (13) gc: Fraction of G and C nucleotides in the hit.
225 (14) bias: The biased-composition correction: the bit score difference contributed by the null3 model for CM hits, or the null2 model for HMM hits. High bias scores may be a red flag for a false positive. It is difficult to correct for all possible ways in which a nonrandom but nonhomologous biological sequences can appear to be similar, such as short-period tandem repeats, so there are cases where the bias correction is not strong enough (creating false positives).
226 (15) score: The score (in bits) for this target/query comparison. It includes the biased-composition cor-rection (the “null3” model for CM hits, or the “null2” model for HMM hits).
227 (16) E-value: The expectation value (statistical significance) of the target. This is a per query E-value; i.e. calculated as the expected number of false positives achieving this comparison’s score for a single query against the search space Z. For cmsearch Z is defined as the total number of nucleotides in the target dataset multiplied by 2 because both strands are searched. For cmscan Z is the total number of nucleotides in the query sequence multiplied by 2 because both strands are searched and multiplied by the number of models in the target database. If you search with multiple queries and if you want to control the overall false positive rate of that search rather than the false positive rate per query, you will want to multiply this per-query E-value by how many queries you’re doing.
228 (17) inc: Indicates whether or not this hit achieves the inclusion threshold: ’!’ if it does, ’?’ if it does not (and rather only achieves the reporting threshold). By default, the inclusion threshold is an E-value of 0.01 and the reporting threshold is an E-value of 10.0, but these can be changed with command line options as described in the manual pages.
229 (18) description of target: The remainder of the line is the target’s description line, as free text.
232 For further questions please refere to the Infernal `Userguide <>`_.
235 ]]>
236 </help>
238 <expand macro="citations" />
241 </tool>