comparison cpt_progressivemauve/progressivemauve.xml @ 0:69795939c29b draft

Uploaded
author cpt
date Fri, 10 Jun 2022 08:41:20 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:69795939c29b
1 <?xml version="1.0"?>
2 <tool id="progressivemauve" name="progressiveMauve" version="19.1.0.0" profile="16.04">
3 <description>constructs multiple genome alignments</description>
4 <macros>
5 <import>macros.xml</import>
6 <import>cpt-macros.xml</import>
7 </macros>
8 <requirements>
9 <requirement type="package">progressivemauve</requirement>
10 </requirements>
11 <command detect_errors="aggressive"><![CDATA[
12 ## Symlink files in with correct extensions
13 #for $file in $sequences:
14 ln -s $file `basename $file`;
15 #end for
16
17 progressiveMauve
18 ## Input Options
19
20 #if $apply_backbone:
21 --apply-backbone=$apply_backbone
22 #end if
23 --island-gap-size=$island_gap_size
24 $mums
25
26 #if $seed_weight:
27 --seed-weight=$seed_weight
28 #end if
29
30 #if $max_gapped_aligner_length:
31 --max-gapped-aligner-length=$max_gapped_aligner_length
32 #end if
33
34 #if $match_input:
35 --match-input=$match_input
36 #end if
37
38 $collinear
39 --scoring-scheme=$scoring_scheme
40 $no_weight_scaling
41
42 --max-breakpoint-distance-scale=$max_breakpoint_distance_scale
43 --conservation-distance-scale=$conservation_distance_scale
44 $skip_refinement
45 $skip_gapped_alignment
46
47 #if $bp_dist_estimate_min_score:
48 --bp-dist-estimate-min-score=$bp_dist_estimate_min_score
49 #end if
50
51 #if $gap_open:
52 --gap-open=$gap_open
53 #end if
54
55 #if $gap_extend:
56 --gap-extend=$gap_extend
57 #end if
58
59 #if $weight:
60 --weight=$weight
61 #end if
62
63 #if $min_scaled_penalty:
64 --min-scaled-penalty=$min_scaled_penalty
65 #end if
66
67 --hmm-p-go-homologous=$hmm_p_go_homologous
68 --hmm-p-go-unrelated=$hmm_p_go_unrelated
69 --hmm-identity=$hmm_identity
70
71 $seed_family
72 $solid_seeds
73 $coding_seeds
74 $no_recursion
75 $disable_backbone
76
77 ## Outputs
78 --output=$output
79 #if $output_guide_tree:
80 --output-guide-tree=$output_guide_tree_file
81 #end if
82
83 #if $output_backbone:
84 --backbone-output=$output_backbone_file
85 #end if
86
87 ## Sequences
88 #for file in $sequences:
89 `basename "${file}"`
90 #end for
91
92 ]]></command>
93 <inputs>
94 <param type="data" format="fasta" name="sequences" multiple="True"
95 label="Select sequences to align" help="in fasta format" />
96 <param type="data" format="xmfa" label="Apply Backbone" name="apply_backbone" optional="True"
97 help="Read an existing sequence alignment in XMFA format and apply backbone statistics to it (--apply-backbone)" />
98
99 <param type="integer" label="Island gap size" value="20" name="island_gap_size"
100 help="Alignment gaps above this size in nucleotides are considered to be islands (--island-gap-size)"/>
101
102 <param type="boolean" truevalue="--disable-backbone" falsevalue="" name="disable_backbone"
103 label="Disable backbone" help="Disable backbone detection (--disable-backbone)" />
104
105 <param type="boolean" truevalue="True" falsevalue="" name="output_guide_tree"
106 label="Output Guide Tree" help="Write out the guide tree used for alignment to a file (--output-guide-tree)" />
107
108 <param type="boolean" truevalue="True" falsevalue="" name="output_backbone"
109 label="Output Backbone" help="Write out the backbone to a file (--backbone-output)" />
110
111 <param type="boolean" truevalue="--mums" falsevalue="" label="MUMs" name="mums"
112 help="Find MUMs only, do not attempt to determine locally collinear blocks (LCBs) (--mums)" />
113
114 <param type="integer" label="Seed weight" name="seed_weight" value="0" optional="True"
115 help="Use the specified seed weight for calculating initial anchors (--seed-weight)" />
116
117 <param type="data" format="tabular" label="Match Input" name="match_input" optional="True"
118 help="Use specified match file instead of searching for matches (--match-input)" />
119
120 <!--<param type="file" label="input-id-matrix" help="An identity matrix describing similarity among all pairs of input sequences/alignments (- -input-id-matrix)" />-->
121 <param type="integer" label="Max gapped aligner length" value="0" optional="True" name="max_gapped_aligner_length"
122 help="Maximum number of base pairs to attempt aligning with the gapped aligner (--max-gapped-aligner-length)" />
123
124 <param type="data" format="nhx" label="input-guide-tree" optional="True" name="input_guide_tree"
125 help="A phylogenetic guide tree in Newick format that describes the order in which sequences will be aligned (--input-guide-tree)" />
126
127 <param type="boolean" truevalue="--collinear" falsevalue="" label="Collinear inputs" name="collinear"
128 help="Assume that input sequences are collinear--they have no rearrangements (--collinear)" />
129
130 <param type="select" label="Scoring scheme" name="scoring_scheme" help="Selects the anchoring score function. (--scoring-scheme)" >
131 <option value="sp" selected="True">Extant sum-of-pairs (sp)</option>
132 <option value="ancestral_sp">Sum-of-pairs + Ancestral (ancestral_sp)</option>
133 <option value="ancestral">Ancestral (ancestral)</option>
134 </param>
135
136 <param type="boolean" truevalue="--no-weight-scaling" falsevalue="" label="No weight scaling" name="no_weight_scaling"
137 help="Don't scale LCB weights by conservation distance and breakpoint distance (--no-weight-scaling)" />
138
139 <param type="float" min="0" max="1" label="max-breakpoint-distance-scale" value="0.5" name="max_breakpoint_distance_scale"
140 help="Set the maximum weight scaling by breakpoint distance. (--max-breakpoint-distance-scale)" />
141
142 <param type="float" min="0" max="1" label="conservation-distance-scale" value="0.5" name="conservation_distance_scale"
143 help="Scale conservation distances by this amount. (--conservation-distance-scale)" />
144
145 <param type="boolean" truevalue="--skip-refinement" falsevalue="" label="Skip refinement" name="skip_refinement"
146 help="Do not perform iterative refinement (--skip-refinement)" />
147 <param type="boolean" truevalue="--skip-gapped-alignment" falsevalue="" label="Skip gapped alignment" name="skip_gapped_alignment"
148 help="Do not perform gapped alignment (--skip-gapped-alignment)" />
149 <param type="integer" label="BP dist estimate min score" name="bp_dist_estimate_min_score" value="0" optional="True"
150 help="Minimum LCB score for estimating pairwise breakpoint distance (--bp-dist-estimate-min-score)" />
151
152 <param type="integer" label="Gap open" name="gap_open" value="0" optional="True"
153 help="Gap open penalty (--gap-open)" />
154
155 <param type="select" label="Repeat penalty" name="repeat_penalty"
156 help="Sets whether the repeat scores go negative or go to zero for highly repetitive sequences. (--repeat-penalty)">
157 <option value="negative" selected="True">Negative</option>
158 <option value="zero">Zero</option>
159 </param>
160
161 <param type="integer" label="Gap extend" name="gap_extend" value="0" optional="True"
162 help="Gap extend penalty (--gap-extend)" />
163
164 <!--<param type="data" label="Substitution matrix" -->
165 <!--help="Nucleotide substitution matrix in NCBI format (- -substitution-matrix)" />-->
166
167 <param type="integer" label="Weight" name="weight" value="0" optional="True"
168 help="Minimum pairwise LCB score (--weight)" />
169 <param type="integer" label="Min scaled penalty" name="min_scaled_penalty" value="0" optional="True"
170 help="Minimum breakpoint penalty after scaling the penalty by expected divergence (--min-scaled-penalty)" />
171
172 <param type="float" label="HMM p go homologous" name="hmm_p_go_homologous" min="0" max="1" value="0.00001"
173 help="Probability of transitioning from the unrelated to the homologous state (--hmm-p-go-homologous)" />
174 <param type="float" label="HMM p go unrelated" name="hmm_p_go_unrelated" min="0" max="1" value="0.000000001"
175 help="Probability of transitioning from the homologous to the unrelated state (--hmm-p-go-unrelated)" />
176 <param type="float" label="HMM identity" name="hmm_identity" min="0" max="1" value="0.7"
177 help="Expected level of sequence identity among pairs of sequences(--hmm-identity)" />
178
179 <param type="boolean" truevalue="--seed-family" falsevalue="" label="Seed family" name="seed_family"
180 help="Use a family of spaced seeds to improve sensitivity (--seed-family)" />
181 <param type="boolean" truevalue="--solid-seeds" falsevalue="" label="Solid seeds" name="solid_seeds"
182 help="Use solid seeds. Do not permit substitutions in anchor matches. (--solid-seeds)" />
183 <param type="boolean" truevalue="--coding-seeds" falsevalue="" label="Coding seeds" name="coding_seeds"
184 help="Use coding pattern seeds. Useful to generate matches coding regions with 3rd codon position degeneracy. (--coding-seeds)" />
185 <param type="boolean" truevalue="--no-recursion" falsevalue="" label="No recursion" name="no_recursion"
186 help="Disable recursive anchor search (--no-recursion)" />
187 </inputs>
188 <outputs>
189 <data format="xmfa" name="output" label="${tool.name} alignment of ${on_string}">
190 <change_format>
191 <when input="mums" value="--mums" format="tabular" />
192 </change_format>
193 </data>
194 <data format="nhx" name="output_guide_tree_file" label="${tool.name} alignment of ${on_string}: Guide tree">
195 <filter>output_guide_tree</filter>
196 </data>
197 <data format="tabular" name="output_backbone_file" label="${tool.name} alignment of ${on_string}: Backbone">
198 <filter>output_backbone</filter>
199 </data>
200 </outputs>
201 <tests>
202 <test>
203 <param name="sequences" value="phagey.fa,karma.fa" />
204 <output name="output" file="1.xmfa" lines_diff="20"/>
205 </test>
206 <test>
207 <param name="sequences" value="merged.fa" />
208 <output name="output" file="2.xmfa" lines_diff="20"/>
209 </test>
210 <test>
211 <param name="sequences" value="merged.fa" />
212 <param name="output_guide_tree" value="True" />
213 <output name="output" file="3.xmfa" lines_diff="20"/>
214 <output name="output_guide_tree_file" file="3.nhx" />
215 </test>
216 <test>
217 <param name="sequences" value="merged.fa" />
218 <param name="mums" value="True" />
219 <output name="output" file="4.mums" compare="sim_size" delta="1000"/>
220 </test>
221 <test>
222 <param name="sequences" value="merged.fa" />
223 <param name="match_input" value="4.mums" />
224 <output name="output" file="5.xmfa" lines_diff="24"/>
225 </test>
226 </tests>
227 <help><![CDATA[
228 What it does
229 ============
230
231 Mauve is a system for efficiently constructing multiple genome alignments in
232 the presence of large-scale evolutionary events such as rearrangement and
233 inversion. Multiple genome alignment provides a basis for research into
234 comparative genomics and the study of evolutionary dynamics. Aligning whole
235 genomes is a fundamentally different problem than aligning short sequences.
236
237 Mauve has been developed with the idea that a multiple genome aligner should
238 require only modest computational resources. It employs algorithmic techniques
239 that scale well in the amount of sequence being aligned. For example, a pair of
240 Y. pestis genomes can be aligned in under a minute, while a group of 9
241 divergent Enterobacterial genomes can be aligned in a few hours.
242
243
244 Example Usage
245 =============
246
247 +-----------------------------------+-------------+
248 | Usage | Notes |
249 +===================================+=============+
250 | Align genomes |Simply |
251 | |select as |
252 | |many fasta |
253 | |files with |
254 | |one or more |
255 | |sequences as |
256 | |necessary |
257 +-----------------------------------+-------------+
258 | Align genomes but also save |Use the |
259 | the guide tree and produce a |**Output |
260 | backbone file |Guide Tree** |
261 | |and **Output |
262 | |Backbone** |
263 | |options |
264 +-----------------------------------+-------------+
265 | Align genomes, but do not |Use the |
266 | detect forced alignment of |**Disable |
267 | unrelated sequences |backbone** |
268 | |option |
269 +-----------------------------------+-------------+
270 | Detect forced alignment of |Use the |
271 | unrelated sequence in the |**Apply |
272 | alignment produced |Backbone** |
273 | in previous example, use |option and |
274 | custom Homology HMM transition |specify the |
275 | parameters. |XMFA file |
276 | |produced |
277 | |in the |
278 | |previous |
279 | |example |
280 +-----------------------------------+-------------+
281 | Compute ungapped |Use the |
282 | local-multiple alignments among |**MUMs** |
283 | the input sequences |option |
284 +-----------------------------------+-------------+
285 | Compute an alignment of the |Set the |
286 | same genomes, using previously |**Match |
287 | computed local-multiple |Input** to |
288 | alignments |the tabular |
289 | |MUMs file |
290 | |produced in |
291 | |the previous |
292 | |example |
293 +-----------------------------------+-------------+
294 | Set a minimum scaled |Use the |
295 | breakpoint penalty to cope with |**Min Scaled |
296 | the case where most genomes |Penalty** and|
297 | are aligned correctly, but manual |set to a |
298 | inspection reveals that |value like |
299 | a divergent genome has too |5000 |
300 | many predicted rearrangements. | |
301 +-----------------------------------+-------------+
302 | Globally align a set of |Use the |
303 | collinear virus |**Colinear**,|
304 | genomes, using seed families |**Seed |
305 | to improve anchoring sensitivity |Family** |
306 | in regions below 70% sequence |options |
307 | identity. | |
308 +-----------------------------------+-------------+
309
310 ]]></help>
311 <citations>
312 <expand macro="citation/progressive_mauve" />
313 <expand macro="citation/mijalisrasche" />
314 </citations>
315 </tool>