comparison lastz.xml @ 5:bd84ff27bc16 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lastz commit a0a0480a8df511d23ed6101a489ca06337f5ed56
author devteam
date Mon, 26 Feb 2018 15:37:53 -0500
parents
children b6d7308c3728
comparison
equal deleted inserted replaced
4:60afcc2c1d05 5:bd84ff27bc16
1 <tool id="lastz_wrapper_2" name="LASTZ" version="1.3">
2 <description>: align long sequences</description>
3 <macros>
4 <import>lastz_macros.xml</import>
5 </macros>
6 <requirements>
7 <requirement type="package" version="@LASTZ_CONDA_VERSION@">lastz</requirement>
8 <requirement type="package" version="1.7">samtools</requirement>
9 <requirement type="package" version="3.4.2">r-base</requirement>
10 </requirements>
11 <command detect_errors="exit_code"><![CDATA[
12 lastz
13 @TARGET_INPUT_COMMAND_LINE@
14 ## If --self is set: perform self alignment and ignore the query
15 #if $where_to_look.self:
16 '${where_to_look.self}'
17 #if $where_to_look.nomirror
18 '${where_to_look.nomirror}'
19 #end if
20 #else:
21 '${query}'
22 #end if
23
24 ## WHERE TO LOOK --------------------------------
25
26 '${where_to_look.strand}'
27 #if str( $where_to_look.qhsplimit.qhsplimit_selector ) == "yes":
28 #if $where_to_look.qhsplimit.qhsplimit_n:
29 #if $where_to_look.qhsplimit.qhsplimit_nowarn:
30 #if $where_to_look.qhsplimit.qhsplimit_keep:
31 '--queryhsplimit=${where_to_look.qhsplimit.qhsplimit_keep},${where_to_look.qhsplimit.qhsplimit_nowarn}:${where_to_look.qhsplimit.qhsplimit_n}'
32 #else:
33 '--queryhsplimit=${where_to_look.qhsplimit.qhsplimit_nowarn}:${where_to_look.qhsplimit.qhsplimit_n}'
34 #end if
35 #else:
36 '--queryhsplimit=${where_to_look.qhsplimit.qhsplimit_n}'
37 #end if
38 #end if
39 #end if
40
41 #if $where_to_look.qhspbest:
42 '--queryhspbest=${where_to_look.qhspbest}'
43 #end if
44
45 #if str( $where_to_look.qdepth.qdepth_selector ) == "yes":
46 #if $where_to_look.qdepth.qdepth_n:
47 #if $where_to_look.qdepth.qdepth_nowarn and not $where_to_look.qdepth.qdepth_keep:
48 '--querydepth=${where_to_look.qdepth.qdepth_nowarn}:${where_to_look.qdepth.qdepth_n}'
49 #elif not $where_to_look.qdepth.qdepth_nowarn and $where_to_look.qdepth.qdepth_keep:
50 '--querydepth=${where_to_look.qdepth.qdepth_keep}:${where_to_look.qdepth.qdepth_n}'
51 #elif $where_to_look.qdepth.qdepth_nowarn and $where_to_look.qdepth.qdepth_keep:
52 '--querydepth=${where_to_look.qdepth.qdepth_keep},${where_to_look.qdepth.qdepth_nowarn}:${where_to_look.qdepth.qdepth_n}'
53 #else:
54 '--querydepth=${where_to_look.qdepth.qdepth_n}'
55 #end if
56 #end if
57 #end if
58
59 ## SCORING --------------------------------
60
61 #if $scoring.score_file:
62 '--scores=${scoring.score_file}'
63 #end if
64
65 #if str( $scoring.match.match_selector ) == "yes":
66 #if $scoring.match.match_reward:
67 #if $scoring.match.match_penalty:
68 '--match=${scoring.match.match_reward},${scoring.match.match_penalty}'
69 #else:
70 '--match=${scoring.match.match_reward}'
71 #end if
72 #end if
73 #end if
74 #if str( $scoring.gap.gap_selector ) == "yes":
75 #if $scoring.gap.gap_extend:
76 #if $scoring.gap.gap_open:
77 '--gap=${scoring.gap.gap_open},${scoring.gap.gap_extend}'
78 #else:
79 '--gap=${scoring.gap.gap_extend}'
80 #end if
81 #end if
82 #end if
83 #if $scoring.ambigN:
84 '${scoring.ambigN}'
85 #end if
86 #if $scoring.ambigIUPAC:
87 '${scoring.ambigIUPAC}'
88 #end if
89
90 ## SEEDING --------------------------------
91
92 #if str( $seeding.seed.seed_selector ) == "pre_set":
93 '${seeding.seed.pre_set_seeds}'
94 #elif str( $seeding.seed.seed_selector ) == "len":
95 '--seed=match${seeding.seed.seed_match}'
96 #elif str( $seeding.seed.seed_selector ) == "half_len":
97 '--seed=half${seeding.seed.seed_half}'
98 #elif str( $seeding.seed.seed_selector ) == "pattern":
99 '--seed=${seeding.seed.pattern}'
100 #end if
101 #if str( $seeding.transitions ) != "--transition":
102 '${seeding.transitions}'
103 #end if
104 #if str( $seeding.seed_filer.seed_filer_selector ) == "yes":
105 #if $seeding.seed_filer.filter_match:
106 #if $seeding.seed_filer.filter_tr:
107 '--filter=${seeding.seed_filer.filter_tr},${seeding.seed_filer.filter_match}'
108 #else:
109 '--filter=${seeding.seed_filer.filter_match}'
110 #end if
111 #end if
112 #end if
113
114 ## FINDING HSPs --------------------------------
115
116 #if $hsp.gfextend:
117 '${hsp.gfextend}'
118 #end if
119 #if $hsp.nogfextend
120 '${hsp.nogfextend}'
121 #end if
122 #if str( $hsp.hsp_method.hsp_method_selector ) == "match":
123
124 #if $hsp.hsp_method.exact:
125 '--exact=${hsp.hsp_method.exact}'
126 #end if
127 #elif str( $hsp.hsp_method.hsp_method_selector ) == "mismatch":
128
129 #if $hsp.hsp_method.mismatch_count and $hsp.hsp_method.mismatch_length:
130 '--mismatch=${hsp.hsp_method.mismatch_count},${hsp.hsp_method.mismatch_length}'
131 #end if
132 #elif str( $hsp.hsp_method.hsp_method_selector ) == "x":
133
134 #if $hsp.hsp_method.xdrop:
135 '--xdrop=${hsp.hsp_method.xdrop}'
136 #end if
137
138 #if $hsp.hsp_method.hspthresh:
139 '--hspthresh=${hsp.hsp_method.hspthresh}'
140 #end if
141
142 #if $hsp.hsp_method.hspthresh_top:
143 '--hspthresh=top${hsp.hsp_method.hspthresh_top}'
144 #end if
145
146 #if $hsp.hsp_method.hspthresh_top_percent:
147 '--hspthresh=top${hsp.hsp_method.hspthresh_top_percent}%'
148 #end if
149 #end if
150 #if $hsp.entropy:
151 '${hsp.entropy}'
152 #end if
153 #if $hsp.entropy_report:
154 '${hsp.entropy_report}'
155 #end if
156 #if $hsp.noentropy:
157 '${hsp.noentropy}'
158 #end if
159
160 ## CHAINING --------------------------------
161
162 #if $chaining.chain:
163 '${chaining.chain}'
164 #end if
165 #if str( $chaining.chaning_penalties.chaning_penalties_selector ) == "yes":
166 #if $chaining.chaning_penalties.diag and $chaining.chaning_penalties.anti:
167 '--chain=${chaining.chaning_penalties.diag},${chaining.chaning_penalties.anti}'
168 #end if
169 #end if
170
171 ## GAPPED EXTENSION --------------------------------
172
173 #if $gap_ext.gapped:
174 '${gap_ext.gapped}'
175 #end if
176 #if $gap_ext.nogapped:
177 '${gap_ext.nogapped}'
178 #end if
179 #if $gap_ext.ydrop:
180 '--ydrop=${gap_ext.ydrop}'
181 #end if
182 #if $gap_ext.noytrim:
183 '${gap_ext.noytrim}'
184 #end if
185 #if $gap_ext.gappedthresh:
186 '--gappedthresh=${gap_ext.gappedthresh}'
187 #end if
188 #if $gap_ext.allgappedbounds:
189 '${gap_ext.allgappedbounds}'
190 #end if
191
192 ## FILTERING --------------------------------
193
194 #if $filters.identity.id_min:
195 #if $filters.identity.id_max:
196 '--filter=identity:${filters.identity.id_min}..${filters.identity.id_max}'
197 #else:
198 '--filter=identity:${filters.identity.id_min}'
199 #end if
200 #end if
201 #if $filters.continuity.cont_min:
202 #if $filters.continuity.cont_max:
203 '--filter=continuity:${filters.continuity.cont_min}..${filters.continuity.cont_max}'
204 #else:
205 '--filter=continuity:${filters.continuity.cont_min}'
206 #end if
207 #end if
208 #if $filters.coverage.cov_min:
209 #if $filters.coverage.cov_max:
210 '--filter=coverage:${filters.coverage.cov_min}..${filters.coverage.cov_max}'
211 #else:
212 '--filter=coverage:${filters.coverage.cov_min}'
213 #end if
214 #end if
215 #if $filters.filter_nmatch:
216 '--filter=nmatch:${filters.filter_nmatch}'
217 #end if
218 #if $filters.filter_nmatch_percent:
219 '--filter=nmatch:${filters.filter_nmatch_percent}%'
220 #end if
221 #if $filters.filter_nmismatch:
222 '--filter=nmismatch:0..${filters.filter_nmismatch}'
223 #end if
224 #if $filters.filter_ngap:
225 '--filter=ngap:0..${filters.filter_ngap}'
226 #end if
227 #if $filters.filter_cgap:
228 '--filter=cgap:0..${filters.filter_cgap}'
229 #end if
230 #if $filters.notrivial:
231 '${filters.notrivial}'
232 #end if
233
234 ## INTERPOLATION --------------------------------
235
236 #if $interpolation.inner:
237 '--inner=${interpolation.inner}'
238 #end if
239
240 ## OUTPUT FORMATS --------------------------------
241
242 #if str( $output_format.out.format ) == "bam":
243 '--format=${output_format.out.bam_options}'
244 #elif str( $output_format.out.format ) == "general_def":
245 --format=general-
246 #elif str( $output_format.out.format ) == "maf":
247 '--format=${output_format.out.maf_type}'
248 #elif str( $output_format.out.format ) == "blastn":
249 --format=BLASTN-
250 #elif str( $output_format.out.format ) == "general_full":
251 '--format=general-:${output_format.out.fields}'
252 #end if
253 --action:target=multiple
254 --rdotplot=plot.r
255 #if str( $output_format.out.format ) == "bam":
256 | samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '${output}' &&
257 #else:
258 > '${output}' &&
259 #end if
260 Rscript $r_plot > /dev/null 2>&1
261
262 ]]>
263 </command>
264 <configfiles>
265 <configfile name="r_plot">
266 <![CDATA[
267 #!/usr/bin/env Rscript
268 ## Setup R error handling to go to stderr
269 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
270 ## Only produce image when 'plot.r' actually contains data
271 if(file.info('plot.r')\$size>0){
272 x <- read.table("plot.r", as.is=TRUE)
273 png("${out_plot}", width=640, height=640, res=72)
274 plot(x, type='l', xlab="target", ylab="query", main="plot")
275 dev.off()
276 }
277 ]]>
278 </configfile>
279 </configfiles>
280 <inputs>
281 <expand macro="target_input"/>
282 <param name="query" format="fasta,fasta.gz,fastq.gz" type="data" label="Select QUERY sequence(s)" help="These are the sequences that you are aligning against TARGET"/>
283
284 <section name="where_to_look" expanded="False" title="Where to look">
285 <param name="strand" type="select" display="radio" label="which strand to search" argument="--strand" help="Search both strands or choose plus or minus">
286 <option value="--strand=both" selected="True">Both</option>
287 <option value="--strand=plus">Plus</option>
288 <option value="--strand=minus">Minus</option>
289 </param>
290 <param name="self" type="boolean" display="radio" truevalue="--self" falsevalue="" checked="False" argument="--self" label="Perform a self-alignment: the target sequence is also the query." help="Computation is more efficient than it would be without this option, since only one of each mirror-image pair of alignment blocks is processed (the other, redundant one is skipped during processing, but re-created in the output). Also, the trivial self-alignment block along the main diagonal is omitted from the output. THIS OPTION CANNOT BE USED IF THE TARGET IS COMPRIZED OF MULTIPLE SEQUENCES"/>
291 <param name="nomirror" type="boolean" display="radio" truevalue="--nomirror" falsevalue="" checked="False" label="Inhibit the re-creation of mirror-image alignments." argument="--nomirror" help="Output consists of only one copy of each meaningful alignment block in a self-alignment. This option is only applicable when the ‑‑self option is used."/>
292 <conditional name="qhsplimit">
293 <param name="qhsplimit_selector" type="select" display="radio" label="Set HSP limit" argument="--queryhsplimit">
294 <option value="yes">Yes</option>
295 <option value="no" selected="true">No</option>
296 </param>
297 <when value="yes">
298 <param name="qhsplimit_n" optional="true" type="integer" label="Discard queries that have more than N HSPs" help="Any queries that exceed this limit are reported as a warning (to stderr), and no alignments are reported. This is useful for mapping reads to a reference genome, when some reads align to too many places in the reference."/>
299 <param name="qhsplimit_nowarn" type="boolean" truevalue="nowarn" display="radio" checked="False" label=" Withhold warnings for queries that exceed the limit set above"/>
300 <param name="qhsplimit_keep" type="boolean" truevalue="keep" display="radio" checked="False" label="Keep queries that exceed the limit and supress warnings" help="For such a query, the first N HSPs found are passed along to downstream processing. Note that the HSPs reported are not the best N HSPs. They are simply the first N found; they very likely have a positional bias."/>
301 </when>
302 <when value="no">
303 <!-- Do nothing -->
304 </when>
305 </conditional>
306 <param name="qhspbest" type="integer" optional="true" label="For queries that have more than N HSPs, discard any HSPs that score below the Nth best." help="This is useful for mapping reads to a reference genome, when some reads align to too many places in the reference."/>
307 <conditional name="qdepth">
308 <param name="qdepth_selector" type="select" display="radio" label="Set ratio of aligned bases to query length" argument="--querydepth">
309 <option value="yes">Yes</option>
310 <option value="no" selected="true">No</option>
311 </param>
312 <when value="yes">
313 <param name="qdepth_n" optional="true" type="integer" label="Stop processing gapped alignments for a query/strand if its ratio of aligned bases to query length exceeds" help="This is a real number that corresponds to a depth of coverage threshold. For example, a value of 5.0 would cause termination once a query/strand has an average of five alignments for every base in the query. The numerator is the number of matches or substitutions (but not gaps); the denominator is the length of the query sequence. The purpose of this option is one of saving time. It is useful for automatically terminating the processing of queries with high repeat content, for which other methods of dealing with repetitive content fail. Moreover, back-end filtering options are not considered. In other words, matches are counted for any alignment that meets the scoring threshold, regardless of whether that alignment would be reported. The justification is that we are trying to abort the processing of queries that have too many bounding alignments in the DP matrix, and back-end filtering occurs later in the process."/>
314 <param name="qdepth_keep" type="boolean" truevalue="keep" display="radio" checked="False" label="Warnings for queries that exceed the limit are witheld" help="Note that the alignments reported are not guaranteed to be the highest scoring alignments that would achieve the threshold. They are simply the first alignments found. In other words, the purpose of this option is one of saving time, not one of finding optimal alignments."/>
315 <param name="qdepth_nowarn" type="boolean" truevalue="nowarn" display="radio" checked="False" label="Same as above but any alignments discovered for this query/strand, before it exceeds the threshold, are reported."/>
316 </when>
317 <when value="no">
318 <!-- Do nothing -->
319 </when>
320 </conditional>
321 </section>
322 <section name="scoring" expanded="false" title="Scoring">
323 <param name="score_file" type="data" format="txt" optional="true" argument="--scores" label="Read the substitution scores and gap penalties (and possibly other options) from a scoring file (see help below for file fomat description)." help="This option cannot be used in conjunction with ‑‑match or inference."/> <!--TODO EDIT INFERENCE -->
324 <conditional name="match">
325 <param name="match_selector" type="select" display="radio" label="Set the score values for a match and mismatch" argument="--match" help="Note that specifying ‑‑match changes the defaults for some of the other options (e.g. the scoring penalties for gaps, and various extension thresholds), as described in respective sections of LASTZ manual. The regular defaults are chosen for compatibility with BLASTZ, but since BLASTZ doesn't support ‑‑match, LASTZ infers that you are not expecting BLASTZ compatibility for this run, so it is free to use improved defaults. THIS OPTION CANNOT BE USED WITH --scores">
326 <option value="yes">Yes</option>
327 <option value="no" selected="true">No</option>
328 </param>
329 <when value="yes">
330 <param name="match_reward" type="integer" optional="true" label="Score value for a match"/>
331 <param name="match_penalty" type="integer" optional="true" label="Score value for a mismatch"/>
332 </when>
333 <when value="no">
334 <!-- Do nothing -->
335 </when>
336 </conditional>
337 <conditional name="gap">
338 <param name="gap_selector" type="select" display="radio" label="Set the score penalties for opening and extending a gap" argument="--gap" help="These are specified as positive values; subtraction is implicitly assumed. Note that the first base in a gap incurs the sum of both penalties. This option is only valid if gapped extension is being performed, and cannot be used in conjunction with inference. These values specified on the command line override any corresponding values from a file provided with ‑‑scores.">
339 <option value="yes">Yes</option>
340 <option value="no" selected="true">No</option>
341 </param>
342 <when value="yes">
343 <param name="gap_open" type="integer" optional="true" label="Gap opening penalty"/>
344 <param name="gap_extend" type="integer" optional="true" label="Gap extension penalty"/>
345 </when>
346 <when value="no">
347 <!-- Do nothing -->
348 </when>
349 </conditional>
350 <param name="ambigN" type="boolean" truevalue="--ambiguous=n" checked="false" label="Treat each N in the input sequences as an ambiguous nucleotide" argument="--ambiguous=n" help="Substitutions with N are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/>
351 <param name="ambigIUPAC" type="boolean" truevalue="--ambiguous=iupac" checked="false" label="Treat each of the IUPAC-IUB ambiguity codes (B, D, H, K, M, R, S, V, W, and Y, as well as N) in the input sequences as a completely ambiguous nucleotide." argument="--ambiguous=iupac" help="Substitutions with these characters are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/>
352 </section>
353 <section name="seeding" expanded="false" title="Seeding">
354 <conditional name="seed">
355 <param name="seed_selector" type="select" display="radio" label="Select seed type">
356 <option value="defaults" selected="true">Use defaults</option>
357 <option value="pre_set">Use pre-set values</option>
358 <option value="len">Set length</option>
359 <option value="half_len">Set half length</option>
360 <option value="pattern">Set pattern</option>
361 </param>
362 <when value="defaults">
363 <!-- Do nothing -->
364 </when>
365 <when value="pre_set">
366 <param name="pre_set_seeds" type="select" display="radio" label="Select seed pattern" help="Select between 12of19 seed (19-bp word with matches in 12 specific positions: 1110100110010101111) or 14of22 seed (22-bp word with matches in 14 specific positions: 1110101100110010101111). 0 = mismatch, 1 = match" argument="--seed=12of19,--seed=14of22">
367 <option value="--seed=12of19">12of19</option>
368 <option value="--seed=14of22">14of22</option>
369 </param>
370 </when>
371 <when value="len">
372 <param name="seed_match" type="integer" optional="true" label="Set seed length" help="Seeds require a N-bp word with matches in all positions" argument="--seed=match" />
373 </when>
374 <when value="half_len">
375 <param name="seed_half" type="integer" optional="true" label="Set seed length" help="Seeds requires N-bp word with matches or transitions in all positions" argument="--seed=half" />
376 </when>
377 <when value="pattern">
378 <param name="pattern" type="text" size="25" optional="true" argument="--seed=PATTERN" label="Specify an arbitrary pattern" help="Use 1s, 0s, and Ts for seed discovery (e.g., 1110100110010101111 where 1 = match, 0 = mismatch, T = transition)" />
379 </when>
380 </conditional>
381 <param name="transitions" type="select" display="radio" label="Allow transitions" help="In each seed, specify how many match positions can be transition(s) instead">
382 <option value="--transition" selected="true">One</option>
383 <option value="--transition=2">Two</option>
384 <option value="--notransition">None</option>
385 </param>
386 <conditional name="seed_filer">
387 <param name="seed_filer_selector" type="select" display="radio" label="Filter seeds" argument="--filter">
388 <option value="yes">Yes</option>
389 <option value="no" selected="true">No</option>
390 </param>
391 <when value="yes">
392 <param name="filter_tr" type="integer" optional="true" label="Number of transitions" help="Allowing no more than this number of transversions. If not specified, any number of transversions is allowed (they are not limited)" />
393 <param name="filter_match" type="integer" optional="true" label="Number of matches" help="Require at least this many exact matches"/>
394 </when>
395 <when value="no">
396 <!-- Do nothing -->
397 </when>
398 </conditional>
399 </section>
400 <section name="hsp" expanded="false" title="HSPs (Gap-free extension)">
401 <param name="gfextend" type="boolean" truevalue="--gfextend" checked="false" argument="--gfextend" label="Perform gap-free extension of seeds to HSPs" help="This will take into account other papermeters in this section"/>
402 <param name="nogfextend" type="boolean" truevalue="--nogfextend" argument="--nogfextend" label="Skip the gap-free extension stage" help="Instead, pass the seeds along to the next specified stage.It is not recommended to use --nogfextend without also using --nogapped."/>
403 <conditional name="hsp_method">
404 <param name="hsp_method_selector" type="select" display="radio" label="Select HSP finding method">
405 <option value="none" selected="true">None</option>
406 <option value="match">Match extension</option>
407 <option value="mismatch">Mismatch extension</option>
408 <option value="x">X-drop extension</option>
409 </param>
410 <when value="none">
411 <!-- Do nothing -->
412 </when>
413 <when value="match">
414 <param name="exact" type="integer" optional="true" argument="--exact" label="Find HSPs using the exact match extension method with the given length threshold" help="This is instead of using the x-drop method"/>
415 </when>
416 <when value="mismatch">
417 <param name="mismatch_count" type="integer" optional="true" label="Specify number of mismatches"/>
418 <param name="mismatch_length" type="integer" min="1" max="50" optional="true" label="Specify length threshold" help="Find HSPs using the mismatch extension method with the given length threshold and allowing specified number of mismatches" argument="--mismatch"/>
419 </when>
420 <when value="x">
421 <param name="xdrop" type="integer" optional="true" argument="--xdrop" label="Find HSPs using the x-drop extension method with this threshold" help="The dropoff setting determines the endpoints of each gap-free segment: the extension of each seed is stopped when its cumulative score drops off by more than the given threshold from the maximum seen so far."/>
422 <param name="hspthresh" type="integer" optional="true" argument="--hspthresh" label="Set the score threshold for the x-drop extension method" help="HSPs scoring lower are discarded"/>
423 <param name="hspthresh_top" type="integer" optional="true" argument="--hspthresh=top" label="Set an adaptive score threshold for the x-drop extension method" help="HSPs scoring lower are discarded. The score threshold is chosen to limit the number of target sequence bases in HSPs to about this value (or possibly a little higher in case of ties, etc.)."/>
424 <param name="hspthresh_top_percent" type="integer" optional="true" argument="--hspthresh=top%" label="Set an adaptive score threshold for the x-drop extension method" help="HSPs scoring lower are discarded. The score threshold is chosen to limit the number of target sequence bases in HSPs to about this perentage value (or possibly a little higher in case of ties, etc.)."/>
425 </when>
426 </conditional>
427 <param name="entropy" type="boolean" truevalue="--entropy" checked="false" label="Adjust for entropy when qualifying HSPs in the x-drop extension method" help="Those that score just slightly above the HSP threshold are adjusted downward according to the entropy of their nucleotides, and any that then fall below the threshold are discarded."/>
428 <param name="entropy_report" type="boolean" truevalue="--entropy=report" checked="false" label="Adjust for entropy when qualifying HSPs in the x-drop extension method, and report (to stderr) any HSPs that are discarded as a result." help="Reported HSPs are printed to stderr"/>
429 <param name="noentropy" type="boolean" truevalue="--noentropy" checked="false" label="Don't adjust for entropy when qualifying HSPs"/>
430 </section>
431 <section name="chaining" expanded="false" title="Chaining">
432 <param name="chain" type="boolean" truevalue="--chain" checked="false" argument="--chain" label="Perform chaining of HSPs with no penalties"/>
433 <conditional name="chaning_penalties">
434 <param name="chaning_penalties_selector" type="select" display="radio" argument="--chain=" label="Perform chaining with penalties">
435 <option value="yes">Yes</option>
436 <option value="no" selected="true">No</option>
437 </param>
438 <when value="yes">
439 <param name="diag" type="integer" optional="true" label="Penalty for diagonal in dynamic programming matrix"/>
440 <param name="anti" type="integer" optional="true" label="Penalty for anti-diagonal in dynamic programming matrix" help="These are specified as positive values; subtraction from the score is implicitly assumed."/>
441 </when>
442 <when value="no">
443 <!-- Do nothing -->
444 </when>
445 </conditional>
446 </section>
447 <section name="gap_ext" expanded="false" title="Gapped extension">
448 <param name="gapped" type="boolean" truevalue="--gapped" argument="--gapped" label="Perform gapped extension of HSPs" help="Extension of HSPs (or seeds, if gap-free extension is not performed), is performed after first reducing them to anchor points."/>
449 <param name="nogapped" type="boolean" truevalue="--nogapped" argument="--nogapped" label="Skip the gapped extension stage" help="This means that interpolation must also be skipped, since it is not allowed without gapped extension"/>
450 <param name="ydrop" type="integer" optional="true" argument="--ydrop" label="Set the threshold for terminating gapped extension" help="This restricts the endpoints of each local alignment by limiting the local region around each anchor in which extension is performed. The boundary of this region in the dynamic programming matrix is formed by the points where the cumulative score has dropped off by more than the given threshold from the maximum seen so far."/>
451 <param name="noytrim" type="boolean" truevalue="--noytrim" argument="--noytrim" label="If y-drop extension encounters the end of the sequence, extend the alignment to the end of the sequence rather than trimming it back to the location giving the maximum score" help="This is highly recommended when either the target or query sequences are short reads (say, less than 100 bases), to prevent y-drop mismatch shadow."/>
452 <param name="gappedthresh" type="integer" optional="true" argument="--gappedthresh" label="Set the threshold for gapped extension" help="Alignments scoring lower than that value are discarded. When used along with the x-drop method for gap-free extension, this value is generally set at least as high as the HSP threshold. Setting it lower has no effect, since at worst the HSP itself would always qualify (both extension stages use the same scoring matrix)."/>
453 <param name="allgappedbounds" type="boolean" truevalue="--allgappedbounds" argument="--allgappedbounds" label="Revert to handling bounding alignments the way they were handled in BLASTZ."/>
454 </section>
455 <section name="filters" expanded="false" title="Filtering">
456 <section name="identity" expanded="true" title="Filter alignments by percent identity">
457 <param name="id_min" type="integer" min="0" max="100" optional="true" label="Minimum identity"/>
458 <param name="id_max" type="integer" min="0" max="100" optional="true" argument="--filter=identity" label="Maximum identity (optional)" help="0 ≤ min ≤ max ≤ 100 percent. Identity is the percentage of aligned bases that are matches. Alignment blocks outside the given range are discarded."/>
459 </section>
460 <section name="continuity" expanded="false" title="Filter alignments by continuity">
461 <param name="cont_min" type="integer" min="0" max="100" optional="true" label="Minimum continuity"/>
462 <param name="cont_max" type="integer" min="0" max="100" optional="true" argument="--filter=continuity" label="Maximum continuity (optional)" help="0 ≤ min ≤ max ≤ 100 percent. Continuity is the percentage of alignment columns that are not gaps. Alignment blocks outside the given range are discarded."/>
463 </section>
464 <section name="coverage" expanded="false" title="Filter alignments by coverage">
465 <param name="cov_min" type="integer" min="0" max="100" optional="true" label="Minimum coverage"/>
466 <param name="cov_max" type="integer" min="0" max="100" optional="true" argument="--filter=coverage" label="Maximum coverage (optional)" help=" 0 ≤ min ≤ max ≤ 100 percent. Coverage is the percentage of the entire target or query sequence (whichever is shorter) that is included in the alignment block. Blocks outside the given range are discarded."/>
467 </section>
468 <param name="filter_nmatch" type="integer" min="1" optional="true" argument="--filter=nmatch" label="Filter alignments by how many bases match" help="Requiring at least this number of matched bases, min > 0. Match count is the number of matched bases in the alignment."/>
469 <param name="filter_nmatch_percent" type="integer" min="1" optional="true" argument="--filter=nmatch%" label="Filter alignments by how many bases match expressed as percentage" help="e.g., percentage of the query length."/>
470 <param name="filter_nmismatch" type="integer" min="0" optional="true" argument="--filter=nmismatch:0.." label="Filter alignments by the number of mismatches" help="Allow no more than this number of mismatched bases. Mismatch count, or nmismatch, is the number of aligned bases in the alignment that are mismatches (substitutions)."/>
471 <param name="filter_ngap" type="integer" min="0" optional="true" argument="--filter=ngap:0.." label="Filter alignments by the number of gaps" help="Allow no more than this number of gaps. Gap count, or ngap, is the number of runs of gapped columns in the alignment (each run is counted as one gap)."/>
472 <param name="filter_cgap" type="integer" min="0" optional="true" argument="--filter=cgap:0.." label="Filter alignments by the number of gap columns" help="Allow no more than this number of gaps. Gap column count, or cgap, is the number of gapped columns in the alignment (each column is counted as one gap)."/>
473 <param name="notrivial" type="boolean" truevalue="--notrivial" argument="--notrivial" label="Do not output a trivial self-alignment block if the target and query sequences are identical" help="Note that using ‑‑self automatically enables this option."/>
474 </section>
475 <section name="interpolation" expanded="false" title="Interpolation">
476 <param name="inner" type="integer" optional="true" argument="--inner" label="Perform additional alignment between the gapped alignment blocks, using (presumably) more sensitive alignment parameters" help="This value is used as the threshold for both the gap-free and gapped extension sub-stages; see the discussion of interpolation for more details. This option is only valid if gapped extension is performed."/>
477 </section>
478 <section name="output_format" expanded="false" title="Output">
479 <conditional name="out">
480 <param name="format" type="select" display="radio" label="Specify the output format">
481 <option value="bam" selected="true">BAM --format=sam)</option>
482 <option value="general_def">General default (--format=general)</option>
483 <option value="general_full">Customized general (‑‑format=general[:fields])</option>
484 <option value="maf">MAF (--format=maf)</option>
485 <option value="blastn">blastn (--format=BLASTN)</option>
486 </param>
487 <when value="bam">
488 <param name="bam_options" type="select" display="radio" argument="--format=sam, --format=softsam" label="Select a BAM flavor to output" help="Lastz actually outputs SAM data but Galaxy converts it into BAM to save space. For alignments that don't reach the end of a query, ‑‑format=sam uses 'hard clipping', while ‑‑format=softsam uses 'soft clipping'. See the section on 'clipped alignment' in the SAM specification for an explanation of what this means. The options ‑‑format=sam- and ‑‑format=softsam- suppress the SAM header lines. This makes them suitable for concatenating output from multiple runs. If you need to specify readgroup information: use AddOrEplaceReadGroups from Picard package">
489 <option value="sam" selected="true">BAM</option>
490 <option value="softsam">soft-clipped BAM</option>
491 <option value="sam-">BAM without header</option>
492 <option value="softsam-">soft-clipped BAM without header</option>
493 </param>
494 </when>
495 <when value="general_def">
496 <!-- Do nothing -->
497 </when>
498 <when value="general_full">
499 <param name="fields" type="select" display="checkboxes" multiple="true" label="Select which fields to include" argument="--format=general-[:fields]">
500 <option value="score" selected="true">score: Score of the alignment block</option>
501 <option value="name1" selected="true">name1: Name of the target sequence</option>
502 <option value="number1">number1: Number of the target sequence within the target file</option>
503 <option value="strand1" selected="true">strand1: Target sequence strand </option>
504 <option value="size1" selected="true">size1: Size of the entire target sequence</option>
505 <option value="start1">start1: Starting position of the alignment block in the target, origin-one</option>
506 <option value="zstart1" selected="true">zstart1: Starting position of the alignment block in the target, origin-zero</option>
507 <option value="end1" selected="true">end1: Ending position of the alignment block in the target</option>
508 <option value="length1">length1: Length of the alignment block in the target (excluding gaps)</option>
509 <option value="text1">text1: Aligned characters in the target, including gap characters</option>
510 <option value="qalign1">qalign1: The target quality sequence (if there is one) correpsonding to aligned characters</option>
511 <option value="nucs1">nucs1: The entire target sequence</option>
512 <option value="name2" selected="true">name2: Name of the query sequence</option>
513 <option value="number2">number2: Number of the query sequence within the query file</option>
514 <option value="strand2" selected="true">strand2: Query sequence strand</option>
515 <option value="size2" selected="true">size2: Size of the entire query sequence</option>
516 <option value="start2">start2: Starting position of the alignment block in the query, origin-one</option>
517 <option value="zstart2" selected="true">zstart2: Starting position of the alignment block in the query, origin-one</option>
518 <option value="end2" selected="true">end2: Ending position of the alignment block in the query</option>
519 <option value="length2">length2: Length of the alignment block in the query (excluding gaps)</option>
520 <option value="text2">text2: Aligned characters in the query, including gap characters</option>
521 <option value="qalign2">qalign2: The query quality sequence (if there is one) correpsonding to aligned characters</option>
522 <option value="nucs2">nucs2: The entire query sequence</option>
523 <option value="nmatch">nmatch: Match count</option>
524 <option value="nmismatch">nmismatch: Mismatch count</option>
525 <option value="ncolumn">ncolumn: Number of columns in the block. This includes matches, mismatches (substitutions), and gaps</option>
526 <option value="npair">npair: Number of aligned bases in the block that are matches or mismatches (substitutions)</option>
527 <option value="ngap">ngap: Gap count, the number of gaps in the block, counting each run of gapped columns as a single gap</option>
528 <option value="cgap">cgap: Gap column count, the number of gaps in the block, counting each gapped column as a separate gap</option>
529 <option value="diff">diff: Differences between what would be written for text1 and text2</option>
530 <option value="cigar">cigar: A CIGAR-like representation of the alignment’s path</option>
531 <option value="cigarx">cigarx: Same as cigar, but uses a newer syntax that distinguishes matches from substitutions</option>
532 <option value="identity" selected="true">identity: Fraction of aligned bases in the block that are matches </option>
533 <option value="idfrac">idfrac: Fraction of aligned bases in the block that are matches </option>
534 <option value="id%" selected="true">id% Fraction of aligned bases in the block that are matches (as %)</option>
535 <option value="blastid%">blastid%: Fraction of the alignment block that is matches, as would be reported by NCBI BLAST</option>
536 <option value="continuity">continuity: Rate of non-gaps (non-indels) in the alignment block</option>
537 <option value="confrac">confrac: Rate of non-gaps (non-indels) in the alignment block (as fraction)</option>
538 <option value="con%">con%: Rate of non-gaps (non-indels) in the alignment block (as %)</option>
539 <option value="coverage" selected="true">coverage: Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block</option>
540 <option value="covfrac">covfrac: Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block (as fraction)</option>
541 <option value="cov%" selected="true">cov%: Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block (as %)</option>
542 <option value="diagonal">diagonal: The diagonal of the start of the alignment block in the DP matrix, expressed as an identifying number start1-start2</option>
543 <option value="shingle">shingle: A measurement of the shingle overlap between the target and the query</option>
544 <option value="number">number: The alignment number, counted as alignments are written to output (1-base)</option>
545 <option value="znumber">znumber: The alignment number, counted as alignments are written to output (0-base)</option>
546 <sanitizer invalid_char="">
547 <valid initial="string.letters,string.digits"><add value="%" /> </valid>
548 </sanitizer>
549 </param>
550 </when>
551 <when value="maf">
552 <param name="maf_type" type="select" display="radio" argument="--format=maf" label="Seleat MAF flavor" help="MAF is a multiple alignment format developed at UCSC">
553 <option value="maf">MAF</option>
554 <option value="maf+">MAF with additional stats</option>
555 <option value="maf-">MAF without header and comments</option>
556 </param>
557 </when>
558 <when value="blastn">
559 <!-- Do nothing -->
560 </when>
561 </conditional>
562 </section>
563 </inputs>
564 <outputs>
565 <data format="tabular" name="output" label="${tool.name} on ${on_string}: mapped reads">
566 <change_format>
567 <when input="output_format.out.format" value="bam" format="bam" />
568 <when input="output_format.out.format" value="maf" format="maf" />
569 </change_format>
570 </data>
571 <data format="png" name="out_plot" label="${tool.name} on ${on_string}: dot plot"/>
572 </outputs>
573 <tests>
574 <test>
575 <param name="ref_source" value="cached" />
576 <param name="target_2bit" value="phiX174" />
577 <param name="query" value="phiX_split.fasta" />
578 <param name="strand" value="--strand=both" />
579 <param name="format" value="general_def" />
580 <output name="output" value="test1.out" />
581 </test>
582 <test>
583 <param name="ref_source" value="history" />
584 <param name="target" value="phiX_split.fasta" />
585 <param name="query" value="phiX.fasta" />
586 <param name="strand" value="--strand=both" />
587 <param name="format" value="general_def" />
588 <output name="output" value="test2.out" />
589 </test>
590 <test>
591 <param name="ref_source" value="history" />
592 <param name="target" value="phiX_split.fasta" />
593 <param name="query" value="phiX.fasta" />
594 <param name="strand" value="--strand=both" />
595 <param name="score_file" value="score_file.txt" />
596 <param name="format" value="general_def" />
597 <output name="output" value="test3.out" />
598 </test>
599 </tests>
600
601 <help><![CDATA[
602
603 **What is does**
604
605 LASTZ is designed to preprocess one sequence or set of sequences (which we collectively call the *TARGET*) and then align several *QUERY* sequences to it. It was developed by `Bob Harris <http://www.bx.psu.edu/~rsharris/>`_ in the lab of Webb Miller at Penn State.
606
607 .. class:: warningmark
608
609 **Read documentation** before proceeding. LASTZ is a complex tool with many parameter options. Fortunately, there is a `great manual <https://lastz.github.io/lastz/>`_ maintained by its author. Default parameters may be sufficient to obtain the initial idea about how similar your sequences are, but to produce reliable alignments you may need to tweak the parameters. So RTFM!
610
611 **About LASTZ parameters**
612
613 Galaxy's version of LASTZ has nine parameter sections (*Where to look*, *Scoring*, *Seeding*, *HSPs*, *Chaining*, *Gapped extension*, *Filtering*, *Interpolation*, and *Output*). These sections closely follow parameter description in the `manual <https://lastz.github.io/lastz/#syntax>`_.
614
615 **Defaults**
616
617 here are defaults for some of the most important parameters::
618
619 --seed=<pattern> set seed pattern (12of19, 14of22, or general pattern)
620 (default is 1110100110010101111)
621 SEE "Seeding" SECTION -> "Select seed type"
622
623 --[no]transition allow (or don't) one transition in a seed hit
624 (by default a transition is allowed)
625 SEE "Seeding" SECTION -> "Allow transitions"
626
627 --[no]chain perform chaining
628 (by default no chaining is performed)
629 SEE "Chaining" SECTION
630
631 --[no]gapped perform gapped alignment (instead of gap-free)
632 (by default gapped alignment is performed)
633 SEE "Gapped extension" SECTION
634
635 --strand=both search both strands
636 --strand=plus search + strand only (matching strand of query spec)
637 (by default both strands are searched)
638 SEE "Where to look" SECTION
639
640 --scores=<file> read substitution and gap scores from a file
641 SEE "Scoring" SECTION
642
643 --xdrop=<score> set x-drop threshold (default is 10sub[A][A])
644 SEE "HSPs" SECTION
645
646 --ydrop=<score> set y-drop threshold (default is open+300extend)
647 SEE "Gapped extension" SECTION
648
649 --hspthresh=<score> set threshold for high scoring pairs (default is 3000)
650 ungapped extensions scoring lower are discarded
651 <score> can also be a percentage or base count
652 SEE "HSPs" SECTION
653
654 --gappedthresh=<score> set threshold for gapped alignments
655 gapped extensions scoring lower are discarded
656 <score> can also be a percentage or base count
657 (default is to use same value as --hspthresh)
658 SEE "Gapped extension" SECTION
659
660
661 **Substitution matrix**
662
663 By default the HOXD70 substitution scores are used (from `Chiaromonte et al. 2002 <https://www.ncbi.nlm.nih.gov/pubmed/11928468>`_)::
664
665 bad_score = X:-1000 # used for sub['X'][*] and sub[*]['X']
666 fill_score = -100 # used when sub[*][*] is not defined
667 gap_open_penalty = 400
668 gap_extend_penalty = 30
669
670 A C G T
671 A 91 -114 -31 -123
672 C -114 100 -125 -31
673 G -31 -125 100 -114
674 T -123 -31 -114 91
675
676 Matrix can be supplied as an input to **Read the substitution scores** parameter in *Scoring* section. Substitution matrix can be inferred from your data using another LASTZ-based tool (LASTZ_D: Infer substitution scores).
677
678 **Output**
679
680 This version of LASTZ produces two outputs by default: a BAM alignment file and a dot-plot in PNG format. Other formats can be configured in *Output* section. This incarnation of LASTZ produces outputs without comment line starting with '#'. To learn identity of each column, consult `formats section of LASTZ manual <https://lastz.github.io/lastz/#formats>`_.
681
682 ]]>
683 </help>
684 <expand macro="citations"/>
685 </tool>