comparison lastz.xml @ 8:e7f19d6a9af8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lastz commit a7e9d5b3906b7ebb35b1c29c3a8e8203b2cefccd
author iuc
date Fri, 18 May 2018 16:58:38 -0400
parents 10aca14c2332
children 2ff111fac1d7
comparison
equal deleted inserted replaced
7:10aca14c2332 8:e7f19d6a9af8
1 <tool id="lastz_wrapper_2" name="LASTZ" version="1.3.1"> 1 <tool id="lastz_wrapper_2" name="LASTZ" version="1.3.2">
2 <description>: align long sequences</description> 2 <description>: align long sequences</description>
3 <macros> 3 <macros>
4 <import>lastz_macros.xml</import> 4 <import>lastz_macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <requirements>
236 236
237 #if $interpolation.inner: 237 #if $interpolation.inner:
238 '--inner=${interpolation.inner}' 238 '--inner=${interpolation.inner}'
239 #end if 239 #end if
240 240
241 ## HOUSEKEEPING ----------------------------------
242
243 --traceback=160M
244
245
241 ## OUTPUT FORMATS -------------------------------- 246 ## OUTPUT FORMATS --------------------------------
242 247
243 #if str( $output_format.out.format ) == "bam": 248 #if str( $output_format.out.format ) == "bam":
244 '--format=${output_format.out.bam_options}' 249 '--format=${output_format.out.bam_options}'
245 #elif str( $output_format.out.format ) == "general_def": 250 #elif str( $output_format.out.format ) == "general_def":
250 --format=BLASTN- 255 --format=BLASTN-
251 #elif str( $output_format.out.format ) == "general_full": 256 #elif str( $output_format.out.format ) == "general_full":
252 '--format=general-:${output_format.out.fields}' 257 '--format=general-:${output_format.out.fields}'
253 #end if 258 #end if
254 --action:target=multiple 259 --action:target=multiple
255 --rdotplot=plot.r 260 $output_format.rplot
256 #if str( $output_format.out.format ) == "bam": 261 #if str( $output_format.out.format ) == "bam":
257 | samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '${output}' && 262 | samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '${output}'
258 #else: 263 #else:
259 > '${output}' && 264 > '${output}'
260 #end if 265 #end if
261 Rscript $r_plot > /dev/null 2>&1 266 #if $output_format.rplot:
267 &&
268 Rscript $r_plot > /dev/null 2>&1
269 #end if
262 270
263 ]]> 271 ]]>
264 </command> 272 </command>
265 <configfiles> 273 <configfiles>
266 <configfile name="r_plot"> 274 <configfile name="r_plot">
347 <when value="no"> 355 <when value="no">
348 <!-- Do nothing --> 356 <!-- Do nothing -->
349 </when> 357 </when>
350 </conditional> 358 </conditional>
351 <param name="ambigN" type="boolean" truevalue="--ambiguous=n" checked="false" label="Treat each N in the input sequences as an ambiguous nucleotide" argument="--ambiguous=n" help="Substitutions with N are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/> 359 <param name="ambigN" type="boolean" truevalue="--ambiguous=n" checked="false" label="Treat each N in the input sequences as an ambiguous nucleotide" argument="--ambiguous=n" help="Substitutions with N are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/>
352 <param name="ambigIUPAC" type="boolean" truevalue="--ambiguous=iupac" checked="false" label="Treat each of the IUPAC-IUB ambiguity codes (B, D, H, K, M, R, S, V, W, and Y, as well as N) in the input sequences as a completely ambiguous nucleotide." argument="--ambiguous=iupac" help="Substitutions with these characters are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/> 360 <param name="ambigIUPAC" type="boolean" truevalue="--ambiguous=iupac" checked="true" label="Treat each of the IUPAC-IUB ambiguity codes (B, D, H, K, M, R, S, V, W, and Y, as well as N) in the input sequences as a completely ambiguous nucleotide." argument="--ambiguous=iupac" help="Substitutions with these characters are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/>
353 </section> 361 </section>
354 <section name="seeding" expanded="false" title="Seeding"> 362 <section name="seeding" expanded="false" title="Seeding">
355 <conditional name="seed"> 363 <conditional name="seed">
356 <param name="seed_selector" type="select" display="radio" label="Select seed type"> 364 <param name="seed_selector" type="select" display="radio" label="Select seed type">
357 <option value="defaults" selected="true">Use defaults</option> 365 <option value="defaults" selected="true">Use defaults</option>
558 </when> 566 </when>
559 <when value="blastn"> 567 <when value="blastn">
560 <!-- Do nothing --> 568 <!-- Do nothing -->
561 </when> 569 </when>
562 </conditional> 570 </conditional>
571 <param name="rplot" type="boolean" truevalue="--rdotplot=plot.r" falsevalue="" checked="false" argument="--rdotplot" label="Create a dotplot representation of alignments?" help="The dotplot is only useful if query and target contain exactly one sequence each"/>
563 </section> 572 </section>
564 </inputs> 573 </inputs>
565 <outputs> 574 <outputs>
566 <data format="tabular" name="output" label="${tool.name} on ${on_string}: mapped reads"> 575 <data format="tabular" name="output" label="${tool.name} on ${on_string}: mapped reads">
567 <change_format> 576 <change_format>
568 <when input="output_format.out.format" value="bam" format="bam" /> 577 <when input="output_format.out.format" value="bam" format="bam" />
569 <when input="output_format.out.format" value="maf" format="maf" /> 578 <when input="output_format.out.format" value="maf" format="maf" />
570 </change_format> 579 </change_format>
571 </data> 580 </data>
572 <data format="png" name="out_plot" label="${tool.name} on ${on_string}: dot plot"/> 581 <data format="png" name="out_plot" label="${tool.name} on ${on_string}: dot plot">
582 <filter>output_format['rplot']</filter>
583 </data>
573 </outputs> 584 </outputs>
574 <tests> 585 <tests>
575 <test> 586 <test>
576 <param name="ref_source" value="cached" /> 587 <param name="ref_source" value="cached" />
577 <param name="target_2bit" value="phiX174" /> 588 <param name="target_2bit" value="phiX174" />
607 </test> 618 </test>
608 <test> 619 <test>
609 <param name="ref_source" value="history" /> 620 <param name="ref_source" value="history" />
610 <param name="target" ftype="fasta.gz" value="chrM_human.fa.gz" /> 621 <param name="target" ftype="fasta.gz" value="chrM_human.fa.gz" />
611 <param name="query" ftype="fastq.bz2" value="chrM_mouse.fq.bz2" /> 622 <param name="query" ftype="fastq.bz2" value="chrM_mouse.fq.bz2" />
623 <param name="traceback" value="83886080" />
624 <param name="word" value="28" />
612 <param name="strand" value="--strand=both" /> 625 <param name="strand" value="--strand=both" />
613 <param name="format" value="blastn" /> 626 <param name="format" value="blastn" />
614 <output name="output" value="test5.out" /> 627 <output name="output" value="test5.out" />
615 </test> 628 </test>
616 <test> 629 <test>
631 </test> 644 </test>
632 </tests> 645 </tests>
633 646
634 <help><![CDATA[ 647 <help><![CDATA[
635 648
636 **What is does** 649 **What is does**
637 650
638 LASTZ is designed to preprocess one sequence or set of sequences (which we collectively call the *TARGET*) and then align several *QUERY* sequences to it. It was developed by `Bob Harris <http://www.bx.psu.edu/~rsharris/>`_ in the lab of Webb Miller at Penn State. 651 LASTZ is designed to preprocess one sequence or set of sequences (which we collectively call the *TARGET*) and then align several *QUERY* sequences to it. It was developed by `Bob Harris <http://www.bx.psu.edu/~rsharris/>`_ in the lab of Webb Miller at Penn State.
639 652
640 .. class:: warningmark 653 .. class:: infomark
641 654
642 **Read documentation** before proceeding. LASTZ is a complex tool with many parameter options. Fortunately, there is a `great manual <https://lastz.github.io/lastz/>`_ maintained by its author. Default parameters may be sufficient to obtain the initial idea about how similar your sequences are, but to produce reliable alignments you may need to tweak the parameters. So RTFM! 655 **Read documentation** before proceeding. LASTZ is a complex tool with many parameter options. Fortunately, there is a `great manual <https://lastz.github.io/lastz/>`_ maintained by its author. Default parameters may be sufficient to obtain the initial idea about how similar your sequences are, but to produce reliable alignments you may need to tweak the parameters. So RTFM!
643 656
644 **About LASTZ parameters** 657 .. class:: warningmark
645 658
646 Galaxy's version of LASTZ has nine parameter sections (*Where to look*, *Scoring*, *Seeding*, *HSPs*, *Chaining*, *Gapped extension*, *Filtering*, *Interpolation*, and *Output*). These sections closely follow parameter description in the `manual <https://lastz.github.io/lastz/#syntax>`_. 659 Galaxy version of LASTZ sets **--ambiguous=iupac** as default (see **Scoring** section). This prevents LASTZ from erroring out if one of the DNA inputrs contains "non-strandard" nucleotides.
647 660
648 **Defaults** 661 **About LASTZ parameters**
649 662
650 here are defaults for some of the most important parameters:: 663 Galaxy's version of LASTZ has nine parameter sections (*Where to look*, *Scoring*, *Seeding*, *HSPs*, *Chaining*, *Gapped extension*, *Filtering*, *Interpolation*, and *Output*). These sections closely follow parameter description in the `manual <https://lastz.github.io/lastz/#syntax>`_.
651 664
652 --seed=<pattern> set seed pattern (12of19, 14of22, or general pattern) 665 **Defaults**
653 (default is 1110100110010101111) 666
654 SEE "Seeding" SECTION -> "Select seed type" 667 here are defaults for some of the most important parameters::
655 668
656 --[no]transition allow (or don't) one transition in a seed hit 669 --seed=<pattern> set seed pattern (12of19, 14of22, or general pattern)
657 (by default a transition is allowed) 670 (default is 1110100110010101111)
658 SEE "Seeding" SECTION -> "Allow transitions" 671 SEE "Seeding" SECTION -> "Select seed type"
659 672
660 --[no]chain perform chaining 673 --[no]transition allow (or don't) one transition in a seed hit
661 (by default no chaining is performed) 674 (by default a transition is allowed)
662 SEE "Chaining" SECTION 675 SEE "Seeding" SECTION -> "Allow transitions"
663 676
664 --[no]gapped perform gapped alignment (instead of gap-free) 677 --[no]chain perform chaining
665 (by default gapped alignment is performed) 678 (by default no chaining is performed)
666 SEE "Gapped extension" SECTION 679 SEE "Chaining" SECTION
667 680
668 --strand=both search both strands 681 --[no]gapped perform gapped alignment (instead of gap-free)
669 --strand=plus search + strand only (matching strand of query spec) 682 (by default gapped alignment is performed)
670 (by default both strands are searched) 683 SEE "Gapped extension" SECTION
671 SEE "Where to look" SECTION 684
672 685 --strand=both search both strands
673 --scores=<file> read substitution and gap scores from a file 686 --strand=plus search + strand only (matching strand of query spec)
674 SEE "Scoring" SECTION 687 (by default both strands are searched)
675 688 SEE "Where to look" SECTION
676 --xdrop=<score> set x-drop threshold (default is 10sub[A][A]) 689
677 SEE "HSPs" SECTION 690 --scores=<file> read substitution and gap scores from a file
678 691 SEE "Scoring" SECTION
679 --ydrop=<score> set y-drop threshold (default is open+300extend) 692
680 SEE "Gapped extension" SECTION 693 --xdrop=<score> set x-drop threshold (default is 10sub[A][A])
681 694 SEE "HSPs" SECTION
682 --hspthresh=<score> set threshold for high scoring pairs (default is 3000) 695
683 ungapped extensions scoring lower are discarded 696 --ydrop=<score> set y-drop threshold (default is open+300extend)
684 <score> can also be a percentage or base count 697 SEE "Gapped extension" SECTION
685 SEE "HSPs" SECTION 698
686 699 --hspthresh=<score> set threshold for high scoring pairs (default is 3000)
687 --gappedthresh=<score> set threshold for gapped alignments 700 ungapped extensions scoring lower are discarded
688 gapped extensions scoring lower are discarded 701 <score> can also be a percentage or base count
689 <score> can also be a percentage or base count 702 SEE "HSPs" SECTION
690 (default is to use same value as --hspthresh) 703
691 SEE "Gapped extension" SECTION 704 --gappedthresh=<score> set threshold for gapped alignments
692 705 gapped extensions scoring lower are discarded
693 706 <score> can also be a percentage or base count
694 **Substitution matrix** 707 (default is to use same value as --hspthresh)
695 708 SEE "Gapped extension" SECTION
696 By default the HOXD70 substitution scores are used (from `Chiaromonte et al. 2002 <https://www.ncbi.nlm.nih.gov/pubmed/11928468>`_):: 709
697 710
698 bad_score = X:-1000 # used for sub['X'][*] and sub[*]['X'] 711 **Substitution matrix**
699 fill_score = -100 # used when sub[*][*] is not defined 712
700 gap_open_penalty = 400 713 By default the HOXD70 substitution scores are used (from `Chiaromonte et al. 2002 <https://www.ncbi.nlm.nih.gov/pubmed/11928468>`_)::
701 gap_extend_penalty = 30 714
702 715 bad_score = X:-1000 # used for sub['X'][*] and sub[*]['X']
703 A C G T 716 fill_score = -100 # used when sub[*][*] is not defined
704 A 91 -114 -31 -123 717 gap_open_penalty = 400
705 C -114 100 -125 -31 718 gap_extend_penalty = 30
706 G -31 -125 100 -114 719
707 T -123 -31 -114 91 720 A C G T
708 721 A 91 -114 -31 -123
709 Matrix can be supplied as an input to **Read the substitution scores** parameter in *Scoring* section. Substitution matrix can be inferred from your data using another LASTZ-based tool (LASTZ_D: Infer substitution scores). 722 C -114 100 -125 -31
710 723 G -31 -125 100 -114
711 **Output** 724 T -123 -31 -114 91
712 725
713 This version of LASTZ produces two outputs by default: a BAM alignment file and a dot-plot in PNG format. Other formats can be configured in *Output* section. This incarnation of LASTZ produces outputs without comment line starting with '#'. To learn identity of each column, consult `formats section of LASTZ manual <https://lastz.github.io/lastz/#formats>`_. 726 Matrix can be supplied as an input to **Read the substitution scores** parameter in *Scoring* section. Substitution matrix can be inferred from your data using another LASTZ-based tool (LASTZ_D: Infer substitution scores).
727
728 **Output**
729
730 This version of LASTZ produces one output by default: a BAM alignment file. Other formats as well as a Dot Plot can be configured in *Output* section. This incarnation of LASTZ produces outputs without comment line starting with '#'. To learn identity of each column, consult `formats section of LASTZ manual <https://lastz.github.io/lastz/#formats>`_.
714 731
715 ]]> 732 ]]>
716 </help> 733 </help>
717 <expand macro="citations"/> 734 <expand macro="citations"/>
718 </tool> 735 </tool>