Galaxy |

Changeset 8:e7f19d6a9af8 (2018-05-18)

Previous changeset 7:10aca14c2332 (2018-05-17) Next changeset 9:2ff111fac1d7 (2021-04-02)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lastz commit a7e9d5b3906b7ebb35b1c29c3a8e8203b2cefccd

modified:
lastz.xml
lastz_d.xml
test-data/test4.out

diff -r 10aca14c2332 -r e7f19d6a9af8 lastz.xml
--- a/lastz.xml Thu May 17 04:29:13 2018 -0400
+++ b/lastz.xml Fri May 18 16:58:38 2018 -0400

[

b'@@ -1,4 +1,4 @@\n-<tool id="lastz_wrapper_2" name="LASTZ" version="1.3.1">\n+<tool id="lastz_wrapper_2" name="LASTZ" version="1.3.2">\n <description>: align long sequences</description>\n <macros>\n <import>lastz_macros.xml</import>\n@@ -238,6 +238,11 @@\n \'--inner=${interpolation.inner}\'\n #end if\n \n+## HOUSEKEEPING ----------------------------------\n+\n+ --traceback=160M\n+\n+\n ## OUTPUT FORMATS --------------------------------\n \n #if str( $output_format.out.format ) == "bam":\n@@ -252,13 +257,16 @@\n \'--format=general-:${output_format.out.fields}\'\n #end if\n --action:target=multiple\n- --rdotplot=plot.r\n+ $output_format.rplot\n #if str( $output_format.out.format ) == "bam":\n- | samtools sort -@\\${GALAXY_SLOTS:-2} -O bam -o \'${output}\' &&\n+ | samtools sort -@\\${GALAXY_SLOTS:-2} -O bam -o \'${output}\'\n #else:\n- > \'${output}\' &&\n+ > \'${output}\'\n #end if\n- Rscript $r_plot > /dev/null 2>&1\n+ #if $output_format.rplot:\n+ &&\n+ Rscript $r_plot > /dev/null 2>&1\n+ #end if\n \n ]]>\n </command>\n@@ -349,7 +357,7 @@\n </when>\n </conditional>\n <param name="ambigN" type="boolean" truevalue="--ambiguous=n" checked="false" label="Treat each N in the input sequences as an ambiguous nucleotide" argument="--ambiguous=n" help="Substitutions with N are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/>\n- <param name="ambigIUPAC" type="boolean" truevalue="--ambiguous=iupac" checked="false" label="Treat each of the IUPAC-IUB ambiguity codes (B, D, H, K, M, R, S, V, W, and Y, as well as N) in the input sequences as a completely ambiguous nucleotide." argument="--ambiguous=iupac" help="Substitutions with these characters are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/>\n+ <param name="ambigIUPAC" type="boolean" truevalue="--ambiguous=iupac" checked="true" label="Treat each of the IUPAC-IUB ambiguity codes (B, D, H, K, M, R, S, V, W, and Y, as well as N) in the input sequences as a completely ambiguous nucleotide." argument="--ambiguous=iupac" help="Substitutions with these characters are scored as zero, instead of using the fill_score value from the scoring file (which is -100 by default)."/>\n </section>\n <section name="seeding" expanded="false" title="Seeding">\n <conditional name="seed">\n@@ -560,6 +568,7 @@\n \n </when>\n </conditional>\n+ <param name="rplot" type="boolean" truevalue="--rdotplot=plot.r" falsevalue="" checked="false" argument="--rdotplot" label="Create a dotplot representation of alignments?" help="The dotplot is only useful if query and target contain exactly one sequence each"/>\n </section>\n </inputs>\n <outputs>\n@@ -569,7 +578,9 @@\n <when input="output_format.out.format" value="maf" format="maf" />\n </change_format>\n </data>\n- <data format="png" name="out_plot" label="${tool.name} on ${on_string}: dot plot"/>\n+ <data format="png" name="out_plot" label="${tool.name} on ${on_string}: dot plot">\n+ <filter>output_format[\'rplot\']</filter>\n+ </data>\n </outputs>\n <tests>\n <test>\n@@ -609,6 +620,8 @@\n <param name="ref_source" value="history" />\n <param name="target" ftype="fasta.gz" value="chrM_human.fa.gz" />\n <param name="query" ftype="fastq.bz2" value="chrM_mouse.fq.bz2" />\n+ <param name="traceback" value="83886080" />\n+ <param name="word" value="28" />\n <param name="strand" value="--strand=both" />\n <param name="format" value="blastn" />\n <output name="output" value="test5.out'..b'\n- --ydrop=<score> set y-drop threshold (default is open+300extend)\n- SEE "Gapped extension" SECTION\n+ --ydrop=<score> set y-drop threshold (default is open+300extend)\n+ SEE "Gapped extension" SECTION\n \n- --hspthresh=<score> set threshold for high scoring pairs (default is 3000)\n- ungapped extensions scoring lower are discarded\n- <score> can also be a percentage or base count\n- SEE "HSPs" SECTION\n+ --hspthresh=<score> set threshold for high scoring pairs (default is 3000)\n+ ungapped extensions scoring lower are discarded\n+ <score> can also be a percentage or base count\n+ SEE "HSPs" SECTION\n \n- --gappedthresh=<score> set threshold for gapped alignments\n- gapped extensions scoring lower are discarded\n- <score> can also be a percentage or base count\n- (default is to use same value as --hspthresh)\n- SEE "Gapped extension" SECTION\n+ --gappedthresh=<score> set threshold for gapped alignments\n+ gapped extensions scoring lower are discarded\n+ <score> can also be a percentage or base count\n+ (default is to use same value as --hspthresh)\n+ SEE "Gapped extension" SECTION\n \n \n- **Substitution matrix**\n+**Substitution matrix**\n \n- By default the HOXD70 substitution scores are used (from `Chiaromonte et al. 2002 <https://www.ncbi.nlm.nih.gov/pubmed/11928468>`_)::\n+By default the HOXD70 substitution scores are used (from `Chiaromonte et al. 2002 <https://www.ncbi.nlm.nih.gov/pubmed/11928468>`_)::\n \n- bad_score = X:-1000 # used for sub[\'X\'][*] and sub[*][\'X\']\n- fill_score = -100 # used when sub[*][*] is not defined\n- gap_open_penalty = 400\n- gap_extend_penalty = 30\n+ bad_score = X:-1000 # used for sub[\'X\'][*] and sub[*][\'X\']\n+ fill_score = -100 # used when sub[*][*] is not defined\n+ gap_open_penalty = 400\n+ gap_extend_penalty = 30\n \n- A C G T\n- A 91 -114 -31 -123\n- C -114 100 -125 -31\n- G -31 -125 100 -114\n- T -123 -31 -114 91\n+ A C G T\n+ A 91 -114 -31 -123\n+ C -114 100 -125 -31\n+ G -31 -125 100 -114\n+ T -123 -31 -114 91\n \n- Matrix can be supplied as an input to **Read the substitution scores** parameter in *Scoring* section. Substitution matrix can be inferred from your data using another LASTZ-based tool (LASTZ_D: Infer substitution scores).\n+Matrix can be supplied as an input to **Read the substitution scores** parameter in *Scoring* section. Substitution matrix can be inferred from your data using another LASTZ-based tool (LASTZ_D: Infer substitution scores).\n \n- **Output**\n+**Output**\n \n- This version of LASTZ produces two outputs by default: a BAM alignment file and a dot-plot in PNG format. Other formats can be configured in *Output* section. This incarnation of LASTZ produces outputs without comment line starting with \'#\'. To learn identity of each column, consult `formats section of LASTZ manual <https://lastz.github.io/lastz/#formats>`_.\n+This version of LASTZ produces one output by default: a BAM alignment file. Other formats as well as a Dot Plot can be configured in *Output* section. This incarnation of LASTZ produces outputs without comment line starting with \'#\'. To learn identity of each column, consult `formats section of LASTZ manual <https://lastz.github.io/lastz/#formats>`_.\n \n ]]>\n </help>\n'

diff -r 10aca14c2332 -r e7f19d6a9af8 lastz_d.xml
--- a/lastz_d.xml Thu May 17 04:29:13 2018 -0400
+++ b/lastz_d.xml Fri May 18 16:58:38 2018 -0400

[

@@ -1,4 +1,4 @@
-<tool id="lastz_d_wrapper" name="LASTZ_D" version="1.3.1">
+<tool id="lastz_d_wrapper" name="LASTZ_D" version="1.3.2">
     <description>: estimate substitution scores matrix</description>
     <macros>
      <import>lastz_macros.xml</import>
@@ -45,39 +45,39 @@

     <help><![CDATA[

-        **What is does**
+**What is does**

-        LASTZ_D is a non-integer (**D** stands for Double) version of LASTZ that can be used to estimate substitution matrix that will be used to score alignments. It was developed by `Bob Harris <http://www.bx.psu.edu/~rsharris/>`_ in the lab of Webb Miller at Penn State as a part of LASTZ. Matrix computed by this tool is to be used by LASTZ (see below).
+LASTZ_D is a non-integer (**D** stands for Double) version of LASTZ that can be used to estimate substitution matrix that will be used to score alignments. It was developed by `Bob Harris <http://www.bx.psu.edu/~rsharris/>`_ in the lab of Webb Miller at Penn State as a part of LASTZ. Matrix computed by this tool is to be used by LASTZ (see below).

-        .. class:: warningmark
+.. class:: warningmark

-        **Read documentation** before proceeding. LASTZ is a complex tool with many parameter options. Fortunately, there is a `great manual <https://lastz.github.io/lastz/>`_ maintained by its author. The two sections that are particularly relevant to the inference of substitution matrix are `Inferring Score Sets <http://www.bx.psu.edu/~rsharris/lastz/README.lastz-1.04.00.html#adv_inference>`_ and `Inference Control File <http://www.bx.psu.edu/~rsharris/lastz/README.lastz-1.04.00.html#fmt_inference>`_.
+**Read documentation** before proceeding. LASTZ is a complex tool with many parameter options. Fortunately, there is a `great manual <https://lastz.github.io/lastz/>`_ maintained by its author. The two sections that are particularly relevant to the inference of substitution matrix are `Inferring Score Sets <http://www.bx.psu.edu/~rsharris/lastz/README.lastz-1.04.00.html#adv_inference>`_ and `Inference Control File <http://www.bx.psu.edu/~rsharris/lastz/README.lastz-1.04.00.html#fmt_inference>`_.

-        **Notes on the inference**
+**Notes on the inference**

-        Inference is achieved by computing the probability of each of the 18 different alignment events (gap open, gap extend, and 16 substitutions). These probabilities are estimated from alignments of the sequences. Of course, at first we don't have alignments, so the process begins by using a generic scoring set to create alignments, infer scores from those, then realign, and so on, until the scores stabilize or "converge". Ungapped alignments are performed until the substitution scores converge, then gapped alignments are performed (holding the substitution scores constant) until the gap penalties converge. In the end you get a matrix like this::
+Inference is achieved by computing the probability of each of the 18 different alignment events (gap open, gap extend, and 16 substitutions). These probabilities are estimated from alignments of the sequences. Of course, at first we don't have alignments, so the process begins by using a generic scoring set to create alignments, infer scores from those, then realign, and so on, until the scores stabilize or "converge". Ungapped alignments are performed until the substitution scores converge, then gapped alignments are performed (holding the substitution scores constant) until the gap penalties converge. In the end you get a matrix like this::

-            # (a LASTZ scoring set, created by "LASTZ --infer")
+    # (a LASTZ scoring set, created by "LASTZ --infer")

-            bad_score          = X:-1781 # used for sub[X][*] and sub[*][X]
-            fill_score         = -178    # used when sub[*][*] not otherwise defined
-            gap_open_penalty   = 400
-            gap_extend_penalty = 30
+    bad_score          = X:-1781 # used for sub[X][*] and sub[*][X]
+    fill_score         = -178    # used when sub[*][*] not otherwise defined
+    gap_open_penalty   = 400
+    gap_extend_penalty = 30

-                  A     C     G     T
-            A    72   -79   -49   -97
-            C   -79   100  -178   -49
-            G   -49  -178   100   -79
-            T   -97   -49   -79    72
+          A     C     G     T
+    A    72   -79   -49   -97
+    C   -79   100  -178   -49
+    G   -49  -178   100   -79
+    T   -97   -49   -79    72

-        This dataset can then be used as an input to the **Read the substitution scores** parameter of LASTZ (Parameter section *Scoring*).
+This dataset can then be used as an input to the **Read the substitution scores** parameter of LASTZ (Parameter section *Scoring*).

-        The iterative process can fail if there's not a lot of sequence to align.  E.g. if after the 4th iteration there's nothing in the central 50% denominators go to zero and the process fails.
+The iterative process can fail if there's not a lot of sequence to align.  E.g. if after the 4th iteration there's nothing in the central 50% denominators go to zero and the process fails.

-        If the sequences you are aligning have GC content different than the usual ACGT 30-20-20-30 split, scoring inference should discover this and give you better alignments.
+If the sequences you are aligning have GC content different than the usual ACGT 30-20-20-30 split, scoring inference should discover this and give you better alignments.

-        ]]>
+]]>
     </help>
     <expand macro="citations"/>
</tool>

diff -r 10aca14c2332 -r e7f19d6a9af8 test-data/test4.out
--- a/test-data/test4.out Thu May 17 04:29:13 2018 -0400
+++ b/test-data/test4.out Fri May 18 16:58:38 2018 -0400

@@ -1,1 +1,1 @@
-chrM chrM 70.79 16186 4208 152 577 16569 1 15860 0 14129.5
+chrM chrM 70.79 16186 4208 152 577 16569 1 15860 0 14131.6