changeset 5:17e61517c166 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/minimap2 commit 31d0c015b36d7aa93f586c566ceeac56324863ad
author iuc
date Fri, 31 Aug 2018 07:44:15 -0400
parents 6f50f36e4481
children 3f4d6399997b
files minimap2.xml test-data/mini_reads.paf
diffstat 2 files changed, 25 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/minimap2.xml	Tue Aug 07 07:59:53 2018 -0400
+++ b/minimap2.xml	Fri Aug 31 07:44:15 2018 -0400
@@ -1,9 +1,12 @@
 <?xml version="1.0"?>
-<tool id="minimap2" name="Map with minimap2" version="2.5+gx1" profile="17.01">
+<tool id="minimap2" name="Map with minimap2" version="@TOOL_VERSION@" profile="17.01">
     <description>A fast pairwise aligner for genomic and spliced nucleotide sequences</description>
+    <macros>
+        <token name="@TOOL_VERSION@">2.12</token>
+    </macros>
     <requirements>
-        <requirement type="package" version="2.5">minimap2</requirement>
-        <requirement type="package" version="1.6">samtools</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">minimap2</requirement>
+        <requirement type="package" version="1.9">samtools</requirement>
     </requirements>
     <version_command>minimap2 --version</version_command>
     <command>
@@ -16,6 +19,7 @@
     minimap2
     -x $analysis_type_selector
     ## indexing options
+    $indexing_options.H
     #if $indexing_options.k:
         -k $indexing_options.k
     #end if
@@ -29,6 +33,9 @@
     #if $mapping_options.f:
         -f $mapping_options.f
     #end if
+    #if $mapping_options.min_occ_floor:
+        --min-occ-floor $min_occ_floor
+    #end if
     #if $mapping_options.g:
         -g $mapping_options.g
     #end if
@@ -113,6 +120,7 @@
         | samtools sort
         -@\${GALAXY_SLOTS:-2}
         -O $io_options.output_format
+        $io_options.eqx
         --reference reference.fa
         --output-fmt-option no_ref
         -o '$alignment_output'
@@ -140,9 +148,7 @@
             </when>
         </conditional>
         <section name="indexing_options" title="Indexing options">
-            <!-- Homopolymer setting seems to not properly overwrite sr preset
             <param argument="-H" name="H" type="boolean" optional="true" truevalue="-H" falsevalue="" label="Use homopolymer-compressed k-mer ?"/>
-            -->
             <param argument="-k" type="integer" min="4" max="28" optional="true"  label="k-mer size" help=""/>
             <param argument="-w" type="integer" min="1" optional="true"  label="minimizer window size" help=""/>
             <param argument="-I" type="integer" min="1" optional="true"  label="split index for every N input gigabases" help=""/>
@@ -171,17 +177,19 @@
         </conditional>
         <!-- end unchanged copy from bwa-mem -->
         <param name="analysis_type_selector" type="select" label="Select analysis mode (sets default)">
-            <option value="map-pb">-Hk19 (PacBio vs reference mapping)</option>
-            <option value="map-ont">-k15 (Oxford Nanopore vs reference mapping)</option>
-            <option value="asm5">-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5% div.)</option>
-            <option value="asm10">-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10% div.)</option>
-            <option value="ava-pb">-Hk19 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (PacBio read overlap)</option>
-            <option value="ava-ont">-k15 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (ONT read overlap)</option>
-            <option value="splice">long-read spliced alignment</option>
-            <option value="sr">short single-end reads without splicing</option>
+            <option value="map-pb">PacBio/Oxford Nanopore read to reference mapping (-Hk19)</option>
+            <option value="map-ont">Oxford Nanopore read to reference mapping. Slightly more sensitive for Oxford Nanopore to reference mapping (-k15). For PacBio reads, HPC minimizers consistently leads to faster performance and more sensitive results in comparison to normal minimizers. For Oxford Nanopore data, normal minimizers are better, though not much. The effectiveness of HPC is determined by the sequencing error mode.</option>
+            <option value="ava-pb">PacBio all-vs-all overlap mapping (-Hk19 -Xw5 -m100 -g10000 --max-chain-skip 25)</option>
+            <option value="ava-ont">Oxford Nanopore all-vs-all overlap mapping (-k15 -Xw5 -m100 -g10000 -r2000 --max-chain-skip 25). Similarly, the major difference from ava-pb is that this preset is not using HPC minimizers.</option>
+            <option value="asm5">Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 --min-occ-floor=100). Typically, the alignment will not extend to regions with 5% or higher sequence divergence. Only use this preset if the average divergence is far below 5%.</option>
+            <option value="asm10">Long assembly to reference mapping (-k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 --min-occ-floor=100). Up to 10% sequence divergence.</option>
+            <option value="asm20">Long assembly to reference mapping (-k19 -w10 -A1 -B6 -O6,26 -E2,1 -s200 -z200 --min-occ-floor=100). Up to 20% sequence divergence.</option>
+            <option value="splice">Long-read spliced alignment (-k15 -w5 --splice -g2000 -G200k  -A1 -B2  -O2,32  -E1,0  -C9  -z200  -ub  --splice-flank=yes). In the splice mode, 1) long deletions are taken as  introns  and  represented as the `N' CIGAR operator 2) long insertions are disabled 3) deletion and insertion gap costs are different during chaining 4) the computation of the `ms' tag ignores introns to demote hits to pseudogenes.</option>
+            <option value="sr">Short single-end reads without splicing (-k21 -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20 -s40 -g200 -2K50m --heap-sort=yes --secondary=no)</option>
         </param>
-        <section name="mapping_options" title="Set advanced mapping options" help="Sets -f, -g, -G, -F, -r, -n, -m, -X, -p and -N options." expanded="False">
+        <section name="mapping_options" title="Set advanced mapping options" help="Sets -f, -g, -G, -F, -r, -n, -m, -X, -p, -N and --min-occ-floor options." expanded="False">
             <param argument="-f" type="float" value="" optional="true" label="filter out top FLOAT fraction of repetitive minimizers" help="default=0.0002"/>
+            <param argument="--min-occ-floor" name="min_occ_floor" type="integer" label="force minimap2 to always use k-mers occuring this many times or fewer" help="Maximum occurence is the number of repetitive minimizers determined by '-f' or this value, whichever is higher." optional="true" />
             <param argument="-g" type="integer" value="" optional="true" label="stop chain enlongation if there are no minimizers in INT-bp" help="default=5000"/>
             <param argument="-G" type="integer" value="" optional="true" label="max intron length in thousand (effective with -xsplice; changing -r)" help="default=200"/>
             <param argument="-F" type="integer" value="" optional="true" label="max fragment length (effective with -xsr or in the fragment mode)" help="default=800" />
@@ -221,6 +229,7 @@
                 <option value="short">short</option>
                 <option value="long">long</option>
             </param>
+            <param argument="--eqx" type="boolean" truevalue="--eqx" falsevalue="" label="write =/X CIGAR operators"/>
             <param argument="-Y" type="boolean" truevalue="-Y" falsevalue="" label="use soft clipping for supplementary alignments ?"/>
         </section>
     </inputs>
--- a/test-data/mini_reads.paf	Tue Aug 07 07:59:53 2018 -0400
+++ b/test-data/mini_reads.paf	Fri Aug 31 07:44:15 2018 -0400
@@ -1,2 +1,2 @@
-m140213_230323_42129_c100520410120000001823082509281362_s1_X0/1263/0_8655	8655	755	8475	+	m140213_230323_42129_c100520410120000001823082509281362_s1_X0/817/0_20440	20440	5798	13009	395	7727	0	tp:A:S	cm:i:43	s1:i:294
-m140213_230323_42129_c100520410120000001823082509281362_s1_X0/1447/0_15191	15191	12916	14825	-	m140213_230323_42129_c100520410120000001823082509281362_s1_X0/1447/15237_17783	2546	347	2303	152	1985	0	tp:A:S	cm:i:15	s1:i:133
+m140213_230323_42129_c100520410120000001823082509281362_s1_X0/1263/0_8655	8655	755	8475	+	m140213_230323_42129_c100520410120000001823082509281362_s1_X0/817/0_20440	20440	5798	13009	395	7727	0	tp:A:S	cm:i:43	s1:i:294	dv:f:0.2746
+m140213_230323_42129_c100520410120000001823082509281362_s1_X0/1447/0_15191	15191	12916	14825	-	m140213_230323_42129_c100520410120000001823082509281362_s1_X0/1447/15237_17783	2546	347	2303	152	1985	0	tp:A:S	cm:i:15	s1:i:133	dv:f:0.2503