diff minimap2.xml @ 12:037c6e54df11 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/minimap2 commit 3617059cec3cc200dc22450347c070c4207b8852"
author iuc
date Fri, 28 May 2021 21:09:55 +0000
parents f54f5baedfdd
children 1f06dccdc5d1
line wrap: on
line diff
--- a/minimap2.xml	Fri Mar 19 09:40:54 2021 +0000
+++ b/minimap2.xml	Fri May 28 21:09:55 2021 +0000
@@ -1,20 +1,12 @@
 <?xml version="1.0"?>
-<tool id="minimap2" name="Map with minimap2" version="@TOOL_VERSION@+galaxy4" profile="17.01">
+<tool id="minimap2" name="Map with minimap2" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@" profile="20.01">
     <description>A fast pairwise aligner for genomic and spliced nucleotide sequences</description>
     <macros>
-        <token name="@TOOL_VERSION@">2.17</token>
-        <xml name="pe_anaylsis_fixed_selector">
-            <param name="analysis_type_selector" type="select"
-            label="Presets for PE reads alignment"
-            help="These are the minimap2 preset options for PE alignment of short reads. You can customize these and other settings in the indexing, mapping and alignment options sections below.">
-                <option value="sr">Short reads without splicing (-k21 -w11 --sr -F800 -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20 -s40 -g200 -2K50m --heap-sort=yes --secondary=no) (sr)</option>
-            </param>
-        </xml>
+        <import>macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="@TOOL_VERSION@">minimap2</requirement>
-        <requirement type="package" version="1.9">samtools</requirement>
-    </requirements>
+    <expand macro="edam_ontology"/>
+    <expand macro="requirements"/>
+    <expand macro="pe_anaylsis_fixed_selector"/>
     <stdio>
         <exit_code range="1:" level="fatal" />
         <regex match="\[ERROR\]" source="stderr" level="fatal" />
@@ -78,7 +70,12 @@
     #if str($mapping_options.N):
         -N $mapping_options.N
     #end if
-
+    #if str($mapping_options.mask_len):
+        --mask-len $mapping_options.mask_len
+    #end if
+    #if str($mapping_options.kmer_ocurrence_interval.interval):
+        -U $mapping_options.kmer_ocurrence_interval.lower_limit,$mapping_options.kmer_ocurrence_interval.upper_limit
+    #end if
     ## Alignment options
     #if str($alignment_options.splicing.splice_mode) == '--splice':
         --frag=no --splice
@@ -209,6 +206,7 @@
                 help="Each profile comes with the preconfigured settings mentioned in parentheses. You can customize each profile further in the indexing, mapping and alignment options sections below. If you do not select a profile here, the tool will use the per-parameter defaults listed in the below sections unless you customize them." >
                     <option value="map-pb">PacBio/Oxford Nanopore read to reference mapping (-Hk19) (map-pb)</option>
                     <option value="map-ont">Oxford Nanopore read to reference mapping. Slightly more sensitive for Oxford Nanopore to reference mapping (-k15). For PacBio reads, HPC minimizers consistently leads to faster performance and more sensitive results in comparison to normal minimizers. For Oxford Nanopore data, normal minimizers are better, though not much. The effectiveness of HPC is determined by the sequencing error mode. (map-ont)</option>
+                    <option value="map-hifi">PacBio HiFi reads vs reference mapping (-k19 -w19 -U50,500 -g10k -A1 -B4 -O6,26 -E2,1 -s200 ) (map-hifi)</option>
                     <option value="ava-pb">PacBio all-vs-all overlap mapping (-Hk19 -Xw5 -m100 -g10000 --max-chain-skip 25) (ava-pb)</option>
                     <option value="ava-ont">Oxford Nanopore all-vs-all overlap mapping (-k15 -Xw5 -m100 -g10000 -r2000 --max-chain-skip 25). Similarly, the major difference from ava-pb is that this preset is not using HPC minimizers. (ava-ont)</option>
                     <option value="asm5">Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 --min-occ-floor=100). Typically, the alignment will not extend to regions with 5% or higher sequence divergence. Only use this preset if the average divergence is far below 5%. (asm5)</option>
@@ -236,29 +234,42 @@
         </conditional>
         <section name="indexing_options" title="Indexing options">
             <param argument="-H" name="H" type="boolean" optional="true" truevalue="-H" falsevalue="" label="Use homopolymer-compressed k-mer ?"/>
-            <param argument="-k" type="integer" min="4" max="28" optional="true"  label="k-mer size" help=""/>
-            <param argument="-w" type="integer" min="1" optional="true"  label="minimizer window size" help=""/>
-            <param argument="-I" type="integer" min="1" optional="true"  label="split index for every N input gigabases" help=""/>
+            <param argument="-k" type="integer" min="4" max="28" optional="true"  label="K-mer size" help=""/>
+            <param argument="-w" type="integer" min="1" optional="true"  label="Minimizer window size" help=""/>
+            <param argument="-I" type="integer" min="1" optional="true"  label="Split index for every N input gigabases" help=""/>
         </section>
         <section name="mapping_options" title="Mapping options" help="Sets -f, -g, -F, -r, -n, -m, -X, -p, -N and --min-occ-floor options." expanded="False">
-            <param argument="-N" type="integer" min="0" optional="true" label="retain at most INT secondary alignments" help="default=5"/>
+            <param argument="-N" type="integer" min="0" optional="true" label="Retain at most INT secondary alignments" help="default=5"/>
             <param argument="-F" type="integer" min="0" value="" optional="true"
             label="Max fragment length for PE alignment"
             help="The maximum apparent fragment length up to which paired-end reads are aligned together; at higher fragment lengths the mates will be aligned independent of each other; effective only for paired-end data and when spliced alignment mode is turned off; default=800" />
-            <param argument="-f" type="float" value="" optional="true" label="filter out top FLOAT fraction of repetitive minimizers" help="default=0.0002"/>
-            <param argument="--min-occ-floor" name="min_occ_floor" type="integer" label="force minimap2 to always use k-mers occuring this many times or fewer" help="Maximum occurence is the number of repetitive minimizers determined by '-f' or this value, whichever is higher." optional="true" />
-            <param argument="-g" type="integer" value="" optional="true" label="stop chain enlongation if there are no minimizers in INT-bp" help="default=5000"/>
-            <param argument="-r" type="integer" value="" optional="true" label="bandwidth used in chaining and DP-based alignment" help="default=500" />
-            <param argument="-n" type="integer" value="" optional="true" label="minimal number of minimizers on a chain" help="default=3"/>
-            <param argument="-m" type="integer" value="" optional="true" label="minimal chaining score (matching bases minus log gap penalty)" help="default=40"/>
+            <param argument="-f" type="float" value="" optional="true" label="Filter out top FLOAT fraction of repetitive minimizers" help="default=0.0002"/>
+            <conditional name="kmer_ocurrence_interval">
+                <param name="interval" argument="-U" type="select" label="Specify an interval of k-mer occurrences" help="Allows to specify an interval of k-mer occurrences with -U. For repeat-rich genomes, the automatic k-mer occurrence threshold determined by -f may be too large and makes alignment impractically slow. The new option protects against such cases. Enabled for asm* and map-hifi.">
+                    <option value="enabled">Enabled</option>
+                    <option value="" selected="true">Disabled</option>
+                </param>
+                <when value="enabled">
+                    <param name="lower_limit" type="integer" min="0" max="10000" value="" label="K-mer ocurrence lower limit"/>
+                    <param name="upper_limit" type="integer" min="0" max="10000" value="" label="K-mer ocurrence upper limit"/>
+                </when>
+                <when value="">
+                </when>
+            </conditional>
+            <param argument="--min-occ-floor" name="min_occ_floor" type="integer" label="Force minimap2 to always use k-mers occuring this many times or fewer" help="Maximum occurence is the number of repetitive minimizers determined by '-f' or this value, whichever is higher." optional="true" />
+            <param argument="-g" type="integer" value="" optional="true" label="Stop chain enlongation if there are no minimizers in INT-bp" help="default=5000"/>
+            <param argument="-r" type="integer" value="" optional="true" label="Bandwidth used in chaining and DP-based alignment" help="default=500" />
+            <param argument="-n" type="integer" value="" optional="true" label="Minimal number of minimizers on a chain" help="default=3"/>
+            <param argument="-m" type="integer" value="" optional="true" label="Minimal chaining score (matching bases minus log gap penalty)" help="default=40"/>
             <param argument="--max-chain-skip" type="integer" value="" optional="true"
             label="Maximum seed skips during chaining"
             help="A heuristics that stops chaining early. Minimap2 uses dynamic programming for chaining. The time complexity is quadratic in the number of seeds. This option makes minimap2 exit the inner loop if it repeatedly sees seeds already on chains. Set to a large number to switch off this heurstics effectively. default=25" />
             <param argument="--max-chain-iter" type="integer" value="" optional="true"
             label="Maximum number of partial chains checked during chaining"
             help="A heuristics to avoid quadratic time complexity in the worst case. default=5000" />
-            <param argument="-X" type="boolean" truevalue="-X" falsevalue="" optional="true" label="skip self and dual mappings (for the all-vs-all mode)"/>
-            <param argument="-p" type="float" value="" max="1" optional="true" label="min secondary-to-primary score ratio" help="default=0.8"/>
+            <param argument="-X" type="boolean" truevalue="-X" falsevalue="" optional="true" label="Skip self and dual mappings (for the all-vs-all mode)"/>
+            <param argument="-p" type="float" value="" max="1" optional="true" label="Min secondary-to-primary score ratio" help="default=0.8"/>
+            <param argument="--mask-len" type="integer" min="0" max="10000" value="" optional="true" label="Fine control the removal of redundant hits" help="Keep an alignment if dropping it leaves an unaligned region on query longer than INT"/>
         </section>
         <section name="alignment_options" title="Alignment options" help="Sets -A, -B, -O, -E, -z, -s, and spliced alignments options." expanded="False">
             <conditional name="splicing">
@@ -279,11 +290,11 @@
                     label="Cost of non-canonical (non-GT-AG) splicing"
                     help="default=0" />
                     <param argument="-u" type="select" optional="true"
-                    label="how to find GT-AG"
+                    label="How to find GT-AG"
                     help="default=n (don't match GT-AG)">
-                        <option value="n">don't match GT-AG (-un)</option>
-                        <option value="f">transcript strand (-uf)</option>
-                        <option value="b">both strands (-ub)</option>
+                        <option value="n">Don't match GT-AG (-un)</option>
+                        <option value="f">Transcript strand (-uf)</option>
+                        <option value="b">Both strands (-ub)</option>
                     </param>
                     <param argument="--splice-flank" type="boolean" truevalue="--splice-flank=yes" falsevalue="--splice-flank=no" checked="true"
                     label="Assume conserved flanking region of splice sites?"
@@ -328,7 +339,7 @@
             label="Z-drop threshold for reverse-complementing the query"
             help="Decrease to find small inversions at the cost of performance and false positives. default=200" />
             <param argument="-s" type="integer" min="0" optional="true"
-            label="minimal peak DP alignment score" help="default=80"/>
+            label="Minimal peak DP alignment score" help="default=80"/>
             <param name="no_end_flt" type="boolean" falsevalue="--no-end-flt" truevalue="" checked="true"
             label="Filter seeds towards the ends of chains before performing base-level alignment?" />
         </section>
@@ -336,11 +347,11 @@
             <param name="output_format" type="select" label="Select an output format">
                 <option value="BAM">BAM</option>
                 <option value="CRAM">CRAM</option>
-                <option value="paf">paf</option>
+                <option value="paf">PAF</option>
             </param>
-            <param argument="-Q" type="boolean" truevalue="-Q" falsevalue="" label="don't output base quality"/>
-            <param argument="-L" type="boolean" truevalue="-L" falsevalue="" label="write CIGAR with >65535 ops to the CG tag" help="Useful for very long reads in SAM/BAM format"/>
-            <param argument="-K" type="integer" optional="true" label="minibatch size for mapping (in megabyte)" help="default=500M"/>
+            <param argument="-Q" type="boolean" truevalue="-Q" falsevalue="" label="Don't output base quality"/>
+            <param argument="-L" type="boolean" truevalue="-L" falsevalue="" label="Write CIGAR with >65535 ops to the CG tag" help="Useful for very long reads in SAM/BAM format"/>
+            <param argument="-K" type="integer" optional="true" label="Minibatch size for mapping (in megabyte)" help="default=500M"/>
             <param argument="--cs" type="select" optional="true" label="Output cs tag?" help="The cs tag is a more compact standalone representation of the MD tag, see help below.">
                 <option value="none">no</option>
                 <option value="short">short</option>
@@ -349,8 +360,8 @@
             <param argument="-c" type="boolean" truevalue="-c" falsevalue="" label="Generate CIGAR"
                 help="In PAF, the CIGAR is written to the ‘cg’ custom tag." />
 
-            <param argument="--eqx" type="boolean" truevalue="--eqx" falsevalue="" label="write =/X CIGAR operators"/>
-            <param argument="-Y" type="boolean" truevalue="-Y" falsevalue="" label="use soft clipping for supplementary alignments ?"/>
+            <param argument="--eqx" type="boolean" truevalue="--eqx" falsevalue="" label="Write =/X CIGAR operators"/>
+            <param argument="-Y" type="boolean" truevalue="-Y" falsevalue="" label="Use soft clipping for supplementary alignments ?"/>
         </section>
     </inputs>
     <outputs>
@@ -480,6 +491,41 @@
             <param name="analysis_type_selector" value="self-homology" />
             <output name="alignment_output" ftype="bam" file="minimap2-self-homology.bam" lines_diff="2" />
         </test>
+        <test>
+            <!-- test mask-len option -->
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
+            <param name="fastq_input_selector" value="single"/>
+            <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/>
+            <param name="analysis_type_selector" value="sr"/>
+            <section name="mapping_options">
+                <param name="mask_len" value="100"/>
+            </section>
+            <output name="alignment_output" ftype="bam" file="minimap2-test-mask_len.bam" lines_diff="2" />
+        </test>
+        <test>
+            <!-- test map-hifi -->
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" ftype="fasta" value="pacbio_hifi_assembly.fa.gz"/>
+            <param name="fastq_input_selector" value="single"/>
+            <param name="fastq_input1" ftype="fastqsanger" value="pacbio_hifi_reads.fasta.gz"/>
+            <param name="analysis_type_selector" value="map-hifi"/>
+            <output name="alignment_output" ftype="bam" file="minimap2-test_hifi-fasta.bam" lines_diff="2" />
+        </test>
+        <test>
+            <!-- test kmer ocurrence interval option -->
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
+            <param name="fastq_input_selector" value="single"/>
+            <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/>
+            <param name="analysis_type_selector" value="sr"/>
+            <conditional name="kmer_ocurrence_interval">
+                <param name="interval" value="enabled"/>
+                <param name="lower_limit" value="10"/>
+                <param name="upper_limit" value="30"/>
+            </conditional>
+            <output name="alignment_output" ftype="bam" file="minimap2-test-kmer_ocurrence.bam" lines_diff="2" />
+        </test>
     </tests>
     <help>
 
@@ -546,7 +592,6 @@
 Map long mRNA/cDNA reads
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
-
 There are different long-read RNA-seq technologies, including
 tranditional full-length cDNA, EST, PacBio Iso-seq, Nanopore 2D cDNA-seq
 and Direct RNA-seq. They produce data of varying quality and properties.
@@ -581,7 +626,6 @@
 Map short accurate genomic reads
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-
 When two read files are specified, minimap2 reads from each file in turn
 and merge them into an interleaved stream internally. Two reads are
 considered to be paired if they are adjacent in the input stream and
@@ -711,15 +755,5 @@
    should not be a big concern because even the optimal alignment may be
    wrong in such regions.
     </help>
-    <citations>
-        <citation type="doi">10.1093/bioinformatics/btp324</citation>
-        <citation type="doi">10.1093/bioinformatics/btp698</citation>
-        <citation type="bibtex">@misc{1303.3997,
-            Author = {Heng Li},
-            Title = {Minimap2: fast pairwise alignment for long nucleotide sequences},
-            Year = {2017},
-            Eprint = {arXiv:1708.01492},
-            url = {https://arxiv.org/abs/1708.01492},
-            }</citation>
-    </citations>
+    <expand macro="citations"/>
 </tool>