view ngmlr.xml @ 1:bb51c75a0157 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngmlr commit 4a774c70198144dda9f63d4c4de600aa5c866b4c
author iuc
date Wed, 27 Jul 2022 09:34:02 +0000
parents 95e4757d8da7
children
line wrap: on
line source

<?xml version="1.0"?>
<tool id="ngmlr" name="CoNvex" version="@VERSION@">
    <xrefs>
        <xref type="bio.tools">ngmlr</xref>
    </xrefs>
    <macros>
        <token name="@VERSION@">0.2.7</token>
        <xml name="rg_validator">
            <validator type="expression" message="This field can only accept printable characters, with the exception of single and double quotes">value.isprintable() and '"' not in value and "'" not in value</validator>
        </xml>
    </macros>
    <requirements>
        <requirement type="package" version="@VERSION@">ngmlr</requirement>
    </requirements>
    <description>Gap-cost alignMents for Long Reads</description>
    <command detect_errors="exit_code"><![CDATA[
        ngmlr -t \${GALAXY_SLOTS:-1} --no-progress --skip-write --output '$alignments'
            $bam_fix $no_smallinv $no_lowqualitysplit
            --reference '$reference'
            --query '$query'
            --presets $presets
            --min-identity $min_identity
            --min-residues $min_residues
            --match $match
            --mismatch $mismatch
            --gap-open $gap_open
            --gap-extend-max $gap_extend_max
            --gap-extend-min $gap_extend_min
            --gap-decay $gap_decay
            --kmer-length $kmer_length
            --kmer-skip $kmer_skip
            --bin-size $bin_size
            --max-segments $max_segments
            --subread-length $subread_length
            --subread-corridor $subread_corridor

            #if str($sam_header.header_data) == 'yes':
                #if $sam_header.rg_id:
                    --rg-id '$sam_header.rg_id'
                #end if
                #if $sam_header.rg_sm:
                    --rg-sm '$sam_header.rg_sm'
                #end if
                #if $sam_header.rg_lb:
                    --rg-lb '$sam_header.rg_lb'
                #end if
                #if $sam_header.rg_pl:
                    --rg-pl '$sam_header.rg_pl'
                #end if
                #if $sam_header.rg_ds:
                    --rg-ds '$sam_header.rg_ds'
                #end if
                #if $sam_header.rg_dt:
                    --rg-dt '$sam_header.rg_dt'
                #end if
                #if $sam_header.rg_pu:
                    --rg-pu '$sam_header.rg_pu'
                #end if
                #if $sam_header.rg_pi:
                    --rg-pi '$sam_header.rg_pi'
                #end if
                #if $sam_header.rg_pg:
                    --rg-pg '$sam_header.rg_pg'
                #end if
                #if $sam_header.rg_cn:
                    --rg-cn '$sam_header.rg_cn'
                #end if
                #if $sam_header.rg_fo:
                    --rg-fo '$sam_header.rg_fo'
                #end if
                #if $sam_header.rg_ks:
                    --rg-ks '$sam_header.rg_ks'
                #end if
            #end if
        ]]>
    </command>
    <inputs>
        <param argument="--reference" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Reference genome" />
        <param argument="--query" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Reads to align" />
        <param argument="--bam-fix" type="boolean" truevalue="--bam-fix" falsevalue="" checked="true" label="Report reads with &gt; 64k CIGAR operations as unmapped" help="Required to be compatible with the BAM format" />
        <conditional name="sam_header">
            <param name="header_data" type="select" label="SAM Read Group" help="If a field is left blank it will be omitted from the output SAM">
                <option value="no">Do not set</option>
                <option value="yes">Set values</option>
            </param>
            <when value="yes">
                <param argument="--rg-id" type="text" label="Value for the read group ID field in the SAM/BAM header">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-sm" type="text" label="RG: Sample">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-lb" type="text" label="RG: Library">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-pl" type="text" label="RG: Platform">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-ds" type="text" label="RG: Description">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-dt" type="text" label="RG: Date" help="YYYY-MM-DD">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-pu" type="text" label="RG: Platform unit">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-pi" type="text" label="RG: Median insert size">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-pg" type="text" label="RG: Programs">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-cn" type="text" label="RG: Sequencing center">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-fo" type="text" label="RG: Flow order">
                    <expand macro="rg_validator" />
                </param>
                <param argument="--rg-ks" type="text" label="RG: Key sequence">
                    <expand macro="rg_validator" />
                </param>
            </when>
            <when value="no" />
        </conditional>
        <param argument="--presets" type="select" label="Presets to use for alignment">
            <option value="pacbio">PacBio</option>
            <option value="ont">Nanopore</option>
        </param>
        <param argument="--min-identity" type="float" min="0" max="1" value="0.65" label="Alignments with an identity lower than this threshold will be discarded" />
        <param argument="--min-residues" type="float" min="0" max="1" value="0.25" label="Alignments containing less than n residues will be discarded" />
        <param argument="--no-smallinv" type="boolean" truevalue="--no-smallinv" falsevalue="" label="Don't detect small inversions" />
        <param argument="--no-lowqualitysplit" type="boolean" truevalue="--no-lowqualitysplit" falsevalue="" label="Split alignments with poor quality" />
        <param argument="--match" type="float" value="2" label="Match score" />
        <param argument="--mismatch" type="float" value="-5" label="Mismatch score" />
        <param argument="--gap-open" type="float" value="-5" label="Gap open score" />
        <param argument="--gap-extend-max" type="float" value="-5" label="Gap open extend max" />
        <param argument="--gap-extend-min" type="float" value="-1" label="Gap open extend min" />
        <param argument="--gap-decay" type="float" value="0.15" label="Gap extend decay" />
        <param argument="--kmer-length" type="integer" min="10" max="15" value="13" label="K-mer length in bases" />
        <param argument="--kmer-skip" type="integer" value="2" label="Number of k-mers to skip when building the lookup table from the reference" />
        <param argument="--bin-size" type="integer" value="4" label="Sets the size of the grid used during candidate search" />
        <param argument="--max-segments" type="integer" value="1" label="Max number of segments allowed for a read per kb" />
        <param argument="--subread-length" type="integer" value="256" label="Length of fragments reads are split into" />
        <param argument="--subread-corridor" type="integer" value="40" label="Length of corridor sub-reads are aligned with" />
    </inputs>
    <outputs>
        <data format="sam" name="alignments" label="${tool.name} on ${on_string} (Alignments)"/>
    </outputs>
    <tests>
        <test>
            <param name="reference" value="ngmlr-ref1.fa" />
            <param name="query" value="ngmlr-in1.fa" />
            <output name="alignments" file="ngmlr-out1.sam" lines_diff="2" sort="true" />
        </test>
        <test>
            <param name="reference" value="ngmlr-ref2.fa" />
            <param name="query" value="ngmlr-in2.fa" />
            <output name="alignments" file="ngmlr-out2.sam" lines_diff="2" sort="true" />
        </test>
        <test>
            <param name="reference" value="ngmlr-ref3.fa.gz" />
            <param name="query" value="ngmlr-in3.fa.gz" />
            <param name="min_residues" value="0.01" />
            <output name="alignments" file="ngmlr-out3.sam" lines_diff="2" sort="true" />
        </test>
        <test>
            <param name="reference" value="ngmlr-ref3.fa.gz" />
            <param name="query" value="ngmlr-in3.fa.gz" />
            <output name="alignments" file="ngmlr-out4.sam" lines_diff="2" sort="true" />
        </test>
    </tests>
    <help>
        <![CDATA[
CoNvex Gap-cost alignMents for Long Reads (ngmlr) is a long-read mapper designed to sensitively align PacBilo or Oxford Nanopore to (large) reference genomes. It was designed to quickly and correctly align the reads, including those spanning (complex) structural variations. Ngmlr uses an SV aware k-mer search to find approximate mapping locations for a read and then a banded Smith-Waterman alignment algorithm to compute the final alignment. Ngmlr uses a convex gap cost model that penalizes gap extensions for longer gaps less than for shorter ones to compute precise alignments. The gap model allows ngmlr to account for both the sequencing error and real genomic variations at the same time and makes it especially effective at more precisely identifying the position of breakpoints stemming from structural variations. The k-mer search helps to detect and split reads that cannot be aligned linearly, enabling ngmlr to reliably align reads to a wide range of different structural variations including nested SVs (e.g. inversions flanked by deletions).
        ]]>
    </help>
    <citations>
        <citation type="doi">10.1038/s41592-018-0001-7</citation>
    </citations>
</tool>