view metabuli.xml @ 1:70129c1e86a8 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metabuli commit 4bb153f5820d234c08ea0f21e3e59938d1ab10b9
author iuc
date Tue, 04 Jun 2024 12:18:48 +0000
parents b01d6b38f779
children
line wrap: on
line source

<tool id="metabuli_classify" name="metabuli classify" version="@VERSION@+galaxy0" profile="20.01">
    <description>
        metagenomic sequences
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">metabuli</xref>
    </xrefs>
    <expand macro="requirements" />
    <expand macro="version_command" />

    <command detect_errors="exit_code"><![CDATA[
        if [ -z "\$GALAXY_MEMORY_MB" ] ; then
            METABULI_GALAXY_MEMORY_GB=8 ;
        else
            METABULI_GALAXY_MEMORY_GB=\$((GALAXY_MEMORY_MB / 1024) - 1) ;
        fi ;
        #set ext = '.fastq'
        #if str($library.lib_type) == "paired"
                #if $library.r1.ext.endswith('.gz'):
                    #set ext = $ext + '.gz'
                #end if
                #set read1_file = 'r1' + $ext
                #set read2_file = 'r2' + $ext
                ln -s '$library.r1' $read1_file &&
                ln -s '$library.r2' $read2_file &&
        #else if str($library.lib_type) == "single"
                #if $library.single.ext.endswith('.gz'):
                    #set ext = $ext + '.gz'
                #end if
                #set se_read = 'sr' + $ext
                ln -s '$library.single' $se_read &&
        #else if str($library.lib_type) == "collection"
                #set ext = '.fastq'
                #if $library.input1.forward.ext.endswith('.gz'):
                    #set ext = $ext + '.gz'
                #end if
                #set read1_file = 'r1' + $ext
                #set read2_file = 'r2' + $ext
                ln -s '$library.input1.forward' $read1_file &&
                ln -s '$library.input1.reverse' $read2_file &&
        #end if
        
        metabuli classify

        #if str($library.lib_type) == "paired"
                $read1_file
                $read2_file
        #else if str($library.lib_type) == "single"
                $se_read
                --mask $library.mask
        #else if str($library.lib_type) == "collection"
                $read1_file
                $read2_file
        #end if
        '$metabuli_databases.fields.path'
        out
        mtbl
        #if str($taxonomy_options.taxonomy_path) == 'local'
            --taxonomy-path '$taxonomy_options.taxonomy.fields.path'
        #end if
        --threads \${GALAXY_SLOTS:-4}
        --min-score $min_score
        --min-cov $min_cov
        --min-cons-cnt $min_cons_cnt
        --min-cons-cnt-euk $min_cons_cnt_euk
        --min-sp-score $min_sp_score
        --hamming-margin $hamming_margin
        --match-per-kmer $match_per_kmer
        #if $accession_level
            --accession-level 1
        #end if
        #if str($library.lib_type) == "single"
            --seq-mode $library.seq_mode
        #end if
        --max-ram \${METABULI_GALAXY_MEMORY_GB:-8}
        >
        '$log'

    ]]>    </command>
    <inputs>
        <conditional name="library">
            <param name="lib_type" type="select" label="Input reads type or collection" help="Select 'paired end' for a single library or 'collection' for a paired end collection">
                <option value="paired" selected="true">Paired End</option>
                <option value="single">Single End</option>
                <option value="collection">Paired Collection</option>
            </param>
            <when value="paired">
                <param name="r1" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Forward reads (R1)" help="The file of forward reads in FASTQ format"/>
                <param name="r2" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Reverse reads (R2)" help="The file of reverse reads in FASTQ format"/>
            </when>
            <when value="single">
                <param name="single" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Read" help="Single end/long read"/>
                <param name="seq_mode" type="select" label="Sequence Mode">
                    <option value="3" selected="true">Long Read</option>
                    <option value="1">Single End Short Read</option>
                </param>
                <param argument="--mask" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Mask low complexity sequences in k-mer stage"/>
            </when>
            <when value="collection">
                <param name="input1" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data_collection" collection_type="paired" label="Paired collection" help="See help section for an explanation of dataset collections"/>
            </when>
        </conditional>

        <!-- DATABASE INPUT-->
        <param label="Select a database" name="metabuli_databases" type="select">
            <options from_data_table="metabuli_databases">
                <validator message="No database is available" type="no_options" />
            </options>
        </param>

        <conditional name="taxonomy_options">
            <param name="taxonomy_path" type="select" label="Select taxonomy path from either local or accompanied metabuli db?">
                <option value="local">Use a local taxonomy</option>
                <option value="embed" selected="true">Use one from a metabuli database</option>
            </param>
            <when value="local">
                <param name="taxonomy" label="Select taxonomy"  type="select">
                    <options from_data_table="taxonomy">
                        <validator message="No database is available" type="no_options" />
                    </options>
                </param>
            </when>
            <when value="embed">
            </when>
        </conditional>
        <param argument="--min-score" type="float" min="0" max="1" value="0.000" label="Min sequence similarity score." help="Value for precision mode: ONT(0.008), PacBio Sequel II (0.005), PacBio HiFi (0.07), Illumina (0.15)"/>
        <param argument="--min-sp-score" type="float" min="0" max="1" value="0.0" label="Min. score for species-level or lower-level classification." help="Value for precision mode: PacBio HiFi (0.3), Illumina (0.5)"/>
        <param argument="--min-cov" type="float" min="0" max="1" value="0.0" label="Min query coverage."/>
        <param argument="--min-cons-cnt" type="integer" min="0" max="100" value="4" label="Min. number of consecutive matches for prokaryote/virus classification."/>
        <param argument="--min-cons-cnt-euk" type="integer" min="0" max="100" value="9" label="Min. number of consecutive matches for eukaryote classification."/>        
        <param argument="--hamming-margin" type="integer" min="0" max="100" value="0" label="It allows extra Hamming distance than the minimum distance."/>
        <param argument="--match-per-kmer" type="integer" min="0" max="100" value="0" label="Num. of matches per query k-mer. Larger values assign more memory for storing k-mer matches."/>
        <param argument="--accession-level" type="boolean" truevalue="1" falsevalue="0" checked="false" label="search a database for accession-level classification."/>
    </inputs>

    <outputs>
        <data format="tabular" name="report" label="${tool.name} on ${on_string} report file" from_work_dir="out/mtbl_report.tsv" />
        <data format="tabular" name="classification" label="${tool.name} on ${on_string}: classification" from_work_dir="out/mtbl_classifications.tsv"/>
        <data format="html" name="krona" label="${tool.name} on ${on_string}: Krona report" from_work_dir="out/mtbl_krona.html"/>
        <data format="txt" name="log" label="${tool.name} on ${on_string}: Log file"/>
    </outputs>

    <tests>
        <!-- PE Illumina -->
        <test expect_num_outputs="4">
            <param name="lib_type" value="paired"/>
            <param name="r1" value="reads/illumina_R1.fastq.gz" ftype="fastqsanger.gz"/>
            <param name="r2" value="reads/illumina_R2.fastq.gz" ftype="fastqsanger.gz" />
            <param name="metabuli_databases" value="RefSeqTest"/>
            <param name="taxonomy_path" value="local" />
            <param name="taxonomy" value="taxonomy"/>
            <output name="report">
                <assert_contents>
                    <has_text text="Escherichia coli str. K-12 substr. MG1655"/>
                    <has_text text="511145"/>
                    <has_text text="66.6667"/>
                </assert_contents>
            </output>
        </test>
        <!-- Long read ONT -->
        <test expect_num_outputs="4">
            <param name="lib_type" value="single"/>
            <param name="single" value="reads/ont.r10.4.1.fastq.gz" ftype="fastq.gz"/>
            <param name="metabuli_databases" value="RefSeqTest"/>
            <param name="taxonomy_path" value="local" />
            <param name="taxonomy" value="taxonomy"/>
            <output name="report">
                <assert_contents>
                    <has_text text="Escherichia coli str. K-12 substr. MG1655"/>
                    <has_text text="30.4348"/>
                </assert_contents>
            </output>
        </test>
        <!-- Long read ONT Precision mode -->
        <test expect_num_outputs="4">
            <param name="lib_type" value="single"/>
            <param name="single" value="reads/ont.r10.4.1.fastq.gz" ftype="fastq.gz"/>
            <param name="min_score" value="0.008" />
            <param name="metabuli_databases" value="RefSeqTest"/>
            <param name="taxonomy_path" value="local" />
            <param name="taxonomy" value="taxonomy"/>
            <output name="report">
                <assert_contents>
                    <has_text text="21.7391"/>
                    <has_text text="Salmonella enterica subsp. enterica serovar Typhimurium str. LT2"/>
                </assert_contents>
            </output>
        </test>
        <!-- Long read Pacbio HiFi -->
        <test expect_num_outputs="4">
            <param name="lib_type" value="single"/>
            <param name="single" value="reads/pacbio.hifi.fastq.gz" ftype="fastq.gz"/>
            <param name="metabuli_databases" value="RefSeqTest"/>
            <param name="taxonomy_path" value="local" />
            <param name="taxonomy" value="taxonomy"/>
            <output name="report">
                <assert_contents>
                    <has_text text="Salmonella enterica subsp. enterica serovar Typhimurium str. LT2"/>
                    <has_text text="32.5000"/>
                </assert_contents>
            </output>
            <output name="classification">
                <assert_contents>
                    <has_text text="a4d8d18d-afde-b01a-d552-476bc9831614"/>
                    <has_text text="208964:931"/>
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[
        
        Metabuli_ specific and sensitive metagenomic classification via joint analysis of DNA and amino acid.

        .. _Metabuli: https://github.com/steineggerlab/Metabuli/tree/master?tab=readme-ov-file#classification

    ]]>
    </help>

    <expand macro="citations" />
</tool>