view metaeuk_easy_predict.xml @ 2:be6bbdeafcb8 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaeuk commit 3309bdde7b03c5c250a020b0aee221133d9ea79b"
author iuc
date Thu, 21 Oct 2021 16:10:35 +0000
parents f91548912113
children
line wrap: on
line source

<tool id="metaeuk_easy_predict" name="MetaEuk Easy Predict" version="@TOOL_VERSION@+galaxy0">
    <description>High-throughput gene discovery and annotation for large-scale eukaryotic metagenomics</description>
    <xrefs>
        <xref type="bio.tools">MetaEuk</xref>
    </xrefs>
    <macros>
        <token name="@TOOL_VERSION@">5.34c21f2</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">metaeuk</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
        mkdir -p ./tmp &&
        metaeuk easy-predict 
            '$contigs'
            '$query'
            output.fasta
            "./tmp/metaeuk_working"
            --threads \${GALAXY_SLOTS:-1}
            --min-length '${min_length}'
            -e '${segment_eval}'
            --metaeuk-eval '${metaeuk_eval}'
            --metaeuk-tcov '${metaeuk_tcov}'
        #if $adv.adv_options == "yes"
            --max-intron '${adv.max_intron}'
            --min-intron '${adv.min_intron}'
        #end if
        --write-frag-coords $write_frag_coords
        && mv output.fasta.codon.fas '$output'
        && mv output.fasta.gff '$gff_output'
    ]]></command>
    <inputs>
        <param name="contigs" type="data" format="fasta" label="Contigs to search again" />
        <param name="query" type="data" format="fasta" label="Proteins to search against contigs" />
        <param argument="-e" name="segment_eval" type="float" value="100" label="Maximum e-value of individual match segment" />
        <param argument="--min-length" name="min_length" type="integer" value="15" label="Minimum number of codons in predicted open reading frame" />
        <param argument="--metaeuk-eval" name="metaeuk_eval" type="float" value="0.001" label="Maximum e-value of combined exon set" />
        <param argument="--metaeuk-tcov" name="metaeuk_tcov" type="float" value="0.5" label="Minimum length ratio of combined exon set to target" />
        <conditional name="adv">
            <param type="select"  name="adv_options" label="Show advanced options">
                <option value="yes">Yes</option>
                <option value="no" selected="true">No</option>
            </param>
            <when value="yes">
                <param argument="--max-intron" name="max_intron" value="10000" type="integer" label="Maximum intron size" />
                <param argument="--min-intron" name="min_intron" value="15" type="integer" label="Minimum intron size" />
            </when>
            <when value="no">
            </when>
        </conditional>
        <param name="write_frag_coords" type="select" value="0" label="Write the contig coords of the stop-to-stop fragment in which putative exon lies" help="The fragment coordinates in square brackets refer to the original fragment in which the exon was found." >
            <option value="0">Yes</option>
            <option value="1">No</option>
        </param>
    </inputs>
    <outputs>
        <data name="output" format="fasta" />
        <data name="gff_output" format="gff" />
    </outputs>

    <tests>
        <test>
            <param name="contigs" ftype="fasta" value="contigs.fna" />
            <param name="query" ftype="fasta" value="proteins.faa" />
            <output name="output" ftype="fasta" value="output.fasta" />
            <output name="gff_output" ftype="gff" value="output.gff" />
        </test>
        <test>
            <param name="contigs" ftype="fasta" value="contigs.fna" />
            <param name="query" ftype="fasta" value="proteins.faa" />
            <param name="write_frag_coords" value="1" />
            <conditional name="adv">
                <param name="adv_options" value="yes" />
                <param name="max_intron" value="1000" />
            </conditional>
            <assert_command>
                <has_text text="--max-intron '1000'" />
            </assert_command>            
            <output name="output" ftype="fasta" value="output_w_frag_coords.fasta" />
            <output name="gff_output" ftype="gff" value="output.gff" />
        </test>
    </tests>
    <help><![CDATA[

    MetaEuk_ is a modular toolkit designed for large-scale gene discovery and
    annotation in eukaryotic metagenomic contigs. Metaeuk combines the fast and
    sensitive homology search capabilities of MMseqs2_ with a dynamic programming
    procedure to recover optimal exons sets. It reduces redundancies in multiple
    discoveries of the same gene and resolves conflicting gene predictions on
    the same strand. 

    This tool implements the easy-predict command from metaeuk, which combines
    metaeuk modules into a pipeline for protein alignment prediction. Input is
    the contigs you want to search for protein hits and the proteins you want
    to search against those contigs. Output is FASTA format predicted ORFs, with
    exons annotated in the header according to the metaeuk header format_.

    .. _MetaEuk: https://github.com/soedinglab/metaeuk
    .. _MMseqs2: https://github.com/soedinglab/MMseqs2
    .. _format: https://github.com/soedinglab/metaeuk#the-metaeuk-header
    ]]></help>
    <citations>
        <citation type="doi">10.1186/s40168-020-00808-x</citation>
    </citations>
</tool>