view helixer.xml @ 5:c2fc4ac35199 draft default tip

planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/helixer commit 54445b041f46b8b3a8e25e90e84f034cdddd2ab1
author genouest
date Tue, 09 Jul 2024 13:09:23 +0000
parents e3846dc36c4d
children
line wrap: on
line source

<?xml version="1.0"?>
<tool id="helixer" name="Helixer" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
    <description>gene calling</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">helixer</xref>
    </xrefs>
    <requirements>
        <expand macro="requirements"/>
    </requirements>

    <command detect_errors="exit_code"><![CDATA[
        /usr/local/bin/fetch_helixer_models.py &&
        Helixer.py
        --fasta-path '$input'
        --species '$species'
        --gff-output-path '$output'
        --temporary-dir ./

        #set default_subsequence_length = {"fungi": 21384, "land_plant": 106920, "invertebrate": 213840, "vertebrate": 213840}
        #set subsequence_len = default_subsequence_length.get(str($lineages))
        #set default_overlap_offset = {"fungi": 10692, "land_plant": 53460, "invertebrate": 106920, "vertebrate": 106920}
        #set overlap_off = default_overlap_offset.get(str($lineages))
        #set default_overlap_core_length = {"fungi": 16038, "land_plant": 80190, "invertebrate": 160380, "vertebrate": 160380}
        #set overlap_core_len = default_overlap_core_length.get(str($lineages))

        #if str($input_model) != "" and str($input_model) != "None":
            --model-filepath '$input_model'
            --subsequence-length '$subsequence_length'
            
        #else:
            --lineage '$lineages'
            #if str($subsequence_length) == "":
                --subsequence-length '$subsequence_len'
            #else:
                --subsequence-length '$subsequence_length'
            #end if

            #if str($option_overlap.use_overlap) == "true":
                #if str($option_overlap.overlap_offset) == "":
                    --overlap-offset '$overlap_off'
                #else:
                    --overlap-offset '$overlap_offset'
                #end if

                #if str($option_overlap.overlap_core_length) == "":
                    --overlap-core-length '$overlap_core_len'
                #else:
                    --overlap-core-length '$overlap_core_length'
                #end if
            #end if
        #end if

        --batch-size $size
        --window-size $post_processing.window_size
        --min-coding-length $post_processing.min_coding_length
        --edge-threshold $post_processing.edge_threshold
        --peak-threshold $post_processing.peak_threshold

    ]]></command>

    <inputs>
        <param argument="--fasta-path" name="input" type="data" format="fasta,fasta.gz" label="Genomic sequence"></param>
        <param argument="--model-filepath" optional="true" name="input_model" type="data" format="h5" label="Lineage model" help="Import your lineage model to replace the default lineage. Please enter a value for the --subsequence_length parameter."></param>
        <param argument="--lineage" name="lineages" type="select" label="Available lineages" help="Choose the model to use for the annotation">
            <option value="land_plant">land plant</option>
            <option value="vertebrate">vertebrate</option>
            <option value="invertebrate">invertebrate</option>
            <option value="fungi">fungi</option>
        </param>

        <param argument="--species" type="text" optional="true" label="Species name">
            <sanitizer invalid_char="">
                <valid initial="string.letters,string.digits">
                    <add value="_" />
                </valid>
            </sanitizer>
            <validator type="regex">[0-9a-zA-Z_]+</validator>
        </param>
        <param name="size" argument="--batch-size" type="integer" value="8" label="Batch size" help="It may be necessary to reduce it if the GPU runs out of memory" />
        <section name="post_processing" title="Post-processing">
            <param argument="--window-size" type="integer" min="0" value="100" label="Window size" help="This determines the number of bases averaged during the sliding window approach"/>
            <param argument="--edge-threshold" type="float" min="0" max="1" value="0.1" label="Edge threshold" help="This threshold specifies the genic score which defines the start / end boundaries of each candidate region"/>
            <param argument="--peak-threshold" type="float" min="0" max="1" value="0.8" label="Peak threshold" help="This threshold specifies the minimum peak genic score required to accept the candidate region"/>
            <param argument="--min-coding-length" type="integer"  min="0" value="100" label="Minimum coding length"/>
        </section>

        <param name="subsequence_length" type="text" label="Subsequence length: how much of the genome the Neural Network can see at once" help="If you do not want to keep the default value, please enter the new value. Default values are 21384 for fungi, 106920 for land plant, and 213840 for vertebrates and invertebrates"/>

        <conditional name="option_overlap">
            <param name="use_overlap" type="select" label="Enable overlapping step after predictions" help="This step combines predictions made on each subsequences, to improve quality near start and end of subsequences.">
                <option value="true" selected="true">Yes</option>
                <option value="false">No</option>
            </param>
            <when value="true">
                <param name="overlap_offset" type="text"  label="Overlap offset: Smaller values may lead to better predictions but will take longer" help="If you do not want to keep the default value, please enter the new value. Default values are 10692 for fungi, 53460 for land plant, and 106920 for vertebrates and invertebrates."/>
                <param name="overlap_core_length" type="text" label="Overlap core length: Predicted subsequences will be cut to this length to increase prediction quality. Smaller values may lead to better predictions but will take longer" help="If you do not want to keep the default value, please enter the new value. Default values are 16038 for fungi, 80190 for land plant, and 160380 for vertebrates and invertebrates."/>
            </when>
            <when value="false"/>
        </conditional>
    </inputs>

    <outputs>
        <data name="output" format="gff3">
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <!-- Test for species and land_plant-->        
            <param name="input" value="sequence.fasta"/>
            <param name="lineages" value="land_plant"/>
            <param name="species" value="Arabidopsis"/>
            <param name="size" value="8"/>
            <param name="subsequence_length" value=""/>
        <section name="post_processing">
            <param name="window_size" value="100"/>
            <param name="edge_threshold" value="0.1"/>
            <param name="peak_threshold" value="0.8"/>
            <param name="min_coding_length" value="100"/>
        </section>
        <conditional name="option_overlap">
            <param name="use_overlap" value="true"/>
            <param name="overlap_offset" value=""/>
            <param name="overlap_core_length" value=""/>
        </conditional>
            <output name="output" value="ouput_species.gff3" ftype="gff3" compare="sim_size" delta="100"/>
        </test>
        
        <test expect_num_outputs="1">
        <!-- Test for vertebrates -->
            <param name="input" value="sequence.fasta"/>
            <param name="lineages" value="vertebrate"/>
            <param name="size" value="8"/>
            <param name="size" value="8"/>
            <param name="subsequence_length" value=""/>
        <section name="post_processing">
            <param name="window_size" value="100"/>
            <param name="edge_threshold" value="0.1"/>
            <param name="peak_threshold" value="0.8"/>
            <param name="min_coding_length" value="100"/>
        </section>
        <conditional name="option_overlap">
            <param name="use_overlap" value="true"/>
            <param name="overlap_offset" value=""/>
            <param name="overlap_core_length" value=""/>
        </conditional>
            <output name="output" value="vertebrate.gff3" ftype="gff3" lines_diff="2"/>
        </test>

        <test expect_failure="true">        
        <!-- Test for model-filepath -->
            <param name="input" value="sequence.fasta"/>
            <param name="input_model" value="test.h5"/>
            <param name="size" value="8"/>
            <param name="size" value="8"/>
            <param name="subsequence_length" value="1000"/>
        <section name="post_processing">
            <param name="window_size" value="100"/>
            <param name="edge_threshold" value="0.1"/>
            <param name="peak_threshold" value="0.8"/>
            <param name="min_coding_length" value="100"/>
        </section>
        <conditional name="option_overlap">
            <param name="use_overlap" value="true"/>
            <param name="overlap_offset" value=""/>
            <param name="overlap_core_length" value=""/>
        </conditional>
        </test>
    </tests>

    <help><![CDATA[
        Helixer_: Gene calling with Deep Neural Networks.

        .. _Helixer: https://github.com/weberlab-hhu/Helixer
    ]]></help>
    <expand macro="citation"></expand>
</tool>