view mirnature.xml @ 0:dd2f87ccbd2c draft default tip

planemo upload for repository https://github.com/Bierinformatik/miRNAture commit 9e4e624a3a486cbd805bc3beb00e9903f315efe0
author iuc
date Sun, 11 Dec 2022 22:28:10 +0000
parents
children
line wrap: on
line source

<tool id="mirnature" name="miRNAture" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
    <description>Computational detection of canonical microRNAs</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="aggressive"><![CDATA[
        #import re
        mkdir -p uncompress &&
        mkdir -p queries_to_test &&
        mkdir -p temporal &&
        mkdir -p output &&

        cp '$speG' temporal/genome.fasta  &&
        #if $blast_strategy:
            cp '$queries_to_blast' queries_to_test/Unknown_species.fa &&
            echo "Unknown_species.fa miRNA Unknown species" > queries_to_test/queries_description.txt  &&
        #end if

        #if str('$database_source.database_source_selector') == "cached":
            ln -s '$dataset.fields.path' uncompress/data.gz &&
        #else:
            ln -s '$dataset' uncompress/data.gz &&
        #end if 
        tar -xf uncompress/data.gz --directory uncompress/ &&
        rm uncompress/data.gz &&
        ## Change name of user folder to Dataset
        mv uncompress/* uncompress/Dataset &&

        miRNAture 
        -stage '$stage' 
        #if $subset_models
            -sublist '$subset_models'  
        #end if
        -nbitscore_cut '$nbitscore'
        -dataF 'uncompress/Dataset/'
        -speG 'temporal/genome.fasta'
        -speN '$speN'
        -speT '$speT' 
        -pe '1'
        -workdir 'output/'
        -m "${",".join(map(str, $homology_mode)) + ",final"}"
        #if $blast_strategy:
            -strategy "${",".join(map(str, $blast_strategy)) + ",ALL"}"
        -blstq 'queries_to_test/'
        #end if
        -rep '$repeat_filter' > '$std_output'
        ]]></command>

    <inputs>
        <!--File-->
        <param argument="-speG" format="fasta" multiple="true" type="data"
            label="Input genome or sequence"
            help="Input sequence to be processed by miRNAture" />

        <!--TEXT-->
        <param argument="-speN" type="text" label="Scientific species name written separated by '_', such as: Homo_sapiens" >
            <validator type="regex" message="Only letters and underscores are allowed">^[\(\w\)]+$</validator>
        </param>
        <param argument="-speT" type="text" label="Species tag to identify through experiment (i.e short 4 letters tag)" >
            <validator type="regex" message="Please write only letters">^[a-zA-Z]+$</validator>
        </param>

        <!--NUM-->
        <param argument="-nbitscore_cut" name="nbitscore" type="float" label="nBitscore" 
            value="1" min="0" max="1" help="Control of normalized bitscore (nbitscore) threshold to filter Rfam candidates" /> 

        <param argument="-rep" name="repeat_filter" type="select" label="Repeats filter" help="Repetition cutoff" >
                <option value="relax,150,100">relax</option>
                <option value="default,200,100">default</option>
        </param>

        <param format="txt" multiple="true" name="subset_models" type="data"
            label="List of miRNA models to be searched"
            help="Input sequence to be processed by miRNAture" optional="true"/>

        <conditional name="database_source">
            <param argument="-dataF" name="database_source_selector" type="select" label="Choose the source for the database">
                <option value="cached">Pre-defined datasets</option>
                <option value="history">History</option>
            </param>
            <when value="cached">
                <param name="dataset" type="select" help="Select a pre-calculated dataset to run miRNAture (i.e. https://doi.org/10.5281/zenodo.7180160)" label="Pre-calculated datasets" >
                    <options from_data_table="mirnature_selection" />
                    <validator message="No reference dataset is available for miRNAture" type="no_options" />
                </param>
            </when>
            <when value="history">
                <param name="dataset" format="gz" type="data" label="Input user Pre-calculated dataset" help="Please submit the pre-calculated data to run miRNAture (i.e https://doi.org/10.5281/zenodo.7180160)" />
            </when>
        </conditional>

<!--Static list-->
    <param argument="-stage" type="select" label="Select one miRNAture stage">
        <option value="complete">complete</option>
        <option value="homology">homology</option>
    </param>
    <!--Check Boxes-->
    <param argument="-mode" name="homology_mode" type="select" multiple="true" label="Homology mode">
        <option value="blast">blast</option>
        <option value="rfam">rfam</option>
        <option value="mirbase">mirbase</option>
        <option value="hmm">hmm</option>
    </param>
    <param argument="-strategy" name="blast_strategy" optional="true" type="select" multiple="true"
        label="Select one or more pre-calculated set of blastn parameters, accordingly to the miRNAture publication.">
        <option value="1">1</option>
        <option value="2">2</option>
        <option value="3">3</option>
        <option value="4">4</option>
        <option value="5">5</option>
        <option value="6">6</option>
        <option value="7">7</option>
        <option value="8">8</option>
        <option value="9">9</option>
    </param>
    <param argument="-blstq" format="fasta" multiple="true" name="queries_to_blast" type="data" label="Query sequences"
        help="Query sequences to search in target genome" optional="true" />
</inputs>

<outputs>
    <data name="std_output" format="txt" label="Standard output miRNAture" /> 
    <collection name="gff3_files_h" type="list" label="${tool.name} on ${on_string}: GFF3 file">
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gff3" directory="output/miRNA_prediction/Final_Candidates/" format="gff3" visible="true" />
        <filter> stage == "homology" </filter>
    </collection>
    <collection name="tab_files_h" type="list" label="${tool.name} on ${on_string}: Additional Files">
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\-potential\.txt" directory="output/miRNA_prediction/Final_Candidates/" format="txt" visible="true" />
        <filter> stage == "homology" </filter>
    </collection>
    <collection name="yaml_files_h" type="list" label="${tool.name} on ${on_string}: YAML file">
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.yaml" directory="output" format="yaml" visible="true"/>
        <filter> stage == "homology" </filter>
    </collection>
    <collection name="gff3_files_c" type="list" label="${tool.name} on ${on_string}: GFF3 file">
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gff3" directory="output/miRNA_prediction/Final_Candidates/" format="gff3" visible="true" />
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gff3" directory="output/Final_miRNA_evaluation/" ext="gff3" format="gff3" visible="true" />
        <filter> stage == "complete" </filter>
    </collection>
    <collection name="bed_files_c" type="list" label="${tool.name} on ${on_string}: BED file">
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.bed" directory="output/Final_miRNA_evaluation/" ext="bed" format="bed" visible="true" />
        <filter> stage == "complete" </filter>
    </collection>
    <collection name="fasta_files_c" type="list" label="${tool.name} on ${on_string}: Fasta files">
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta" directory="output/Final_miRNA_evaluation/Fasta" ext="fasta" format="fasta" visible="true" />
        <filter> stage == "complete" </filter>
    </collection>
    <collection name="statistics_files_c" type="list" label="${tool.name} on ${on_string}: Summary file">
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt" directory="output/Final_miRNA_evaluation/" ext="txt" format="txt" visible="true" />
        <filter> stage == "complete" </filter>
    </collection>
    <collection name="yaml_files_c" type="list" label="${tool.name} on ${on_string}: YAML file">
        <discover_datasets pattern="(?P&lt;designation&gt;.+)\.yaml" directory="output" ext="yaml" format="yaml" visible="true"/>
        <filter> stage == "complete" </filter>
    </collection>
</outputs>

<tests>
    <!-- HMM Rfam search -->
    <test expect_exit_code="0">
        <param name="stage" value="homology"/>
        <param name="subset_models" value="fam.txt"/>
        <param name="nbitscore" value="1.0"/>
        <param name="database_source_selector" value="history" />
        <param name="dataset" value="Dataset_mirnature_tutorial.tar.gz"/>
        <param name="speG" value="genome.fasta"/>
        <param name="speN" value="Unknown_species"/>
        <param name="speT" value="Unsg"/>
        <param name="homology_mode" value="hmm"/>
        <param name="repeat_filter" value="relax"/>
        <output name="std_output" file="test_hmm.txt" ftype="txt"/>
        <output_collection name="gff3_files_h" count="1"/>
        <output_collection name="yaml_files_h" count="1"/>
        <output_collection name="tab_files_h" count="1"/>
    </test>
    <!-- Infernal Rfam search -->
    <test expect_exit_code="0">
        <param name="stage" value="homology"/>
        <param name="subset_models" value="fam.txt"/>
        <param name="nbitscore" value="1.0"/>
        <param name="database_source_selector" value="history" />
        <param name="dataset" value="Dataset_mirnature_tutorial.tar.gz"/>
        <param name="speG" value="genome.fasta"/>
        <param name="speN" value="Unknown_species"/>
        <param name="speT" value="Unsp"/>
        <param name="homology_mode" value="rfam"/>
        <param name="repeat_filter" value="relax"/>
        <output name="std_output" file="test_rfam.txt" ftype="txt"/>
        <output_collection name="gff3_files_h" count="1"/>
        <output_collection name="yaml_files_h" count="1"/>
        <output_collection name="tab_files_h" count="1"/>
    </test>
    <!-- Infernal miRBase search -->
    <test expect_exit_code="0">
        <param name="stage" value="homology"/>
        <param name="subset_models" value="fam.txt"/>
        <param name="nbitscore" value="1.0"/>
        <param name="database_source_selector" value="history" />
        <param name="dataset" value="Dataset_mirnature_tutorial.tar.gz"/>
        <param name="speG" value="genome.fasta"/>
        <param name="speN" value="Unknown_species"/>
        <param name="speT" value="Unsd"/>
        <param name="homology_mode" value="mirbase"/>
        <param name="repeat_filter" value="relax"/>
        <output name="std_output" file="test_mirbase.txt" ftype="txt"/>
        <output_collection name="gff3_files_h" count="1"/>
        <output_collection name="yaml_files_h" count="1"/>
        <output_collection name="tab_files_h" count="1"/>
    </test>
    <!-- Combined all homology search -->
    <test expect_exit_code="0">
        <param name="stage" value="homology"/>
        <param name="subset_models" value="fam.txt"/>
        <param name="nbitscore" value="1.0"/>
        <param name="database_source_selector" value="history" />
        <param name="dataset" value="Dataset_mirnature_tutorial.tar.gz"/>
        <param name="speG" value="genome.fasta"/>
        <param name="speN" value="Unknown_species"/>
        <param name="speT" value="Unsp"/>
        <param name="homology_mode" value="blast,hmm,rfam,mirbase"/>
        <param name="blast_strategy" value="8,9"/>
        <param name="queries_to_blast" value="test.fasta"/>
        <param name="repeat_filter" value="relax"/>
        <output name="std_output" file="test_all_homology.txt" ftype="txt"/>
        <output_collection name="gff3_files_h" count="1"/>
        <output_collection name="yaml_files_h" count="1"/>
        <output_collection name="tab_files_h" count="1"/>
    </test>
</tests>

<help><![CDATA[
    
**miRNAture** detects *bona fide* miRNA candidates through sequence homology searches and validation steps using structural alignments
with pre-defined or/and modified miRNA-specific covariance models. The miRNAture pipeline is composed of three modules:

    #. Homology search operating on miRNA precursors
    #. Prediction of the positioning of mature miRNAs within the precursor mature annotation
    #. Evaluation scheme designed to identify false positive miRNA annotations.

This multi-stage approach generates annotation files in BED/GFF3 from precursors and detected mature regions and corresponding FASTA files.
At the same time, a summary file with the MFE, precursor length and number of loci of each annotated miRNA family.

    ]]></help>
    <expand macro="creators" />
    <expand macro="citations" />
</tool>