view repeat_annotate_custom.xml @ 11:5366d5ea04bc draft

planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
author petr-novak
date Fri, 04 Aug 2023 12:35:32 +0000
parents 276efc4cb17f
children 755a4d643184
line wrap: on
line source

<tool id="repeat_annotate" name="Library Based Assembly Annotation" version="0.1.6" python_template_version="3.5">
    <requirements>
        <requirement type="package" version="4.1.2.p1">repeatmasker</requirement>
        <requirement type="package">bioconductor-rtracklayer</requirement>
    </requirements>
    <required_files>
        <include type="literal" path="clean_rm_output.R"/>
    </required_files>
    <command detect_errors="exit_code"><![CDATA[

        #if $sensitivity.value == 'default':
        RepeatMasker -dir \$(pwd) '$input' -pa 32 -lib '$repeat_library' -xsmall ${nolow} -no_is -e ncbi
        #else:
        RepeatMasker -dir \$(pwd) '$input' -pa 32 -lib '$repeat_library' -xsmall ${nolow} -no_is -e ncbi $sensitivity
        #end if
        &&
        ls -l * >&2 &&
        cp `basename $input`.out  $output2
        &&
        Rscript ${__tool_directory__}/clean_rm_output.R $output2 $output1

        ]]></command>
    <inputs>
        <param type="data" name="input" format="fasta" label="Genome assembly to annotate" />
        <param type="data" name="repeat_library" format="fasta" label="RepeatExplorer based Library of Repetitive Sequences"
               help="custom database of repetitive sequences should be provided in fasta format. Sequence header should specify repeat class:
                     >sequence_id#classification_level1/classification_level2/..." />
        <param type="select" label="sensitivity" name="sensitivity" >
            <option value="default" selected="true" >Default sensitivity </option>
            <option value="-s">Slow search, more sensitive </option>
            <option value="-q">Quick search, less sensitive </option>
        </param>
        <param argument="-nolow" type="boolean" truevalue="" falsevalue="-nolow" checked="false" label="Perform masking of low complexity and simple repeats" />
    </inputs>
    <outputs>
        <data name="output1" format="gff3"  label="Repeat Annotation (GFF3) on ${on_string}"/>
        <data name="output2" format="tabular" label="Raw output from RepeatMasker on ${on_string}" />
    </outputs>
    <help><![CDATA[
        This tools uses RepeatMasker to annotate repetitive sequences in the genome assemblies using custom library of repeats created either from RepeatExplorer output or from DANTE_LTR transposable element library.
        Library of repeats can be created from RepeatExplorer output from contigs and TAREAN consensus sequences.
        Fasta formatted library of repeats must contain header containing  information about classification of repeats as **>sequence_id#classification_level1/classification_level2/...**

        Classification in RepeatExplorer based library follows predetermined classification levels. User can however specify additional classification levels or custom classifications.
        Conflicts in annotations are resolved based on classification hierarchy.
    ]]></help>
</tool>