diff repeat_annotate_custom.xml @ 0:ea6a3059a6af draft

Uploaded
author petr-novak
date Mon, 18 Oct 2021 11:01:20 +0000
parents
children 7f1032da7a0a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repeat_annotate_custom.xml	Mon Oct 18 11:01:20 2021 +0000
@@ -0,0 +1,32 @@
+<tool id="repeat_annotate" name="RepeatExplorer Based Assembly Annotation" version="0.1.1" python_template_version="3.5">
+    <requirements>
+        <requirement type="package">repeatmasker</requirement>
+        <requirement type="package">bioconductor-rtracklayer</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        RepeatMasker -dir \$(pwd) '$input' -pa 32 -lib '$repeat_library' -xsmall -nolow -no_is -e ncbi -s
+        &&
+        ls -l * >&2 &&
+        cp `basename $input`.out  $output2
+        &&
+        Rscript ${__tool_directory__}/clean_rm_output.R $output2 $output1
+
+        ]]></command>
+    <inputs>
+        <param type="data" name="input" format="fasta" label="Genome/ Assembly to annotate" />
+        <param type="data" name="repeat_library" format="fasta" label="RepeatExplorer based Library of Repetitive Sequences"
+               help="custom database of repetitive sequences should be provided in fasta format. Sequence header should specify repeat class:
+                     >sequence_id#classification_level1/classification_level2/..." />
+    </inputs>
+    <outputs>
+        <data name="output1" format="gff3"  label="Repeat Annotation on ${on_string}, cleaned gff"/>
+        <data name="output2" format="tabular" label="Raw output from RepeatMasker on ${on_string}" />
+    </outputs>
+    <help><![CDATA[
+        This tools uses RepeatMasker to annotate repetitive sequences in the genome assemblie using custom library of repeats created from RepeatExplorer output.
+        Library of repeats can be created from RepeatExplorer ouputt from contigs and TAREAN consensus sequences. Fasta formated library of repeats must contain header containg information about classification of repeats as **>sequence_id#classification_level1/classification_level2/...**
+
+        Classification in RepeatExplorer based library follows predetermined classification levels. User can however specify additional classification levels or completelly custom classifications. Conflicts in annotations are resolved based on classification hierarchy.
+    ]]></help>
+</tool>
+