annotate repeat_annotate_custom.xml @ 2:3f8ae272f4f3 draft

Uploaded
author petr-novak
date Thu, 07 Oct 2021 07:29:59 +0000
parents cf3cea0a3039
children e955b40ad3a4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
1 <tool id="repeat_annotate" name="RepeatExplorer Based Assembly Annotation" version="0.1.0" python_template_version="3.5">
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
2 <requirements>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
3 <requirement type="package">repeatmasker</requirement>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
4 <requirement type="package">bioconductor-rtracklayer</requirement>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
5 </requirements>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
6 <command detect_errors="exit_code"><![CDATA[
2
3f8ae272f4f3 Uploaded
petr-novak
parents: 0
diff changeset
7 RepeatMasker -dir \$(pwd) '$input' -pa 32 -lib '$repeat_library' -xsmall -nolow -no_is -e ncbi -s
0
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
8 &&
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
9 ls -l * >&2 &&
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
10 cp `basename $input`.out $output2
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
11 &&
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
12 Rscript ${__tool_directory__}/clean_rm_output.R $output2 $output1
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
13
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
14 ]]></command>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
15 <inputs>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
16 <param type="data" name="input" format="fasta" label="Genome/ Assembly to annotate" />
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
17 <param type="data" name="repeat_library" format="fasta" label="RepeatExplorer based Library of Repetitive Sequences"
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
18 help="custom database of repetitive sequences should be provided in fasta format. Sequence header should specify repeat class:
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
19 >sequence_id#classification_level1/classification_level2/..." />
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
20 </inputs>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
21 <outputs>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
22 <data name="output1" format="gff3" label="Repeat Annotation on ${on_string}, cleaned gff"/>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
23 <data name="output2" format="tabular" label="RepeatMasker on ${on_string}, original output" />
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
24 </outputs>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
25 <help><![CDATA[
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
26 This tools uses RepeatMasker to annotate repetitive sequences in the genome assemblie using custom library of repeats created from RepeatExplorer output.
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
27 Library of repeats created from RepeatExplorer ouput are contigs and TAREAN consensus sequences in fasta format where sequence header containg information about classification of repeats as **>sequence_id#classification_level1/classification_level2/...**
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
28
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
29 Classification in RepeatExplorer based library follows predetermined classification levels. User can however specify additional classification levels or ciompletelly custom classifications. Conflicts in annotations are resolved based on classification hierarchy.
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
30 ]]></help>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
31 </tool>
cf3cea0a3039 Uploaded
petr-novak
parents:
diff changeset
32