0
|
1 <tool id="annotate_contigs" name="Format Repeat Library" version="0.1.0" python_template_version="3.5">
|
|
2 <requirements>
|
|
3 <requirement type="package" version="2.60.0">bioconductor-biostrings</requirement>
|
|
4 </requirements>
|
|
5 <command detect_errors="exit_code"><![CDATA[
|
|
6 $__tool_directory__/annotate_contigs.R '$contigs' '$cluster_table' '$annotated_contigs'
|
|
7 ]]></command>
|
|
8 <inputs>
|
|
9 <param type="data" name="contigs" format="fasta" label="Contigs - Library of Repeats from TAREAN/RepeatExplorer pipeline" />
|
|
10 <param type="data" name="cluster_table" format="txt" label="CLUSTER_TABLE from RepeatExplorer pipeline" help="CLUSTER_TABLE which contain annotation of clusters from RepeatExplorer pipeline"/>
|
|
11 </inputs>
|
|
12 <outputs>
|
|
13 <data name="annotated_contigs" format="fasta" label="Annotated Repeat Library based on ${contigs.hid} and ${cluster_table.hid}" />
|
|
14 </outputs>
|
|
15 <help><![CDATA[
|
|
16 **What this tool does**
|
|
17
|
|
18 Contigs from RepeatExplorer archive are annotated based on the classification of repeats from cluster_table.
|
|
19
|
|
20 Preformated CLUSTER_TABLE can be extracted from RepeatExplorer archive and modified accordingly. By default, "Final_annotation" column is used to append annotation to contigs in repeat library (based on the cluster id). If "Final_annotation" column is incomplete, "Automatic_annotation" column is used instead.
|
|
21
|
|
22 Example of tab delimited CLUSTER_TABLE::
|
|
23
|
|
24
|
|
25 "Number_of_reads_in_clusters" 3886
|
|
26 "Number_of_clusters" 822
|
|
27 "Number_of_superclusters" 821
|
|
28 "Number_of_singlets" 6114
|
|
29 "Number_of_analyzed_reads" 10000
|
|
30
|
|
31 "Cluster" "Supercluster" "Size" "Size_adjusted" "Automatic_annotation" "TAREAN_annotation" "Final_annotation"
|
|
32 1 1 260 260 "All/repeat/satellite" "Putative satellites (low confidence)" ""
|
|
33 2 2 157 157 "All/repeat/satellite" "Putative satellites (low confidence)" ""
|
|
34 3 4 100 100 "All" "Other" ""
|
|
35 4 5 83 83 "All" "Other" ""
|
|
36 5 3 77 77 "All" "Other" ""
|
|
37 6 3 65 65 "All" "Other" ""
|
|
38 7 6 61 61 "All" "Other" ""
|
|
39 8 7 58 58 "All" "Other" ""
|
|
40 9 8 53 53 "All" "Other" ""
|
|
41 10 9 53 53 "All" "Other" ""
|
|
42 11 10 51 51 "All" "Other" ""
|
|
43 12 11 45 45 "All" "Other" ""
|
|
44 13 12 44 44 "All" "Other" ""
|
|
45 14 13 44 44 "All" "Other" ""
|
|
46 15 14 39 39 "All" "Other" ""
|
|
47 16 15 37 37 "All" "Other" ""
|
|
48 17 16 30 30 "All/repeat/satellite" "Putative satellites (low confidence)" ""
|
|
49 18 17 28 28 "All/repeat/satellite" "Putative satellites (low confidence)" ""
|
|
50 19 18 26 26 "All/repeat/satellite" "Putative satellites (high confidence)" ""
|
|
51 20 19 23 23 "All/repeat/../CRM" "Other" ""
|
|
52 21 20 21 21 "All" "Other" ""
|
|
53 22 21 21 21 "All" "Other" ""
|
|
54 23 22 21 21 "All" "Other" ""
|
|
55 24 23 21 21 "All" "Other" ""
|
|
56 25 24 20 20 "All/repeat/../Ogre" "Other" ""
|
|
57
|
|
58
|
|
59 Only Cluster, Automatic_annotation/Final_annnotation are mandatory"
|
|
60
|
|
61 Clusters with higher number than those in CLUSTER_TABLE are removed from Repeat library
|
|
62
|
|
63 Contigs are provided in followinf format::
|
|
64
|
|
65
|
|
66 >CL25Contig1
|
|
67 AGATCAAGATGGCGCCGGAGGACATGGAGAAAACGACGTTTATCACTCCCTGGGGAACATTTTGCTACAAGGTAATGCCT
|
|
68 TTCGGTCTGAAGAACGCAGGGGCCACTTACCAACGAGCAATGGTAACTT
|
|
69 >CL1Contig4#All/repeat/satellite
|
|
70 ACCCGAAGGCCGGCTCAACCCGAAGTTGAGAAGAACATCTGACCTCGCCGTCAGGCATCTGTTAAACAAACAGGCATCGA
|
|
71 A
|
|
72 >CL1Contig5
|
|
73 TGAGAAGAACATCTGACCTCGCCGTCAGGCATCTGTTAAACAAACAGGCATCGAACCCGAAGGCCGGCTCAACCCGAAGT
|
|
74 TGATAAGAACATCTGACCTCGCCGTCAGGCATCTGTTAAACAAACAGGCATCGAACCCGAAGGCCGGCTCAACACGAAGT
|
|
75 TGAGAGGAACATCTGACCTCGCCGTCAGGCATCTGTTAAA
|
|
76
|
|
77
|
|
78 Resulting repeat library will have following format::
|
|
79
|
|
80 >CL25Contig1#All/repeat/mobile_element/Class_I/LTR/Ty3_gypsy/non-chromovirus/OTA/Tat/Ogre
|
|
81 AGATCAAGATGGCGCCGGAGGACATGGAGAAAACGACGTTTATCACTCCCTGGGGAACATTTTGCTACAAGGTAATGCCT
|
|
82 TTCGGTCTGAAGAACGCAGGGGCCACTTACCAACGAGCAATGGTAACTT
|
|
83 >CL1Contig4#All/repeat/satellite
|
|
84 ACCCGAAGGCCGGCTCAACCCGAAGTTGAGAAGAACATCTGACCTCGCCGTCAGGCATCTGTTAAACAAACAGGCATCGA
|
|
85 A
|
|
86 >CL1Contig5#All/repeat/satellite
|
|
87 TGAGAAGAACATCTGACCTCGCCGTCAGGCATCTGTTAAACAAACAGGCATCGAACCCGAAGGCCGGCTCAACCCGAAGT
|
|
88 TGATAAGAACATCTGACCTCGCCGTCAGGCATCTGTTAAACAAACAGGCATCGAACCCGAAGGCCGGCTCAACACGAAGT
|
|
89 TGAGAGGAACATCTGACCTCGCCGTCAGGCATCTGTTAAA
|
|
90
|
|
91
|
|
92
|
|
93
|
|
94 ]]></help>
|
|
95 </tool>
|