annotate TEannot.xml @ 0:b126ea31824f draft default tip

1st Uploaded
author vmarcon
date Mon, 06 Feb 2017 13:37:49 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
1 <tool id="teannot" name="REPET Lite - TEannot" version="1.5.0">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
2
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
3 <!-- [REQUIRED] Tool description displayed after the tool name -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
4 <description> Genome annotation for masking transposable elements</description>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
5
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
6 <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
7 <requirements>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
8 <requirement type="binary">python</requirement>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
9 <requirement type="package" version="2.5">repet</requirement>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
10 </requirements>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
11
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
12 <!-- [STRONGLY RECOMMANDED] Exit code rules -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
13 <stdio>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
14 <!-- Anything other than zero is an error -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
15 <exit_code range="1:" level="fatal"/>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
16 <exit_code range=":-1" level="fatal"/>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
17
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
18 </stdio>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
19
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
20
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
21 <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
22 <version_command>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
23 <!--
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
24 tool_binary -v
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
25 -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
26 </version_command>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
27
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
28 <!-- [REQUIRED] The command to execute -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
29 <command interpreter="bash">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
30 TEannot.sh $fasta $library $outputfile $outputmaskedfile $outputlog $outputconfig
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
31 #if str( $withStats ) == "yes":
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
32 $outputstatsfile
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
33 #else :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
34 $withStats
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
35 #end if
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
36 $classif
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
37 $outputmasked_SSRmaskfile
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
38 </command>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
39
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
40 <!-- [REQUIRED] Input files and tool parameters -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
41 <inputs>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
42 <param name="fasta" type="data" format="fasta" optional="false" label="Fasta alignment input" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
43 <param name="library" type="data" format="fasta" optional="false" label="Fasta TE library [from TEdenovo]" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
44 <param name="classif" type="data" format="tabular" optional="true" label="Classification file" help="To add classification information in the output file." />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
45 <param name="label" type="text" label="Output name" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
46 <param name="withStats" type="select" label="Get statistical file">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
47 <option value="yes" selected="true">Yes</option>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
48 <option value="no">No</option>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
49 </param>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
50 </inputs>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
51
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
52 <!-- [REQUIRED] Output files -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
53 <outputs>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
54 <data name="outputlog" type="data" format="txt" label="TEannot-#if str($label)=='' then $fasta.name else $label #.log" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
55 <data name="outputfile" type="data" format="gff3" label="TEannot-#if str($label)=='' then $fasta.name else $label #.gff3" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
56 <data name="outputmaskedfile" type="data" format="fasta" label="TEannot-#if str($label)=='' then $fasta.name else $label #_masked.fa" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
57 <data name="outputmasked_SSRmaskfile" type="data" format="fasta" label="TEannot-#if str($label)=='' then $fasta.name else $label #_SSRmask.fa" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
58 <data name="outputstatsfile" type="data" format="txt" label="TEannot-#if str($label)=='' then $fasta.name else $label #.stats" >
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
59 <filter>(withStats == 'yes')</filter>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
60 </data>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
61 <data name="outputconfig" type="data" format="txt" label="TEannot-#if str($label)=='' then $fasta.name else $label #.cfg" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
62 </outputs>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
63
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
64
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
65 <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
66 <tests>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
67 <!-- [HELP] Test files have to be in the ~/test-data directory -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
68 <test>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
69 <param name="fasta" value="alignment.fa" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
70 <param name="library" value="libTE.fa" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
71 <output name="outputfile" >
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
72 <assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
73 <has_line_matching expression="^##gff-version 3" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
74 <has_n_columns n="9" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
75 </assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
76 </output>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
77 <output name="outputmaskedfile" >
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
78 <assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
79 <has_line_matching expression="^>\w+" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
80 <has_line_matching expression="[ACTGX]{60}" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
81 </assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
82 </output>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
83 <output name="outputstatsfile">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
84 <assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
85 <has_line_matching expression="^nb of sequences:" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
86 <has_line_matching expression="^mean of median length percentage of all families:" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
87 </assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
88 </output>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
89 <output name="outputlog">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
90 <assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
91 <has_line_matching expression="^step 7 finished successfully" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
92 <has_line_matching expression="^END time: \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
93 <has_line_matching expression="^Writing fasta file" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
94 </assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
95 </output>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
96 <output name="outputconfig">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
97 <assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
98 <has_line_matching expression="^project_name: \d{8}" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
99 <has_line_matching expression="^repet_version: 2.5" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
100 <has_line_matching expression="^tmpDir:" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
101 <has_line_matching expression="^clean: yes" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
102 </assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
103 </output>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
104 </test>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
105 <test>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
106 <param name="fasta" value="alignment.fa" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
107 <param name="library" value="libTE.fa" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
108 <param name="classif" value="libTE.classif" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
109 <param name="withStats" value="no"/>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
110 <output name="outputfile">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
111 <assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
112 <has_line_matching expression="^##gff-version 3" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
113 <has_n_columns n="9" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
114 </assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
115 </output>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
116 <output name="outputmaskedfile">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
117 <assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
118 <has_line_matching expression="^>\w+" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
119 <has_line_matching expression="[ACTGX]{60}" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
120 </assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
121 </output>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
122 <output name="outputlog">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
123 <assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
124 <has_line_matching expression="^step 7 finished successfully" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
125 <has_line_matching expression="^END time: \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
126 <has_line_matching expression="^Writing fasta file" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
127 </assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
128 </output>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
129 <output name="outputconfig">
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
130 <assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
131 <has_line_matching expression="^project_name: \d{8}" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
132 <has_line_matching expression="^repet_version: 2.5" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
133 <has_line_matching expression="^tmpDir:" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
134 <has_line_matching expression="^clean: yes" />
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
135 </assert_contents>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
136 </output>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
137 </test>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
138 </tests>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
139
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
140 <!-- [OPTIONAL] Help displayed in Galaxy -->
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
141 <help>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
142 <![CDATA[
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
143 .. class:: infomark
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
144
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
145 **Authors**
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
146 Gwendoline Andres
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
147 Valentin Marcon
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
148 Veronique Jamilloux
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
149 Olivier Inizan
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
150
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
151 ---------------------------------------------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
152
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
153 .. class:: infomark
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
154
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
155 **Please cite** If you use this tool, please cite
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
156
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
157 ---------------------------------------------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
158
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
159 ==============
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
160 TEannot Lite
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
161 ==============
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
162
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
163 -----------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
164 Description
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
165 -----------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
166
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
167 REPET is for detection and annotation of transposable elements (TE). The ligth version available on Galaxy is specialised on transposable element masking.
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
168 TEannot is the second and last step to mask TE on the genome.
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
169 For a detailed description of each parameter used, please consult the Galaxy page in "Shared Data > Published Pages"
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
170
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
171 -----------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
172 Workflow position
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
173 -----------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
174
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
175 **Upstream tools**
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
176
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
177 =========== ========================== =======
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
178 Name output file(s) format
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
179 =========== ========================== =======
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
180 TEdenovo Fasta file with TE library fasta
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
181 =========== ========================== =======
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
182
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
183
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
184 ----------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
185 Input file
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
186 ----------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
187
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
188 Fasta file
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
189 Genome file at fasta format
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
190
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
191 Library file
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
192 Fasta file with a library of transposable elements from TEdenovo.
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
193
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
194 ----------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
195 Parameters
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
196 ----------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
197
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
198 Masked file
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
199 To get an additionnal output file : Masked fasta file
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
200
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
201
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
202 ------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
203 Output files
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
204 ------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
205
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
206 Output_gff3
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
207 GFF3 file with transposable elements
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
208 Output_masked_fasta
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
209 Input fasta file masked with TE infos
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
210 Output_config
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
211 File to show which params have been used
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
212 Output_stats
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
213 File with statistics on TE library
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
214
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
215 ------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
216 Dependencies
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
217 ------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
218
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
219
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
220 ---------------------------------------------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
221
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
222 ---------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
223 Working example
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
224 ---------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
225
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
226 Input files
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
227 ===========
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
228
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
229 Fasta file
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
230 ----------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
231
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
232 ::
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
233
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
234 >dmel_chr4
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
235 GAGAACCGTCCTGTAAGTACTCTTGCTTTAAATACGAAAGTAATACTAATCCATGACGCTTAAGTCGAAGAGAGAATAAGTCAATATTTAATTGGACTCATCGCTTATGTTCATCATGAATCTATAGTTAACTTGATGTTGTGCTCCATGTACGATATAAAAAGTTAGATA
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
236
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
237
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
238 Fasta Library
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
239 -------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
240
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
241 ::
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
242
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
243 >DTX-incomp_20150325110123-B-G1-Map3
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
244 ATACAGCTGCGGTTAAAATAATAGCACTACTGCAGGTGGAAAGTTGATTTCCTAAAAAAA
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
245 ATTATTAAATGTTTATATTTTTTTAAGTCAGATTGCATGAATAATAAGTACCATATGTTG
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
246 GCTCTCTGAGCAAGAAATTTTTAGTCTCT
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
247 >DTX-incomp_20150325110123-B-P1.0-Map3
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
248 CTTGTGTCCGCACTTCGTGCCTCAAGATATGAACAAAGCAAAGACACTAGAATAATTCTA
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
249 GTGTATTACTTTGATATTACTTTTGCAATAAACAGTTATCATATTTTTA
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
250
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
251
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
252 Output files
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
253 ============
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
254
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
255 GFF3 output :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
256 -------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
257
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
258 ::
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
259
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
260 ##gff-version 3
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
261 dmel_chr4 test_REPET_TEs match 971161 971469 0.0 - . ID=ms1_dmel_chr4_DTX-incomp_DmelChr4-B-G1-Map3;Target=DTX-incomp_DmelChr4-B-G1-Map3 45 542
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
262 dmel_chr4 test_REPET_TEs match_part 971161 971271 0.0 - . ID=mp1-1_dmel_chr4_DTX-incomp_DmelChr4-B-G1-Map3;Parent=ms1_dmel_chr4_DTX-incomp_DmelChr4-B-G1-Map3;Target=DTX-incomp_DmelChr4-B-G1-Map3 435 542;Identity=94.4
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
263
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
264 Masked fasta output :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
265 ---------------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
266
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
267 ::
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
268
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
269 >dmel_chr4
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
270 GAGAACCGTCCTGTAAGTACTCTTGCTTTAAATACGXXXXXXXXXXXXXXXXXXXXACGCTTAAGTCGAAGAGAGAATAAGTCAATATTTAATTGGACTCATCGCTTATGTTCATCATGAATCTATAGTTAACTTGATGTTGTGCTCCATGTACGATATAAAAAGTTAGATA
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
271
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
272 Config file :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
273 -------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
274
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
275 ::
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
276
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
277 [repet_env]
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
278 repet_version: 2.4
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
279 repet_host: ******
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
280 repet_user: ******
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
281
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
282 Statistics file :
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
283 -----------------
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
284
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
285 ::
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
286
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
287 nb of sequences: 8
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
288 nb of matched sequences: 8
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
289 cumulative coverage: 133656 bp
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
290
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
291 ]]>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
292 </help>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
293
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
294 <citations>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
295 <citation type="bibtex"><![CDATA[@article{10.1371/journal.pone.0016526,
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
296 author = {Flutre, Timothée AND Duprat, Elodie AND Feuillet, Catherine AND Quesneville, Hadi},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
297 journal = {PLoS ONE},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
298 publisher = {Public Library of Science},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
299 title = {Considering Transposable Element Diversification in <italic>De Novo</italic> Annotation Approaches},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
300 year = {2011},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
301 month = {01},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
302 volume = {6},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
303 url = {http://dx.doi.org/10.1371%2Fjournal.pone.0016526},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
304 pages = {e16526},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
305 abstract = {
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
306 <p>Transposable elements (TEs) are mobile, repetitive DNA sequences that are almost ubiquitous in prokaryotic and eukaryotic genomes. They have a large impact on genome structure, function and evolution. With the recent development of high-throughput sequencing methods, many genome sequences have become available, making possible comparative studies of TE dynamics at an unprecedented scale. Several methods have been proposed for the <italic>de novo</italic> identification of TEs in sequenced genomes. Most begin with the detection of genomic repeats, but the subsequent steps for defining TE families differ. High-quality TE annotations are available for the <italic>Drosophila melanogaster</italic> and <italic>Arabidopsis thaliana</italic> genome sequences, providing a solid basis for the benchmarking of such methods. We compared the performance of specific algorithms for the clustering of interspersed repeats and found that only a particular combination of algorithms detected TE families with good recovery of the reference sequences. We then applied a new procedure for reconciling the different clustering results and classifying TE sequences. The whole approach was implemented in a pipeline using the REPET package. Finally, we show that our combined approach highlights the dynamics of well defined TE families by making it possible to identify structural variations among their copies. This approach makes it possible to annotate TE families and to study their diversification in a single analysis, improving our understanding of TE dynamics at the whole-genome scale and for diverse species.</p>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
307 },
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
308 number = {1},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
309 doi = {10.1371/journal.pone.0016526}
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
310 }]]></citation>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
311 <citation type="bibtex"><![CDATA[@article{10.1371/journal.pone.0094101,
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
312 author = {Maumus, Florian AND Quesneville, Hadi},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
313 journal = {PLoS ONE},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
314 publisher = {Public Library of Science},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
315 title = {Deep Investigation of <italic>Arabidopsis thaliana</italic> Junk DNA Reveals a Continuum between Repetitive Elements and Genomic Dark Matter},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
316 year = {2014},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
317 month = {04},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
318 volume = {9},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
319 url = {http://dx.doi.org/10.1371%2Fjournal.pone.0094101},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
320 pages = {e94101},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
321 abstract = {<p>Eukaryotic genomes contain highly variable amounts of DNA with no apparent function. This so-called junk DNA is composed of two components: repeated and repeat-derived sequences (together referred to as the repeatome), and non-annotated sequences also known as genomic dark matter. Because of their high duplication rates as compared to other genomic features, transposable elements are predominant contributors to the repeatome and the products of their decay is thought to be a major source of genomic dark matter. Determining the origin and composition of junk DNA is thus important to help understanding genome evolution as well as host biology. In this study, we have used a combination of tools enabling to show that the repeatome from the small and reducing <italic>A. thaliana</italic> genome is significantly larger than previously thought. Furthermore, we present the concepts and results from a series of innovative approaches suggesting that a significant amount of the <italic>A. thaliana</italic> dark matter is of repetitive origin. As a tentative standard for the community, we propose a deep compendium annotation of the <italic>A. thaliana</italic> repeatome that may help addressing farther genome evolution as well as transcriptional and epigenetic regulation in this model plant.</p>},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
322 number = {4},
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
323 doi = {10.1371/journal.pone.0094101}
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
324 }]]></citation>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
325 </citations>
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
326
b126ea31824f 1st Uploaded
vmarcon
parents:
diff changeset
327 </tool>