comparison gffcompare.xml @ 16:24d0a81802d6 draft

Uploaded
author enios
date Sat, 22 May 2021 07:30:49 +0000
parents
children
comparison
equal deleted inserted replaced
15:97abe4dc3ee3 16:24d0a81802d6
1 <tool id="gffcompare" name="GffCompare">
2 <description>compare assembled transcripts to a reference annotation</description>
3 <requirements>
4 <container type="docker">bianca7/lncrna:gffcompare</container>
5 </requirements>
6 <!--version_command>gffcompare -v | awk '{print $2}'</version_command-->
7 <command detect_errors="aggressive"><![CDATA[
8 #import re
9
10 #set escaped_element_identifiers = [re.sub('[^\w\-]', '_', str(_.element_identifier)) for _ in $gffinputs]
11 #for $input, $escaped_element_identifier in zip($gffinputs, $escaped_element_identifiers):
12 ln -s '$input' '$escaped_element_identifier' &&
13 #end for
14 #if $seq_data.use_seq_data == "Yes":
15 #if $seq_data.seq_source.index_source == "history":
16 ln -s '$seq_data.seq_source.ref_file' ref_seq.fa &&
17 #else:
18 ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa &&
19 #end if
20 #end if
21
22 #if $annotation.use_ref_annotation == "Yes":
23 #if $annotation.ref_source.ref_source_sel == "history":
24 ln -s '$annotation.ref_source.reference_annotation' ref_annotation &&
25 #else
26 ln -s '$annotation.ref_source.index.fields.path' ref_annotation &&
27 #end if
28 #end if
29
30 gffcompare
31 ## Use annotation reference?
32 #if $annotation.use_ref_annotation == "Yes":
33 -r ref_annotation
34 $annotation.ignore_nonoverlapping_reference
35 $annotation.ignore_nonoverlapping_transfrags
36 $annotation.strict_match
37 #end if
38 #if $annotation.refmap_tmap == "":
39 -T
40 #end if
41
42 ## Use sequence data?
43 #if $seq_data.use_seq_data == "Yes":
44 -s ref_seq.fa
45 #end if
46
47 $discard_single_exon
48 $discard_duplicates
49 $no_merge
50 -e $max_dist_exon
51 -d $max_dist_group
52 $chr_stats
53 -p '$adv_output.p'
54 $adv_output.A
55 $adv_output.C
56 $adv_output.X
57 $adv_output.K
58
59 #for $escaped_element_identifier in $escaped_element_identifiers:
60 '$escaped_element_identifier'
61 #end for
62
63 ]]></command>
64 <inputs>
65 <param format="gtf" name="gffinputs" type="data" label="GTF inputs for comparison" help="" multiple="true" />
66 <conditional name="annotation">
67 <param label="Use Reference Annotation" name="use_ref_annotation" type="select">
68 <option value="No">No</option>
69 <option value="Yes">Yes</option>
70 </param>
71 <when value="Yes">
72 <conditional name="ref_source">
73 <param label="Choose the source for the reference annotation" name="ref_source_sel" type="select">
74 <option value="cached">Locally cached</option>
75 <option value="history">History</option>
76 </param>
77 <when value="cached">
78 <param argument="-r" label="Using reference annotation" name="index" type="select">
79 <options from_data_table="gene_sets">
80 <!--filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" /-->
81 </options>
82 <validator message="No reference annotation is available for the build associated with the selected input dataset" type="no_options" />
83 </param>
84 </when>
85 <when value="history">
86 <param argument="-r" format="gff3,gtf" help="Requires an annotation file in GFF3 or GTF format." label="Reference Annotation" name="reference_annotation" type="data" />
87 </when>
88 </conditional>
89 <param argument="-R" falsevalue="" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" label="Ignore reference transcripts that are not overlapped by any input transfrags" name="ignore_nonoverlapping_reference" truevalue="-R" type="boolean" />
90 <param argument="-Q" falsevalue="" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" label="Ignore input transcripts that are not overlapped by any reference transcripts" name="ignore_nonoverlapping_transfrags" truevalue="-Q" type="boolean" />
91 <param argument="--strict-match" name="strict_match" type="boolean" checked="false" truevalue="--strict-match" falsevalue="" label="the match code '=' is only assigned when all exon boundaries match" help="code '~' is assigned for intron chain match or single-exon" />
92 <param argument="-T" name="refmap_tmap" label="Generate tmap or refmap file for each input file" type="select" multiple="True">
93 <option value="refmap" selected="True">refmap</option>
94 <option value="tmap" selected="True">tmap</option>
95 </param>
96 </when>
97 <when value="No">
98 <param argument="-T" name="refmap_tmap" label="Generate tmap file for each input file" type="select" multiple="True">
99 <option value="tmap" selected="True">tmap</option>
100 </param>
101 </when>
102 </conditional>
103 <conditional name="seq_data">
104 <param help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." label="Use Sequence Data" name="use_seq_data" type="select">
105 <option value="No">No</option>
106 <option value="Yes">Yes</option>
107 </param>
108 <when value="No"/>
109 <when value="Yes">
110 <conditional name="seq_source">
111 <param label="Choose the source for the reference sequence" name="index_source" type="select">
112 <option value="cached">Locally cached</option>
113 <option value="history">History</option>
114 </param>
115 <when value="cached">
116 <param argument="-s" label="Using reference genome" name="index" type="select">
117 <options from_data_table="fasta_indexes">
118 <filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" />
119 </options>
120 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
121 </param>
122 </when>
123 <when value="history">
124 <param argument="-s" format="fasta" label="Using reference file" name="ref_file" type="data" />
125 </when>
126 </conditional>
127 </when>
128 </conditional>
129 <param name="discard_single_exon" argument="-M/-N" type="select" label="Discard single-exon transcripts" help="If -S and also --strict-match is given, exact matching of all exon boundaries is required">
130 <option selected="True" value="">No</option>
131 <option value="-M">Discard single-exon transfrags and reference transcripts</option>
132 <option value="-N">Discard single-exon reference transcripts</option>
133 </param>
134 <param label="Discard duplicates" name="discard_duplicates" type="select">
135 <option value="">None</option>
136 <option value="-D">discard 'duplicate' query transfrags within a single sample (-D)</option>
137 <option value="-S">Only discard 'duplicate' query or reference transcripts if their boundaries are fully contained within other, larger or identical transfrags (-S)</option>
138 </param>
139 <param name="no_merge" argument="--no-merge" type="boolean" checked="false" truevalue="--no-merge" falsevalue="" label="Disable close-exon merging" help="Default: merge exons separated by 'introns' shorter than 5 bases" />
140 <param argument="-e" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" label="Max. Distance for assessing exon accuracy" name="max_dist_exon" type="integer" value="100" />
141 <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" />
142 <param name="chr_stats" argument="--chr-stats" type="boolean" checked="false" truevalue="--chr-stats" falsevalue="" label="Show summary and accuracy data separately for each reference sequence in the transcript accuracy data set" />
143 <section name="adv_output" title="Options for the combined GTF output file">
144 <param argument="-p" type="text" value="TCONS" label="name prefix for consensus transcripts" help="for combined.gtf" />
145 <param argument="-C" type="boolean" checked="false" truevalue="-C" falsevalue="" label="discard matching and 'contained' transfrags" help="i.e. collapse intron-redundant transfrags across all query files" />
146 <param argument="-A" type="boolean" checked="false" truevalue="-A" falsevalue="" label="discard the 'contained' transfrags except intron-redundant transfrags starting with a different 5' exon" help="like -C but does not discard intron-redundant transfrags if they start with a different 5' exon" />
147 <param argument="-X" type="boolean" checked="false" truevalue="-X" falsevalue="" label="discard the 'contained' transfrags also if ends stick out within the container's introns" help="like -C but also discard contained transfrags if transfrag ends stick out within the container's introns" />
148 <param argument="-K" type="boolean" checked="false" truevalue="-K" falsevalue="" label="do NOT discard any redundant transfrag matching a reference" help="for -C/-A/-X" />
149 </section>
150 </inputs>
151 <outputs>
152 <data format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: transcript accuracy" name="transcripts_stats" />
153 <data format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci" name="transcripts_loci" />
154 <data format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: data ${gffinputs[0].hid} tracking file" name="transcripts_tracking" />
155 <data format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" name="transcripts_combined">
156 <filter>(isinstance(gffinputs, list) and len(gffinputs) > 1) or annotation['use_ref_annotation'] == "No"</filter>
157 </data>
158 <data format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts" name="transcripts_annotated">
159 <filter>not (isinstance(gffinputs, list) and len(gffinputs) > 1) and annotation['use_ref_annotation'] == "Yes"</filter>
160 </data>
161 <collection name="refmap_output" type="list" label="${tool.name} on ${on_string}: refmap">
162 <discover_datasets pattern="gffcmp\.(?P&lt;designation&gt;.+)\.refmap" ext="tabular" />
163 <filter>annotation['refmap_tmap'] != None and 'refmap' in annotation['refmap_tmap']</filter>
164 </collection>
165 <collection name="tmap_output" type="list" label="${tool.name} on ${on_string}: tmap">
166 <discover_datasets pattern="gffcmp\.(?P&lt;designation&gt;.+)\.tmap" ext="tabular" />
167 <filter>annotation['refmap_tmap'] != None and 'tmap' in annotation['refmap_tmap']</filter>
168 </collection>
169 </outputs>
170 <tests>
171 <!-- 2 inputs, no reference, default options -->
172 <test expect_num_outputs="5">
173 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
174 <conditional name="annotation">
175 <param name="use_ref_annotation" value="No" />
176 </conditional>
177 <conditional name="seq_data">
178 <param name="use_seq_data" value="No" />
179 </conditional>
180 <assert_command>
181 <not_has_text text="-R " />
182 <not_has_text text="-Q " />
183 <not_has_text text="--strict-match " />
184 <not_has_text text="-T " />
185 <has_text_matching expression="^.*gffcompare((?!-s).)*$" /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is not set -->
186 <not_has_text text="-M " />
187 <not_has_text text="-N " />
188 <has_text text="-e 100 " />
189 <has_text text="-d 100 " />
190 <not_has_text text="-D " />
191 <not_has_text text="--no-merge " />
192 <has_text text="-p 'TCONS' " />
193 <not_has_text text="-C " />
194 <not_has_text text="-A " />
195 <not_has_text text="-X " />
196 <not_has_text text="-K " />
197 </assert_command>
198 <output file="gffcompare_out1.stats" name="transcripts_stats" />
199 <output file="gffcompare_out1.loci" name="transcripts_loci" />
200 <output file="gffcompare_out1.tracking" name="transcripts_tracking" />
201 <output file="gffcompare_out1.gtf" name="transcripts_combined" />
202 <output_collection name="tmap_output" type="list" count="2"/>
203 </test>
204 <!-- 2 inputs, no reference, with refsequence, default options (but disable tmap output) -->
205 <test expect_num_outputs="4">
206 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
207 <conditional name="annotation">
208 <param name="use_ref_annotation" value="No" />
209 <param name="refmap_tmap" value=""/>
210 </conditional>
211 <conditional name="seq_data">
212 <param name="use_seq_data" value="Yes" />
213 <conditional name="seq_source">
214 <param name="index_source" value="history"/>
215 <param name="ref_file" ftype="fasta" value="sequence.fa"/>
216 </conditional>
217 </conditional>
218 <assert_command>
219 <not_has_text text="-R " />
220 <not_has_text text="-Q " />
221 <has_text text="-T " />
222 <has_text_matching expression="gffcompare.*-s " /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is set -->
223 <not_has_text text="-M " />
224 <not_has_text text="-N " />
225 <has_text text="-e 100 " />
226 <has_text text="-d 100 " />
227 <has_text text="-p 'TCONS' " />
228 <not_has_text text="-C " />
229 <not_has_text text="-A " />
230 <not_has_text text="-X " />
231 <not_has_text text="-K " />
232 </assert_command>
233 <output file="gffcompare_out1.stats" name="transcripts_stats" compare="sim_size" />
234 <output file="gffcompare_out1.loci" name="transcripts_loci" compare="sim_size" />
235 <output file="gffcompare_out1.tracking" name="transcripts_tracking" compare="sim_size" />
236 <output file="gffcompare_out1.gtf" name="transcripts_combined" compare="sim_size" />
237 </test>
238 <!-- 2 inputs, no reference, with cached refsequence, default options (but disable tmap output) -->
239 <test expect_num_outputs="4">
240 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" dbkey="hg17" />
241 <conditional name="annotation">
242 <param name="use_ref_annotation" value="No" />
243 <param name="refmap_tmap" value=""/>
244 </conditional>
245 <conditional name="seq_data">
246 <param name="use_seq_data" value="Yes" />
247 <conditional name="seq_source">
248 <param name="index_source" value="cached"/>
249 <param name="index" value="test_buildid"/>
250 </conditional>
251 </conditional>
252 <assert_command>
253 <not_has_text text="-R " />
254 <not_has_text text="-Q " />
255 <has_text text="-T " />
256 <has_text_matching expression="gffcompare.*-s " />
257 <not_has_text text="-M " />
258 <not_has_text text="-N " />
259 <has_text text="-e 100 " />
260 <has_text text="-d 100 " />
261 <has_text text="-p 'TCONS' " />
262 <not_has_text text="-C " />
263 <not_has_text text="-A " />
264 <not_has_text text="-X " />
265 <not_has_text text="-K " />
266 </assert_command>
267 <output file="gffcompare_out1.stats" name="transcripts_stats" compare="sim_size" />
268 <output file="gffcompare_out1.loci" name="transcripts_loci" compare="sim_size" />
269 <output file="gffcompare_out1.tracking" name="transcripts_tracking" compare="sim_size" />
270 <output file="gffcompare_out1.gtf" name="transcripts_combined" compare="sim_size" />
271 </test>
272 <!-- 2 inputs and reference, default options -->
273 <test expect_num_outputs="6">
274 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
275 <conditional name="annotation">
276 <param name="use_ref_annotation" value="Yes" />
277 <conditional name="ref_source">
278 <param name="ref_source_sel" value="history"/>
279 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
280 </conditional>
281 </conditional>
282 <conditional name="seq_data">
283 <param name="use_seq_data" value="No" />
284 </conditional>
285 <assert_command>
286 <not_has_text text="-R " />
287 <not_has_text text="-Q " />
288 <not_has_text text="--strict-match " />
289 <not_has_text text="-T " />
290 <not_has_text text="-M " />
291 <not_has_text text="-N " />
292 <has_text text="-e 100 " />
293 <has_text text="-d 100 " />
294 <not_has_text text="-D " />
295 <not_has_text text="--no-merge " />
296 <not_has_text text="--chr-stats" />
297 <has_text text="-p 'TCONS' " />
298 <not_has_text text="-C " />
299 <not_has_text text="-A " />
300 <not_has_text text="-X " />
301 <not_has_text text="-K " />
302 </assert_command>
303 <output file="gffcompare_out2.stats" name="transcripts_stats" />
304 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
305 <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
306 <output file="gffcompare_out2.gtf" name="transcripts_combined" />
307 <output_collection name="refmap_output" type="list" count="2">
308 <element name="gffcompare_in1_gtf" file="gffcompare_out2-1.refmap" ftype="tabular" />
309 <element name="gffcompare_in2_gtf" file="gffcompare_out2-2.refmap" ftype="tabular" />
310 </output_collection>
311 <output_collection name="tmap_output" type="list" count="2">
312 <element name="gffcompare_in1_gtf" file="gffcompare_out2-1.tmap" ftype="tabular" />
313 <element name="gffcompare_in2_gtf" file="gffcompare_out2-2.tmap" ftype="tabular" />
314 </output_collection>
315 </test>
316 <!-- 2 inputs and reference (cached), non default options, only refmap output -->
317 <test expect_num_outputs="5">
318 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" dbkey="hg17" />
319 <conditional name="annotation">
320 <param name="use_ref_annotation" value="Yes" />
321 <conditional name="ref_source">
322 <param name="ref_source_sel" value="cached"/>
323 <param name="index" value="test_buildid"/>
324 </conditional>
325 <param name="ignore_nonoverlapping_reference" value="Yes" />
326 <param name="ignore_nonoverlapping_transfrags" value="Yes" />
327 <param name="strict_match" value="--strict-match" />
328 <param name="refmap_tmap" value="refmap" />
329 </conditional>
330 <conditional name="seq_data">
331 <param name="use_seq_data" value="No" />
332 </conditional>
333 <param name="discard_single_exon" value="-M"/>
334 <param name="discard_duplicates" value="-D"/>
335 <param name="no_merge" value="--no-merge" />
336 <param name="max_dist_exon" value="101" />
337 <param name="max_dist_group" value="99" />
338 <param name="chr_stats" value="--chr-stats" />
339 <assert_command>
340 <has_text text="-R " />
341 <has_text text="-Q " />
342 <has_text text="--strict-match " />
343 <not_has_text text="-T " />
344 <has_text text="-M " />
345 <not_has_text text="-N " />
346 <has_text text="-e 101 " />
347 <has_text text="-d 99 " />
348 <has_text text="-D " />
349 <has_text text="--no-merge " />
350 <has_text text="--chr-stats" />
351 <has_text text="-p 'TCONS' " />
352 <not_has_text text="-C " />
353 <not_has_text text="-A " />
354 <not_has_text text="-X " />
355 <not_has_text text="-K " />
356 </assert_command>
357 <output file="gffcompare_out2.stats" name="transcripts_stats" compare="sim_size" />
358 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
359 <output file="gffcompare_out2.tracking" name="transcripts_tracking" compare="sim_size" />
360 <output file="gffcompare_out2.gtf" name="transcripts_combined" compare="sim_size" delta="50000"/>
361 <output_collection name="refmap_output" type="list" count="0"/> <!-- because of -M no refmaps are created -->
362 </test>
363 <!-- 2 inputs and reference, non default advanced options, only tmap output -->
364 <test expect_num_outputs="5">
365 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
366 <conditional name="annotation">
367 <param name="use_ref_annotation" value="Yes" />
368 <conditional name="ref_source">
369 <param name="ref_source_sel" value="history"/>
370 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
371 </conditional>
372 <param name="refmap_tmap" value="tmap" />
373 </conditional>
374 <conditional name="seq_data">
375 <param name="use_seq_data" value="No" />
376 </conditional>
377 <section name="adv_output">
378 <param name="p" value="OTHER" />
379 <param name="C" value="-C" />
380 <param name="A" value="-A" />
381 <param name="X" value="-X" />
382 <param name="K" value="-K" />
383 </section>
384 <assert_command>
385 <not_has_text text="-R " />
386 <not_has_text text="-Q " />
387 <not_has_text text="--strict-match " />
388 <not_has_text text="-T " />
389 <not_has_text text="-M " />
390 <not_has_text text="-N " />
391 <has_text text="-e 100 " />
392 <has_text text="-d 100 " />
393 <not_has_text text="-D " />
394 <not_has_text text="--no-merge " />
395 <not_has_text text="--chr-stats" />
396 <has_text text="-p 'OTHER' " />
397 <has_text text="-C " />
398 <has_text text="-A " />
399 <has_text text="-X " />
400 <has_text text="-K " />
401 </assert_command>
402 <output file="gffcompare_out2.stats" name="transcripts_stats" compare="sim_size" />
403 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
404 <output file="gffcompare_out2.tracking" name="transcripts_tracking" compare="sim_size" />
405 <output file="gffcompare_out2.gtf" name="transcripts_combined" compare="sim_size" delta="50000"/>
406 <output_collection name="tmap_output" type="list" count="2"/>
407 </test>
408 <!-- 2 inputs and reference, default options, no tmap or refmap output -->
409 <test expect_num_outputs="4">
410 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
411 <conditional name="annotation">
412 <param name="use_ref_annotation" value="Yes" />
413 <conditional name="ref_source">
414 <param name="ref_source_sel" value="history"/>
415 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
416 </conditional>
417 <param name="refmap_tmap" value="" />
418 </conditional>
419 <conditional name="seq_data">
420 <param name="use_seq_data" value="No" />
421 </conditional>
422 <assert_command>
423 <not_has_text text="-R " />
424 <not_has_text text="-Q " />
425 <not_has_text text="--strict-match " />
426 <has_text text="-T " />
427 <not_has_text text="-M " />
428 <not_has_text text="-N " />
429 <has_text text="-e 100 " />
430 <has_text text="-d 100 " />
431 <not_has_text text="-D " />
432 <not_has_text text="--no-merge " />
433 <not_has_text text="--chr-stats" />
434 <has_text text="-p 'TCONS' " />
435 <not_has_text text="-C " />
436 <not_has_text text="-A " />
437 <not_has_text text="-X " />
438 <not_has_text text="-K " />
439 </assert_command>
440 <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="2" />
441 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
442 <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
443 <output file="gffcompare_out2.gtf" name="transcripts_combined" />
444 </test>
445
446 <test expect_num_outputs="4">
447 <param ftype="gtf" name="gffinputs" value="gffcompare_in4.gtf" />
448 <conditional name="annotation">
449 <param name="use_ref_annotation" value="Yes" />
450 <conditional name="ref_source">
451 <param name="ref_source_sel" value="history"/>
452 <param ftype="gtf" name="reference_annotation" value="gffcompare_in5.gtf" />
453 </conditional>
454 <param name="ignore_nonoverlapping_reference" value="Yes" />
455 <param name="ignore_nonoverlapping_transfrags" value="No" />
456 <param name="refmap_tmap" value="" />
457 </conditional>
458 <param name="use_seq_data" value="No" />
459 <param name="discard_single_exon" value="" />
460 <param name="max_dist_exon" value="100" />
461 <param name="max_dist_group" value="100" />
462 <output file="gffcompare_out3.stats" name="transcripts_stats"/>
463 <output file="gffcompare_out3.loci" name="transcripts_loci" compare="sim_size" />
464 <output file="gffcompare_out3.tracking" name="transcripts_tracking" />
465 <output file="gffcompare_out3.gtf" name="transcripts_annotated" />
466 </test>
467 </tests>
468 <help>
469 <![CDATA[
470 **GffCompare Overview**
471
472 ## GffCompare
473 * compare and evaluate the accuracy of RNA-Seq transcript assemblers (Cufflinks, Stringtie).
474 * collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g. resulted from assembly of different samples)
475 * classify transcripts from one or multiple GTF/GFF3 files as they relate to reference transcripts provided in a
476 annotation file (also in GTF/GFF3 format)
477
478 More information can be found here: https://ccb.jhu.edu/software/stringtie/gffcompare.shtml.
479
480 The original form of this program is also distributed as part of the Cufflinks suite, under the name "CuffCompare"
481 (see manual: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/). Most of the options and parameters of CuffCompare
482 are supported by GffCompare, while new features will likely be added to GffCompare in the future.
483
484 A notable difference of GffCompare is that when a single query GTF/GFF file is given as input, along with a reference annotation (-r option),
485 gffcompare switches into "annotation mode" and it generates a .annotated.gtf file instead of the .merged.gtf produced by CuffCompare with the
486 same parameters. This file has the same general format as CuffCompare's .merged.gtf file (with "class codes" assigned to transcripts as per
487 their relationship with the matching/overlapping reference transcript), but the original transcript IDs are preserved, so gffcompare can thus
488 be used as a simple way of annotating a set of transcripts.
489
490 Another important difference is that the input transcripts are no longer discarded when they are found to be "intron redundant", i.e.
491 contained within other, longer isoforms. CuffCompare had the -G option to prevent collapsing of such intron redundant isoforms into
492 their longer "containers", but GffCompare has made this the default mode of operation (hence the -G option is no longer needed
493 and is simply ignored when given).
494 ]]>
495 </help>
496 <citations>
497 <citation type="doi">10.1038/nbt.1621</citation>
498 </citations>
499 </tool>