16
|
1 <tool id="gffcompare" name="GffCompare">
|
|
2 <description>compare assembled transcripts to a reference annotation</description>
|
|
3 <requirements>
|
|
4 <container type="docker">bianca7/lncrna:gffcompare</container>
|
|
5 </requirements>
|
|
6 <!--version_command>gffcompare -v | awk '{print $2}'</version_command-->
|
|
7 <command detect_errors="aggressive"><![CDATA[
|
|
8 #import re
|
|
9
|
|
10 #set escaped_element_identifiers = [re.sub('[^\w\-]', '_', str(_.element_identifier)) for _ in $gffinputs]
|
|
11 #for $input, $escaped_element_identifier in zip($gffinputs, $escaped_element_identifiers):
|
|
12 ln -s '$input' '$escaped_element_identifier' &&
|
|
13 #end for
|
|
14 #if $seq_data.use_seq_data == "Yes":
|
|
15 #if $seq_data.seq_source.index_source == "history":
|
|
16 ln -s '$seq_data.seq_source.ref_file' ref_seq.fa &&
|
|
17 #else:
|
|
18 ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa &&
|
|
19 #end if
|
|
20 #end if
|
|
21
|
|
22 #if $annotation.use_ref_annotation == "Yes":
|
|
23 #if $annotation.ref_source.ref_source_sel == "history":
|
|
24 ln -s '$annotation.ref_source.reference_annotation' ref_annotation &&
|
|
25 #else
|
|
26 ln -s '$annotation.ref_source.index.fields.path' ref_annotation &&
|
|
27 #end if
|
|
28 #end if
|
|
29
|
|
30 gffcompare
|
|
31 ## Use annotation reference?
|
|
32 #if $annotation.use_ref_annotation == "Yes":
|
|
33 -r ref_annotation
|
|
34 $annotation.ignore_nonoverlapping_reference
|
|
35 $annotation.ignore_nonoverlapping_transfrags
|
|
36 $annotation.strict_match
|
|
37 #end if
|
|
38 #if $annotation.refmap_tmap == "":
|
|
39 -T
|
|
40 #end if
|
|
41
|
|
42 ## Use sequence data?
|
|
43 #if $seq_data.use_seq_data == "Yes":
|
|
44 -s ref_seq.fa
|
|
45 #end if
|
|
46
|
|
47 $discard_single_exon
|
|
48 $discard_duplicates
|
|
49 $no_merge
|
|
50 -e $max_dist_exon
|
|
51 -d $max_dist_group
|
|
52 $chr_stats
|
|
53 -p '$adv_output.p'
|
|
54 $adv_output.A
|
|
55 $adv_output.C
|
|
56 $adv_output.X
|
|
57 $adv_output.K
|
|
58
|
|
59 #for $escaped_element_identifier in $escaped_element_identifiers:
|
|
60 '$escaped_element_identifier'
|
|
61 #end for
|
|
62
|
|
63 ]]></command>
|
|
64 <inputs>
|
|
65 <param format="gtf" name="gffinputs" type="data" label="GTF inputs for comparison" help="" multiple="true" />
|
|
66 <conditional name="annotation">
|
|
67 <param label="Use Reference Annotation" name="use_ref_annotation" type="select">
|
|
68 <option value="No">No</option>
|
|
69 <option value="Yes">Yes</option>
|
|
70 </param>
|
|
71 <when value="Yes">
|
|
72 <conditional name="ref_source">
|
|
73 <param label="Choose the source for the reference annotation" name="ref_source_sel" type="select">
|
|
74 <option value="cached">Locally cached</option>
|
|
75 <option value="history">History</option>
|
|
76 </param>
|
|
77 <when value="cached">
|
|
78 <param argument="-r" label="Using reference annotation" name="index" type="select">
|
|
79 <options from_data_table="gene_sets">
|
|
80 <!--filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" /-->
|
|
81 </options>
|
|
82 <validator message="No reference annotation is available for the build associated with the selected input dataset" type="no_options" />
|
|
83 </param>
|
|
84 </when>
|
|
85 <when value="history">
|
|
86 <param argument="-r" format="gff3,gtf" help="Requires an annotation file in GFF3 or GTF format." label="Reference Annotation" name="reference_annotation" type="data" />
|
|
87 </when>
|
|
88 </conditional>
|
|
89 <param argument="-R" falsevalue="" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" label="Ignore reference transcripts that are not overlapped by any input transfrags" name="ignore_nonoverlapping_reference" truevalue="-R" type="boolean" />
|
|
90 <param argument="-Q" falsevalue="" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" label="Ignore input transcripts that are not overlapped by any reference transcripts" name="ignore_nonoverlapping_transfrags" truevalue="-Q" type="boolean" />
|
|
91 <param argument="--strict-match" name="strict_match" type="boolean" checked="false" truevalue="--strict-match" falsevalue="" label="the match code '=' is only assigned when all exon boundaries match" help="code '~' is assigned for intron chain match or single-exon" />
|
|
92 <param argument="-T" name="refmap_tmap" label="Generate tmap or refmap file for each input file" type="select" multiple="True">
|
|
93 <option value="refmap" selected="True">refmap</option>
|
|
94 <option value="tmap" selected="True">tmap</option>
|
|
95 </param>
|
|
96 </when>
|
|
97 <when value="No">
|
|
98 <param argument="-T" name="refmap_tmap" label="Generate tmap file for each input file" type="select" multiple="True">
|
|
99 <option value="tmap" selected="True">tmap</option>
|
|
100 </param>
|
|
101 </when>
|
|
102 </conditional>
|
|
103 <conditional name="seq_data">
|
|
104 <param help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." label="Use Sequence Data" name="use_seq_data" type="select">
|
|
105 <option value="No">No</option>
|
|
106 <option value="Yes">Yes</option>
|
|
107 </param>
|
|
108 <when value="No"/>
|
|
109 <when value="Yes">
|
|
110 <conditional name="seq_source">
|
|
111 <param label="Choose the source for the reference sequence" name="index_source" type="select">
|
|
112 <option value="cached">Locally cached</option>
|
|
113 <option value="history">History</option>
|
|
114 </param>
|
|
115 <when value="cached">
|
|
116 <param argument="-s" label="Using reference genome" name="index" type="select">
|
|
117 <options from_data_table="fasta_indexes">
|
|
118 <filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" />
|
|
119 </options>
|
|
120 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
|
|
121 </param>
|
|
122 </when>
|
|
123 <when value="history">
|
|
124 <param argument="-s" format="fasta" label="Using reference file" name="ref_file" type="data" />
|
|
125 </when>
|
|
126 </conditional>
|
|
127 </when>
|
|
128 </conditional>
|
|
129 <param name="discard_single_exon" argument="-M/-N" type="select" label="Discard single-exon transcripts" help="If -S and also --strict-match is given, exact matching of all exon boundaries is required">
|
|
130 <option selected="True" value="">No</option>
|
|
131 <option value="-M">Discard single-exon transfrags and reference transcripts</option>
|
|
132 <option value="-N">Discard single-exon reference transcripts</option>
|
|
133 </param>
|
|
134 <param label="Discard duplicates" name="discard_duplicates" type="select">
|
|
135 <option value="">None</option>
|
|
136 <option value="-D">discard 'duplicate' query transfrags within a single sample (-D)</option>
|
|
137 <option value="-S">Only discard 'duplicate' query or reference transcripts if their boundaries are fully contained within other, larger or identical transfrags (-S)</option>
|
|
138 </param>
|
|
139 <param name="no_merge" argument="--no-merge" type="boolean" checked="false" truevalue="--no-merge" falsevalue="" label="Disable close-exon merging" help="Default: merge exons separated by 'introns' shorter than 5 bases" />
|
|
140 <param argument="-e" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" label="Max. Distance for assessing exon accuracy" name="max_dist_exon" type="integer" value="100" />
|
|
141 <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" />
|
|
142 <param name="chr_stats" argument="--chr-stats" type="boolean" checked="false" truevalue="--chr-stats" falsevalue="" label="Show summary and accuracy data separately for each reference sequence in the transcript accuracy data set" />
|
|
143 <section name="adv_output" title="Options for the combined GTF output file">
|
|
144 <param argument="-p" type="text" value="TCONS" label="name prefix for consensus transcripts" help="for combined.gtf" />
|
|
145 <param argument="-C" type="boolean" checked="false" truevalue="-C" falsevalue="" label="discard matching and 'contained' transfrags" help="i.e. collapse intron-redundant transfrags across all query files" />
|
|
146 <param argument="-A" type="boolean" checked="false" truevalue="-A" falsevalue="" label="discard the 'contained' transfrags except intron-redundant transfrags starting with a different 5' exon" help="like -C but does not discard intron-redundant transfrags if they start with a different 5' exon" />
|
|
147 <param argument="-X" type="boolean" checked="false" truevalue="-X" falsevalue="" label="discard the 'contained' transfrags also if ends stick out within the container's introns" help="like -C but also discard contained transfrags if transfrag ends stick out within the container's introns" />
|
|
148 <param argument="-K" type="boolean" checked="false" truevalue="-K" falsevalue="" label="do NOT discard any redundant transfrag matching a reference" help="for -C/-A/-X" />
|
|
149 </section>
|
|
150 </inputs>
|
|
151 <outputs>
|
|
152 <data format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: transcript accuracy" name="transcripts_stats" />
|
|
153 <data format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci" name="transcripts_loci" />
|
|
154 <data format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: data ${gffinputs[0].hid} tracking file" name="transcripts_tracking" />
|
|
155 <data format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" name="transcripts_combined">
|
|
156 <filter>(isinstance(gffinputs, list) and len(gffinputs) > 1) or annotation['use_ref_annotation'] == "No"</filter>
|
|
157 </data>
|
|
158 <data format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts" name="transcripts_annotated">
|
|
159 <filter>not (isinstance(gffinputs, list) and len(gffinputs) > 1) and annotation['use_ref_annotation'] == "Yes"</filter>
|
|
160 </data>
|
|
161 <collection name="refmap_output" type="list" label="${tool.name} on ${on_string}: refmap">
|
|
162 <discover_datasets pattern="gffcmp\.(?P<designation>.+)\.refmap" ext="tabular" />
|
|
163 <filter>annotation['refmap_tmap'] != None and 'refmap' in annotation['refmap_tmap']</filter>
|
|
164 </collection>
|
|
165 <collection name="tmap_output" type="list" label="${tool.name} on ${on_string}: tmap">
|
|
166 <discover_datasets pattern="gffcmp\.(?P<designation>.+)\.tmap" ext="tabular" />
|
|
167 <filter>annotation['refmap_tmap'] != None and 'tmap' in annotation['refmap_tmap']</filter>
|
|
168 </collection>
|
|
169 </outputs>
|
|
170 <tests>
|
|
171 <!-- 2 inputs, no reference, default options -->
|
|
172 <test expect_num_outputs="5">
|
|
173 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
|
|
174 <conditional name="annotation">
|
|
175 <param name="use_ref_annotation" value="No" />
|
|
176 </conditional>
|
|
177 <conditional name="seq_data">
|
|
178 <param name="use_seq_data" value="No" />
|
|
179 </conditional>
|
|
180 <assert_command>
|
|
181 <not_has_text text="-R " />
|
|
182 <not_has_text text="-Q " />
|
|
183 <not_has_text text="--strict-match " />
|
|
184 <not_has_text text="-T " />
|
|
185 <has_text_matching expression="^.*gffcompare((?!-s).)*$" /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is not set -->
|
|
186 <not_has_text text="-M " />
|
|
187 <not_has_text text="-N " />
|
|
188 <has_text text="-e 100 " />
|
|
189 <has_text text="-d 100 " />
|
|
190 <not_has_text text="-D " />
|
|
191 <not_has_text text="--no-merge " />
|
|
192 <has_text text="-p 'TCONS' " />
|
|
193 <not_has_text text="-C " />
|
|
194 <not_has_text text="-A " />
|
|
195 <not_has_text text="-X " />
|
|
196 <not_has_text text="-K " />
|
|
197 </assert_command>
|
|
198 <output file="gffcompare_out1.stats" name="transcripts_stats" />
|
|
199 <output file="gffcompare_out1.loci" name="transcripts_loci" />
|
|
200 <output file="gffcompare_out1.tracking" name="transcripts_tracking" />
|
|
201 <output file="gffcompare_out1.gtf" name="transcripts_combined" />
|
|
202 <output_collection name="tmap_output" type="list" count="2"/>
|
|
203 </test>
|
|
204 <!-- 2 inputs, no reference, with refsequence, default options (but disable tmap output) -->
|
|
205 <test expect_num_outputs="4">
|
|
206 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
|
|
207 <conditional name="annotation">
|
|
208 <param name="use_ref_annotation" value="No" />
|
|
209 <param name="refmap_tmap" value=""/>
|
|
210 </conditional>
|
|
211 <conditional name="seq_data">
|
|
212 <param name="use_seq_data" value="Yes" />
|
|
213 <conditional name="seq_source">
|
|
214 <param name="index_source" value="history"/>
|
|
215 <param name="ref_file" ftype="fasta" value="sequence.fa"/>
|
|
216 </conditional>
|
|
217 </conditional>
|
|
218 <assert_command>
|
|
219 <not_has_text text="-R " />
|
|
220 <not_has_text text="-Q " />
|
|
221 <has_text text="-T " />
|
|
222 <has_text_matching expression="gffcompare.*-s " /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is set -->
|
|
223 <not_has_text text="-M " />
|
|
224 <not_has_text text="-N " />
|
|
225 <has_text text="-e 100 " />
|
|
226 <has_text text="-d 100 " />
|
|
227 <has_text text="-p 'TCONS' " />
|
|
228 <not_has_text text="-C " />
|
|
229 <not_has_text text="-A " />
|
|
230 <not_has_text text="-X " />
|
|
231 <not_has_text text="-K " />
|
|
232 </assert_command>
|
|
233 <output file="gffcompare_out1.stats" name="transcripts_stats" compare="sim_size" />
|
|
234 <output file="gffcompare_out1.loci" name="transcripts_loci" compare="sim_size" />
|
|
235 <output file="gffcompare_out1.tracking" name="transcripts_tracking" compare="sim_size" />
|
|
236 <output file="gffcompare_out1.gtf" name="transcripts_combined" compare="sim_size" />
|
|
237 </test>
|
|
238 <!-- 2 inputs, no reference, with cached refsequence, default options (but disable tmap output) -->
|
|
239 <test expect_num_outputs="4">
|
|
240 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" dbkey="hg17" />
|
|
241 <conditional name="annotation">
|
|
242 <param name="use_ref_annotation" value="No" />
|
|
243 <param name="refmap_tmap" value=""/>
|
|
244 </conditional>
|
|
245 <conditional name="seq_data">
|
|
246 <param name="use_seq_data" value="Yes" />
|
|
247 <conditional name="seq_source">
|
|
248 <param name="index_source" value="cached"/>
|
|
249 <param name="index" value="test_buildid"/>
|
|
250 </conditional>
|
|
251 </conditional>
|
|
252 <assert_command>
|
|
253 <not_has_text text="-R " />
|
|
254 <not_has_text text="-Q " />
|
|
255 <has_text text="-T " />
|
|
256 <has_text_matching expression="gffcompare.*-s " />
|
|
257 <not_has_text text="-M " />
|
|
258 <not_has_text text="-N " />
|
|
259 <has_text text="-e 100 " />
|
|
260 <has_text text="-d 100 " />
|
|
261 <has_text text="-p 'TCONS' " />
|
|
262 <not_has_text text="-C " />
|
|
263 <not_has_text text="-A " />
|
|
264 <not_has_text text="-X " />
|
|
265 <not_has_text text="-K " />
|
|
266 </assert_command>
|
|
267 <output file="gffcompare_out1.stats" name="transcripts_stats" compare="sim_size" />
|
|
268 <output file="gffcompare_out1.loci" name="transcripts_loci" compare="sim_size" />
|
|
269 <output file="gffcompare_out1.tracking" name="transcripts_tracking" compare="sim_size" />
|
|
270 <output file="gffcompare_out1.gtf" name="transcripts_combined" compare="sim_size" />
|
|
271 </test>
|
|
272 <!-- 2 inputs and reference, default options -->
|
|
273 <test expect_num_outputs="6">
|
|
274 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
|
|
275 <conditional name="annotation">
|
|
276 <param name="use_ref_annotation" value="Yes" />
|
|
277 <conditional name="ref_source">
|
|
278 <param name="ref_source_sel" value="history"/>
|
|
279 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
|
|
280 </conditional>
|
|
281 </conditional>
|
|
282 <conditional name="seq_data">
|
|
283 <param name="use_seq_data" value="No" />
|
|
284 </conditional>
|
|
285 <assert_command>
|
|
286 <not_has_text text="-R " />
|
|
287 <not_has_text text="-Q " />
|
|
288 <not_has_text text="--strict-match " />
|
|
289 <not_has_text text="-T " />
|
|
290 <not_has_text text="-M " />
|
|
291 <not_has_text text="-N " />
|
|
292 <has_text text="-e 100 " />
|
|
293 <has_text text="-d 100 " />
|
|
294 <not_has_text text="-D " />
|
|
295 <not_has_text text="--no-merge " />
|
|
296 <not_has_text text="--chr-stats" />
|
|
297 <has_text text="-p 'TCONS' " />
|
|
298 <not_has_text text="-C " />
|
|
299 <not_has_text text="-A " />
|
|
300 <not_has_text text="-X " />
|
|
301 <not_has_text text="-K " />
|
|
302 </assert_command>
|
|
303 <output file="gffcompare_out2.stats" name="transcripts_stats" />
|
|
304 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
|
|
305 <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
|
|
306 <output file="gffcompare_out2.gtf" name="transcripts_combined" />
|
|
307 <output_collection name="refmap_output" type="list" count="2">
|
|
308 <element name="gffcompare_in1_gtf" file="gffcompare_out2-1.refmap" ftype="tabular" />
|
|
309 <element name="gffcompare_in2_gtf" file="gffcompare_out2-2.refmap" ftype="tabular" />
|
|
310 </output_collection>
|
|
311 <output_collection name="tmap_output" type="list" count="2">
|
|
312 <element name="gffcompare_in1_gtf" file="gffcompare_out2-1.tmap" ftype="tabular" />
|
|
313 <element name="gffcompare_in2_gtf" file="gffcompare_out2-2.tmap" ftype="tabular" />
|
|
314 </output_collection>
|
|
315 </test>
|
|
316 <!-- 2 inputs and reference (cached), non default options, only refmap output -->
|
|
317 <test expect_num_outputs="5">
|
|
318 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" dbkey="hg17" />
|
|
319 <conditional name="annotation">
|
|
320 <param name="use_ref_annotation" value="Yes" />
|
|
321 <conditional name="ref_source">
|
|
322 <param name="ref_source_sel" value="cached"/>
|
|
323 <param name="index" value="test_buildid"/>
|
|
324 </conditional>
|
|
325 <param name="ignore_nonoverlapping_reference" value="Yes" />
|
|
326 <param name="ignore_nonoverlapping_transfrags" value="Yes" />
|
|
327 <param name="strict_match" value="--strict-match" />
|
|
328 <param name="refmap_tmap" value="refmap" />
|
|
329 </conditional>
|
|
330 <conditional name="seq_data">
|
|
331 <param name="use_seq_data" value="No" />
|
|
332 </conditional>
|
|
333 <param name="discard_single_exon" value="-M"/>
|
|
334 <param name="discard_duplicates" value="-D"/>
|
|
335 <param name="no_merge" value="--no-merge" />
|
|
336 <param name="max_dist_exon" value="101" />
|
|
337 <param name="max_dist_group" value="99" />
|
|
338 <param name="chr_stats" value="--chr-stats" />
|
|
339 <assert_command>
|
|
340 <has_text text="-R " />
|
|
341 <has_text text="-Q " />
|
|
342 <has_text text="--strict-match " />
|
|
343 <not_has_text text="-T " />
|
|
344 <has_text text="-M " />
|
|
345 <not_has_text text="-N " />
|
|
346 <has_text text="-e 101 " />
|
|
347 <has_text text="-d 99 " />
|
|
348 <has_text text="-D " />
|
|
349 <has_text text="--no-merge " />
|
|
350 <has_text text="--chr-stats" />
|
|
351 <has_text text="-p 'TCONS' " />
|
|
352 <not_has_text text="-C " />
|
|
353 <not_has_text text="-A " />
|
|
354 <not_has_text text="-X " />
|
|
355 <not_has_text text="-K " />
|
|
356 </assert_command>
|
|
357 <output file="gffcompare_out2.stats" name="transcripts_stats" compare="sim_size" />
|
|
358 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
|
|
359 <output file="gffcompare_out2.tracking" name="transcripts_tracking" compare="sim_size" />
|
|
360 <output file="gffcompare_out2.gtf" name="transcripts_combined" compare="sim_size" delta="50000"/>
|
|
361 <output_collection name="refmap_output" type="list" count="0"/> <!-- because of -M no refmaps are created -->
|
|
362 </test>
|
|
363 <!-- 2 inputs and reference, non default advanced options, only tmap output -->
|
|
364 <test expect_num_outputs="5">
|
|
365 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
|
|
366 <conditional name="annotation">
|
|
367 <param name="use_ref_annotation" value="Yes" />
|
|
368 <conditional name="ref_source">
|
|
369 <param name="ref_source_sel" value="history"/>
|
|
370 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
|
|
371 </conditional>
|
|
372 <param name="refmap_tmap" value="tmap" />
|
|
373 </conditional>
|
|
374 <conditional name="seq_data">
|
|
375 <param name="use_seq_data" value="No" />
|
|
376 </conditional>
|
|
377 <section name="adv_output">
|
|
378 <param name="p" value="OTHER" />
|
|
379 <param name="C" value="-C" />
|
|
380 <param name="A" value="-A" />
|
|
381 <param name="X" value="-X" />
|
|
382 <param name="K" value="-K" />
|
|
383 </section>
|
|
384 <assert_command>
|
|
385 <not_has_text text="-R " />
|
|
386 <not_has_text text="-Q " />
|
|
387 <not_has_text text="--strict-match " />
|
|
388 <not_has_text text="-T " />
|
|
389 <not_has_text text="-M " />
|
|
390 <not_has_text text="-N " />
|
|
391 <has_text text="-e 100 " />
|
|
392 <has_text text="-d 100 " />
|
|
393 <not_has_text text="-D " />
|
|
394 <not_has_text text="--no-merge " />
|
|
395 <not_has_text text="--chr-stats" />
|
|
396 <has_text text="-p 'OTHER' " />
|
|
397 <has_text text="-C " />
|
|
398 <has_text text="-A " />
|
|
399 <has_text text="-X " />
|
|
400 <has_text text="-K " />
|
|
401 </assert_command>
|
|
402 <output file="gffcompare_out2.stats" name="transcripts_stats" compare="sim_size" />
|
|
403 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
|
|
404 <output file="gffcompare_out2.tracking" name="transcripts_tracking" compare="sim_size" />
|
|
405 <output file="gffcompare_out2.gtf" name="transcripts_combined" compare="sim_size" delta="50000"/>
|
|
406 <output_collection name="tmap_output" type="list" count="2"/>
|
|
407 </test>
|
|
408 <!-- 2 inputs and reference, default options, no tmap or refmap output -->
|
|
409 <test expect_num_outputs="4">
|
|
410 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
|
|
411 <conditional name="annotation">
|
|
412 <param name="use_ref_annotation" value="Yes" />
|
|
413 <conditional name="ref_source">
|
|
414 <param name="ref_source_sel" value="history"/>
|
|
415 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
|
|
416 </conditional>
|
|
417 <param name="refmap_tmap" value="" />
|
|
418 </conditional>
|
|
419 <conditional name="seq_data">
|
|
420 <param name="use_seq_data" value="No" />
|
|
421 </conditional>
|
|
422 <assert_command>
|
|
423 <not_has_text text="-R " />
|
|
424 <not_has_text text="-Q " />
|
|
425 <not_has_text text="--strict-match " />
|
|
426 <has_text text="-T " />
|
|
427 <not_has_text text="-M " />
|
|
428 <not_has_text text="-N " />
|
|
429 <has_text text="-e 100 " />
|
|
430 <has_text text="-d 100 " />
|
|
431 <not_has_text text="-D " />
|
|
432 <not_has_text text="--no-merge " />
|
|
433 <not_has_text text="--chr-stats" />
|
|
434 <has_text text="-p 'TCONS' " />
|
|
435 <not_has_text text="-C " />
|
|
436 <not_has_text text="-A " />
|
|
437 <not_has_text text="-X " />
|
|
438 <not_has_text text="-K " />
|
|
439 </assert_command>
|
|
440 <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="2" />
|
|
441 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
|
|
442 <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
|
|
443 <output file="gffcompare_out2.gtf" name="transcripts_combined" />
|
|
444 </test>
|
|
445
|
|
446 <test expect_num_outputs="4">
|
|
447 <param ftype="gtf" name="gffinputs" value="gffcompare_in4.gtf" />
|
|
448 <conditional name="annotation">
|
|
449 <param name="use_ref_annotation" value="Yes" />
|
|
450 <conditional name="ref_source">
|
|
451 <param name="ref_source_sel" value="history"/>
|
|
452 <param ftype="gtf" name="reference_annotation" value="gffcompare_in5.gtf" />
|
|
453 </conditional>
|
|
454 <param name="ignore_nonoverlapping_reference" value="Yes" />
|
|
455 <param name="ignore_nonoverlapping_transfrags" value="No" />
|
|
456 <param name="refmap_tmap" value="" />
|
|
457 </conditional>
|
|
458 <param name="use_seq_data" value="No" />
|
|
459 <param name="discard_single_exon" value="" />
|
|
460 <param name="max_dist_exon" value="100" />
|
|
461 <param name="max_dist_group" value="100" />
|
|
462 <output file="gffcompare_out3.stats" name="transcripts_stats"/>
|
|
463 <output file="gffcompare_out3.loci" name="transcripts_loci" compare="sim_size" />
|
|
464 <output file="gffcompare_out3.tracking" name="transcripts_tracking" />
|
|
465 <output file="gffcompare_out3.gtf" name="transcripts_annotated" />
|
|
466 </test>
|
|
467 </tests>
|
|
468 <help>
|
|
469 <![CDATA[
|
|
470 **GffCompare Overview**
|
|
471
|
|
472 ## GffCompare
|
|
473 * compare and evaluate the accuracy of RNA-Seq transcript assemblers (Cufflinks, Stringtie).
|
|
474 * collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g. resulted from assembly of different samples)
|
|
475 * classify transcripts from one or multiple GTF/GFF3 files as they relate to reference transcripts provided in a
|
|
476 annotation file (also in GTF/GFF3 format)
|
|
477
|
|
478 More information can be found here: https://ccb.jhu.edu/software/stringtie/gffcompare.shtml.
|
|
479
|
|
480 The original form of this program is also distributed as part of the Cufflinks suite, under the name "CuffCompare"
|
|
481 (see manual: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/). Most of the options and parameters of CuffCompare
|
|
482 are supported by GffCompare, while new features will likely be added to GffCompare in the future.
|
|
483
|
|
484 A notable difference of GffCompare is that when a single query GTF/GFF file is given as input, along with a reference annotation (-r option),
|
|
485 gffcompare switches into "annotation mode" and it generates a .annotated.gtf file instead of the .merged.gtf produced by CuffCompare with the
|
|
486 same parameters. This file has the same general format as CuffCompare's .merged.gtf file (with "class codes" assigned to transcripts as per
|
|
487 their relationship with the matching/overlapping reference transcript), but the original transcript IDs are preserved, so gffcompare can thus
|
|
488 be used as a simple way of annotating a set of transcripts.
|
|
489
|
|
490 Another important difference is that the input transcripts are no longer discarded when they are found to be "intron redundant", i.e.
|
|
491 contained within other, longer isoforms. CuffCompare had the -G option to prevent collapsing of such intron redundant isoforms into
|
|
492 their longer "containers", but GffCompare has made this the default mode of operation (hence the -G option is no longer needed
|
|
493 and is simply ignored when given).
|
|
494 ]]>
|
|
495 </help>
|
|
496 <citations>
|
|
497 <citation type="doi">10.1038/nbt.1621</citation>
|
|
498 </citations>
|
|
499 </tool>
|