comparison gffcompare.xml @ 5:f99dd58de04f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gffcompare commit c8028c2640d2d213da5097df2341a8281fe0b7c8
author iuc
date Fri, 03 Feb 2023 10:57:30 +0000
parents 0f710191a66d
children
comparison
equal deleted inserted replaced
4:0f710191a66d 5:f99dd58de04f
1 <tool id="gffcompare" name="GffCompare" version="@GFFCOMPARE_VERSION@"> 1 <tool id="gffcompare" name="GffCompare" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
2 <description>compare assembled transcripts to a reference annotation</description> 2 <description>compare assembled transcripts to a reference annotation</description>
3 <macros> 3 <macros>
4 <token name="@GFFCOMPARE_VERSION@">0.11.2</token> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <xrefs>
7 <requirement type="package" version="@GFFCOMPARE_VERSION@">gffcompare</requirement> 7 <xref type="bio.tools">gffcompare</xref>
8 </requirements> 8 </xrefs>
9 <expand macro="requirements" />
9 <version_command>gffcompare -v | awk '{print $2}'</version_command> 10 <version_command>gffcompare -v | awk '{print $2}'</version_command>
10 <command detect_errors="aggressive"><![CDATA[ 11 <command detect_errors="aggressive"><![CDATA[
11 #import re 12 #import re
12 13
13 #set escaped_element_identifiers = [re.sub('[^\w\-]', '_', str(_.element_identifier)) for _ in $gffinputs] 14 #set escaped_element_identifiers = [re.sub('[^\w\-]', '_', str(_.element_identifier)) for _ in $gffinputs]
14 #for $input, $escaped_element_identifier in zip($gffinputs, $escaped_element_identifiers): 15 #for $input, $escaped_element_identifier in zip($gffinputs, $escaped_element_identifiers):
15 ln -s '$input' '$escaped_element_identifier' && 16 ln -s '$input' '$escaped_element_identifier' &&
16 #end for 17 #end for
17 #if $seq_data.use_seq_data == "Yes": 18 #if $conditional_annotation.selector == "yes":
18 #if $seq_data.seq_source.index_source == "history": 19 #if $conditional_annotation.ref_source.selector == "history":
19 ln -s '$seq_data.seq_source.ref_file' ref_seq.fa && 20 ln -s '$conditional_annotation.ref_source.reference_annotation' reference_annotation &&
20 #else: 21 #else:
21 ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa && 22 ln -s '${conditional_annotation.ref_source.index.fields.path}' reference_annotation &&
22 #end if 23 #end if
23 #end if 24 #end if
24 25 #if $seq_data.selector == "Yes":
25 #if $annotation.use_ref_annotation == "Yes": 26 #if $seq_data.seq_source.index_source == "history":
26 #if $annotation.ref_source.ref_source_sel == "history": 27 ln -s '$seq_data.seq_source.ref_genome' ref_seq.fa &&
27 ln -s '$annotation.ref_source.reference_annotation' ref_annotation && 28 samtools faidx ref_seq.fa &&
28 #else 29 #else:
29 ln -s '$annotation.ref_source.index.fields.path' ref_annotation && 30 ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa &&
30 #end if 31 #end if
31 #end if 32 #end if
32 33 gffcompare -V
33 gffcompare 34 #if $conditional_annotation.selector == "yes":
34 ## Use annotation reference? 35 -r reference_annotation
35 #if $annotation.use_ref_annotation == "Yes": 36 $conditional_annotation.R
36 -r ref_annotation 37 $conditional_annotation.Q
37 $annotation.ignore_nonoverlapping_reference 38 $conditional_annotation.conditional_strict.selector
38 $annotation.ignore_nonoverlapping_transfrags 39 #if $conditional_annotation.conditional_strict.selector == '--strict-match'
39 $annotation.strict_match 40 -e $conditional_annotation.conditional_strict.e
40 #end if 41 #end if
41 #if $annotation.refmap_tmap == "": 42 $conditional_annotation.discard_single_exon
42 -T 43 $conditional_annotation.conditional_duplication.selector
43 #end if 44 #if $conditional_annotation.conditional_duplication.selector == "-D"
44 45 $conditional_annotation.conditional_duplication.S
45 ## Use sequence data? 46 #end if
46 #if $seq_data.use_seq_data == "Yes": 47 $conditional_annotation.no_merge
47 -s ref_seq.fa 48 #end if
48 #end if 49 $refmap_tmap
49 50 #if $seq_data.selector == "Yes":
50 $discard_single_exon 51 -s ref_seq.fa
51 $discard_duplicates 52 #end if
52 $no_merge 53 -d $max_dist_group
53 -e $max_dist_exon 54 $chr_stats
54 -d $max_dist_group 55 -p '$adv_output.p'
55 $chr_stats 56 $adv_output.A
56 -p '$adv_output.p' 57 $adv_output.C
57 $adv_output.A 58 $adv_output.X
58 $adv_output.C 59 $adv_output.K
59 $adv_output.X 60 #for $escaped_element_identifier in $escaped_element_identifiers:
60 $adv_output.K 61 '$escaped_element_identifier'
61 62 #end for
62 #for $escaped_element_identifier in $escaped_element_identifiers: 63 #if len($gffinputs) == 1 and $refmap_tmap == 'true'
63 '$escaped_element_identifier' 64 && mv *tmap output.tmap
64 #end for 65 #if $seq_data.selector == "Yes"
65 66 && mv *refmap output.refmap
67 #end if
68 #end if
66 ]]></command> 69 ]]></command>
67 <inputs> 70 <inputs>
68 <param format="gtf" name="gffinputs" type="data" label="GTF inputs for comparison" help="" multiple="true" /> 71 <param format="gtf,gff3" name="gffinputs" type="data" label="GTF inputs for comparison" help="" multiple="true" />
69 <conditional name="annotation"> 72 <conditional name="conditional_annotation">
70 <param label="Use Reference Annotation" name="use_ref_annotation" type="select"> 73 <param name="selector" type="select" label="Use reference annotation">
71 <option value="No">No</option> 74 <option value="no">No</option>
72 <option value="Yes">Yes</option> 75 <option value="yes">Yes</option>
73 </param> 76 </param>
74 <when value="Yes"> 77 <when value="yes">
75 <conditional name="ref_source"> 78 <conditional name="ref_source">
76 <param label="Choose the source for the reference annotation" name="ref_source_sel" type="select"> 79 <param label="Choose the source for the reference annotation" name="selector" type="select">
77 <option value="cached">Locally cached</option> 80 <option value="cached">Locally cached</option>
78 <option value="history">History</option> 81 <option value="history" selected="true">History</option>
79 </param> 82 </param>
80 <when value="cached"> 83 <when value="cached">
81 <param argument="-r" label="Using reference annotation" name="index" type="select"> 84 <param argument="-r" label="Using reference annotation" name="index" type="select">
82 <options from_data_table="gene_sets"> 85 <options from_data_table="gene_sets">
83 <filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" /> 86 <filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" />
84 </options> 87 </options>
85 <validator message="No reference annotation is available for the build associated with the selected input dataset" type="no_options" /> 88 <validator message="No reference annotation is available for the build associated with the selected input dataset" type="no_options" />
86 </param> 89 </param>
87 </when> 90 </when>
88 <when value="history"> 91 <when value="history">
89 <param argument="-r" format="gff3,gtf" help="Requires an annotation file in GFF3 or GTF format." label="Reference Annotation" name="reference_annotation" type="data" /> 92 <param argument="-r" name="reference_annotation" type="data" format="gff3,gtf" label="Reference annotation"
93 help="Requires an annotation file in GFF3 or GTF format"/>
90 </when> 94 </when>
91 </conditional> 95 </conditional>
92 <param argument="-R" falsevalue="" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" label="Ignore reference transcripts that are not overlapped by any input transfrags" name="ignore_nonoverlapping_reference" truevalue="-R" type="boolean" /> 96 <param argument="-R" falsevalue="" truevalue="-R" type="boolean" label="Sn correction" help="Consider only the reference transcripts that
93 <param argument="-Q" falsevalue="" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" label="Ignore input transcripts that are not overlapped by any reference transcripts" name="ignore_nonoverlapping_transfrags" truevalue="-Q" type="boolean" /> 97 overlap any of the input transfrags"/>
94 <param argument="--strict-match" name="strict_match" type="boolean" checked="false" truevalue="--strict-match" falsevalue="" label="the match code '=' is only assigned when all exon boundaries match" help="code '~' is assigned for intron chain match or single-exon" /> 98 <param argument="-Q" falsevalue="" truevalue="-Q" type="boolean" label="Sp correction" help="Consider only the input transcripts that overlap
95 <param argument="-T" name="refmap_tmap" label="Generate tmap or refmap file for each input file" type="select" multiple="True"> 99 any of the reference transcripts. Warning: this will discard all 'novel' loci!"/>
96 <option value="refmap" selected="True">refmap</option> 100 <conditional name="conditional_strict">
97 <option value="tmap" selected="True">tmap</option> 101 <param name="selector" argument="--strict-match" type="select" label="Strict match" help="Make the accuracy estimation
102 at transcript level much more stringtent by only allowing a limited variation of the outer coordinates of the terminal exons. Transcript
103 matching takes into account the -e range for terminal exons; code '=' is only assigned if transcript ends are within that range, otherwiscode
104 '~' is assigned for intron chain match or single-exon">
105 <option value="">No</option>
106 <option value="--strict-match">Yes</option>
107 </param>
108 <when value=""/>
109 <when value="--strict-match">
110 <param argument="-e" label="Maximum range of variation for the free ends of terminal exons" type="integer" value="100" />
111 </when>
112 </conditional>
113 <param name="discard_single_exon" argument="-M/-N" type="select" label="Discard single-exon transcripts" help="If -S and also --strict-match is given,
114 exact matching of all exon boundaries is required">
115 <option value="" selected="true">No</option>
116 <option value="-M">Discard single-exon transfrags and reference transcripts</option>
117 <option value="-N">Discard single-exon reference transcripts</option>
98 </param> 118 </param>
119 <conditional name="conditional_duplication">
120 <param name="selector" argument="-D" type="select" label="Discart duplicate query transfrags" help="Discard duplicate query transfrags (i.e. same
121 intron chain) within a single sample (disable annotation mode for a single file); this option is automatically enabled when multiple query files are provided">
122 <option value="">No</option>
123 <option value="-D">Yes</option>
124 </param>
125 <when value=""/>
126 <when value="-D">
127 <param argument="-S" type="boolean" truevalue="-S" falsevalue="" checked="false" label="Strict duplicate checking" help="When -D is enabled (or
128 multiple query files are provided), perform a more strict duplicate checking: only discard matching (same intron chain) query transcripts from
129 the same sample if their boundaries are fully contained within (or same with) matching transcripts if --strict-match is also given, exact match
130 of all exons is required" />
131 </when>
132 </conditional>
133 <param argument="--no-merge" type="boolean" checked="false" truevalue="--no-merge" falsevalue="" label="Disable close-exon merging"
134 help="Default: merge exons separated by 'introns' shorter than 5 bases" />
99 </when> 135 </when>
100 <when value="No"> 136 <when value="no"/>
101 <param argument="-T" name="refmap_tmap" label="Generate tmap file for each input file" type="select" multiple="True">
102 <option value="tmap" selected="True">tmap</option>
103 </param>
104 </when>
105 </conditional> 137 </conditional>
106 <conditional name="seq_data"> 138 <conditional name="seq_data">
107 <param help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." label="Use Sequence Data" name="use_seq_data" type="select"> 139 <param name="selector" type="select" label="Use sequence data" help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." >
108 <option value="No">No</option> 140 <option value="no">No</option>
109 <option value="Yes">Yes</option> 141 <option value="yes">Yes</option>
110 </param> 142 </param>
111 <when value="No"/> 143 <when value="no"/>
112 <when value="Yes"> 144 <when value="yes">
113 <conditional name="seq_source"> 145 <conditional name="seq_source">
114 <param label="Choose the source for the reference sequence" name="index_source" type="select"> 146 <param label="Choose the source for the reference sequence" name="index_source" type="select">
115 <option value="cached">Locally cached</option> 147 <option value="cached">Locally cached</option>
116 <option value="history">History</option> 148 <option value="history" selected="true">History</option>
117 </param> 149 </param>
118 <when value="cached"> 150 <when value="cached">
119 <param argument="-s" label="Using reference genome" name="index" type="select"> 151 <param argument="-s" label="Using reference genome" name="index" type="select">
120 <options from_data_table="fasta_indexes"> 152 <options from_data_table="fasta_indexes">
121 <filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" /> 153 <filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" />
122 </options> 154 </options>
123 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" /> 155 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
124 </param> 156 </param>
125 </when> 157 </when>
126 <when value="history"> 158 <when value="history">
127 <param argument="-s" format="fasta" label="Using reference file" name="ref_file" type="data" /> 159 <param argument="-s" name="ref_genome" type="data" format="fasta" label="Reference genome" help="Optional. Repeats must be soft-masked (lower case) in order to be able to classify
160 transfrags as repeats"/>
128 </when> 161 </when>
129 </conditional> 162 </conditional>
130 </when> 163 </when>
131 </conditional> 164 </conditional>
132 <param name="discard_single_exon" argument="-M/-N" type="select" label="Discard single-exon transcripts" help="If -S and also --strict-match is given, exact matching of all exon boundaries is required"> 165 <param argument="-d" name="max_dist_group" type="integer" value="100" min="0" help="Maximum distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" />
133 <option selected="True" value="">No</option> 166 <param argument="--chr-stats" type="boolean" checked="false" truevalue="--chr-stats" falsevalue="" label="Stats per reference contig/chromosome" help="Show summary and accuracy data separately for each reference sequence in the transcript accuracy data set" />
134 <option value="-M">Discard single-exon transfrags and reference transcripts</option> 167 <param argument="-T" name="refmap_tmap" type="boolean" truevalue="" falsevalue="-T" checked="true" label="Generate TMAP and RefMap files for each input" help="TMAP are tabular files that store the information regarding the best match for each prediction in the reference.
135 <option value="-N">Discard single-exon reference transcripts</option> 168 RefMap files are tabular files which store the information regarding the best match for each reference transcript, among all possible prediction models. More information in the help section"/>
136 </param> 169 <section name="adv_output" title="Combined GTF output parameters">
137 <param label="Discard duplicates" name="discard_duplicates" type="select"> 170 <param argument="-p" type="text" value="TCONS" label="Name prefix for consensus transcripts">
138 <option value="">None</option> 171 <sanitizer invalid_char="">
139 <option value="-D">discard 'duplicate' query transfrags within a single sample (-D)</option> 172 <valid initial="string.letters,string.digits">
140 <option value="-S">Only discard 'duplicate' query or reference transcripts if their boundaries are fully contained within other, larger or identical transfrags (-S)</option> 173 <add value="_" />
141 </param> 174 <add value="-" />
142 <param name="no_merge" argument="--no-merge" type="boolean" checked="false" truevalue="--no-merge" falsevalue="" label="Disable close-exon merging" help="Default: merge exons separated by 'introns' shorter than 5 bases" /> 175 </valid>
143 <param argument="-e" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" label="Max. Distance for assessing exon accuracy" name="max_dist_exon" type="integer" value="100" /> 176 </sanitizer>
144 <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" /> 177 <validator type="regex">[0-9a-zA-Z_-]+</validator>
145 <param name="chr_stats" argument="--chr-stats" type="boolean" checked="false" truevalue="--chr-stats" falsevalue="" label="Show summary and accuracy data separately for each reference sequence in the transcript accuracy data set" /> 178 </param>
146 <section name="adv_output" title="Options for the combined GTF output file"> 179 <param argument="-C" type="boolean" checked="false" truevalue="-C" falsevalue="" label="Discard matching and 'contained' transfrags" help="I.e. collapse intron-redundant transfrags across all query files" />
147 <param argument="-p" type="text" value="TCONS" label="name prefix for consensus transcripts" help="for combined.gtf" /> 180 <param argument="-A" type="boolean" checked="false" truevalue="-A" falsevalue="" label="Discard the 'contained' transfrags except intron-redundant transfrags starting with a different 5' exon" help="Like -C but does not discard intron-redundant transfrags if they start with a different 5' exon" />
148 <param argument="-C" type="boolean" checked="false" truevalue="-C" falsevalue="" label="discard matching and 'contained' transfrags" help="i.e. collapse intron-redundant transfrags across all query files" /> 181 <param argument="-X" type="boolean" checked="false" truevalue="-X" falsevalue="" label="Discard the 'contained' transfrags also if ends stick out within the container's introns" help="Like -C but also discard contained transfrags if transfrag ends stick out within the container's introns" />
149 <param argument="-A" type="boolean" checked="false" truevalue="-A" falsevalue="" label="discard the 'contained' transfrags except intron-redundant transfrags starting with a different 5' exon" help="like -C but does not discard intron-redundant transfrags if they start with a different 5' exon" /> 182 <param argument="-K" type="boolean" checked="false" truevalue="-K" falsevalue="" label="Do NOT discard any redundant transfrag matching a reference" help="For -C/-A/-X" />
150 <param argument="-X" type="boolean" checked="false" truevalue="-X" falsevalue="" label="discard the 'contained' transfrags also if ends stick out within the container's introns" help="like -C but also discard contained transfrags if transfrag ends stick out within the container's introns" />
151 <param argument="-K" type="boolean" checked="false" truevalue="-K" falsevalue="" label="do NOT discard any redundant transfrag matching a reference" help="for -C/-A/-X" />
152 </section> 183 </section>
153 </inputs> 184 </inputs>
154 <outputs> 185 <outputs>
155 <data format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: transcript accuracy" name="transcripts_stats" /> 186 <data name="transcripts_annotated" format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts">
156 <data format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci" name="transcripts_loci" /> 187 <filter>conditional_annotation['selector'] == "yes"</filter>
157 <data format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: data ${gffinputs[0].hid} tracking file" name="transcripts_tracking" /> 188 <filter>len(gffinputs) == 1</filter>
158 <data format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" name="transcripts_combined">
159 <filter>(isinstance(gffinputs, list) and len(gffinputs) > 1) or annotation['use_ref_annotation'] == "No"</filter>
160 </data> 189 </data>
161 <data format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts" name="transcripts_annotated"> 190 <data name="transcripts_combined" format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" >
162 <filter>not (isinstance(gffinputs, list) and len(gffinputs) > 1) and annotation['use_ref_annotation'] == "Yes"</filter> 191 <filter>len(gffinputs) > 1</filter>
163 </data> 192 </data>
164 <collection name="refmap_output" type="list" label="${tool.name} on ${on_string}: refmap"> 193 <collection name="refmap_output_collection" type="list" label="${tool.name} on ${on_string}: RefMap">
165 <discover_datasets pattern="gffcmp\.(?P&lt;designation&gt;.+)\.refmap" ext="tabular" /> 194 <discover_datasets pattern="gffcmp\.(?P&lt;designation&gt;.+)\.refmap" ext="tabular" />
166 <filter>annotation['refmap_tmap'] != None and 'refmap' in annotation['refmap_tmap']</filter> 195 <filter>conditional_annotation['selector'] == 'yes'</filter>
196 <filter>len(gffinputs) > 1</filter>
197 <filter>refmap_tmap</filter>
167 </collection> 198 </collection>
168 <collection name="tmap_output" type="list" label="${tool.name} on ${on_string}: tmap"> 199 <data name="refmap_output" format="tabular" from_work_dir="output.refmap" label="${tool.name} on ${on_string}: RefMap">
200 <filter>conditional_annotation['selector'] == 'yes'</filter>
201 <filter>len(gffinputs) == 1</filter>
202 <filter>refmap_tmap</filter>
203 </data>
204 <collection name="tmap_output_collection" type="list" label="${tool.name} on ${on_string}: TMAP">
169 <discover_datasets pattern="gffcmp\.(?P&lt;designation&gt;.+)\.tmap" ext="tabular" /> 205 <discover_datasets pattern="gffcmp\.(?P&lt;designation&gt;.+)\.tmap" ext="tabular" />
170 <filter>annotation['refmap_tmap'] != None and 'tmap' in annotation['refmap_tmap']</filter> 206 <filter>refmap_tmap</filter>
207 <filter>len(gffinputs) > 1</filter>
171 </collection> 208 </collection>
209 <data name="tmap_output" format="tabular" from_work_dir="output.tmap" label="${tool.name} on ${on_string}: TMAP">
210 <filter>refmap_tmap</filter>
211 <filter>len(gffinputs) == 1</filter>
212 </data>
213 <data name="transcripts_stats" format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: accuracy stats" />
214 <data name="transcripts_loci" format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci file" />
215 <data name="transcripts_tracking" format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: tracking file" />
172 </outputs> 216 </outputs>
173 <tests> 217 <tests>
174 <!-- 2 inputs, no reference, default options --> 218 <!-- Test 01: 2 inputs, no reference, default options -->
175 <test expect_num_outputs="5"> 219 <test expect_num_outputs="5">
176 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" /> 220 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
177 <conditional name="annotation"> 221 <conditional name="conditional_annotation">
178 <param name="use_ref_annotation" value="No" /> 222 <param name="selector" value="no"/>
179 </conditional> 223 </conditional>
180 <conditional name="seq_data"> 224 <conditional name="seq_data">
181 <param name="use_seq_data" value="No" /> 225 <param name="selector" value="no" />
182 </conditional> 226 </conditional>
183 <assert_command> 227 <assert_command>
184 <not_has_text text="-R " /> 228 <not_has_text text="-R " />
185 <not_has_text text="-Q " /> 229 <not_has_text text="-Q " />
186 <not_has_text text="--strict-match " /> 230 <not_has_text text="--strict-match " />
187 <not_has_text text="-T " /> 231 <not_has_text text="-T " />
188 <has_text_matching expression="^.*gffcompare((?!-s).)*$" /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is not set --> 232 <has_text_matching expression="^.*gffcompare((?!-s).)*$" /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is not set -->
189 <not_has_text text="-M " /> 233 <not_has_text text="-M " />
190 <not_has_text text="-N " /> 234 <not_has_text text="-N " />
191 <has_text text="-e 100 " />
192 <has_text text="-d 100 " /> 235 <has_text text="-d 100 " />
193 <not_has_text text="-D " /> 236 <not_has_text text="-D " />
194 <not_has_text text="--no-merge " /> 237 <not_has_text text="--no-merge " />
195 <has_text text="-p 'TCONS' " /> 238 <has_text text="-p 'TCONS' " />
196 <not_has_text text="-C " /> 239 <not_has_text text="-C " />
200 </assert_command> 243 </assert_command>
201 <output file="gffcompare_out1.stats" name="transcripts_stats" /> 244 <output file="gffcompare_out1.stats" name="transcripts_stats" />
202 <output file="gffcompare_out1.loci" name="transcripts_loci" /> 245 <output file="gffcompare_out1.loci" name="transcripts_loci" />
203 <output file="gffcompare_out1.tracking" name="transcripts_tracking" /> 246 <output file="gffcompare_out1.tracking" name="transcripts_tracking" />
204 <output file="gffcompare_out1.gtf" name="transcripts_combined" /> 247 <output file="gffcompare_out1.gtf" name="transcripts_combined" />
205 <output_collection name="tmap_output" type="list" count="2"/> 248 <output_collection name="tmap_output_collection" type="list" count="2"/>
206 </test> 249 </test>
207 <!-- 2 inputs, no reference, with refsequence, default options (but disable tmap output) --> 250 <!-- Test 02: 2 inputs, no reference, with refsequence, default options (but disable tmap output) -->
208 <test expect_num_outputs="4"> 251 <test expect_num_outputs="4">
209 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" /> 252 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
210 <conditional name="annotation"> 253 <param name="refmap_tmap" value="false"/>
211 <param name="use_ref_annotation" value="No" /> 254 <conditional name="conditional_annotation">
212 <param name="refmap_tmap" value=""/> 255 <param name="selector" value="no"/>
213 </conditional> 256 </conditional>
214 <conditional name="seq_data"> 257 <conditional name="seq_data">
215 <param name="use_seq_data" value="Yes" /> 258 <param name="selector" value="Yes" />
216 <conditional name="seq_source"> 259 <conditional name="seq_source">
217 <param name="index_source" value="history"/> 260 <param name="index_source" value="history"/>
218 <param name="ref_file" ftype="fasta" value="sequence.fa"/> 261 <param name="ref_file" ftype="fasta" value="sequence.fa"/>
219 </conditional> 262 </conditional>
220 </conditional> 263 </conditional>
223 <not_has_text text="-Q " /> 266 <not_has_text text="-Q " />
224 <has_text text="-T " /> 267 <has_text text="-T " />
225 <has_text_matching expression="gffcompare.*-s " /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is set --> 268 <has_text_matching expression="gffcompare.*-s " /> <!-- since ln also has -s a more complicated regexp is needed here to check if -s is set -->
226 <not_has_text text="-M " /> 269 <not_has_text text="-M " />
227 <not_has_text text="-N " /> 270 <not_has_text text="-N " />
228 <has_text text="-e 100 " />
229 <has_text text="-d 100 " /> 271 <has_text text="-d 100 " />
230 <has_text text="-p 'TCONS' " /> 272 <has_text text="-p 'TCONS' " />
231 <not_has_text text="-C " /> 273 <not_has_text text="-C " />
232 <not_has_text text="-A " /> 274 <not_has_text text="-A " />
233 <not_has_text text="-X " /> 275 <not_has_text text="-X " />
236 <output file="gffcompare_out1.stats" name="transcripts_stats" compare="sim_size" /> 278 <output file="gffcompare_out1.stats" name="transcripts_stats" compare="sim_size" />
237 <output file="gffcompare_out1.loci" name="transcripts_loci" compare="sim_size" /> 279 <output file="gffcompare_out1.loci" name="transcripts_loci" compare="sim_size" />
238 <output file="gffcompare_out1.tracking" name="transcripts_tracking" compare="sim_size" /> 280 <output file="gffcompare_out1.tracking" name="transcripts_tracking" compare="sim_size" />
239 <output file="gffcompare_out1.gtf" name="transcripts_combined" compare="sim_size" /> 281 <output file="gffcompare_out1.gtf" name="transcripts_combined" compare="sim_size" />
240 </test> 282 </test>
241 <!-- 2 inputs, no reference, with cached refsequence, default options (but disable tmap output) --> 283 <!-- Test 03: 2 inputs, no reference, with cached refsequence, default options (but disable tmap output) -->
242 <test expect_num_outputs="4"> 284 <test expect_num_outputs="4">
243 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" dbkey="hg17" /> 285 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" dbkey="hg17" />
244 <conditional name="annotation"> 286 <param name="refmap_tmap" value="false"/>
245 <param name="use_ref_annotation" value="No" /> 287 <conditional name="conditional_annotation">
246 <param name="refmap_tmap" value=""/> 288 <param name="selector" value="no"/>
247 </conditional> 289 </conditional>
248 <conditional name="seq_data"> 290 <conditional name="seq_data">
249 <param name="use_seq_data" value="Yes" /> 291 <param name="selector" value="yes" />
250 <conditional name="seq_source"> 292 <conditional name="seq_source">
251 <param name="index_source" value="cached"/> 293 <param name="index_source" value="cached"/>
252 <param name="index" value="test_buildid"/> 294 <param name="index" value="test_buildid"/>
253 </conditional> 295 </conditional>
254 </conditional> 296 </conditional>
257 <not_has_text text="-Q " /> 299 <not_has_text text="-Q " />
258 <has_text text="-T " /> 300 <has_text text="-T " />
259 <has_text_matching expression="gffcompare.*-s " /> 301 <has_text_matching expression="gffcompare.*-s " />
260 <not_has_text text="-M " /> 302 <not_has_text text="-M " />
261 <not_has_text text="-N " /> 303 <not_has_text text="-N " />
262 <has_text text="-e 100 " />
263 <has_text text="-d 100 " /> 304 <has_text text="-d 100 " />
264 <has_text text="-p 'TCONS' " /> 305 <has_text text="-p 'TCONS' " />
265 <not_has_text text="-C " /> 306 <not_has_text text="-C " />
266 <not_has_text text="-A " /> 307 <not_has_text text="-A " />
267 <not_has_text text="-X " /> 308 <not_has_text text="-X " />
268 <not_has_text text="-K " /> 309 <not_has_text text="-K " />
269 </assert_command> 310 </assert_command>
270 <output file="gffcompare_out1.stats" name="transcripts_stats" compare="sim_size" /> 311 <output file="gffcompare_out1.stats" name="transcripts_stats" compare="sim_size" lines_diff="2"/>
271 <output file="gffcompare_out1.loci" name="transcripts_loci" compare="sim_size" /> 312 <output file="gffcompare_out1.loci" name="transcripts_loci" compare="sim_size" />
272 <output file="gffcompare_out1.tracking" name="transcripts_tracking" compare="sim_size" /> 313 <output file="gffcompare_out1.tracking" name="transcripts_tracking" compare="sim_size" />
273 <output file="gffcompare_out1.gtf" name="transcripts_combined" compare="sim_size" /> 314 <output file="gffcompare_out1.gtf" name="transcripts_combined" compare="sim_size" />
274 </test> 315 </test>
275 <!-- 2 inputs and reference, default options --> 316 <!-- Test 04: 2 inputs and reference, default options -->
276 <test expect_num_outputs="6"> 317 <test expect_num_outputs="6">
277 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" /> 318 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
278 <conditional name="annotation"> 319 <conditional name="conditional_annotation">
279 <param name="use_ref_annotation" value="Yes" /> 320 <param name="selector" value="yes"/>
280 <conditional name="ref_source"> 321 <conditional name="ref_source">
281 <param name="ref_source_sel" value="history"/> 322 <param name="ref_source_sel" value="history"/>
282 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" /> 323 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
283 </conditional> 324 </conditional>
325 <conditional name="conditional_strict">
326 <param name="selector" value="--strict-match"/>
327 <param name="e" value="100"/>
328 </conditional>
284 </conditional> 329 </conditional>
285 <conditional name="seq_data"> 330 <conditional name="seq_data">
286 <param name="use_seq_data" value="No" /> 331 <param name="seletor" value="no" />
287 </conditional> 332 </conditional>
288 <assert_command> 333 <assert_command>
289 <not_has_text text="-R " /> 334 <not_has_text text="-R " />
290 <not_has_text text="-Q " /> 335 <not_has_text text="-Q " />
291 <not_has_text text="--strict-match " /> 336 <has_text text="--strict-match " />
292 <not_has_text text="-T " /> 337 <not_has_text text="-T " />
293 <not_has_text text="-M " /> 338 <not_has_text text="-M " />
294 <not_has_text text="-N " /> 339 <not_has_text text="-N " />
295 <has_text text="-e 100 " /> 340 <has_text text="-e 100 " />
296 <has_text text="-d 100 " /> 341 <has_text text="-d 100 " />
301 <not_has_text text="-C " /> 346 <not_has_text text="-C " />
302 <not_has_text text="-A " /> 347 <not_has_text text="-A " />
303 <not_has_text text="-X " /> 348 <not_has_text text="-X " />
304 <not_has_text text="-K " /> 349 <not_has_text text="-K " />
305 </assert_command> 350 </assert_command>
306 <output file="gffcompare_out2.stats" name="transcripts_stats" /> 351 <output file="gffcompare_out2.stats" name="transcripts_stats" compare="sim_size" />
307 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" /> 352 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
308 <output file="gffcompare_out2.tracking" name="transcripts_tracking" /> 353 <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
309 <output file="gffcompare_out2.gtf" name="transcripts_combined" /> 354 <output file="gffcompare_out2.gtf" name="transcripts_combined" />
310 <output_collection name="refmap_output" type="list" count="2"> 355 <output_collection name="refmap_output_collection" type="list" count="2">
311 <element name="gffcompare_in1_gtf" file="gffcompare_out2-1.refmap" ftype="tabular" /> 356 <element name="gffcompare_in1_gtf" file="gffcompare_out2-1.refmap" ftype="tabular" />
312 <element name="gffcompare_in2_gtf" file="gffcompare_out2-2.refmap" ftype="tabular" /> 357 <element name="gffcompare_in2_gtf" file="gffcompare_out2-2.refmap" ftype="tabular" />
313 </output_collection> 358 </output_collection>
314 <output_collection name="tmap_output" type="list" count="2"> 359 <output_collection name="tmap_output_collection" type="list" count="2">
315 <element name="gffcompare_in1_gtf" file="gffcompare_out2-1.tmap" ftype="tabular" /> 360 <element name="gffcompare_in1_gtf" file="gffcompare_out2-1.tmap" ftype="tabular" />
316 <element name="gffcompare_in2_gtf" file="gffcompare_out2-2.tmap" ftype="tabular" /> 361 <element name="gffcompare_in2_gtf" file="gffcompare_out2-2.tmap" ftype="tabular" />
317 </output_collection> 362 </output_collection>
318 </test> 363 </test>
319 <!-- 2 inputs and reference (cached), non default options, only refmap output --> 364 <!-- Test 05: 2 inputs and reference (cached), non default options -->
320 <test expect_num_outputs="5"> 365 <test expect_num_outputs="6">
321 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" dbkey="hg17" /> 366 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" dbkey="hg17" />
322 <conditional name="annotation"> 367 <conditional name="conditional_annotation">
323 <param name="use_ref_annotation" value="Yes" /> 368 <param name="selector" value="yes"/>
324 <conditional name="ref_source"> 369 <conditional name="ref_source">
325 <param name="ref_source_sel" value="cached"/> 370 <param name="ref_source_sel" value="cached"/>
326 <param name="index" value="test_buildid"/> 371 <param name="index" value="test_buildid"/>
327 </conditional> 372 </conditional>
328 <param name="ignore_nonoverlapping_reference" value="Yes" /> 373 <param name="R" value="true"/>
329 <param name="ignore_nonoverlapping_transfrags" value="Yes" /> 374 <param name="Q" value="true"/>
330 <param name="strict_match" value="--strict-match" /> 375 <param name="discard_single_exon" value="-M"/>
331 <param name="refmap_tmap" value="refmap" /> 376 <param name="no_merge" value="true"/>
332 </conditional> 377 <conditional name="conditional_strict">
333 <conditional name="seq_data"> 378 <param name="selector" value="--strict-match"/>
334 <param name="use_seq_data" value="No" /> 379 <param name="e" value="101"/>
335 </conditional> 380 </conditional>
336 <param name="discard_single_exon" value="-M"/> 381 <conditional name="conditional_duplication">
337 <param name="discard_duplicates" value="-D"/> 382 <param name="selector" value="-D"/>
338 <param name="no_merge" value="--no-merge" /> 383 <param name="S" value="false"/>
339 <param name="max_dist_exon" value="101" /> 384 </conditional>
385 </conditional>
340 <param name="max_dist_group" value="99" /> 386 <param name="max_dist_group" value="99" />
341 <param name="chr_stats" value="--chr-stats" /> 387 <param name="chr_stats" value="true" />
342 <assert_command> 388 <assert_command>
343 <has_text text="-R " /> 389 <has_text text="-R " />
344 <has_text text="-Q " /> 390 <has_text text="-Q " />
345 <has_text text="--strict-match " /> 391 <has_text text="--strict-match " />
346 <not_has_text text="-T " /> 392 <not_has_text text="-T " />
359 </assert_command> 405 </assert_command>
360 <output file="gffcompare_out2.stats" name="transcripts_stats" compare="sim_size" /> 406 <output file="gffcompare_out2.stats" name="transcripts_stats" compare="sim_size" />
361 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" /> 407 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
362 <output file="gffcompare_out2.tracking" name="transcripts_tracking" compare="sim_size" /> 408 <output file="gffcompare_out2.tracking" name="transcripts_tracking" compare="sim_size" />
363 <output file="gffcompare_out2.gtf" name="transcripts_combined" compare="sim_size" delta="50000"/> 409 <output file="gffcompare_out2.gtf" name="transcripts_combined" compare="sim_size" delta="50000"/>
364 <output_collection name="refmap_output" type="list" count="0"/> <!-- because of -M no refmaps are created --> 410 <output_collection name="refmap_output_collection" type="list" count="0"/> <!-- because of -M no refmaps are created -->
365 </test> 411 <output_collection name="tmap_output_collection" type="list" count="2"/>
366 <!-- 2 inputs and reference, non default advanced options, only tmap output --> 412 </test>
367 <test expect_num_outputs="5"> 413 <!-- Test 06: 2 inputs and reference, non default advanced options -->
414 <test expect_num_outputs="6">
368 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" /> 415 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
369 <conditional name="annotation"> 416 <conditional name="conditional_annotation">
370 <param name="use_ref_annotation" value="Yes" /> 417 <param name="selector" value="yes"/>
371 <conditional name="ref_source"> 418 <conditional name="ref_source">
372 <param name="ref_source_sel" value="history"/> 419 <param name="ref_source_sel" value="history"/>
373 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" /> 420 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
374 </conditional> 421 </conditional>
375 <param name="refmap_tmap" value="tmap" />
376 </conditional> 422 </conditional>
377 <conditional name="seq_data"> 423 <conditional name="seq_data">
378 <param name="use_seq_data" value="No" /> 424 <param name="selector" value="no" />
379 </conditional> 425 </conditional>
380 <section name="adv_output"> 426 <section name="adv_output">
381 <param name="p" value="OTHER" /> 427 <param name="p" value="OTHER" />
382 <param name="C" value="-C" /> 428 <param name="C" value="true" />
383 <param name="A" value="-A" /> 429 <param name="A" value="true" />
384 <param name="X" value="-X" /> 430 <param name="X" value="true" />
385 <param name="K" value="-K" /> 431 <param name="K" value="true" />
386 </section> 432 </section>
387 <assert_command> 433 <assert_command>
388 <not_has_text text="-R " /> 434 <not_has_text text="-R " />
389 <not_has_text text="-Q " /> 435 <not_has_text text="-Q " />
390 <not_has_text text="--strict-match " /> 436 <not_has_text text="--strict-match " />
391 <not_has_text text="-T " /> 437 <not_has_text text="-T " />
392 <not_has_text text="-M " /> 438 <not_has_text text="-M " />
393 <not_has_text text="-N " /> 439 <not_has_text text="-N " />
394 <has_text text="-e 100 " /> 440 <not_has_text text="-e 100 " />
395 <has_text text="-d 100 " /> 441 <has_text text="-d 100 " />
396 <not_has_text text="-D " /> 442 <not_has_text text="-D " />
397 <not_has_text text="--no-merge " /> 443 <not_has_text text="--no-merge " />
398 <not_has_text text="--chr-stats" /> 444 <not_has_text text="--chr-stats" />
399 <has_text text="-p 'OTHER' " /> 445 <has_text text="-p 'OTHER' " />
404 </assert_command> 450 </assert_command>
405 <output file="gffcompare_out2.stats" name="transcripts_stats" compare="sim_size" /> 451 <output file="gffcompare_out2.stats" name="transcripts_stats" compare="sim_size" />
406 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" /> 452 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
407 <output file="gffcompare_out2.tracking" name="transcripts_tracking" compare="sim_size" /> 453 <output file="gffcompare_out2.tracking" name="transcripts_tracking" compare="sim_size" />
408 <output file="gffcompare_out2.gtf" name="transcripts_combined" compare="sim_size" delta="50000"/> 454 <output file="gffcompare_out2.gtf" name="transcripts_combined" compare="sim_size" delta="50000"/>
409 <output_collection name="tmap_output" type="list" count="2"/> 455 <output_collection name="tmap_output_collection" type="list" count="2"/>
410 </test> 456 <output_collection name="tmap_output_collection" type="list" count="2"/>
411 <!-- 2 inputs and reference, default options, no tmap or refmap output --> 457 </test>
458 <!-- Test 07: 2 inputs and reference, default options, no tmap or refmap output -->
412 <test expect_num_outputs="4"> 459 <test expect_num_outputs="4">
413 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" /> 460 <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
414 <conditional name="annotation"> 461 <param name="refmap_tmap" value="false"/>
415 <param name="use_ref_annotation" value="Yes" /> 462 <conditional name="conditional_annotation">
463 <param name="selector" value="yes"/>
416 <conditional name="ref_source"> 464 <conditional name="ref_source">
417 <param name="ref_source_sel" value="history"/> 465 <param name="ref_source_sel" value="history"/>
418 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" /> 466 <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
419 </conditional> 467 </conditional>
420 <param name="refmap_tmap" value="" />
421 </conditional> 468 </conditional>
422 <conditional name="seq_data"> 469 <conditional name="seq_data">
423 <param name="use_seq_data" value="No" /> 470 <param name="selector" value="No" />
424 </conditional> 471 </conditional>
425 <assert_command> 472 <assert_command>
426 <not_has_text text="-R " /> 473 <not_has_text text="-R " />
427 <not_has_text text="-Q " /> 474 <not_has_text text="-Q " />
428 <not_has_text text="--strict-match " /> 475 <not_has_text text="--strict-match " />
429 <has_text text="-T " /> 476 <has_text text="-T " />
430 <not_has_text text="-M " /> 477 <not_has_text text="-M " />
431 <not_has_text text="-N " /> 478 <not_has_text text="-N " />
432 <has_text text="-e 100 " />
433 <has_text text="-d 100 " /> 479 <has_text text="-d 100 " />
434 <not_has_text text="-D " /> 480 <not_has_text text="-D " />
435 <not_has_text text="--no-merge " /> 481 <not_has_text text="--no-merge " />
436 <not_has_text text="--chr-stats" /> 482 <not_has_text text="--chr-stats" />
437 <has_text text="-p 'TCONS' " /> 483 <has_text text="-p 'TCONS' " />
443 <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="2" /> 489 <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="2" />
444 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" /> 490 <output file="gffcompare_out2.loci" name="transcripts_loci" compare="sim_size" />
445 <output file="gffcompare_out2.tracking" name="transcripts_tracking" /> 491 <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
446 <output file="gffcompare_out2.gtf" name="transcripts_combined" /> 492 <output file="gffcompare_out2.gtf" name="transcripts_combined" />
447 </test> 493 </test>
448 494 <!-- Test 08: 1 inputs and reference, default options, no tmap or refmap output -->
449 <test expect_num_outputs="4"> 495 <test expect_num_outputs="4">
450 <param ftype="gtf" name="gffinputs" value="gffcompare_in4.gtf" /> 496 <param ftype="gtf" name="gffinputs" value="gffcompare_in4.gtf" />
451 <conditional name="annotation"> 497 <param name="refmap_tmap" value="false"/>
452 <param name="use_ref_annotation" value="Yes" /> 498 <conditional name="conditional_annotation">
499 <param name="selector" value="yes"/>
453 <conditional name="ref_source"> 500 <conditional name="ref_source">
454 <param name="ref_source_sel" value="history"/> 501 <param name="ref_source_sel" value="history"/>
455 <param ftype="gtf" name="reference_annotation" value="gffcompare_in5.gtf" /> 502 <param ftype="gtf" name="reference_annotation" value="gffcompare_in5.gtf" />
456 </conditional> 503 </conditional>
457 <param name="ignore_nonoverlapping_reference" value="Yes" /> 504 <param name="R" value="true"/>
458 <param name="ignore_nonoverlapping_transfrags" value="No" /> 505 <param name="Q" value="false"/>
459 <param name="refmap_tmap" value="" /> 506 <conditional name="conditional_strict">
460 </conditional> 507 <param name="selector" value="--strict-match"/>
461 <param name="use_seq_data" value="No" /> 508 <param name="e" value="100"/>
462 <param name="discard_single_exon" value="" /> 509 </conditional>
463 <param name="max_dist_exon" value="100" /> 510 <param name="discard_single_exon" value=""/>
511 </conditional>
464 <param name="max_dist_group" value="100" /> 512 <param name="max_dist_group" value="100" />
465 <output file="gffcompare_out3.stats" name="transcripts_stats"/> 513 <output file="gffcompare_out3.stats" name="transcripts_stats"/>
466 <output file="gffcompare_out3.loci" name="transcripts_loci" compare="sim_size" /> 514 <output file="gffcompare_out3.loci" name="transcripts_loci" compare="sim_size" />
467 <output file="gffcompare_out3.tracking" name="transcripts_tracking" /> 515 <output file="gffcompare_out3.tracking" name="transcripts_tracking" />
468 <output file="gffcompare_out3.gtf" name="transcripts_annotated" /> 516 <output file="gffcompare_out3.gtf" name="transcripts_annotated" />
469 </test> 517 </test>
470 </tests> 518 </tests>
471 <help> 519 <help>
472 <![CDATA[ 520 <![CDATA[
521
522 .. class:: infomark
523
473 **GffCompare Overview** 524 **GffCompare Overview**
474 525
475 ## GffCompare 526 GffCompare is designed to systematically compare one or more sets of transcript predictions to a reference annotation at different levels of granularity (base level, exon level,
476 * compare and evaluate the accuracy of RNA-Seq transcript assemblers (Cufflinks, Stringtie). 527 transcript level etc.), and in the process to provide a way to "annotate" such transcript predictions based on their overlaps or proximity to reference annotation transcripts.
477 * collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g. resulted from assembly of different samples) 528 When multiple transcript files (samples) are provided, GffCompare generates a non-redundant combined set of transcripts, tracking structurally equivalent transcripts across multiple
478 * classify transcripts from one or multiple GTF/GFF3 files as they relate to reference transcripts provided in a 529 samples and classifying them according to their relationship to reference transcripts. GffCompare has the following main functions:
479 annotation file (also in GTF/GFF3 format) 530
480 531 - Merge structurally equivalent transcripts and transcript fragments (transfrags) across multiple samples
481 More information can be found here: https://ccb.jhu.edu/software/stringtie/gffcompare.shtml. 532 - Assess the accuracy of the assembled transcripts from an RNA-Seq sample by comparing it to known annotation
482 533 - Track, annotate, and report all structurally distinct transfrags across multiple samples
483 The original form of this program is also distributed as part of the Cufflinks suite, under the name "CuffCompare" 534
484 (see manual: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/). Most of the options and parameters of CuffCompare 535 The last two purposes require the user to provide a known reference annotation file that GffCompare then uses to classify all the transcripts in the input samples according to the
485 are supported by GffCompare, while new features will likely be added to GffCompare in the future. 536 reference transcript that they most closely overlap.
486 537
487 A notable difference of GffCompare is that when a single query GTF/GFF file is given as input, along with a reference annotation (-r option), 538 To assess the accuracy of transcriptome assemblies, GffCompare reports several accuracy metrics previously employed for gene prediction evaluation. These metrics include sensitivity
488 gffcompare switches into "annotation mode" and it generates a .annotated.gtf file instead of the .merged.gtf produced by CuffCompare with the 539 and precision as well as the number of novel or missed features, and the metrics are computed at various levels (base, exon, intron chain, transcript, or locus).
489 same parameters. This file has the same general format as CuffCompare's .merged.gtf file (with "class codes" assigned to transcripts as per 540
490 their relationship with the matching/overlapping reference transcript), but the original transcript IDs are preserved, so gffcompare can thus 541 ----
491 be used as a simple way of annotating a set of transcripts. 542
492 543 .. class:: infomark
493 Another important difference is that the input transcripts are no longer discarded when they are found to be "intron redundant", i.e. 544
494 contained within other, longer isoforms. CuffCompare had the -G option to prevent collapsing of such intron redundant isoforms into 545 **Annotation mode**
495 their longer "containers", but GffCompare has made this the default mode of operation (hence the -G option is no longer needed 546
496 and is simply ignored when given). 547 When a single query GTF/GFF file is given as input for analysis, along with a reference annotation (-r option), GffCompare switches into annotation mode and it generates a *annotated
548 transcripts* file, allowing annotate transcripts by using a reference annotation. It should be noted that this file is not generated when options to remove "duplicated"/redundant transfrags are given (-D, -S, -C, -A, -X).
549
550 ----
551
552 .. class:: infomark
553
554 **Merging structually equivalent transcripts**
555
556 When multiple input GTF/GFF files are provided, GffCompare reports a GTF file named *combined transcripts* that containing the union of all transfrags in each sample. If a transfrag with the same
557 exact intron chain is present in both samples, it is thus reported only once in the output file.
558
559 **The "super-locus" concept**
560
561 A super-locus is a region of the genome where predicted transcripts and reference transcripts get clustered together by exon overlaps. When multiple GFF files are provided as input to GffCompare,
562 this clustering is performed across all the input files. Due to the transitive nature of this clustering, these super-loci can occasionally get very large, sometimes merging a few distinct reference
563 gene regions together, especially if there is a lot of transcription or alignment noise around the individual gene regions. For each super-locus, GffCompare assigns a unique identifier with the XLOC prefix.
564
565 ----
566
567 .. class:: infomark
568
569 **Transcript accuracy estimation**
570
571 GffCompare can be used to assess the accuracy of transcriptome assemblies produced by programs like StringTie 19 with respect to a known reference annotation. To this end, GffCompare
572 reports various statistics related to the accuracy of the input transcripts compared to the reference annotation in the *accuracy stats* file.
573
574 Among these statistics are sensitivity and precision values computed at various levels (base, exon, intron chain, transcript, locus), which are calculated as:
575
576 * Sensitivity = TP/(TP+FN)
577 * Precision = TP/(TP+FP)
578
579 where TP stands for "true positives", or query features (bases, exons, introns, transcripts, etc.) that agree with the corresponding reference annotation features; FN means "false negatives",
580 i.e. features that are found in the reference annotation but are not present in the input data; FP (“false positives”) are features present in the input data but not confirmed by any reference
581 annotation data. Notice that FP+ TP amounts to the whole input set of query features in the input file. If multiple query GTF/GFF files are given as input, these metrics are computed separately
582 for each sample.
583
584 Sensitivity and Precision values are estimated at various levels, which are largely an increasingly stringent way of evaluating the accuracy/correctness of a set of predicted transcripts (transfrags),
585 when compared to the reference annotation. The six different levels that GffCompare uses are described below:
586
587 * **Base level**: At the base level, TP represents the number of exon bases that are reported at the same coordinate on both the query transcripts and any reference transcript, FN is the number of bases in reference data exons that are not covered at all by any of the query exons, and FP is the number of bases which are covered by predicted transcripts' exons but not covered by any reference transcript exons.
588 * **Exon level**: We define the TP, FN, and FP values at the exon level similar to the base level, but now the unit of comparison is the exon interval on the genome, i.e. if an exon of the predicted transcript overlaps and matches the boundaries of a reference transcript exon, then it is counted as a TP.
589 * **Intron Level**: Intron intervals are the units that are matched at the intron level, therefore each intron of the predicted transcript is checked against any introns of the reference transcripts in the same region and if there is one with the same exact start-end coordinates, it is counted as a TP.
590 * **Intron chain level**: At this level we count as a TP any query transcript for which all of its introns can be found, with the same exact intron coordinates as in a reference transcript that has the same number of introns. Matching all the introns at this level implies that all the internal exons also match, but this might not be true for the external boundaries of the terminal exons.
591 * **Transcript level**: Note that intron chain level values are calculated only by looking at multi-exon transcripts, so it completely ignores the single-exon transcripts, which can be quite numerous in a RNA-Seq experiment (possibly due to a lot of transcriptional and alignment noise). The transcript level considers single-exons as well. A TP at this level is defined as a full exon chain match between the predicted transcript and a reference transcript, where all internal exons match and the outer boundaries of the terminal query exons can only slightly differ from the reference exons (with at most 100 bases by default). Also GffCompare considers single-exon transcripts as matching an overlapping single-exon reference transcript if there is a significant overlap between the two (more than 80% of the longer transcript by default).
592 * **Locus level**: At this level GffCompare considers that an observed locus, defined as a cluster of exon-overlapping transcripts, matches a similarly built reference locus if at least one predicted transcript has a transcript level match with a reference transcript in the corresponding reference locus.
593
594 ----
595
596 .. class:: infomark
597
598 **Tracking transcripts**
599
600 GffCompare can also be used to track all transcripts that are structurally equivalent among the different input files. GffCompare considers transcripts matching (or structurally equivalent) if all
601 their introns are identical. Note that matching transcripts are allowed to differ on the length of the first and last exons, since these lengths can usually vary across samples for the same biological transcript.
602
603 A list of all matching transcripts is reported in a file called *tracking file* in which each row represents a transcript. The first column in this file represents a unique id assigned to that transcripts.
604 The second file represents the super-locus that contains that transcript. If a reference annotation is provided, the 3 rd and 4 th columns contain the reference annotation transcript that was found to be
605 closest to the transcript and the classification code that specifies the relationship between these two transcripts, respectively. The rest of the columns show the corresponding
606 transcript from each input file in order.
607
608 **RefMap and TMAP files**
609
610 In order to quickly see which reference transcripts match which transcripts from a sample file, two other files, called *RefMap* and *TMAP* are also created for each query. The RefMap file is a tab-delimited file
611 that has a row for each reference transcript that either fully or partially matches a transcript from the given input file. Conversely, the TMAP file has a row for each input transcript, while the columns in this
612 file describe the most closely matching reference transcript for that transcript.
613
497 ]]> 614 ]]>
498 </help> 615 </help>
499 <citations> 616 <expand macro="citations" />
500 <citation type="doi">10.1038/nbt.1621</citation>
501 </citations>
502 </tool> 617 </tool>