comparison gffread.xml @ 8:154d00cbbf2d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gffread commit f40643d8b80299ebb84faebe92579321ac459746"
author iuc
date Sat, 25 Sep 2021 15:38:31 +0000
parents 4dea02886337
children 3e436657dcd0
comparison
equal deleted inserted replaced
7:4dea02886337 8:154d00cbbf2d
1 <tool id="gffread" name="gffread" version="@VERSION@.0"> 1 <tool id="gffread" name="gffread" version="@GALAXY_TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>Filters and/or converts GFF3/GTF2 records</description> 2 <description>Filters and/or converts GFF3/GTF2 records</description>
3 <xrefs>
4 <xref type="bio.tools">gffread</xref>
5 </xrefs>
3 <macros> 6 <macros>
4 <token name="@VERSION@">0.11.6</token> 7 <!-- the version of this tool must not be lowered since in the past 2.x was used
8 lets use small increments and hope that gffread catches up one day -->
9 <token name="@GALAXY_TOOL_VERSION@">2.2.1.3</token>
10 <token name="@TOOL_VERSION@">0.12.7</token>
11 <token name="@VERSION_SUFFIX@">0</token>
5 <xml name="fasta_output_select"> 12 <xml name="fasta_output_select">
6 <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs"> 13 <param name="fa_outputs" type="select" display="checkboxes" multiple="true" label="Select fasta outputs">
7 <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w exons.fa)</option> 14 <option value="-w exons.fa">fasta file with spliced exons for each GFF transcript (-w)</option>
8 <option value="-x cds.fa">fasta file with spliced CDS for each GFF transcript (-x cds.fa)</option> 15 <option value="-x cds.fa">fasta file with spliced CDS for each GFF transcript (-x)</option>
9 <option value="-y pep.fa">protein fasta file with the translation of CDS for each record (-y pep.fa)</option> 16 <option value="-y pep.fa">protein fasta file with the translation of CDS for each record (-y)</option>
10 <option value="-W">for each fasta: record the exon coordinates projected onto the spliced sequence (-W)</option> 17 <option value="-W">for each fasta: record the exon coordinates projected onto the spliced sequence (-W)</option>
18 <option value="-S">for protein fasta: use '*' instead of '.' as stop codon translation (-S)</option>
11 </param> 19 </param>
12 </xml> 20 </xml>
13 <xml name="ref_filtering_select"> 21 <xml name="ref_filtering_select">
14 <param name="ref_filtering" type="select" display="checkboxes" multiple="true" label="reference based filters"> 22 <param name="ref_filtering" type="select" display="checkboxes" multiple="true" label="reference based filters">
15 <option value="-N">discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus, i.e. not GT-AG, GC-AG or AT-AC (-N)</option> 23 <option value="-N">discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus, i.e. not GT-AG, GC-AG or AT-AC (-N)</option>
20 <option value="-B">single-exon transcripts are also checked on the opposite strand (-B with -V)</option> 28 <option value="-B">single-exon transcripts are also checked on the opposite strand (-B with -V)</option>
21 --> 29 -->
22 </param> 30 </param>
23 </xml> 31 </xml>
24 <xml name="trackname"> 32 <xml name="trackname">
25 <param name="tname" type="text" value="" optional="true" label="Trackname to use in the second column of each GFF output line" help="(-t track_name}"> 33 <param argument="-t" name="tname" type="text" value="" optional="true" label="Trackname to use in the second column of each GFF output line" help="">
26 <validator type="regex">\w+</validator> 34 <validator type="regex">\w+</validator>
27 </param> 35 </param>
28 </xml> 36 </xml>
29 <xml name="merge_opts"> 37 <xml name="merge_opts">
30 <option value="-K">also collapse shorter, fully contained transcripts with fewer introns than the container (-K)</option> 38 <option value="-K">also collapse shorter, fully contained transcripts with fewer introns than the container (-K)</option>
31 <option value="-Q">remove the containment restriction: multi-exon transcripts will be collapsed if just their introns match, while single-exon transcripts can partially overlap 80% (-Q)</option> 39 <option value="-Q">remove the containment restriction: multi-exon transcripts will be collapsed if just their introns match, while single-exon transcripts can partially overlap 80% (-Q)</option>
32 <option value="-d dupinfo">output collapsing info (-d dupinfo)</option> 40 <option value="-d dupinfo">output collapsing info (-d)</option>
33 </xml> 41 </xml>
34 <xml name="cluster_opts"> 42 <xml name="cluster_opts">
35 <option value="--force-exons"> make sure that the lowest level GFF features are printed as 'exon' features (--force-exons)</option> 43 <option value="--force-exons"> make sure that the lowest level GFF features are printed as 'exon' features (--force-exons)</option>
36 <option value="-Z">merge close exons into a single exon (for intron size &lt; 4) (-Z)</option> 44 <option value="-Z">merge close exons into a single exon (for intron size &lt; 4) (-Z)</option>
37 </xml> 45 </xml>
46 <expand macro="cluster_opts" /> 54 <expand macro="cluster_opts" />
47 </param> 55 </param>
48 </xml> 56 </xml>
49 </macros> 57 </macros>
50 <requirements> 58 <requirements>
51 <requirement type="package" version="@VERSION@">gffread</requirement> 59 <requirement type="package" version="@TOOL_VERSION@">gffread</requirement>
52 </requirements> 60 </requirements>
61 <version_command>gffread --version</version_command>
53 <command detect_errors="aggressive"> 62 <command detect_errors="aggressive">
54 <![CDATA[ 63 <![CDATA[
55 #if $reference_genome.source == 'history': 64 #if $reference_genome.source == 'history':
56 ln -s '$reference_genome.genome_fasta' genomeref.fa && 65 ln -s '$reference_genome.genome_fasta' genomeref.fa &&
57 #end if 66 #end if
67
58 gffread '$input' 68 gffread '$input'
69 #if $input.ext.startswith("bed")
70 --in-bed
71 #end if
59 #if $reference_genome.source == 'cached': 72 #if $reference_genome.source == 'cached':
60 -g '${reference_genome.fasta_indexes.fields.path}' 73 -g '${reference_genome.fasta_indexes.fields.path}'
61 #if $reference_genome.ref_filtering and str($reference_genome.ref_filtering) != '': 74 #if $reference_genome.ref_filtering and str($reference_genome.ref_filtering) != '':
62 #echo ' '.join(str($reference_genome.ref_filtering).split(',')) 75 #echo ' '.join(str($reference_genome.ref_filtering).split(','))
63 #end if 76 #end if
102 #if $reference_genome.fa_outputs and str($reference_genome.fa_outputs) != '': 115 #if $reference_genome.fa_outputs and str($reference_genome.fa_outputs) != '':
103 #echo ' ' + ' '.join(str($reference_genome.fa_outputs).split(',')) 116 #echo ' ' + ' '.join(str($reference_genome.fa_outputs).split(','))
104 #end if 117 #end if
105 #end if 118 #end if
106 #if $gffs.gff_fmt != 'none': 119 #if $gffs.gff_fmt != 'none':
107 #if $gffs.tname: 120 #if $gffs.gff_fmt != 'bed' and $gffs.tname:
108 -t '$gffs.tname' 121 -t '$gffs.tname'
109 #end if 122 #end if
110 #if $gffs.gff_fmt == 'gff': 123 #if $gffs.gff_fmt == 'gff':
124 ## TODO bug 'gft' -> 'gtf'
111 #if $input.datatype.file_ext == 'gft': 125 #if $input.datatype.file_ext == 'gft':
112 $gffs.ensembl 126 $gffs.ensembl
113 #end if 127 #end if
114 $gffs.output_cmd
115 #elif $gffs.gff_fmt == 'gtf':
116 $gffs.output_cmd
117 #end if 128 #end if
118 #end if 129 #if $gffs.gff_fmt == 'gtf'
130 -T
131 #elif $gffs.gff_fmt == 'bed'
132 --bed
133 #end if
134 -o output.$gffs.gff_fmt
135 #end if
136
137 ## Missing options
138 ##
139 ## --ids
140 ## --nids
141 ## -l
142 ## --jmatch
143 ## --nc
144 ## --ignore-locus
145 ## -A -s (see above)
146 ## --sort-alpha : chromosomes (reference sequences) are sorted alphabetically
147 ## --sort-by : sort the reference sequences by the order in which their
148 ## names are given in the <refseq.lst> file
149 ## Misc
150 ## --keep-exon-attrs : for -F option, do not attempt to reduce redundant
151 ## --attrs
152 ## --keep-genes : in transcript-only mode (default), also preserve gene records
153 ## --keep-comments: for GFF3 input/output, try to preserve comments
154 ## -B (see above)
155 ## -P
156 ## --add-hasCDS : add a "hasCDS" attribute with value "true" for transcripts
157 ## that have CDS features
158 ## --adj-stop stop codon adjustment: enables -P and performs automatic
159 ## adjustment of the CDS stop coordinate if premature or downstream
160
161 ## --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS
162 ## features (see --tlf option below); automatic if the input
163 ## filename ends with .tlf)
164 ## --stream: fast processing of input GFF/BED transcripts as they are received
165 ## ((no sorting, exons must be grouped by transcript in the input data)
166
167 ## Clustering
168
169 ## -Y
170
171 ## Output
172
173 ## --gene2exon
174 ## --t-adopt
175 ## -j
176 ## --w-add
177 ## --w-nocds
119 ]]> 178 ]]>
120 </command> 179 </command>
121 <inputs> 180 <inputs>
122 <param name="input" type="data" format="gff3,gtf" label="Input GFF3 or GTF feature file"/> 181 <param name="input" type="data" format="bed,gff3,gtf" label="Input BED, GFF3 or GTF feature file"/>
123 <!-- filtering --> 182 <!-- filtering -->
124 <param name="filtering" type="select" display="checkboxes" multiple="true" label="filters"> 183 <param name="filtering" type="select" display="checkboxes" multiple="true" label="filters">
125 <option value="-U">discard single-exon transcripts (-U)</option> 184 <option value="-U">discard single-exon transcripts (-U)</option>
126 <option value="-C">coding only: discard mRNAs that have no CDS feature (-C)</option> 185 <option value="-C">coding only: discard mRNAs that have no CDS feature (-C)</option>
127 <option value="-G">only parse additional exon attributes from the first exon and move them to the mRNA level (useful for GTF input) (-G)</option> 186 <option value="-G">only parse additional exon attributes from the first exon and move them to the mRNA level (useful for GTF input) (-G)</option>
133 <option value="none">No</option> 192 <option value="none">No</option>
134 <option value="filter">Yes</option> 193 <option value="filter">Yes</option>
135 </param> 194 </param>
136 <when value="none"/> 195 <when value="none"/>
137 <when value="filter"> 196 <when value="filter">
138 <param name="range" type="text" value="" label="Only show transcripts overlapping coordinate range"> 197 <param argument="-r" name="range" type="text" value="" label="Only show transcripts overlapping coordinate range">
139 <help><![CDATA[ 198 <help><![CDATA[
140 (-r [['strand']'chr':]'start'..'end') <br> 199 [['strand']'chr':]'start'..'end' <br>
141 examples: <br> 200 examples: <br>
142 1000..500000 <br> 201 1000..500000 <br>
143 chr1:1000..500000 <br> 202 chr1:1000..500000 <br>
144 +chr1:1000..500000 <br> 203 +chr1:1000..500000 <br>
145 -chr1:1000..500000 204 -chr1:1000..500000
146 ]]> 205 ]]>
147 </help> 206 </help>
148 <validator type="regex">(([+-])?(\w+:))?\d+\.\.\d+</validator> 207 <validator type="regex">(([+-])?(\w+:))?\d+\.\.\d+</validator>
149 </param> 208 </param>
150 <param name="discard_partial" type="boolean" truevalue="-R" falsevalue="" checked="false" 209 <param argument="-R" name="discard_partial" type="boolean" truevalue="-R" falsevalue="" checked="false"
151 label="Discard all transcripts that are not fully contained within the given range" help="(-R)"/> 210 label="Discard all transcripts that are not fully contained within the given range" help=""/>
152 </when> 211 </when>
153 </conditional> 212 </conditional>
154 <param name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns" 213 <param argument="-i" name="maxintron" type="integer" value="" optional="true" min="0" label="Filter out transcipts with large introns"
155 help="If set, discard transcripts having an intron larger (-i max_intron)"/> 214 help="If set, discard transcripts having an intron larger"/>
156 <param name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" > 215 <param argument="-m" name="chr_replace" type="data" format="tabular" optional="true" label="Replace reference sequence names" >
157 <help><![CDATA[(-m chr_replace) <br> 216 <help><![CDATA[
158 chr_replace is a reference sequence replacement table consisting of 2 columns: "original_ref_ID" "new_ref_ID"<br> 217 chr_replace is a reference sequence replacement table consisting of 2 columns: "original_ref_ID" "new_ref_ID"<br>
159 It is useful for switching between Ensembl and UCSC naming conventions <br> 218 It is useful for switching between Ensembl and UCSC naming conventions <br>
160 NOTE: GFF records on reference sequences that are not found among the "original_ref_ID" entries in this file will be filtered out 219 NOTE: GFF records on reference sequences that are not found among the "original_ref_ID" entries in this file will be filtered out
161 ]]> 220 ]]>
162 </help> 221 </help>
172 </param> 231 </param>
173 --> 232 -->
174 233
175 <!-- merging --> 234 <!-- merging -->
176 <conditional name="merging"> 235 <conditional name="merging">
177 <param name="merge_sel" type="select" label="Transcript merging" help="(-M/--merge or --cluster-only)"> 236 <param name="merge_sel" type="select" label="Transcript merging" help="">
178 <option value="none">none</option> 237 <option value="none">none</option>
179 <option value="merge">merge: cluster the input transcripts into loci, collapsing matching transcripts</option> 238 <option value="merge">merge: cluster the input transcripts into loci, collapsing matching transcripts (--merge)</option>
180 <option value="cluster">cluster-only: merge but without collapsing matching transcripts</option> 239 <option value="cluster">cluster-only: merge but without collapsing matching transcripts (--cluster-only)</option>
181 </param> 240 </param>
182 <when value="none"/> 241 <when value="none"/>
183 <when value="merge"> 242 <when value="merge">
184 <param name="merge_cmd" type="hidden" value="--merge"/> 243 <param name="merge_cmd" type="hidden" value="--merge"/>
185 <expand macro="merge_opt_sel" /> 244 <expand macro="merge_opt_sel" />
190 </when> 249 </when>
191 </conditional> 250 </conditional>
192 <!-- reference sequence file --> 251 <!-- reference sequence file -->
193 <!-- Error: -g option is required for options -w, -x, -y, -V, -N, -M --> 252 <!-- Error: -g option is required for options -w, -x, -y, -V, -N, -M -->
194 <conditional name="reference_genome"> 253 <conditional name="reference_genome">
195 <param name="source" type="select" label="Reference Genome" help="(-g genome.fasta) NOTE: Required for fasta outputs"> 254 <param name="source" type="select" label="Reference Genome" help="NOTE: Required for fasta outputs">
196 <option value="none">none</option> 255 <option value="none">none</option>
197 <option value="cached"></option> 256 <option value="cached"></option>
198 <option value="history">From your history</option> 257 <option value="history">From your history</option>
199 </param> 258 </param>
200 <when value="none"> 259 <when value="none">
201 </when> 260 </when>
202 <when value="cached"> 261 <when value="cached">
203 <param name="fasta_indexes" type="select" label="Source FASTA Sequence"> 262 <param argument="-g" name="fasta_indexes" type="select" label="Source FASTA Sequence">
204 <options from_data_table="all_fasta"/> 263 <options from_data_table="all_fasta"/>
205 </param> 264 </param>
206 <expand macro="ref_filtering_select" /> 265 <expand macro="ref_filtering_select" />
207 <expand macro="fasta_output_select" /> 266 <expand macro="fasta_output_select" />
208 </when> 267 </when>
209 <when value="history"> 268 <when value="history">
210 <param name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/> 269 <param argument="-g" name="genome_fasta" type="data" format="fasta" label="Genome Reference Fasta"/>
211 <expand macro="ref_filtering_select" /> 270 <expand macro="ref_filtering_select" />
212 <expand macro="fasta_output_select" /> 271 <expand macro="fasta_output_select" />
213 </when> 272 </when>
214 </conditional> 273 </conditional>
215 274
217 <conditional name="gffs"> 276 <conditional name="gffs">
218 <param name="gff_fmt" type="select" label="Feature File Output" help="(-o output.gff3|output.gtf)"> 277 <param name="gff_fmt" type="select" label="Feature File Output" help="(-o output.gff3|output.gtf)">
219 <option value="none">none</option> 278 <option value="none">none</option>
220 <option value="gff">GFF</option> 279 <option value="gff">GFF</option>
221 <option value="gtf">GTF</option> 280 <option value="gtf">GTF</option>
281 <option value="bed">BED</option>
222 </param> 282 </param>
223 <when value="none"> 283 <when value="none">
224 </when> 284 </when>
225 <when value="gff"> 285 <when value="gff">
226 <param name="output_cmd" type="hidden" value="-o output.gff3"/> 286 <param argument="-L" name="ensembl" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Ensembl GTF to GFF3 conversion" help=""/>
227 <param name="ensembl" type="boolean" truevalue="-L" falsevalue="" checked="false" label="Ensembl GTF to GFF3 conversion" help="(-L)"/>
228 <expand macro="trackname" /> 287 <expand macro="trackname" />
229 </when> 288 </when>
230 <when value="gtf"> 289 <when value="gtf">
231 <param name="output_cmd" type="hidden" value="-T -o output.gtf"/>
232 <expand macro="trackname" /> 290 <expand macro="trackname" />
233 </when> 291 </when>
292 <when value="bed">
293 </when>
234 </conditional> 294 </conditional>
235 295
236 <param name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" checked="false" 296 <param argument="-F" name="full_gff_attribute_preservation" type="boolean" truevalue="-F" falsevalue="" checked="false"
237 label="full GFF attribute preservation (all attributes are shown)" help="(-F)"/> 297 label="full GFF attribute preservation (all attributes are shown)" help=""/>
238 <param name="decode_url" type="boolean" truevalue="-D" falsevalue="" checked="false" 298 <param argument="-D" name="decode_url" type="boolean" truevalue="-D" falsevalue="" checked="false"
239 label="decode url encoded characters within attributes" help="(-D)"/> 299 label="decode url encoded characters within attributes" help=""/>
240 <param name="expose" type="boolean" truevalue="-E" falsevalue="" checked="false" 300 <param argument="-E" name="expose" type="boolean" truevalue="-E" falsevalue="" checked="false"
241 label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help="(-E)"/> 301 label="warn about duplicate transcript IDs and other potential problems with the given GFF/GTF records" help=""/>
242 302
243 </inputs> 303 </inputs>
244 <outputs> 304 <outputs>
245 <data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff3"> 305 <data name="output_gff" format="gff3" metadata_source="input" label="${tool.name} on ${on_string}: gff3" from_work_dir="output.gff">
246 <filter>gffs['gff_fmt'] == 'gff'</filter> 306 <filter>gffs['gff_fmt'] == 'gff'</filter>
247 </data> 307 </data>
248 <data name="output_gtf" format="gtf" metadata_source="input" label="${tool.name} on ${on_string}: gtf" from_work_dir="output.gtf"> 308 <data name="output_gtf" format="gtf" metadata_source="input" label="${tool.name} on ${on_string}: gtf" from_work_dir="output.gtf">
249 <filter>gffs['gff_fmt'] == 'gtf'</filter> 309 <filter>gffs['gff_fmt'] == 'gtf'</filter>
310 </data>
311 <data name="output_bed" format="bed" metadata_source="input" label="${tool.name} on ${on_string}: bed" from_work_dir="output.bed">
312 <filter>gffs['gff_fmt'] == 'bed'</filter>
250 </data> 313 </data>
251 <data name="output_exons" format="fasta" label="${tool.name} on ${on_string}: exons.fa" from_work_dir="exons.fa"> 314 <data name="output_exons" format="fasta" label="${tool.name} on ${on_string}: exons.fa" from_work_dir="exons.fa">
252 <filter>'fa_outputs' in reference_genome and str(reference_genome['fa_outputs']).find('exons.fa') > 0 </filter> 315 <filter>'fa_outputs' in reference_genome and str(reference_genome['fa_outputs']).find('exons.fa') > 0 </filter>
253 </data> 316 </data>
254 <data name="output_cds" format="fasta" label="${tool.name} on ${on_string}: cds.fa" from_work_dir="cds.fa"> 317 <data name="output_cds" format="fasta" label="${tool.name} on ${on_string}: cds.fa" from_work_dir="cds.fa">
260 <data name="output_dupinfo" format="txt" label="${tool.name} on ${on_string}: dupinfo" from_work_dir="dupinfo"> 323 <data name="output_dupinfo" format="txt" label="${tool.name} on ${on_string}: dupinfo" from_work_dir="dupinfo">
261 <filter>'merge_options' in merging and merging['merge_options'].find('dupinfo') > 0</filter> 324 <filter>'merge_options' in merging and merging['merge_options'].find('dupinfo') > 0</filter>
262 </data> 325 </data>
263 </outputs> 326 </outputs>
264 <tests> 327 <tests>
265 <test> 328 <test expect_num_outputs="1">
266 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 329 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
267 <param name="gff_fmt" value="gff"/> 330 <param name="gff_fmt" value="gff"/>
268 <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="2" /> 331 <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="4" />
269 </test> 332 </test>
270 <test> 333 <test expect_num_outputs="1">
334 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
335 <param name="gff_fmt" value="gff"/>
336 <output name="output_gff" file="Homo_sapiens.GRCh37_19.71.gff3" ftype="gff3" lines_diff="4" />
337 </test>
338 <test expect_num_outputs="1">
271 <param name="input" ftype="gtf" value="ecoli-k12.gff3"/> 339 <param name="input" ftype="gtf" value="ecoli-k12.gff3"/>
272 <param name="gff_fmt" value="gff"/> 340 <param name="gff_fmt" value="gff"/>
273 <param name="full_gff_attribute_preservation" value="-F"/> 341 <param name="full_gff_attribute_preservation" value="-F"/>
274 <output name="output_gff" file="ecoli-k12.processed.gff3" ftype="gff3" lines_diff="2" /> 342 <output name="output_gff" file="ecoli-k12.processed.gff3" ftype="gff3" lines_diff="4" />
275 </test> 343 </test>
276 <test> 344 <!-- bed output -->
277 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 345 <test expect_num_outputs="1">
278 <param name="filtering" value="--no-pseudo"/> 346 <param name="input" ftype="gff3" value="Homo_sapiens.GRCh37_19.71.gff3"/>
279 <param name="gff_fmt" value="gtf"/> 347 <param name="gff_fmt" value="bed"/>
280 <output name="output_gtf"> 348 <output name="output_bed" ftype="bed">
281 <assert_contents> 349 <assert_contents>
282 <not_has_text text="pseudo" /> 350 <has_n_lines n="42"/>
283 </assert_contents> 351 <has_n_columns n="13"/>
284 </output> 352 </assert_contents>
285 </test> 353 </output>
286 <test> 354 </test>
355 <!-- bed input and test tname -->
356 <test expect_num_outputs="1">
357 <param name="input" ftype="bed" value="Homo_sapiens.GRCh37_19.71.bed"/>
358 <param name="gff_fmt" value="gff"/>
359 <param name="tname" value="track name"/>
360 <output name="output_bed" ftype="gff3">
361 <assert_contents>
362 <has_n_lines n="388"/>
363 <!-- this will work with https://github.com/galaxyproject/galaxy/pull/12528 -->
364 <!-- <has_n_columns n="9" comment="#"/> -->
365 <has_text text="track name"/>
366 </assert_contents>
367 </output>
368 </test>
369 <test expect_num_outputs="1">
287 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 370 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
288 <param name="region_filter" value="filter"/> 371 <param name="region_filter" value="filter"/>
289 <param name="range" value="19:496500..504965"/> 372 <param name="range" value="19:496500..504965"/>
290 <param name="gff_fmt" value="gtf"/> 373 <param name="gff_fmt" value="gtf"/>
291 <output name="output_gtf"> 374 <output name="output_gtf">
293 <has_text text="ENST00000587541" /> 376 <has_text text="ENST00000587541" />
294 <has_text text="ENST00000382683" /> 377 <has_text text="ENST00000382683" />
295 </assert_contents> 378 </assert_contents>
296 </output> 379 </output>
297 </test> 380 </test>
298 <test> 381 <test expect_num_outputs="1">
299 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 382 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
300 <param name="region_filter" value="filter"/> 383 <param name="region_filter" value="filter"/>
301 <param name="range" value="19:496500..504965"/> 384 <param name="range" value="19:496500..504965"/>
302 <param name="discard_partial" value="true"/> 385 <param name="discard_partial" value="true"/>
303 <param name="gff_fmt" value="gtf"/> 386 <param name="gff_fmt" value="gtf"/>
306 <not_has_text text="ENST00000587541" /> 389 <not_has_text text="ENST00000587541" />
307 <has_text text="ENST00000382683" /> 390 <has_text text="ENST00000382683" />
308 </assert_contents> 391 </assert_contents>
309 </output> 392 </output>
310 </test> 393 </test>
311 <test> 394 <test expect_num_outputs="1">
312 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 395 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
313 <param name="filtering" value="-C"/> 396 <param name="filtering" value="-C"/>
314 <param name="region_filter" value="filter"/> 397 <param name="region_filter" value="filter"/>
315 <param name="range" value="19:496500..504965"/> 398 <param name="range" value="19:496500..504965"/>
316 <param name="gff_fmt" value="gtf"/> 399 <param name="gff_fmt" value="gtf"/>
319 <not_has_text text="ENST00000587541" /> 402 <not_has_text text="ENST00000587541" />
320 <has_text text="ENST00000382683" /> 403 <has_text text="ENST00000382683" />
321 </assert_contents> 404 </assert_contents>
322 </output> 405 </output>
323 </test> 406 </test>
324 <test> 407 <test expect_num_outputs="4">
325 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/> 408 <param name="input" ftype="gtf" value="Homo_sapiens.GRCh37_19.71.gtf"/>
326 <param name="source" value="history"/> 409 <param name="source" value="history"/>
327 <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/> 410 <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/>
328 <param name="fa_outputs" value="-w exons.fa,-x cds.fa,-y pep.fa"/> 411 <param name="fa_outputs" value="-w exons.fa,-x cds.fa,-y pep.fa"/>
329 <param name="region_filter" value="filter"/> 412 <param name="region_filter" value="filter"/>
352 <has_text text="ENST00000346144" /> 435 <has_text text="ENST00000346144" />
353 <has_text text="MDFGLALLLAGLLGLLLGQSLQVKPLQVEPPEPVVAVALGASRQLTCRLACADRGASVQWRGLDTSLGAV" /> 436 <has_text text="MDFGLALLLAGLLGLLLGQSLQVKPLQVEPPEPVVAVALGASRQLTCRLACADRGASVQWRGLDTSLGAV" />
354 </assert_contents> 437 </assert_contents>
355 </output> 438 </output>
356 </test> 439 </test>
357 440 <test expect_num_outputs="1">
441 <param name="input" ftype="gtf" value="stop_codons.gtf"/>
442 <param name="source" value="history"/>
443 <param name="genome_fasta" ftype="fasta" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa"/>
444 <param name="fa_outputs" value="-y pep.fa,-S"/>
445 <output name="output_pep">
446 <assert_contents>
447 <has_text text="ENST00000269812" />
448 <has_text text="PLRGLHPRV*LQTPLERCPCWPPAGGTGGCPHCLLHLRLLQSPTPTALSEGGGAGTEAQPVTDVDPGRG*" />
449 </assert_contents>
450 </output>
451 </test>
358 </tests> 452 </tests>
359 <help> 453 <help>
360 <![CDATA[ 454 <![CDATA[
361 **gffread Filters and/or converts GFF3/GTF2 records** 455 **gffread Filters and/or converts GFF3/GTF2 records**
362 456
363 The gffread command is documented with the stringtie_ package. 457 The gffread command is documented with the stringtie_ package.
364 458
365 .. _stringtie: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread 459 .. _stringtie: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
366 460
367 461 gffread v0.12.7. Usage: ::
368 gffread v0.11.4. Usage: :: 462
369 463 gffread [-g <genomic_seqs_fasta> | <dir>] [-s <seq_info.fsize>]
370 gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>] 464 [-o <outfile>] [-t <trackname>] [-r [<strand>]<chr>:<start>-<end> [-R]]
371 [-o <outfile>] [-t <trackname>] [-r [[<strand>]<chr>:]<start>..<end> [-R]] 465 [--jmatch <chr>:<start>-<end>] [--no-pseudo]
372 [-CTVNJMKQAFPGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>] 466 [-CTVNJMKQAFPGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>]
373 [-i <maxintron>] [--bed] [--table <attrlist>] [--sort-by <refseq_list.txt>] 467 [-j ][--ids <IDs.lst> | --nids <IDs.lst>] [--attrs <attr-list>] [-i <maxintron>]
374 468 [--stream] [--bed | --gtf | --tlf] [--table <attrlist>] [--sort-by <ref.lst>]
469 [<input_gff>]
470
375 Filter, convert or cluster GFF/GTF/BED records, extract the sequence of 471 Filter, convert or cluster GFF/GTF/BED records, extract the sequence of
376 transcripts (exon or CDS) and more. 472 transcripts (exon or CDS) and more.
377 By default (i.e. without -O) only transcripts are processed, discarding any 473 By default (i.e. without -O) only transcripts are processed, discarding any
378 other non-transcript features. Default output is a simplified GFF3 with only 474 other non-transcript features. Default output is a simplified GFF3 with only
379 the basic attributes. 475 the basic attributes.
380 476
381 <input_gff> is a GFF file, use '-' for stdin
382
383 Options: 477 Options:
384 478 --ids discard records/transcripts if their IDs are not listed in <IDs.lst>
479 --nids discard records/transcripts if their IDs are listed in <IDs.lst>
385 -i discard transcripts having an intron larger than <maxintron> 480 -i discard transcripts having an intron larger than <maxintron>
386 -l discard transcripts shorter than <minlen> bases 481 -l discard transcripts shorter than <minlen> bases
387 -r only show transcripts overlapping coordinate range <start>..<end> 482 -r only show transcripts overlapping coordinate range <start>..<end>
388 (on chromosome/contig <chr>, strand <strand> if provided) 483 (on chromosome/contig <chr>, strand <strand> if provided)
389 -R for -r option, discard all transcripts that are not fully 484 -R for -r option, discard all transcripts that are not fully
390 contained within the given range 485 contained within the given range
486 --jmatch only output transcripts matching the given junction
391 -U discard single-exon transcripts 487 -U discard single-exon transcripts
392 -C coding only: discard mRNAs that have no CDS features 488 -C coding only: discard mRNAs that have no CDS features
393 --nc non-coding only: discard mRNAs that have CDS features 489 --nc non-coding only: discard mRNAs that have CDS features
394 --ignore-locus : discard locus features and attributes found in the input 490 --ignore-locus : discard locus features and attributes found in the input
395 -A use the description field from <seq_info.fsize> and add it 491 -A use the description field from <seq_info.fsize> and add it
396 as the value for a 'descr' attribute to the GFF record 492 as the value for a 'descr' attribute to the GFF record
397 -s <seq_info.fsize> is a tab-delimited file providing this info 493 -s <seq_info.fsize> is a tab-delimited file providing this info
398 for each of the mapped sequences: 494 for each of the mapped sequences:
399 <seq-name> <seq-length> <seq-description> 495 <seq-name> <seq-length> <seq-description>
400 (useful for -A option with mRNA/EST/protein mappings) 496 (useful for -A option with mRNA/EST/protein mappings)
401 497 Sorting: (by default, chromosomes are kept in the order they were found)
402 Sorting: (by default, chromosomes are kept in the order they were found)
403 --sort-alpha : chromosomes (reference sequences) are sorted alphabetically 498 --sort-alpha : chromosomes (reference sequences) are sorted alphabetically
404 --sort-by : sort the reference sequences by the order in which their 499 --sort-by : sort the reference sequences by the order in which their
405 names are given in the <refseq.lst> file 500 names are given in the <refseq.lst> file
406
407 Misc options: 501 Misc options:
408 -F preserve all GFF attributes (for non-exon features) 502 -F keep all GFF attributes (for non-exon features)
409 --keep-exon-attrs : for -F option, do not attempt to reduce redundant 503 --keep-exon-attrs : for -F option, do not attempt to reduce redundant
410 exon/CDS attributes 504 exon/CDS attributes
411 -G do not keep exon attributes, move them to the transcript feature 505 -G do not keep exon attributes, move them to the transcript feature
412 (for GFF3 output) 506 (for GFF3 output)
507 --attrs <attr-list> only output the GTF/GFF attributes listed in <attr-list>
508 which is a comma delimited list of attribute names to
413 --keep-genes : in transcript-only mode (default), also preserve gene records 509 --keep-genes : in transcript-only mode (default), also preserve gene records
414 --keep-comments: for GFF3 input/output, try to preserve comments 510 --keep-comments: for GFF3 input/output, try to preserve comments
415 -O process other non-transcript GFF records (by default non-transcript 511 -O process other non-transcript GFF records (by default non-transcript
416 records are ignored) 512 records are ignored)
417 -V discard any mRNAs with CDS having in-frame stop codons (requires -g) 513 -V discard any mRNAs with CDS having in-frame stop codons (requires -g)
435 --in-bed: input should be parsed as BED format (automatic if the input 531 --in-bed: input should be parsed as BED format (automatic if the input
436 filename ends with .bed*) 532 filename ends with .bed*)
437 --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS 533 --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS
438 features (see --tlf option below); automatic if the input 534 features (see --tlf option below); automatic if the input
439 filename ends with .tlf) 535 filename ends with .tlf)
440 536 --stream: fast processing of input GFF/BED transcripts as they are received
537 ((no sorting, exons must be grouped by transcript in the input data)
441 Clustering: 538 Clustering:
442 -M/--merge : cluster the input transcripts into loci, discarding 539 -M/--merge : cluster the input transcripts into loci, discarding
443 "duplicated" transcripts (those with the same exact introns 540 "redundant" transcripts (those with the same exact introns
444 and fully contained or equal boundaries) 541 and fully contained or equal boundaries)
445 -d <dupinfo> : for -M option, write duplication info to file <dupinfo> 542 -d <dupinfo> : for -M option, write duplication info to file <dupinfo>
446 --cluster-only: same as -M/--merge but without discarding any of the 543 --cluster-only: same as -M/--merge but without discarding any of the
447 "duplicate" transcripts, only create "locus" features 544 "duplicate" transcripts, only create "locus" features
448 -K for -M option: also discard as redundant the shorter, fully contained 545 -K for -M option: also discard as redundant the shorter, fully contained
450 -Q for -M option, no longer require boundary containment when assessing 547 -Q for -M option, no longer require boundary containment when assessing
451 redundancy (can be combined with -K); only introns have to match for 548 redundancy (can be combined with -K); only introns have to match for
452 multi-exon transcripts, and >=80% overlap for single-exon transcripts 549 multi-exon transcripts, and >=80% overlap for single-exon transcripts
453 -Y for -M option, enforce -Q but also discard overlapping single-exon 550 -Y for -M option, enforce -Q but also discard overlapping single-exon
454 transcripts, even on the opposite strand (can be combined with -K) 551 transcripts, even on the opposite strand (can be combined with -K)
455
456 Output options: 552 Output options:
457 --force-exons: make sure that the lowest level GFF features are considered 553 --force-exons: make sure that the lowest level GFF features are considered
458 "exon" features 554 "exon" features
459 --gene2exon: for single-line genes not parenting any transcripts, add an 555 --gene2exon: for single-line genes not parenting any transcripts, add an
460 exon feature spanning the entire gene (treat it as a transcript) 556 exon feature spanning the entire gene (treat it as a transcript)
463 -D decode url encoded characters within attributes 559 -D decode url encoded characters within attributes
464 -Z merge very close exons into a single exon (when intron size<4) 560 -Z merge very close exons into a single exon (when intron size<4)
465 -g full path to a multi-fasta file with the genomic sequences 561 -g full path to a multi-fasta file with the genomic sequences
466 for all input mappings, OR a directory with single-fasta files 562 for all input mappings, OR a directory with single-fasta files
467 (one per genomic sequence, with file names matching sequence names) 563 (one per genomic sequence, with file names matching sequence names)
468 -w write a fasta file with spliced exons for each GFF transcript 564 -j output the junctions and the corresponding transcripts
565 -w write a fasta file with spliced exons for each transcript
566 --w-add <N> for the -w option, extract additional <N> bases
567 both upstream and downstream of the transcript boundaries
568 --w-nocds for -w, disable the output of CDS info in the FASTA file
469 -x write a fasta file with spliced CDS for each GFF transcript 569 -x write a fasta file with spliced CDS for each GFF transcript
470 -y write a protein fasta file with the translation of CDS for each record 570 -y write a protein fasta file with the translation of CDS for each record
471 -W for -w and -x options, write in the FASTA defline the exon 571 -W for -w, -x and -y options, write in the FASTA defline all the exon
472 coordinates projected onto the spliced sequence; 572 coordinates projected onto the spliced sequence;
473 for -y option, write transcript attributes in the FASTA defline
474 -S for -y option, use '*' instead of '.' as stop codon translation 573 -S for -y option, use '*' instead of '.' as stop codon translation
475 -L Ensembl GTF to GFF3 conversion (implies -F; should be used with -m) 574 -L Ensembl GTF to GFF3 conversion, adds version to IDs
476 -m <chr_replace> is a name mapping table for converting reference 575 -m <chr_replace> is a name mapping table for converting reference
477 sequence names, having this 2-column format: 576 sequence names, having this 2-column format:
478 <original_ref_ID> <new_ref_ID> 577 <original_ref_ID> <new_ref_ID>
479 WARNING: all GFF records on reference sequences whose original IDs
480 are not found in the 1st column of this table will be discarded!
481 -t use <trackname> in the 2nd column of each GFF/GTF output line 578 -t use <trackname> in the 2nd column of each GFF/GTF output line
482 -o write the records into <outfile> instead of stdout 579 -o write the output records into <outfile> instead of stdout
483 -T main output will be GTF instead of GFF3 580 -T main output will be GTF instead of GFF3
484 --bed output records in BED format instead of default GFF3 581 --bed output records in BED format instead of default GFF3
485 --tlf output "transcript line format" which is like GFF 582 --tlf output "transcript line format" which is like GFF
486 but exons, CDS features and related data are stored as GFF 583 but with exons and CDS related features stored as GFF
487 attributes in the transcript feature line, like this: 584 attributes in the transcript feature line, like this:
488 exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords> 585 exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords>
489 <exons> is a comma-delimited list of exon_start-exon_end coordinates; 586 <exons> is a comma-delimited list of exon_start-exon_end coordinates;
490 <CDScoords> is CDS_start:CDS_end coordinates or a list like <exons> 587 <CDScoords> is CDS_start:CDS_end coordinates or a list like <exons>
491 --table output a simple tab delimited format instead of GFF, with columns 588 --table output a simple tab delimited format instead of GFF, with columns
492 having the values of GFF attributes given in <attrlist>; special 589 having the values of GFF attributes given in <attrlist>; special
493 pseudo-attributes (prefixed by @) are recognized: 590 pseudo-attributes (prefixed by @) are recognized:
494 @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen 591 @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons,
592 @cds, @covlen, @cdslen
593 If any of -w/-y/-x FASTA output files are enabled, the same fields
594 (excluding @id) are appended to the definition line of corresponding
595 FASTA records
495 -v,-E expose (warn about) duplicate transcript IDs and other potential 596 -v,-E expose (warn about) duplicate transcript IDs and other potential
496 problems with the given GFF/GTF records 597 problems with the given GFF/GTF records
598
497 ]]> 599 ]]>
498 </help> 600 </help>
499 <citations> 601 <citations>
500 <citation type="doi">10.1038/nbt.1621</citation> 602 <citation type="doi">10.1038/nbt.1621</citation>
501 </citations> 603 </citations>