comparison featurecounts.xml @ 0:9e7a369eec58 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/featurecounts commit 03f64004f90ac0a7be67ecfc355a7b361f3c3314
author iuc
date Wed, 21 Sep 2016 07:24:39 -0400
parents
children c7bd0cc53524
comparison
equal deleted inserted replaced
-1:000000000000 0:9e7a369eec58
1 <tool id="featurecounts" name="featureCounts" version="1.4.6.p5" profile="16.04">
2 <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description>
3 <requirements>
4 <requirement type="package" version="1.4.6p5">subread</requirement>
5 </requirements>
6
7 <version_command>featureCounts -v 2&gt;&amp;1 | grep .</version_command>
8 <command><![CDATA[
9 ## Check whether all alignments are from the same type (bam || sam)
10 featureCounts
11 -a "$reference_gene_sets"
12 -o "output"
13 -T \${GALAXY_SLOTS:-2}
14
15 -t "$extended_parameters.gff_feature_type"
16 -g "$extended_parameters.gff_feature_attribute"
17 $extended_parameters.summarization_level
18 $extended_parameters.contribute_to_multiple_features
19 -s $extended_parameters.strand_specificity
20 $extended_parameters.multimapping_enabled.multimapping_counts
21
22 #if str($extended_parameters.multimapping_enabled.multimapping_counts) == " -M"
23 $extended_parameters.multimapping_enabled.fraction
24 #end if
25
26 -Q $extended_parameters.mapping_quality
27 $extended_parameters.largest_overlap
28 --minOverlap $extended_parameters.min_overlap
29 $extended_parameters.read_reduction
30 $extended_parameters.primary
31 $extended_parameters.ignore_dup
32
33 #if str($extended_parameters.read_extension_5p) != "0"
34 --readExtension5 $extended_parameters.read_extension_5p
35 #end if
36
37 #if str($extended_parameters.read_extension_3p) != "0"
38 --readExtension3 $extended_parameters.read_extension_3p
39 #end if
40
41 $pe_parameters.fragment_counting_enabled.fragment_counting
42 #if str($pe_parameters.fragment_counting_enabled.fragment_counting) == " -p"
43 $pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance
44 #if str($pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance) == " -P"
45 -d $pe_parameters.fragment_counting_enabled.check_distance_enabled.minimum_fragment_length
46 -D $pe_parameters.fragment_counting_enabled.check_distance_enabled.maximum_fragment_length
47 #end if
48 #end if
49
50 $pe_parameters.only_both_ends
51 -S $pe_parameters.orientation
52 $pe_parameters.exclude_chimerics
53
54 "${alignment}"
55
56 ## Removal of comment and column-header line
57 && grep -v "^#" "output" | tail -n+2 > body.txt
58
59 ## Set the right columns for the tabular formats
60 #if $format.value == "tabdel_medium"
61 && cut -f 1,7 body.txt > expression_matrix.txt
62
63 ## Paste doesn't allow a non ordered list of columns: -f 1,7,8,6 will only return columns 1,7 and 8
64 ## Thus the gene length column (last column) has to be added separately
65 && cut -f 6 body.txt > gene_lengths.txt
66 && paste expression_matrix.txt gene_lengths.txt > expression_matrix.txt.bak
67 && mv -f expression_matrix.txt.bak "${output_medium}"
68 #elif $format.value == "tabdel_short"
69 && cut -f 1,7 body.txt > "${output_short}"
70 #else
71 && cp body.txt "${output_full}"
72 #end if
73
74
75 #if str($include_feature_length_file) == "true"
76 && cut -f 1,6 body.txt > "${output_feature_lengths}"
77 #end if
78
79 && tail -n+2 "output.summary" > "${output_summary}"
80
81 ]]></command>
82 <inputs>
83 <param name="alignment"
84 type="data"
85 multiple="false"
86 format="bam,sam"
87 label="Alignment file"
88 help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format" />
89
90 <param name="reference_gene_sets"
91 format="gff,gtf,gff3"
92 type="data"
93 label="Gene annotation file"
94 help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" />
95
96 <param name="format"
97 type="select"
98 label="Output format"
99 help="The output format will be tabular, select the preferred columns here">
100 <option value="tabdel_short" selected="true">Gene-ID "\t" read-count (DESeq2 IUC wrapper compatible)</option>
101 <option value="tabdel_medium">Gene-ID "\t" read-count "\t" gene-length</option>
102 <option value="tabdel_full">featureCounts 1.4.0+ default (includes regions provided by the GTF file)</option>
103 </param>
104
105 <param name="include_feature_length_file"
106 type="boolean"
107 truevalue="true"
108 falsevalue="false"
109 selected="false"
110 label="Create gene-length file"
111 help="Creates a tabular file that contains the effective (nucleotides used for counting reads) length of the feature; might be useful for estimating FPKM/RPKM" />
112
113
114 <section name="pe_parameters" title="Options for paired-end reads">
115 <conditional name="fragment_counting_enabled">
116
117 <param name="fragment_counting"
118 type="select"
119 argument="-p"
120 checked="true"
121 label="Count fragments instead of reads"
122 help="If specified, fragments (or templates) will be counted instead of reads.">
123 <option value="" selected="true">Disabled; all reads/mates will be counted individually</option>
124 <option value=" -p">Enabled; fragments (or templates) will be counted instead of reads</option>
125 </param>
126
127 <when value=" -p">
128 <conditional name="check_distance_enabled">
129 <param name="check_distance"
130 type="boolean"
131 truevalue=" -P"
132 falsevalue=""
133 argument="-P"
134 label="Check paired-end distance"
135 help="If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." />
136 <when value=" -P">
137 <param name="minimum_fragment_length"
138 type="integer"
139 value="50"
140 argument="-d"
141 label="Minimum fragment/template length." />
142 <param name="maximum_fragment_length"
143 type="integer"
144 value="600"
145 argument="-D"
146 label="Maximum fragment/template length." />
147 </when>
148 <when value="" />
149 </conditional>
150 </when>
151 <when value="" />
152 </conditional>
153
154 <param name="only_both_ends"
155 type="boolean"
156 truevalue=" -B"
157 falsevalue=""
158 argument="-B"
159 label="Only allow fragments with both reads aligned"
160 help="If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." />
161
162 <param name="orientation"
163 type="select"
164 label="Orientation of the two read from the same pair"
165 argument="-S"
166 help="Default is 'fr'">
167 <option value="fr" selected="true">Forward, Reverse (fr)</option>
168 <option value="ff">Forward, Forward (ff)</option>
169 <option value="rf">Reverse, Forward (rf)</option>
170 </param>
171
172 <param name="exclude_chimerics"
173 type="boolean"
174 truevalue=" -C"
175 falsevalue=""
176 argument="-C"
177 checked="true"
178 label="Exclude chimeric fragments"
179 help="If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." />
180 </section>
181
182 <section name="extended_parameters" title="Advanced options">
183 <param name="gff_feature_type"
184 type="text"
185 value="exon"
186 argument="-t"
187 label="GFF feature type filter"
188 help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." />
189
190 <param name="gff_feature_attribute"
191 type="text"
192 value="gene_id"
193 argument="-g"
194 label="GFF gene identifier"
195 help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." />
196
197 <param name="summarization_level"
198 type="boolean"
199 truevalue=" -f"
200 falsevalue=""
201 argument="-f"
202 label="On feature level"
203 help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." />
204
205 <param name ="contribute_to_multiple_features"
206 type="boolean"
207 truevalue=" -O"
208 falsevalue=""
209 argument="-O"
210 label="Allow read to contribute to multiple features"
211 help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta- feature (or matched feature if -f is specified)" />
212
213 <param name="strand_specificity"
214 type="select"
215 label="Strand specificity of the protocol"
216 argument="-s"
217 help="Indicate if strand-specific read counting should be performed.">
218 <option value="0" selected="true">Unstranded</option>
219 <option value="1">Stranded (forwards)</option>
220 <option value="2">Stranded (reverse)</option>
221 </param>
222
223 <conditional name="multimapping_enabled">
224 <param name="multimapping_counts"
225 type="select"
226 argument="-M"
227 label="Count multi-mapping reads/fragments"
228 help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads.">
229 <option value="" selected="true">Disabled; multi-mapping reads are excluded (default)</option>
230 <option value=" -M">Enabled; multi-mapping reads are included</option>
231 </param>
232 <when value=" -M">
233 <param name="fraction"
234 type="boolean"
235 truevalue="--fraction"
236 falsevalue=""
237 argument="--fraction"
238 label="Assign fractions to multimapping reads"
239 help="If specified, a fractional count 1/n will be generated for each multi-mapping read, where n is the number of alignments (indica- ted by 'NH' tag) reported for the read. This option must be used together with the '-M' option." />
240 </when>
241 <when value="" />
242 </conditional>
243
244 <param name="mapping_quality"
245 type="integer"
246 value="12"
247 argument="-Q"
248 label="Minimum mapping quality per read"
249 help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 12 by default." />
250
251 <param name="largest_overlap"
252 type="boolean"
253 truevalue=" --largestOverlap"
254 falsevalue=""
255 argument="--largestOverlap"
256 label="Largest overlap"
257 help="If specified, reads (or fragments) will be assigned to the target that has the largest number of overlapping bases" />
258
259 <param name="min_overlap"
260 type="integer"
261 value="1"
262 argument="--minOverlap"
263 label="Minimum overlap"
264 help="Specify the minimum required number of overlapping bases between a read (or a fragment) and a feature. 1 by default. If a negative value is provided, the read will be extended from both ends." />
265
266 <param name="read_extension_5p"
267 type="integer"
268 value="0"
269 argument="--readExtension5"
270 label="Read 5' extension"
271 help="Reads are extended upstream by ... bases from their 5' end" />
272
273 <param name="read_extension_3p"
274 type="integer"
275 value="0"
276 argument="--readExtension3"
277 label="Read 3' extension"
278 help="Reads are extended upstream by ... bases from their 3' end" />
279
280 <param name="read_reduction"
281 type="select"
282 label="Reduce read to single position"
283 argument="--read2pos"
284 help="The read is reduced to its 5' most base or 3'most base. Read summarization is then performed based on thesingle base which the read is reduced to.">
285 <option value="" selected="true">Leave the read as it is</option>
286 <option value="--read2pos 5">Reduce it to the 5' end</option>
287 <option value="--read2pos 3">Reduce it to the 3' end</option>
288 </param>
289
290 <param name="primary"
291 type="boolean"
292 truevalue=" --primary"
293 falsevalue=""
294 argument="--primary"
295 label="Only count primary alignments"
296 help="If specified, only primary alignments will be counted. Primaryand secondary alignments are identified using bit 0x100 in theFlag field of SAM/BAM files. All primary alignments in a datasetwill be counted no matter they are from multi-mapping reads ornot ('-M' is ignored)." />
297
298 <param name="ignore_dup"
299 type="boolean"
300 truevalue=" --ignoreDup"
301 falsevalue=""
302 argument="--ignoreDup"
303 label="Ignore reads marked as duplicate"
304 help="If specified, reads that were marked asduplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAMfile is used for identifying duplicate reads. In paired enddata, the entire read pair will be ignored if at least one endis found to be a duplicate read." />
305
306 <param name="count_split_alignments_only"
307 type="boolean"
308 truevalue=" --countSplitAlignmentsOnly"
309 falsevalue=""
310 argument="--countSplitAlignmentsOnly"
311 label="Ignore reads marked as duplicate"
312 help="If specified, only split alignments (CIGARstrings containing letter `N') will be counted. All the otheralignments will be ignored. An example of split alignments isthe exon-spanning reads in RNA-seq data." />
313 </section>
314 </inputs>
315 <outputs>
316 <data format="tabular"
317 name="output_medium"
318 label="${tool.name} on ${on_string}">
319 <filter>format == "tabdel_medium"</filter>
320 <actions>
321 <action name="column_names" type="metadata" default="Geneid,${alignment.name},Length" />
322 </actions>
323 </data>
324
325 <data format="tabular"
326 name="output_short"
327 label="${tool.name} on ${on_string}">
328 <filter>format == "tabdel_short"</filter>
329 <actions>
330 <action name="column_names" type="metadata" default="Geneid,${alignment.name}" />
331 </actions>
332 </data>
333
334 <data format="tabular"
335 name="output_full"
336 label="${tool.name} on ${on_string}: count table">
337 <filter>format == "tabdel_full"</filter>
338 <actions>
339 <action name="column_names" type="metadata" default="Geneid,Chr,Start,End,Strand,Length,${alignment.name}" />
340 </actions>
341 </data>
342
343 <data format="tabular"
344 name="output_summary"
345 hidden="true"
346 label="${tool.name} on ${on_string}: summary">
347 <actions>
348 <action name="column_names" type="metadata" default="Status,${alignment.name}" />
349 </actions>
350 </data>
351
352 <data format="tabular"
353 name="output_feature_lengths"
354 label="${tool.name} on ${on_string}: feature lengths">
355 <filter>include_feature_length_file</filter>
356 <actions>
357 <action name="column_names" type="metadata" default="Feature,Length" />
358 </actions>
359 </data>
360 </outputs>
361 <tests>
362 <test>
363 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
364 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
365 <param name="format" value="tabdel_short" />
366 <param name="include_feature_length_file" value="true"/>
367 <output name="output" file="output_1_short.tab"/>
368 <output name="output_summary" file="output_1_summary.tab"/>
369 </test>
370 <test>
371 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
372 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
373 <param name="format" value="tabdel_medium" />
374 <param name="include_feature_length_file" value="true"/>
375 <output name="output" file="output_1_medium.tab"/>
376 <output name="output_summary" file="output_1_summary.tab"/>
377 </test>
378 <test>
379 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
380 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
381 <param name="format" value="tabdel_full" />
382 <param name="include_feature_length_file" value="true"/>
383 <output name="output" file="output_1_full.tab"/>
384 <output name="output_summary" file="output_1_summary.tab"/>
385 <output name="output_feature_lengths" file="output_feature_lengths.tab"/>
386 </test>
387
388 <test>
389 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
390 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
391 <param name="format" value="tabdel_short" />
392 <param name="include_feature_length_file" value="true"/>
393 <output name="output" file="output_2_short.tab"/>
394 <output name="output_summary" file="output_2_summary.tab"/>
395 </test>
396 <test>
397 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
398 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
399 <param name="format" value="tabdel_medium" />
400 <param name="include_feature_length_file" value="true"/>
401 <output name="output" file="output_2_medium.tab"/>
402 <output name="output_summary" file="output_2_summary.tab"/>
403 </test>
404 <test>
405 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
406 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
407 <param name="format" value="tabdel_full" />
408 <param name="include_feature_length_file" value="true"/>
409 <output name="output" file="output_2_full.tab"/>
410 <output name="output_summary" file="output_2_summary.tab"/>
411 <output name="output_feature_lengths" file="output_feature_lengths.tab"/>
412 </test>
413 </tests>
414
415 <help><![CDATA[
416 featureCounts
417 #############
418
419 Overview
420 --------
421 FeatureCounts is a light-weight read counting program written entirely in the C programming language. It can be used to count both gDNA-seq and RNA-seq reads for genomic features in in SAM/BAM files.
422
423 Input formats
424 -------------
425 Alignments should be provided in either:
426
427 - SAM format, http://samtools.sourceforge.net/samtools.shtml#5
428 - BAM format
429
430 Gene regions should be provided in the GFF/GTF format:
431
432 - http://genome.ucsc.edu/FAQ/FAQformat.html#format3
433 - http://www.ensembl.org/info/website/upload/gff.html
434
435 Output format
436 -------------
437 FeatureCounts produces a table containing the counted reads, per gene, per row. Optionally the last column can be set to be the effective gene-length. These tables are compatible with the DESeq2 Galaxy wrapper by IUC.
438 ]]></help>
439 <citations>
440 <citation type="doi">10.1093/bioinformatics/btt656</citation>
441 </citations>
442 </tool>