comparison featurecounts.xml @ 3:e04fbcc4e91a draft default tip

planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/featurecounts_galaxy_wrapper commit 597fa3df643b54ea93a17448c722f657e3d68b60
author yhoogstrate
date Wed, 07 Oct 2015 11:38:41 -0400
parents b5c93611d2c5
children
comparison
equal deleted inserted replaced
2:b5c93611d2c5 3:e04fbcc4e91a
1 <?xml version="1.0" encoding="UTF-8"?> 1 <tool id="featurecounts" name="featureCounts" version="1.4.6.p5">
2 <tool id="featurecounts" name="featureCounts" version="1.4.6.p1">
3 <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description> 2 <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description>
4 <requirements> 3 <requirements>
5 <requirement type="package" version="1.4.6.p1">featurecounts</requirement> 4 <requirement type="package" version="1.4.6.p5">featurecounts</requirement>
6 <requirement type="package" version="1.0.0">featurecounts2bed</requirement>
7 </requirements> 5 </requirements>
8 <version_command>featureCounts -v</version_command> 6 <version_command>featureCounts -v 2&gt;&amp;1 | grep .</version_command>
9 <command><![CDATA[ 7 <command><![CDATA[
8
10 ## Check 01: do the alignments have a dbkey and is the option set to using it? 9 ## Check 01: do the alignments have a dbkey and is the option set to using it?
11
12 #if $reference_gene_sets_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1 10 #if $reference_gene_sets_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1
13 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/gene_sets.loc' is configured properly!" >&2 11 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/gene_sets.loc' is configured properly!" >&2
14 #else 12 #else
15 ## Check 02: are all alignments from the same type (bam || sam) 13 ## Check 02: are all alignments from the same type (bam || sam)
16 #if len({ alignment.extension:True for alignment in $alignments }.keys()) != 1 14 #if len({ alignment.extension:True for alignment in $alignments }.keys()) != 1
17 echo "Either all files must be SAM or all files must be BAM, no mixture is allowed." >&2 15 echo "Either all files must be SAM or all files must be BAM, no mixture is allowed." >&2
18 #else 16 #else
19 featureCounts 17 featureCounts
20 -a 18 -a
21 #if $reference_gene_sets_source.source_select == "indexed_filtered" 19 #if $reference_gene_sets_source.source_select == "indexed_filtered"
22 "$reference_gene_sets_source.reference_gene_sets" 20 "$reference_gene_sets_source.reference_gene_sets"
23 #else if $reference_gene_sets_source.source_select == "indexed_all" 21 #else if $reference_gene_sets_source.source_select == "indexed_all"
24 "$reference_gene_sets_source.reference_gene_sets" 22 "$reference_gene_sets_source.reference_gene_sets"
25 #else if $reference_gene_sets_source.source_select == "history" 23 #else if $reference_gene_sets_source.source_select == "history"
31 Because this file is "calculated" during run-time, it can 29 Because this file is "calculated" during run-time, it can
32 be used in a workflow. 30 be used in a workflow.
33 *# 31 *#
34 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'gene_sets' ].get_fields() )[0][2] }" 32 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'gene_sets' ].get_fields() )[0][2] }"
35 #end if 33 #end if
36 34
37 -o "$output" 35 -o "${output}"
38 -T \${GALAXY_SLOTS:-2} 36 -T \${GALAXY_SLOTS:-2}
39 37
40 #if $extended_parameters.parameters == "extended" 38 #if $extended_parameters.parameters == "extended"
41 -t $extended_parameters.gff_feature_type 39 -t $extended_parameters.gff_feature_type
42 -g $extended_parameters.gff_feature_attribute 40 -g $extended_parameters.gff_feature_attribute
43 $extended_parameters.summarization_level 41 $extended_parameters.summarization_level
44 $extended_parameters.contribute_to_multiple_features 42 $extended_parameters.contribute_to_multiple_features
49 $extended_parameters.check_distance 47 $extended_parameters.check_distance
50 -d $extended_parameters.minimum_fragment_length 48 -d $extended_parameters.minimum_fragment_length
51 -D $extended_parameters.maximum_fragment_length 49 -D $extended_parameters.maximum_fragment_length
52 $extended_parameters.only_both_ends 50 $extended_parameters.only_both_ends
53 $extended_parameters.exclude_chimerics 51 $extended_parameters.exclude_chimerics
54 $extended_parameters.namesort
55 #end if 52 #end if
56 53
57 #for $alignment in $alignments 54 #for $alignment in $alignments
58 ${alignment} 55 ${alignment}
59 #end for 56 #end for
60 57
61 2>&1 58 2>&1
62 59
63 #set $columns = [str(i+7) for i, alignment in enumerate($alignments)] 60 #set $columns = [str(i+7) for i, alignment in enumerate($alignments)]
64 #set $columns=",".join($columns) 61 #set $columns=",".join($columns)
65 62
66 #if $format == "tabdel_default" or $format.value == "tabdel_default" 63 #if $format == "tabdel_default" or $format.value == "tabdel_default"
67 ; cp $output tmp.txt 64 && cp $output tmp.txt
68 ; egrep -v "^#" tmp.txt > tmp2.txt 65 && egrep -v "^#" tmp.txt > tmp2.txt
69 ; cut -f 1,$columns tmp2.txt > tmp_left.txt 66 && cut -f 1,$columns tmp2.txt > tmp_left.txt
70 ; cut -f 6 tmp2.txt > tmp_right.txt 67 && cut -f 6 tmp2.txt > tmp_right.txt
71 ; paste tmp_left.txt tmp_right.txt > $output 68 && paste tmp_left.txt tmp_right.txt > $output
72 #elif $format == "tabdel_short" or $format.value == "tabdel_short" 69 #elif $format == "tabdel_short" or $format.value == "tabdel_short"
73 ; cp $output tmp.txt 70 && cp $output tmp.txt
74 ; egrep -v "^#" tmp.txt | cut -f 1,$columns > $output 71 && egrep -v "^#" tmp.txt | cut -f 1,$columns > $output
75 #end if 72 #end if
76 73
77 ## For every alignment, replace its filename for: "hid: sample name" 74 ## For every alignment, replace its filename for: "hid: sample name"
78 #for $alignment in $alignments 75 #for $alignment in $alignments
79 #set $alignment_escaped = str($alignment).replace('/', '\/').replace('.', '\.') 76 #set $alignment_escaped = str($alignment).replace('/', '\/').replace('.', '\.')
80 #set $alignment_name_escaped = str(alignment.hid)+": "+str($alignment.name).replace('\t',' ').replace('\\','\\\\').replace("'","\\'").replace('/','\/') 77 #set $alignment_name_escaped = str(alignment.hid)+": "+str($alignment.name).replace('\t',' ').replace('\\','\\\\').replace("'","\\'").replace('/','\/')
81 78
82 #if $format.value == "tabdel_default" or $format.value == "tabdel_short" 79 #if $format.value == "tabdel_default" or $format.value == "tabdel_short"
83 ; sed -e '1 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt 80 && sed -e '1 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt
84 #elif $format.value == "bed": 81 #elif $format.value == "bed":
85 ; featurecounts2bed.sh -f "$output" > tmp.txt 82 && $__tool_directory__/featurecounts2bed.sh -f "$output" > tmp.txt
86 #else 83 #else
87 ; sed -e '1,2 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt 84 && sed -e '1,2 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt
88 #end if 85 #end if
89 86
90 ; mv tmp.txt $output 87 && mv tmp.txt "${output}"
91 88
92 ; sed -e '1 s/$alignment_escaped/${alignment_name_escaped}/g' $output".summary" > tmp.txt 89 && sed -e '1 s/$alignment_escaped/${alignment_name_escaped}/g' $output".summary" > tmp.txt
93 ; mv tmp.txt $output".summary" 90 && mv tmp.txt ${output}".summary"
94 #end for 91 #end for
95 ; mv $output".summary" $output_summary 92 && mv ${output}".summary" "${output_summary}"
96 #end if 93 #end if
97 #end if 94 #end if
98 ]]></command> 95 ]]></command>
99
100 <inputs> 96 <inputs>
101 <param name="alignments" type="data" format="bam,sam" label="Alignment file" help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files in the series must be in THE SAME format." multiple="true" /> 97 <param name="alignments" type="data" format="bam,sam" label="Alignment file" help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files in the series must be in THE SAME format." multiple="true" />
102 98
103 <!-- Find out how to access the the GTF/GFF file(s) --> 99 <!-- Find out how to access the the GTF/GFF file(s) -->
104 <conditional name="reference_gene_sets_source"> 100 <conditional name="reference_gene_sets_source">
105 <param name="source_select" type="select" label="GFF/GTF Source"> 101 <param name="source_select" type="select" label="GFF/GTF Source">
106 <option value="indexed_filtered">Use a built-in index (which fits your reference)</option> 102 <option value="indexed_filtered">Use a built-in index (which fits your reference)</option>
107 <option value="history">Use reference from the history</option> 103 <option value="history">Use reference from the history</option>
134 </when> 130 </when>
135 <when value="attribute"> 131 <when value="attribute">
136 <!-- Do nothing, determine GTF/GFF file at runtime --> 132 <!-- Do nothing, determine GTF/GFF file at runtime -->
137 </when> 133 </when>
138 </conditional> 134 </conditional>
139
140 <param name="format" type="select" label="Output format"> 135 <param name="format" type="select" label="Output format">
136 <option value="tabdel_default">Gene-name "\t" gene-count "\t" gene-length</option>
137 <option value="tabdel_short" selected="true">Gene-name "\t" gene-count</option>
138 <option value="bed">BED format (line per exon): chr "\t" start "\t" stop "\t" description "\t" readcount</option>
141 <option value="complex">featureCounts 1.4.0+ default (extensive; complex)</option> 139 <option value="complex">featureCounts 1.4.0+ default (extensive; complex)</option>
142 <option value="tabdel_default" selected="true">Gene-name "\t" gene-count "\t" gene-length (tab-delimited)</option>
143 <option value="tabdel_short">Gene-name "\t" gene-count (tab-delimited)</option>
144 <option value="bed">BED format (line per exon): chr "\t" start "\t" stop "\t" description "\t" readcount (tab-delimited)</option>
145 </param> 140 </param>
146
147 <conditional name="extended_parameters"> 141 <conditional name="extended_parameters">
148 <param name="parameters" type="select" label="featureCounts parameters" help="For more advanced featureCounts settings."> 142 <param name="parameters" type="select" label="featureCounts parameters" help="For more advanced featureCounts settings.">
149 <option value="default">Default settings</option> 143 <option value="default">Default settings</option>
150 <option value="extended">Extended settings</option> 144 <option value="extended">Extended settings</option>
151 </param> 145 </param>
152 <when value="default"> 146 <when value="default">
153 </when> 147 </when>
154 <when value="extended"> 148 <when value="extended">
155 <param name="gff_feature_type" type="text" value="exon" label="GFF feature type filter" help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." /> 149 <param name="gff_feature_type" type="text" value="exon"
156 150 label="GFF feature type filter"
157 <param name="gff_feature_attribute" type="text" value="gene_id" label="GFF gene identifier" help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." /> 151 help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." />
158 152
159 <param name ="summarization_level" type="boolean" truevalue=" -f" falsevalue="" label="On feature level" help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." /> 153 <param name="gff_feature_attribute" type="text" value="gene_id"
160 154 label="GFF gene identifier"
161 <param name ="contribute_to_multiple_features" type="boolean" truevalue=" -O" falsevalue="" label="Allow read to contribute to multiple features" help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta- feature (or matched feature if -f is specified)" /> 155 help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." />
162 156
163 <param name="protocol" type="select" label="Strand specific protocol" help="Indicate if strand-specific read counting should be performed. It has three possible values: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). 0 by default."> 157 <param name ="contribute_to_multiple_features" type="boolean" truevalue=" -O" falsevalue=""
158 label="Allow read to contribute to multiple features"
159 help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta- feature (or matched feature if -f is specified)" />
160
161 <param name="protocol" type="select" label="Strand specific protocol"
162 help="Indicate if strand-specific read counting should be performed.">
164 <option value=" -s 0" selected="true">Unstranded</option> 163 <option value=" -s 0" selected="true">Unstranded</option>
165 <option value=" -s 1">Stranded (forwards)</option> 164 <option value=" -s 1">Stranded (forwards)</option>
166 <option value=" -s 2">Stranded (reverse)</option> 165 <option value=" -s 2">Stranded (reverse)</option>
167 </param> 166 </param>
168 167
169 <param name="multimapping_counts" type="boolean" truevalue=" -M" falsevalue="" label="Count multi-mapping reads/fragments" help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads." /> 168 <param name="multimapping_counts" type="boolean" truevalue=" -M" falsevalue=""
170 169 label="Count multi-mapping reads/fragments"
171 <param name="mapping_quality" type="integer" value="0" label="Minimum read quality" help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 0 by default." /> 170 help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads." />
172 171
173 <param name="fragment_counting" type="boolean" truevalue=" -p" falsevalue="" label="PE: Count fragments instead of reads" help="Paired-end specific: If specified, fragments (or templates) will be counted instead of reads. The two reads from the same fragment must be adjacent to each other in the provided SAM/BAM file. If SAM/BAM input does not meet this requirement, the -S (sorting) option should be provided as well." /> 172 <param name="mapping_quality" type="integer" value="12" label="Minimum read quality"
174 173 help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 12 by default." />
175 <param name="check_distance" type="boolean" truevalue=" -P" falsevalue="" label="PE: Check paired-end distance" help="Paired-end specific: If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." /> 174
176 175 <param name="fragment_counting" type="boolean" truevalue=" -p" falsevalue="" checked="true"
176 label="PE: Count fragments instead of reads"
177 help="Paired-end specific: If specified, fragments (or templates) will be counted instead of reads." />
178
179 <param name="check_distance" type="boolean" truevalue=" -P" falsevalue=""
180 label="PE: Check paired-end distance"
181 help="Paired-end specific: If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." />
182
177 <param name="minimum_fragment_length" type="integer" value="50" label="PE: Minimum fragment/template length." /> 183 <param name="minimum_fragment_length" type="integer" value="50" label="PE: Minimum fragment/template length." />
178 <param name="maximum_fragment_length" type="integer" value="600" label="PE: Maximum fragment/template length." /> 184 <param name="maximum_fragment_length" type="integer" value="600" label="PE: Maximum fragment/template length." />
179 185
180 <param name="only_both_ends" type="boolean" truevalue=" -B" falsevalue="" label="PE: only allow fragments with both reads aligned" help="Paired-end specific: If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." /> 186 <param name="only_both_ends" type="boolean" truevalue=" -B" falsevalue=""
181 187 label="PE: only allow fragments with both reads aligned"
182 <param name="exclude_chimerics" type="boolean" truevalue=" -C" falsevalue="" label="PE: Exclude chimeric fragments" help="Paired-end specific: If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." /> 188 help="Paired-end specific: If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." />
183 189
184 <param name="namesort" type="boolean" truevalue=" -S" falsevalue="" label="PE: Name-sort reads (slow!)" help="Paired-end specific: If specified, the program will reorder input reads according to their names and make reads from the same pair be adjacent to each other. This option should be provided when reads from the same pair are not adjacent to each other in input SAM/BAM files (for instance sorting reads by chromosomal locations could decouple reads from the same pair)." /> 190 <param name="exclude_chimerics" type="boolean" truevalue=" -C" falsevalue="" checked="true"
191 label="PE: Exclude chimeric fragments"
192 help="Paired-end specific: If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." />
193
194 <param name ="summarization_level" type="boolean" truevalue=" -f" falsevalue=""
195 label="On feature level"
196 help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." />
197
185 </when> 198 </when>
186 </conditional> 199 </conditional>
187 </inputs> 200 </inputs>
188
189 <outputs> 201 <outputs>
190 <data format="tabular" name="output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}" /> 202 <data format="tabular" name="output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}" />
191 <data format="tabular" name="output_summary" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])} summary" /> 203 <data format="tabular" name="output_summary" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])} summary" />
192 </outputs> 204 </outputs>
193
194 <tests> 205 <tests>
195 <test> 206 <test>
196 <param name="alignments" value="featureCounts_input1.bam,featureCounts_input2.bam" ftype="bam" /> 207 <param name="alignments" value="featureCounts_input1.bam,featureCounts_input2.bam" ftype="bam" />
197 <param name="source_select" value="history" /> 208 <param name="source_select" value="history" />
209 <param name="format" value="tabdel_default" />
198 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> 210 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
199 <output name="output" file="output.tab"/> 211 <output name="output" file="output.tab"/>
200 <output name="output_summary" file="output_summary.tab"/> 212 <output name="output_summary" file="output_summary.tab"/>
201 </test> 213 </test>
202 </tests> 214 </tests>