comparison snpfreqplot.xml @ 0:1062d6ad6503 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
author iuc
date Wed, 02 Dec 2020 21:23:06 +0000
parents
children e362b3143cde
comparison
equal deleted inserted replaced
-1:000000000000 0:1062d6ad6503
1 <tool id="snpfreqplot" name="Variant Frequency Plot" version="@VERSION@+galaxy@GALAXY_VERSION@" profile="20.09"
2 license="GPL-3.0-or-later" >
3 <description>Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data</description>
4 <macros>
5 <token name="@VERSION@">1.0</token>
6 <token name="@GALAXY_VERSION@">0</token>
7 </macros>
8 <requirements>
9 <requirement type="package" version="4.0">r-base</requirement>
10 <requirement type="package" version="1.0.12">r-pheatmap</requirement>
11 <requirement type="package" version="1.3.0">r-tidyverse</requirement>
12 <requirement type="package" version="1.36.0">bioconductor-variantannotation</requirement>
13 <requirement type="package" version="">xorg-libxt</requirement>
14 </requirements>
15 <edam_topics>
16 <edam_topic>topic_0797</edam_topic>
17 <edam_topic>topic_0092</edam_topic>
18 </edam_topics>
19 <edam_operations>
20 <edam_operation>operation_3436</edam_operation>
21 </edam_operations>
22 <command detect_errors="exit_code"><![CDATA[
23 #set $outfile = "tmp_output." + str($advanced.output_type)
24
25 cat '$__tool_directory__/helperFunctions.R' > /dev/null
26 && cat '$__tool_directory__/snpEffExtract.R' > /dev/null
27 && cat '$__tool_directory__/heatmap_for_variants.R' > /dev/null
28 && echo "output file: $outfile"
29 && Rscript '$configscript'
30 ]]>
31 </command>
32 <configfiles>
33 <configfile name="configscript"><![CDATA[
34 ## 1. Set Sample Inputs
35 ## ------------------
36 ## Create a dataframe of sample ids, filetypes, and filenames
37 ## from the input collection. At this point, the list could be
38 ## of mixed type (vcf and tabular), though maybe Galaxy
39 ## restricts that.
40 samples = list(ids = c(), exts= c(), files = c())
41 #for $i, $file in enumerate($sinputs):
42 samples\$ids = c(samples\$ids, '${file.element_identifier}')
43 samples\$exts = c(samples\$exts, '${file.extension}')
44 samples\$files = c(samples\$files, '${file}')
45 #end for
46 samples = data.frame(samples, stringsAsFactors=F)
47
48 ## 2. Input Conversion (external script)
49 ## ----------------------------------
50 ## We source the input conversion script *after* the samples
51 ## have been populated, so that it performs an inplace replacement
52 ## of the vcf inputs with their converted tabular counterparts.
53 ##
54 ## All samples are all tabular after this point
55 source('$__tool_directory__/helperFunctions.R')
56 source('$__tool_directory__/snpEffExtract.R')
57
58 ## 3. Galaxy Params
59 ## --------------
60 ## Set the general script parameters from the UI
61 variant_frequency <- as.numeric( '$varfreq' )
62 brewer_color_gene_annotation <- as.character( '$advanced.color' )
63
64 #if str($clustering.do) == "TRUE":
65 pheat_clustering <- TRUE
66 pheat_clustering_method <- as.character( '$clustering.method' )
67 pheat_number_of_clusters <- as.integer( '$clustering.nclust' )
68 #else
69 pheat_clustering <- FALSE
70 pheat_clustering_method <- "ward.D2"
71 pheat_number_of_clusters <- 5
72 #end if
73
74 ratio = as.numeric('$advanced.ratio')
75 out_ext = '$advanced.output_type'
76 out_file = paste0("tmp_output.", out_ext)
77
78 ## 4. Generate Heatmap (external script)
79 ## ----------------------------------
80 source('$__tool_directory__/heatmap_for_variants.R')
81
82 ]]>
83 </configfile>
84 </configfiles>
85 <inputs>
86 <param name="sinputs" format="tabular,vcf" type="data" multiple="true"
87 collection_type="list" label="Variant lists data"
88 help="Select at least two datasets (or a dataset collection) with variant lists (see the tool help below for format details). Datasets are expected to represent individual samples and dataset names will be used as sample identifiers." />
89 <param name="varfreq" type="float" min="0" max="1" value="0.1"
90 label="Variant Frequency Threshold"
91 help="Only plot variants with an intrasample frequency above this threshold in at least one sample." />
92 <section name="advanced" title="Image Properties" expanded="true">
93 <param name="output_type" type="select" label="Plot output format" >
94 <option value="pdf" selected="true" >PDF</option>
95 <option value="png" >PNG</option>
96 <option value="svg">SVG</option>
97 <option value="tiff" >TIFF</option>
98 <option value="bmp" >BMP</option>
99 <option value="jpeg" >JPEG</option>
100 </param>
101 <param name="ratio" label="Cell Ratio" type="float"
102 min="0.05" value="0.67" max="20"
103 help="Width:Height ratio of individual heatmap cells" />
104 <param name="color" type="select" label="Color palette used for the gene annotations" >
105 <option value="Set1" />
106 <option value="Set2" />
107 <option value="Set3" selected="true" />
108 <option value="Pastel2" />
109 <option value="Pastel1" />
110 <option value="Paired" />
111 <option value="Dark2" />
112 <option value="Accent" />
113 <option value="YlOrRd" />
114 <option value="YlOrBr" />
115 <option value="YlGnBu" />
116 <option value="YlGn" />
117 <option value="Reds" />
118 <option value="RdPu" />
119 <option value="Purples" />
120 <option value="PuRd" />
121 <option value="PuBuGn" />
122 <option value="PuBu" />
123 <option value="OrRd" />
124 <option value="Oranges" />
125 <option value="Greys" />
126 <option value="Greens" />
127 <option value="GnBu" />
128 <option value="BuPu" />
129 <option value="BuGn" />
130 <option value="Blues" />
131 <option value="Spectral" />
132 <option value="RdYlGn" />
133 <option value="RdYlBu" />
134 <option value="RdGy" />
135 <option value="RdBu" />
136 <option value="PuOr" />
137 <option value="PRGn" />
138 <option value="PiYG" />
139 <option value="BrBG" />
140 </param>
141 </section>
142 <conditional name="clustering">
143 <param name="do" type="select" label="Perform Clustering?" >
144 <option value="TRUE">Yes</option>
145 <option value="FALSE" selected="true">No</option>
146 </param>
147 <when value="TRUE" >
148 <param name="nclust" type="integer"
149 min="1" value="1" label="Number of clusters" />
150 <param name="method" type="select" label="Clustering method" >
151 <option value="ward.D" />
152 <option value="ward.D2" selected="true" />
153 <option value="single" />
154 <option value="complete" />
155 <option value="average" >average (UPGMA)</option>
156 <option value="mcquitty" >mcquitty (WPGMA)</option>
157 <option value="median" >median (WPGMC)</option>
158 <option value="centroid" >centroid (UPGMC)</option>
159 </param>
160 </when>
161 <when value="FALSE" />
162 </conditional>
163 </inputs>
164 <outputs>
165 <data name="outfile" format="pdf" from_work_dir="tmp_output.*"
166 label="Variant-Frequency Plot on ${on_string}: ${advanced.output_type}">
167 <change_format>
168 <when input="advanced.output_type" value="svg" format="svg" />
169 <when input="advanced.output_type" value="png" format="png" />
170 <when input="advanced.output_type" value="tiff" format="tiff" />
171 <when input="advanced.output_type" value="bmp" format="bmp" />
172 <when input="advanced.output_type" value="jpeg" format="jpg" />
173 </change_format>
174 </data>
175 </outputs>
176 <tests>
177 <test expect_num_outputs="1">
178 <!-- PDF, tabular inputs -->
179 <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular,input443.tabular,input444.tabular" />
180 <output name="outfile" ftype="pdf" value="heatmap.default.pdf" compare="sim_size" delta="250" />
181 </test>
182 <test expect_num_outputs="1">
183 <!-- PNG, multiple inputs, non-numeric IDS -->
184 <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input443.tabular,input444.tabular" />
185 <param name="varfreq" value="0.5" />
186 <section name="advanced" >
187 <param name="color" value="Spectral" />
188 <param name="output_type" value="png" />
189 </section>
190 <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="86000" />
191 </test>
192 <test expect_num_outputs="1">
193 <!-- SVG, clustering defaults -->
194 <param name="sinputs" ftype="tabular" value="input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular" />
195 <conditional name="clustering">
196 <param name="do" value="TRUE" />
197 </conditional>
198 <section name="advanced" >
199 <param name="color" value="Greys" />
200 <param name="ratio" value="0.8" />
201 <param name="output_type" value="svg" />
202 </section>
203 <output name="outfile" ftype="svg">
204 <assert_contents>
205 <has_text text="viewBox=&quot;0 0 1156 335&quot;" />
206 </assert_contents>
207 </output>
208 </test>
209 <test expect_num_outputs="1">
210 <!-- JPEG, clustering extras, mixed alphanumeric labels -->
211 <param name="sinputs" ftype="tabular" value="input436.tabular,input443.tabular,input438.tabular,input444.tabular" />
212 <conditional name="clustering">
213 <param name="do" value="TRUE" />
214 <param name="nclust" value="2" />
215 <param name="method" value="centroid" />
216 </conditional>
217 <section name="advanced" >
218 <param name="color" value="Purples" />
219 <param name="ratio" value="1.2" />
220 <param name="output_type" value="jpeg" />
221 </section>
222 <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="121000" />
223 </test>
224 <test expect_num_outputs="1">
225 <!-- PDF, vcf test -->
226 <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf" />
227 <section name="advanced" >
228 <param name="color" value="PuBuGn" />
229 <param name="output_type" value="pdf" />
230 </section>
231 <output name="outfile" ftype="pdf" value="heatmap.from_vcf.pdf" compare="sim_size" delta="250" />
232 </test>
233 <test expect_num_outputs="1">
234 <!-- SVG, problematic vcf test -->
235 <param name="sinputs" ftype="vcf" value="1084592.vcf,1085080.vcf,1085445.vcf,1085841.vcf,1085990.vcf" />
236 <section name="advanced" >
237 <param name="output_type" value="svg" />
238 </section>
239 <output name="outfile" ftype="svg">
240 <assert_contents>
241 <has_text text="viewBox=&quot;0 0 754 271&quot;" />
242 </assert_contents>
243 </output>
244 </test>
245 <test expect_num_outputs="1">
246 <!-- SVG, Vcf test with problematic splice+syn at snpeff789.vcf for threshold = 0.0222 -->
247 <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf" />
248 <param name="varfreq" value="0.0222" />
249 <section name="advanced" >
250 <param name="output_type" value="svg" />
251 </section>
252 <output name="outfile" ftype="svg">
253 <assert_contents>
254 <has_text text="viewBox=&quot;0 0 3101 697&quot;" />
255 </assert_contents>
256 </output>
257 </test>
258 </tests>
259 <help><![CDATA[
260 **What it does**
261
262 This tool generates multi-sample variant-frequency plots from SnpEff-annotated
263 viral variant lists with optional hierarchical clustering of the samples.
264
265 .. class:: Warning mark
266
267 Currently, this tool has been tested only on SARS-CoV-2 variant data.
268 While the intention is to have it work for viral variant data in general,
269 be prepared for unexpected behavior with other input data at the current
270 development stage.
271
272 ----
273
274 The tool expects input variant lists in one of the following two formats:
275
276 1. VCF datasets as produced by standard variant callers with
277
278 - variant allele frequencies encoded in an ``AF`` INFO field
279 - variant functional genomic effects annotated using SnpEff's EFF format (SnpEff's ANN format is not currently supported!)
280
281 2. tabular datasets with columns listing, at least, the following variant properties:
282
283 - ``CHROM``
284 - ``POS``
285 - ``REF``
286 - ``ALT``
287 - ``AF``
288 - ``EFF[*].AA``
289 - ``EFF[*].GENE``
290 - ``EFF[*].EFFECT``
291
292 Such files can be produced with SnpSift Extract Fields and can be useful if
293 preprocessing of the lists with standard text processing tools is required.
294
295 ----
296
297 Example output:
298
299 .. image:: /static/images/example_output.png
300
301 ]]></help>
302 <citations>
303 <citation type="bibtex">@unpublished{Fuchs2020,
304 author = {Fuchs, Jonas},
305 title = {},
306 year = {2020},
307 note = {Multi-sample annotated viral variant-frequency plots based on the R pheatmap package.},
308 address = {Institute for Virology, University of Freiburg}
309 }</citation>
310 </citations>
311 </tool>