comparison ggplot2_pca.xml @ 0:c21099566418 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ggplot2 commit f9fb73a88ab8b52ce11c25a966d4fe99e67c9fbf
author iuc
date Mon, 11 Jun 2018 16:05:29 -0400
parents
children 7e616a1bdb07
comparison
equal deleted inserted replaced
-1:000000000000 0:c21099566418
1 <tool id="ggplot2_pca" name="PCA plot w ggplot2" version="@VERSION@">
2 <macros>
3 <import>macros.xml</import>
4 </macros>
5 <requirements>
6 <requirement type="package" version="0.4.3">r-ggfortify</requirement>
7 <requirement type="package" version="1.2.1">r-svglite</requirement>
8 </requirements>
9 <command detect_errors="exit_code"><![CDATA[
10 cat '$script' &&
11 Rscript '$script'
12 ]]></command>
13 <configfiles>
14 <configfile name="script"><![CDATA[
15 @R_INIT@
16
17 ## Import libraries
18 library(ggfortify)
19 library(scales)
20
21 ## static VARs
22 groups <- TRUE
23 circles_boolean <- TRUE
24 group_name <- list()
25 group_cols <- list()
26 group_colors <- list()
27 group_feature <- ""
28 group_colors_vector <- ""
29
30 input <- '$input1'
31 header <- ${inputdata.header}
32 rowname_index <- as.integer('$inputdata.row_names_index')
33
34 horizontal <- ${inputdata.sample_name_orientation}
35 ## title <- '$title'
36 transform <- '$adv.transform'
37 ## background <- '$adv.theme'
38 ## scaling <- '$adv.scaling.plot_scaling'
39 ## legend <- '$adv.legend'
40 group_type <- '$groups.group_type'
41
42 ##if(group_type == "define_groups"){
43 #if str($groups.group_type) == "define_groups":
44 #for $q in $groups.group_names:
45 group_name <- append(group_name, '$q.groupName')
46 group_cols <- append(group_cols, '$q.groupCols')
47 group_colors <- append(group_colors, '$q.color_cond.plot_color')
48 #end for
49 #end if
50 group_name <- unlist(group_name)
51 group_cols <- unlist(group_cols)
52 group_colors <- unlist(group_colors)
53
54 circles <- '$groups.circle'
55 plot_param <- '$plotoptions'
56
57 ## split plotoptions
58 plot_param_list <- strsplit(x = plot_param, split = ",")
59 plot_options <- c("shape", "label") %in% plot_param_list[[1]]
60
61 ## read table with or with out header or row_names
62 if(rowname_index > 0){
63 df <- read.table(input, header = header, row.names = rowname_index, sep = "\t")
64 }else{
65 df <- read.table(input, header = header, sep = "\t")
66 }
67
68 ## check if indices are out of range
69 num_cols <- length(names(df))
70 if(group_type == "define_groups"){
71 check_col_indices <- as.integer(unlist(strsplit(group_cols, split = ",")))
72 if(any(check_col_indices > num_cols)){
73 stop("Error: column indices for grouping are out of range! Check help!")
74 }
75 }
76
77 ## check if table has only numbers
78 if(any(!sapply(df, is.numeric))){
79 stop("Error: table contains not only numbers!")
80 }
81
82 ## check if group_names are unique
83 if(length(unique(c(group_name, "no_group"))) != length(c(group_name, "no_group"))){
84 stop("Error: group_names must be unique: ", paste(group_name, "no_group", collapse = ","), " is not unique!")
85 }
86
87 ## prepare group_features for grouping of samples accouring to orientation
88 if(horizontal){
89 num_cols <- length(names(df))
90 group_feature <- rep("no_group", num_cols)
91 }else{
92 num_rows <- nrow(df)
93 group_feature <- rep("no_group", num_rows)
94 }
95
96 default_ggplot_colors <- ""
97 default_ggplot_colors_autoplot <- ""
98 ## split group elements and assign indexes
99 if(group_type == "define_groups"){
100 ## set up colors
101 color_names <- c(group_name, "no_group")
102 cat("\ncolor_names: ", color_names)
103 default_ggplot_colors <- hue_pal()(length(color_names))
104 names(default_ggplot_colors) <- color_names
105 cat("\ndefault_ggplot_colors: ", default_ggplot_colors)
106 names(group_colors) <- group_name
107 group_colors <- group_colors[group_colors != "none"]
108 default_ggplot_colors[names(group_colors)] <- group_colors
109 cat("\ndefault_ggplot_colors: ", default_ggplot_colors)
110 group_string <- lapply(seq_along(group_name), function(k){
111 gname <- group_name[k]
112 gindex <- as.integer(strsplit(group_cols[k], split = ",")[[1]])
113 gnames <- rep(gname, length(gindex))
114 names(gindex) <- gnames
115 gindex
116 })
117 group_string <- do.call(c, group_string)
118 group_feature[group_string] <- names(group_string)
119 ## subset colors on groups if and check if there is "no_group" present
120 default_ggplot_colors <- default_ggplot_colors[unique(group_feature)]
121 default_ggplot_colors_autoplot <- default_ggplot_colors[group_feature]
122 cat("\ndefault_ggplot_colors: ", default_ggplot_colors)
123 }
124
125 @LEGEND@
126
127 @XY_SCALING@
128
129 @THEME@
130
131 ## transpose data.frame for plotting if sample names are horizontal
132 if(horizontal){
133 df <- as.data.frame(t(df))
134 }
135
136 plot_df <- df
137 ## set group column if wanted
138 if(group_type %in% "no_groups"){
139 groups <- FALSE
140 }else{
141 plot_df\$group <- group_feature
142 group_name <- "group"
143 }
144
145 ## set boolean elipes_value to plot circle options
146 if(circles %in% "no"){
147 circles_boolean <- FALSE
148 }
149
150 plot_mat <- df
151
152 ## transform dataset
153 if(transform == "log2"){
154 plot_mat <- log2(plot_mat)
155 cat("\n ", transform, " transformed")
156 }else if(transform == "log2plus1"){
157 plot_mat <- log2(plot_mat+1)
158 cat("\n ", transform, " transformed")
159 }else if(transform == "log10"){
160 plot_mat <- log10(plot_mat)
161 cat("\n ", transform, " transformed")
162 }else if(transform == "log10plus1"){
163 plot_mat <- log10(plot_mat+1)
164 cat("\n ", transform, " transformed")
165 }else{
166 plot_mat <- plot_mat
167 }
168
169 ## plot with or without groups and set more plotting options using autoplot
170 if(groups){
171 if(circles_boolean){
172 plot_out <- autoplot(prcomp(plot_mat), data = plot_df, colour = group_name,
173 frame = T, frame.type=circles, shape = plot_options[1],
174 label = plot_options[2])
175 }else{
176 plot_out <- autoplot(prcomp(plot_mat), data = plot_df, colour = group_name,
177 frame = F, shape = plot_options[1], label = plot_options[2])
178 }
179 }else{
180 if(!circles_boolean){
181 plot_out <- autoplot(prcomp(plot_mat), data = plot_df, frame = F, shape = plot_options[1], label = plot_options[2])
182 }else{
183 plot_out <- autoplot(prcomp(plot_mat), data = plot_df, frame = F, shape = plot_options[1], label = plot_options[2])
184 }
185 }
186
187 ## add advanced plotting options for final plot
188 plot_out <- plot_out +
189 scale_color_manual(values=default_ggplot_colors) +
190 scale_fill_manual(values=default_ggplot_colors) +
191 gg_theme +
192 gg_legend +
193 ggtitle('$title') +
194 theme(plot.title = element_text(hjust = 0.5))
195
196 @SAVE_OUTPUT@
197 ]]></configfile>
198 </configfiles>
199 <inputs>
200 <param name="input1" type="data" format="tabular" label="Select table"/>
201 <conditional name="inputdata">
202 <param name="input_type" type="select" label="Select input dataset options" help="specific dataset input for reading">
203 <option value="with_header" selected="true">Dataset with header</option>
204 <option value="with_rownames">Dataset with row names</option>
205 <option value="with_header_rownames">Dataset with header and row names</option>
206 <option value="no_header_rownames">Dataset without header or row names</option>
207 </param>
208 <when value="with_header">
209 <param name="header" type="hidden" value="TRUE"/>
210 <param name="row_names_index" type="hidden" value="0"/>
211 <param name="sample_name_orientation" type="select" display="radio" multiple="false" label="Sample names orientation" help="Default horizontal: header names are interpreted as sample names">
212 <option value="TRUE" selected="true">horizontal</option>
213 <option value="FALSE">vertial</option>
214 </param>
215 </when>
216 <when value="with_rownames">
217 <param name="header" type="hidden" value="FALSE"/>
218 <param name="row_names_index" type="data_column" data_ref="input1" label="Select column, for row names" help="WARNING: please consider that using row names might shift the grouping columns"/>
219 <param name="sample_name_orientation" type="select" display="radio" label="Sample names orientation" help="default vertial: row names are interpreted as sample names">
220 <option value="TRUE">horizontal</option>
221 <option value="FALSE" selected="true">vertial</option>
222 </param>
223 </when>
224 <when value="with_header_rownames">
225 <param name="header" type="hidden" value="TRUE"/>
226 <param name="row_names_index" label="Select column, for row names" type="data_column" data_ref="input1" help="WARNING: please consider that using row names might shift the grouping columns"/>
227 <param name="sample_name_orientation" type="select" label="Sample names orientation" display="radio" multiple="false" help="deside whether header names or row names are interpreted as sample names">
228 <option value="TRUE" selected="true">horizontal</option>
229 <option value="FALSE">vertial</option>
230 </param>
231 </when>
232 <when value="no_header_rownames">
233 <param name="header" type="hidden" value="FALSE"/>
234 <param name="row_names_index" type="hidden" value="0"/>
235 <param name="sample_name_orientation" type="hidden" value="TRUE"/>
236 </when>
237 </conditional>
238 <expand macro="title"/>
239 <conditional name="groups">
240 <param name="group_type" type="select" label="Select groups">
241 <option value="no_groups" selected="true">No groups</option>
242 <option value="define_groups">Define groups</option>
243 </param>
244 <when value="no_groups">
245 <param name="circle" type="hidden" value="no"/>
246 </when>
247 <when value="define_groups">
248 <repeat name="group_names" min="2" title="Group" help="Select the samples for this group">
249 <param name="groupName" type="text" optional="false" label="Group name"
250 help="Use short names, avoid special characters and numbers at the beginning of the name (The names might be changed by the program to make them conform to processing in R)">
251 <validator type="no_options" message="Please choose a name for this group!"/>
252 </param>
253 <param name="groupCols" type="data_column" data_ref="input1" multiple="true" min="1" label="Columns for group"
254 help="WARNING: please use columns indices after removing the row name column if selected in second section!">
255 <validator type="no_options" message="Select at least one column for each group!"/>
256 <!--<filter type="remove_value" meta_ref="groupCols" />-->
257 </param>
258 <conditional name="color_cond">
259 <param name="color_bool" type="select" label="Change colors">
260 <option value="default" selected="true">Default ggplot colors</option>
261 <option value="selected_colors">Select colors</option>
262 </param>
263 <when value="selected_colors">
264 <param name="plot_color" type="color" value="" label="Pick color">
265 <sanitizer sanitize="false"/>
266 </param>
267 </when>
268 <when value="default">
269 <param name="plot_color" type="hidden" value="none"/>
270 </when>
271 </conditional>
272 </repeat>
273 <param name="circle" type="select" display="radio" multiple="false" label="Select type of ellipses">
274 <option value="no">no ellipes</option>
275 <option value="convex">convex</option>
276 <option value="t" selected="true">t-distribution</option>
277 <option value="norm">normal distribution</option>
278 <option value="euclid">euclidean distance</option>
279 </param>
280 </when>
281 </conditional>
282 <param name="plotoptions" type="select" label="Select plot layout" display="radio" >
283 <option value="shape" >show shapes</option>
284 <option value="label" selected="true">show labels (group elements)</option>
285 <option value="shape,label" >show shapes and labels</option>
286 </param>
287 <!-- Advanced Options -->
288 <section name="adv" title="Advanced Options" expanded="false">
289 <expand macro="transform"/>
290 <expand macro="xy_scaling" />
291 <expand macro="theme"/>
292 <expand macro="legend"/>
293 </section>
294 <!-- Output Options -->
295 <section name="out" title="Output Options" expanded="true">
296 <expand macro="dimensions" />
297 </section>
298 </inputs>
299 <outputs>
300 <expand macro="additional_output" />
301 </outputs>
302 <tests>
303 <test>
304 <param name="input1" value="mtcars.txt" ftype="tabular"/>
305 <conditional name="inputdata">
306 <param name="input_type" value="with_header_rownames"/>
307 <param name="header" value="TRUE"/>
308 <param name="row_names_index" value="1"/>
309 <param name="sample_name_orientation" value="TRUE"/>
310 </conditional>
311 <conditional name="groups">
312 <param name="group_type" value="define_groups"/>
313 <repeat name="group_names">
314 <param name="groupName" value="group1"/>
315 <param name="groupCols" value="3,4"/>
316 <conditional name="color_cond">
317 <param name="color_bool" value="default"/>
318 <param name="plot_color" value="none"/>
319 </conditional>
320 </repeat>
321 <repeat name="group_names">
322 <param name="groupName" value="group2"/>
323 <param name="groupCols" value="2,5,6,8,9,10,11"/>
324 <conditional name="color_cond">
325 <param name="color_bool" value="default"/>
326 <param name="plot_color" value="none"/>
327 </conditional>
328 </repeat>
329 <param name="circle" value="convex"/>
330 </conditional>
331 <param name="additional_output_format" value="pdf"/>
332 <output name="output2" file="ggplot_pca_result1.pdf" compare="sim_size"/>
333 </test>
334 </tests>
335 <help><![CDATA[
336 **What it does**
337
338 This tool generates a Principal component analysis (PCA) for a given table using a combination of ggplot2 and ggfortify.
339
340 -----
341
342 **Example**
343
344 **WARNING:** Be carefull when selecting row names in the second option because the grouping elements do not update automaticly before executing the script. This means that columns have to be chosen as if the row name column was already be removed.
345
346 **Example for row names in table**
347
348 +--------+-----------+-----------+---------------+---------------+
349 | name | control 1 | control 2 | treatment 1 | treatment 2 |
350 +========+===========+===========+===============+===============+
351 | gene 1 | 10 | 12 | 3455 | 232 |
352 +--------+-----------+-----------+---------------+---------------+
353 | gene 2 | 20 | 2 | 345 | 334 |
354 +--------+-----------+-----------+---------------+---------------+
355 | gene 3 | 200 | 210 | 20 | 2 |
356 +--------+-----------+-----------+---------------+---------------+
357 | | | | | |
358 +--------+-----------+-----------+---------------+---------------+
359 | 1 | 2 | 3 | 4 | 5 |
360 +--------+-----------+-----------+---------------+---------------+
361
362 The new index after reading the table will be:
363
364 +--------+-----------+-----------+--------------+---------------+
365 | name | control 1 | control 2 | treatment 1 | treatment 2 |
366 +========+===========+===========+==============+===============+
367 |* | 1 | 2 | 3 | 4 |
368 +--------+-----------+-----------+--------------+---------------+
369
370 -----
371
372 Pictures coming soon.
373 ]]></help>
374 <expand macro="citations">
375 <citation type="bibtex">@article{tang2016ggfortify,
376 title={ggfortify: unified interface to visualize statistical results of popular R packages},
377 author={Tang, Yuan and Horikoshi, Masaaki and Li, Wenxuan},
378 journal={The R Journal},
379 volume={8},
380 number={2},
381 pages={478-489},
382 year={2016},
383 url = {https://journal.r-project.org/archive/2016/RJ-2016-060/RJ-2016-060.pdf}
384 }
385 </citation>
386 <citation type="bibtex">@manual{gu2016getoptlong,
387 title = {GetoptLong: Parsing Command-Line Arguments and Variable Interpolation},
388 author = {Zuguang Gu},
389 year = {2016},
390 note = {R package version 0.1.5},
391 url = {https://CRAN.R-project.org/package=GetoptLong},
392 }
393 </citation>
394 </expand>
395 </tool>