comparison w4mcorcov_calc.R @ 6:7bd523ca1f9a draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit cafda5095a79ce2376325b57337302f95137195d
author eschen42
date Wed, 18 Jul 2018 12:35:55 -0400
parents 50f60f94c034
children 066b1f409e9f
comparison
equal deleted inserted replaced
5:50f60f94c034 6:7bd523ca1f9a
5 } 5 }
6 6
7 #### OPLS-DA 7 #### OPLS-DA
8 algoC <- "nipals" 8 algoC <- "nipals"
9 9
10 do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_progress = print, x_env, x_crossval_i) { 10 do_detail_plot <- function(
11 x_dataMatrix
12 , x_predictor
13 , x_is_match
14 , x_algorithm
15 , x_prefix
16 , x_show_labels
17 , x_progress = print
18 , x_env
19 , x_crossval_i
20 ) {
11 off <- function(x) if (x_show_labels == "0") 0 else x 21 off <- function(x) if (x_show_labels == "0") 0 else x
12 if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { 22 if ( x_is_match
23 && ncol(x_dataMatrix) > 0
24 && length(unique(x_predictor))> 1
25 && x_crossval_i < nrow(x_dataMatrix)
26 ) {
13 my_oplsda <- opls( 27 my_oplsda <- opls(
14 x = x_dataMatrix 28 x = x_dataMatrix
15 , y = x_predictor 29 , y = x_predictor
16 , algoC = x_algorithm 30 , algoC = x_algorithm
17 , predI = 1 31 , predI = 1
18 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0
19 , printL = FALSE 33 , printL = FALSE
20 , plotL = FALSE 34 , plotL = FALSE
21 , crossvalI = x_crossval_i 35 , crossvalI = x_crossval_i
36 , scaleC = "none" # data have been pareto scaled outside this routine and must not be scaled again. This line fixes issue #2.
22 ) 37 )
23 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) 38 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
24 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] 39 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
25 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] 40 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
26 do_s_plot <- function(parallel_x) { 41 do_s_plot <- function(
27 my_cor_vs_cov <- cor_vs_cov( 42 x_env
28 matrix_x = x_dataMatrix 43 , predictor_projection_x = TRUE
29 , ropls_x = my_oplsda 44 , cplot_x = FALSE
30 , parallel_x = parallel_x 45 , cor_vs_cov_x = NULL
31 ) 46 )
47 {
48 #print(ls(x_env)) # "cplot_y" etc
49 #print(str(x_env$cplot_y)) # chr "covariance"
50 if (cplot_x) {
51 #print(x_env$cplot_y) # "covariance"
52 cplot_y_correlation <- (x_env$cplot_y == "correlation")
53 #print(cplot_y_correlation) # FALSE
54 }
55 if (is.null(cor_vs_cov_x)) {
56 my_cor_vs_cov <- cor_vs_cov(
57 matrix_x = x_dataMatrix
58 , ropls_x = my_oplsda
59 , predictor_projection_x = predictor_projection_x
60 )
61 } else {
62 my_cor_vs_cov <- cor_vs_cov_x
63 }
64 # print("str(my_cor_vs_cov)")
65 # str(my_cor_vs_cov)
66 if (sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
67 x_progress("No cor_vs_cov data produced")
68 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
69 text(x=1, y=1, labels="too few covariance data")
70 return(my_cor_vs_cov)
71 }
32 with( 72 with(
33 my_cor_vs_cov 73 my_cor_vs_cov
34 , { 74 , {
35 min_x <- min(covariance) 75 min_x <- min(covariance, na.rm = TRUE)
36 max_x <- max(covariance) 76 max_x <- max(covariance, na.rm = TRUE)
37 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) 77 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
38 covariance <- covariance / lim_x 78 covariance <- covariance / lim_x
39 lim_x <- 1.2 79 lim_x <- 1.2
40 # "It is generally accepted that a variable should be selected if vj>1, [27–29], 80 # "It is generally accepted that a variable should be selected if vj>1, [27–29],
41 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." 81 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
42 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) 82 # (Mehmood 2012 doi:10.1186/1748-7188-6-27)
43 plus_cor <- correlation 83 plus_cor <- correlation
44 plus_cov <- covariance 84 plus_cov <- covariance
45 cex <- 0.75 85 cex <- 0.65
46 which_projection <- if (projection == 1) "t1" else "o1" 86 which_projection <- if (projection == 1) "t1" else "o1"
47 which_loading <- if (projection == 1) "parallel" else "orthogonal" 87 which_loading <- if (projection == 1) "parallel" else "orthogonal"
48 if (projection == 1) { 88 if (projection == 1) { # predictor-projection
49 my_xlab <- "relative covariance(feature,t1)" 89 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
50 my_x <- plus_cov 90 if (!cplot_x) { # S-plot predictor-projection
51 my_ylab <- "correlation(feature,t1) [~ parallel loading]" 91 my_xlab <- "relative covariance(feature,t1)"
52 my_y <- plus_cor 92 my_x <- plus_cov
53 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) 93 my_ylab <- "correlation(feature,t1)"
94 my_y <- plus_cor
95 } else { # C-plot predictor-projection
96 my_xlab <- "variable importance in predictor-projection"
97 my_x <- vip4p
98 if (cplot_y_correlation) {
99 my_ylab <- "correlation(feature,t1)"
100 my_y <- plus_cor
101 } else {
102 my_ylab <- "relative covariance(feature,t1)"
103 my_y <- plus_cov
104 }
105 }
106 if (cplot_x) {
107 lim_x <- max(my_x, na.rm = TRUE) * 1.1
108 my_xlim <- c( 0, lim_x + off(0.2) )
109 } else {
110 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
111 }
54 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) 112 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) )
55 my_load_distal <- loadp 113 my_load_distal <- loadp
56 my_load_proximal <- loado 114 my_load_proximal <- loado
57 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
58 red <- as.numeric(correlation > 0) * vipcp 115 red <- as.numeric(correlation > 0) * vipcp
59 blue <- as.numeric(correlation < 0) * vipcp 116 blue <- as.numeric(correlation < 0) * vipcp
60 alpha <- 0.1 + 0.4 * vipcp 117 alpha <- 0.1 + 0.4 * vipcp
61 my_col = rgb(blue = blue, red = red, green = 0, alpha = alpha) 118 red[is.na(red)] <- 0
62 main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) 119 blue[is.na(blue)] <- 0
63 } else { 120 alpha[is.na(alpha)] <- 0
64 my_xlab <- "relative covariance(feature,to1)" 121 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
65 my_x <- -plus_cov 122 main_label <- sprintf("%s for level %s versus %s"
66 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) 123 , x_prefix, fctr_lvl_1, fctr_lvl_2)
67 my_ylab <- "correlation(feature,to1) [~ orthogonal loading]" 124 } else { # orthogonal projection
68 my_y <- plus_cor 125 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
126 if (!cplot_x) {
127 my_xlab <- "relative covariance(feature,to1)"
128 my_x <- -plus_cov
129 } else {
130 my_xlab <- "variable importance in orthogonal projection"
131 my_x <- vip4o
132 }
133 if (!cplot_x) { # S-plot orthogonal projection
134 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
135 my_ylab <- "correlation(feature,to1)"
136 my_y <- plus_cor
137 } else { # C-plot orthogonal projection
138 lim_x <- max(my_x, na.rm = TRUE) * 1.1
139 my_xlim <- c( 0, lim_x + off(0.2) )
140 if (cplot_y_correlation) {
141 my_ylab <- "correlation(feature,to1)"
142 my_y <- plus_cor
143 } else {
144 my_ylab <- "relative covariance(feature,to1)"
145 my_y <- plus_cov
146 }
147 }
69 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) 148 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) )
70 my_load_distal <- loado 149 my_load_distal <- loado
71 my_load_proximal <- loadp 150 my_load_proximal <- loadp
72 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
73 alpha <- 0.1 + 0.4 * vipco 151 alpha <- 0.1 + 0.4 * vipco
74 my_col = rgb(blue = 0, red = 0, green = 0, alpha = alpha) 152 alpha[is.na(alpha)] <- 0
75 main_label <- sprintf("Features influencing orthogonal projection for level %s versus %s", fctr_lvl_1, fctr_lvl_2) 153 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha)
154 main_label <- sprintf(
155 "Features influencing orthogonal projection for %s versus %s"
156 , fctr_lvl_1, fctr_lvl_2)
76 } 157 }
77 main_cex <- min(1.0, 46.0/nchar(main_label)) 158 main_cex <- min(1.0, 46.0/nchar(main_label))
78 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward 159 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
79 plot( 160 plot(
80 y = my_y 161 y = my_y
90 , pch = 16 171 , pch = 16
91 , col = my_col 172 , col = my_col
92 ) 173 )
93 low_x <- -0.7 * lim_x 174 low_x <- -0.7 * lim_x
94 high_x <- 0.7 * lim_x 175 high_x <- 0.7 * lim_x
95 if (projection == 1) { 176 if (projection == 1 && !cplot_x) {
96 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") 177 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue")
97 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") 178 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red")
98 } 179 }
99 if ( x_show_labels != "0" ) { 180 if ( x_show_labels != "0" ) {
100 names(my_load_distal) <- tsv1$featureID 181 names(my_load_distal) <- tsv1$featureID
107 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) 188 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
108 labels_to_show <- c( 189 labels_to_show <- c(
109 names(head(sort(my_load_distal),n = n_labels)) 190 names(head(sort(my_load_distal),n = n_labels))
110 , names(tail(sort(my_load_distal),n = n_labels)) 191 , names(tail(sort(my_load_distal),n = n_labels))
111 ) 192 )
112 labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) 193 labels <- unname(
194 sapply(
195 X = tsv1$featureID
196 , FUN = function(x) if( x %in% labels_to_show ) x else ""
197 )
198 )
113 x_text_offset <- 0.024 199 x_text_offset <- 0.024
114 y_text_offset <- (if (projection == 1) 1 else -1) * -0.017 200 y_text_off <- 0.017
201 if (!cplot_x) { # S-plot
202 y_text_offset <- if (projection == 1) -y_text_off else y_text_off
203 } else { # C-plot
204 y_text_offset <-
205 sapply(
206 X = (my_y > 0)
207 , FUN = function(x) { if (x) y_text_off else -y_text_off }
208 )
209 }
115 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { 210 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
116 print("str(x_arg)") 211 # print("str(x_arg)")
117 print(str(x_arg)) 212 # print(str(x_arg))
118 print("str(y_arg)") 213 # print("str(y_arg)")
119 print(str(y_arg)) 214 # print(str(y_arg))
120 print("str(labels_arg)") 215 # print("str(labels_arg)")
121 print(str(labels_arg)) 216 # print(str(labels_arg))
122 text( 217 if (length(labels_arg) > 0) {
123 y = y_arg 218 unique_slant <- unique(slant_arg)
124 , x = x_arg + x_text_offset 219 if (length(unique_slant) == 1) {
125 , cex = 0.4 220 text(
126 , labels = labels_arg 221 y = y_arg
127 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels 222 , x = x_arg + x_text_offset
128 , srt = slant_arg 223 , cex = 0.4
129 , adj = 0 # left-justified 224 , labels = labels_arg
130 ) 225 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
131 } 226 , srt = slant_arg
132 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant 227 , adj = 0 # left-justified
228 )
229 } else {
230 for (slant in unique_slant) {
231 text(
232 y = y_arg[slant_arg == slant]
233 , x = x_arg[slant_arg == slant] + x_text_offset
234 , cex = 0.4
235 , labels = labels_arg[slant_arg == slant]
236 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
237 , srt = slant
238 , adj = 0 # left-justified
239 )
240 }
241 }
242 }
243 }
244 if (!cplot_x) {
245 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
246 } else {
247 my_slant <- sapply(
248 X = (my_y > 0)
249 , FUN = function(x) if (x) 2 else -2
250 ) * my_feature_label_slant
251 }
133 if (length(my_x) > 1) { 252 if (length(my_x) > 1) {
134 label_features( 253 label_features(
135 x_arg = my_x [my_x > 0] 254 x_arg = my_x [my_x > 0]
136 , y_arg = my_y [my_x > 0] - y_text_offset 255 , y_arg = my_y [my_x > 0] - y_text_offset
137 , labels_arg = labels[my_x > 0] 256 , labels_arg = labels[my_x > 0]
138 , slant_arg = -my_slant 257 , slant_arg = (if (!cplot_x) -my_slant else (my_slant))
139 ) 258 )
140 label_features( 259 if (!cplot_x) {
141 x_arg = my_x [my_x < 0] 260 label_features(
142 , y_arg = my_y [my_x < 0] + y_text_offset 261 x_arg = my_x [my_x < 0]
143 , labels_arg = labels[my_x < 0] 262 , y_arg = my_y [my_x < 0] + y_text_offset
263 , labels_arg = labels[my_x < 0]
264 , slant_arg = my_slant
265 )
266 }
267 } else {
268 if (!cplot_x) {
269 my_slant <- (if (my_x > 1) -1 else 1) * my_slant
270 my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
271 } else {
272 my_slant <- (if (my_y > 1) -1 else 1) * my_slant
273 my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset
274 }
275 label_features(
276 x_arg = my_x
277 , y_arg = my_y_arg
278 , labels_arg = labels
144 , slant_arg = my_slant 279 , slant_arg = my_slant
145 )
146 } else {
147 label_features(
148 x_arg = my_x
149 , y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
150 , labels_arg = labels
151 , slant_arg = (if (my_x > 1) -1 else 1) * my_slant
152 ) 280 )
153 } 281 }
154 } 282 }
155 } 283 }
156 ) 284 )
157 return (my_cor_vs_cov) 285 return (my_cor_vs_cov)
158 } 286 }
159 my_cor_vs_cov <- do_s_plot( parallel_x = TRUE ) 287 my_cor_vs_cov <- do_s_plot(
288 x_env = x_env
289 , predictor_projection_x = TRUE
290 , cplot_x = FALSE
291 )
160 typeVc <- c("correlation", # 1 292 typeVc <- c("correlation", # 1
161 "outlier", # 2 293 "outlier", # 2
162 "overview", # 3 294 "overview", # 3
163 "permutation", # 4 295 "permutation", # 4
164 "predict-train", # 5 296 "predict-train", # 5
173 if ( length(my_oplsda@orthoVipVn) > 0 ) { 305 if ( length(my_oplsda@orthoVipVn) > 0 ) {
174 my_typevc <- typeVc[c(9,3,8)] 306 my_typevc <- typeVc[c(9,3,8)]
175 } else { 307 } else {
176 my_typevc <- c("(dummy)","overview","(dummy)") 308 my_typevc <- c("(dummy)","overview","(dummy)")
177 } 309 }
310 my_ortho_cor_vs_cov_exists <- FALSE
178 for (my_type in my_typevc) { 311 for (my_type in my_typevc) {
179 if (my_type %in% typeVc) { 312 if (my_type %in% typeVc) {
180 tryCatch({ 313 tryCatch({
181 if ( my_type != "x-loading" ) { 314 if ( my_type != "x-loading" ) {
182 plot( 315 plot(
185 , parCexN = 0.4 318 , parCexN = 0.4
186 , parDevNewL = FALSE 319 , parDevNewL = FALSE
187 , parLayL = TRUE 320 , parLayL = TRUE
188 , parEllipsesL = TRUE 321 , parEllipsesL = TRUE
189 ) 322 )
323 if (my_type == "overview") {
324 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2)
325 title(sub = sub_label)
326 }
190 } else { 327 } else {
191 do_s_plot( parallel_x = FALSE ) 328 my_ortho_cor_vs_cov <- do_s_plot(
329 x_env = x_env
330 , predictor_projection_x = FALSE
331 , cplot_x = FALSE
332 )
333 my_ortho_cor_vs_cov_exists <- TRUE
192 } 334 }
193 }, error = function(e) { 335 }, error = function(e) {
194 x_progress( sprintf( "factor level %s or %s may have only one sample - %s", fctr_lvl_1, fctr_lvl_2, e$message ) ) 336 x_progress(
337 sprintf(
338 "factor level %s or %s may have only one sample - %s"
339 , fctr_lvl_1
340 , fctr_lvl_2
341 , e$message
342 )
343 )
195 }) 344 })
196 } else { 345 } else {
197 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") 346 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
198 text(x=1, y=1, labels="no orthogonal projection is possible") 347 text(x=1, y=1, labels="no orthogonal projection is possible")
199 } 348 }
200 } 349 }
350 cplot_p <- x_env$cplot_p
351 cplot_o <- x_env$cplot_o
352 if (cplot_p || cplot_o) {
353 if (cplot_p) {
354 do_s_plot(
355 x_env = x_env
356 , predictor_projection_x = TRUE
357 , cplot_x = TRUE
358 , cor_vs_cov_x = my_cor_vs_cov
359 )
360 did_plots <- 1
361 } else {
362 did_plots <- 0
363 }
364 if (cplot_o) {
365 if (my_ortho_cor_vs_cov_exists) {
366 do_s_plot(
367 x_env = x_env
368 , predictor_projection_x = FALSE
369 , cplot_x = TRUE
370 , cor_vs_cov_x = my_ortho_cor_vs_cov
371 )
372 } else {
373 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
374 text(x=1, y=1, labels="no orthogonal projection is possible")
375 }
376 did_plots <- 1 + did_plots
377 }
378 if (did_plots == 1) {
379 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n", fg = "white")
380 }
381 }
201 return (my_cor_vs_cov) 382 return (my_cor_vs_cov)
202 } else { 383 } else {
203 return (NULL) 384 return (NULL)
204 } 385 }
205 } 386 }
206 387
207 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 388 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
208 corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) { 389 corcov_calc <- function(
390 calc_env
391 , failure_action = stop
392 , progress_action = function(x) { }
393 , corcov_tsv_action = function(t) { }
394 , salience_tsv_action = function(t) { }
395 , extra_plots = c()
396 ) {
209 if ( ! is.environment(calc_env) ) { 397 if ( ! is.environment(calc_env) ) {
210 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") 398 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment")
211 return ( FALSE ) 399 return ( FALSE )
212 } 400 }
213 if ( ! is.function(corcov_tsv_action) ) { 401 if ( ! is.function(corcov_tsv_action) ) {
233 matchingC <- calc_env$matchingC 421 matchingC <- calc_env$matchingC
234 labelFeatures <- calc_env$labelFeatures 422 labelFeatures <- calc_env$labelFeatures
235 423
236 # arg/env checking 424 # arg/env checking
237 if (!(facC %in% names(smpl_metadata))) { 425 if (!(facC %in% names(smpl_metadata))) {
238 failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC)) 426 failure_action(
427 sprintf("bad parameter! Factor name '%s' not found in sampleMetadata"
428 , facC))
239 return ( FALSE ) 429 return ( FALSE )
240 } 430 }
241 431
242 mz <- vrbl_metadata$mz 432 mz <- vrbl_metadata$mz
243 names(mz) <- vrbl_metadata$variableMetadata 433 names(mz) <- vrbl_metadata$variableMetadata
244 mz_lookup <- function(feature) unname(mz[feature]) 434 mz_lookup <- function(feature) unname(mz[feature])
245 435
246 rt <- vrbl_metadata$rt 436 rt <- vrbl_metadata$rt
247 names(rt) <- vrbl_metadata$variableMetadata 437 names(rt) <- vrbl_metadata$variableMetadata
248 rt_lookup <- function(feature) unname(rt[feature]) 438 rt_lookup <- function(feature) unname(rt[feature])
249 439
250 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) 440 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv)
251 salience_df <- calc_env$salience_df <- w4msalience( 441 salience_df <- calc_env$salience_df <- w4msalience(
252 data_matrix = data_matrix 442 data_matrix = data_matrix
253 , sample_class = smpl_metadata[,facC] 443 , sample_class = smpl_metadata[,facC]
254 , failure_action = failure_action 444 , failure_action = failure_action
320 did_plot <- FALSE 510 did_plot <- FALSE
321 if (tesC != "none") { 511 if (tesC != "none") {
322 # for each column name, extract the parts of the name matched by 'col_pattern', if any 512 # for each column name, extract the parts of the name matched by 'col_pattern', if any
323 the_colnames <- colnames(vrbl_metadata) 513 the_colnames <- colnames(vrbl_metadata)
324 if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) { 514 if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) {
325 failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) 515 failure_action(
516 sprintf(
517 "bad parameter! variableMetadata must contain results of W4M Univariate test '%s'."
518 , tesC))
326 return ( FALSE ) 519 return ( FALSE )
327 } 520 }
328 col_matches <- lapply( 521 col_matches <- lapply(
329 X = the_colnames, 522 X = the_colnames,
330 FUN = function(x) { 523 FUN = function(x) {
347 level_union <- c(level_union, col_match[2], col_match[3]) 540 level_union <- c(level_union, col_match[2], col_match[3])
348 } 541 }
349 } 542 }
350 } 543 }
351 level_union <- unique(sort(level_union)) 544 level_union <- unique(sort(level_union))
352 overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) 545 overall_significant <- 1 == (
546 if (intersample_sig_col %in% colnames(vrbl_metadata)) {
547 vrbl_metadata[,intersample_sig_col]
548 } else {
549 1
550 }
551 )
353 if ( length(level_union) > 2 ) { 552 if ( length(level_union) > 2 ) {
354 chosen_samples <- smpl_metadata_facC %in% level_union 553 chosen_samples <- smpl_metadata_facC %in% level_union
355 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) 554 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
356 col_selector <- vrbl_metadata_names[ overall_significant ] 555 col_selector <- vrbl_metadata_names[ overall_significant ]
357 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] 556 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
358 plot_action <- function(fctr_lvl_1, fctr_lvl_2) { 557 plot_action <- function(fctr_lvl_1, fctr_lvl_2) {
359 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 558 progress_action(
360 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) 559 sprintf("calculating/plotting contrast of %s vs. %s"
560 , fctr_lvl_1, fctr_lvl_2))
561 predictor <- sapply(
562 X = chosen_facC
563 , FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
564 )
361 my_cor_cov <- do_detail_plot( 565 my_cor_cov <- do_detail_plot(
362 x_dataMatrix = my_matrix 566 x_dataMatrix = my_matrix
363 , x_predictor = predictor 567 , x_predictor = predictor
364 , x_is_match = is_match 568 , x_is_match = is_match
365 , x_algorithm = algoC 569 , x_algorithm = algoC
366 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" 570 , x_prefix = if (pairSigFeatOnly) {
571 "Significantly contrasting features"
572 } else {
573 "Significant features"
574 }
367 , x_show_labels = labelFeatures 575 , x_show_labels = labelFeatures
368 , x_progress = progress_action 576 , x_progress = progress_action
369 , x_crossval_i = min(7, length(chosen_samples)) 577 , x_crossval_i = min(7, length(chosen_samples))
370 , x_env = calc_env 578 , x_env = calc_env
371 ) 579 )
373 progress_action("NOTHING TO PLOT.") 581 progress_action("NOTHING TO PLOT.")
374 } else { 582 } else {
375 my_tsv <- my_cor_cov$tsv1 583 my_tsv <- my_cor_cov$tsv1
376 my_tsv$mz <- mz_lookup(my_tsv$featureID) 584 my_tsv$mz <- mz_lookup(my_tsv$featureID)
377 my_tsv$rt <- rt_lookup(my_tsv$featureID) 585 my_tsv$rt <- rt_lookup(my_tsv$featureID)
378 my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 586 my_tsv["level1Level2Sig"] <- vrbl_metadata[
587 match(my_tsv$featureID, vrbl_metadata_names)
588 , vrbl_metadata_col
589 ]
379 tsv <<- my_tsv 590 tsv <<- my_tsv
380 corcov_tsv_action(tsv) 591 corcov_tsv_action(tsv)
381 did_plot <<- TRUE 592 did_plot <<- TRUE
382 } 593 }
383 } 594 }
402 vrbl_metadata_col <- col_match[1] # ^^^^^^^^^^^^^^^^^^^^^ # Column name 613 vrbl_metadata_col <- col_match[1] # ^^^^^^^^^^^^^^^^^^^^^ # Column name
403 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1 614 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1
404 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 615 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2
405 # only process this column if both factors are members of lvlCSV 616 # only process this column if both factors are members of lvlCSV
406 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) 617 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
407 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 618 progress_action(
619 sprintf("calculating/plotting contrast of %s vs. %s"
620 , fctr_lvl_1, fctr_lvl_2))
408 # choose only samples with one of the two factors for this column 621 # choose only samples with one of the two factors for this column
409 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 622 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
410 predictor <- smpl_metadata_facC[chosen_samples] 623 predictor <- smpl_metadata_facC[chosen_samples]
411 # extract only the significantly-varying features and the chosen samples 624 # extract only the significantly-varying features and the chosen samples
412 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) 625 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] *
413 col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] 626 ( if (intersample_sig_col %in% colnames(vrbl_metadata)) {
627 vrbl_metadata[,intersample_sig_col]
628 } else {
629 1
630 }
631 )
632 col_selector <- vrbl_metadata_names[
633 if ( pairSigFeatOnly ) fully_significant else overall_significant
634 ]
414 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] 635 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
415 my_cor_cov <- do_detail_plot( 636 my_cor_cov <- do_detail_plot(
416 x_dataMatrix = my_matrix 637 x_dataMatrix = my_matrix
417 , x_predictor = predictor 638 , x_predictor = predictor
418 , x_is_match = is_match 639 , x_is_match = is_match
419 , x_algorithm = algoC 640 , x_algorithm = algoC
420 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" 641 , x_prefix = if (pairSigFeatOnly) {
642 "Significantly contrasting features"
643 } else {
644 "Significant features"
645 }
421 , x_show_labels = labelFeatures 646 , x_show_labels = labelFeatures
422 , x_progress = progress_action 647 , x_progress = progress_action
423 , x_crossval_i = min(7, length(chosen_samples)) 648 , x_crossval_i = min(7, length(chosen_samples))
424 , x_env = calc_env 649 , x_env = calc_env
425 ) 650 )
427 progress_action("NOTHING TO PLOT.") 652 progress_action("NOTHING TO PLOT.")
428 } else { 653 } else {
429 tsv <- my_cor_cov$tsv1 654 tsv <- my_cor_cov$tsv1
430 tsv$mz <- mz_lookup(tsv$featureID) 655 tsv$mz <- mz_lookup(tsv$featureID)
431 tsv$rt <- rt_lookup(tsv$featureID) 656 tsv$rt <- rt_lookup(tsv$featureID)
432 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 657 tsv["level1Level2Sig"] <- vrbl_metadata[
658 match(tsv$featureID, vrbl_metadata_names)
659 , vrbl_metadata_col
660 ]
433 corcov_tsv_action(tsv) 661 corcov_tsv_action(tsv)
434 did_plot <- TRUE 662 did_plot <- TRUE
435 } 663 }
436 } 664 }
437 } 665 }
442 if ( length(level_union) > 2 ) { 670 if ( length(level_union) > 2 ) {
443 ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ## 671 ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
444 completed <- c() 672 completed <- c()
445 lapply( 673 lapply(
446 X = level_union 674 X = level_union
447 , FUN = function(x) { 675 , FUN = function(x) {
448 fctr_lvl_1 <- x[1] 676 fctr_lvl_1 <- x[1]
449 fctr_lvl_2 <- { 677 fctr_lvl_2 <- {
450 if ( fctr_lvl_1 %in% completed ) 678 if ( fctr_lvl_1 %in% completed )
451 return("DUMMY") 679 return("DUMMY")
452 # strF(completed) 680 # strF(completed)
453 completed <<- c(completed, fctr_lvl_1) 681 completed <<- c(completed, fctr_lvl_1)
454 setdiff(level_union, fctr_lvl_1) 682 setdiff(level_union, fctr_lvl_1)
455 } 683 }
456 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 684 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
457 fctr_lvl_2 <- "other" 685 fctr_lvl_2 <- "other"
458 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 686 progress_action(
687 sprintf("calculating/plotting contrast of %s vs. %s"
688 , fctr_lvl_1, fctr_lvl_2)
689 )
459 if (length(unique(chosen_samples)) < 1) { 690 if (length(unique(chosen_samples)) < 1) {
460 progress_action("NOTHING TO PLOT...") 691 progress_action("NOTHING TO PLOT...")
461 } else { 692 } else {
462 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) 693 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
463 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) 694 predictor <- sapply(
695 X = chosen_facC
696 , FUN = function(fac) {
697 if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
698 }
699 )
464 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] 700 my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
465 # only process this column if both factors are members of lvlCSV 701 # only process this column if both factors are members of lvlCSV
466 is_match <- isLevelSelected(fctr_lvl_1) 702 is_match <- isLevelSelected(fctr_lvl_1)
467 my_cor_cov <- do_detail_plot( 703 my_cor_cov <- do_detail_plot(
468 x_dataMatrix = my_matrix 704 x_dataMatrix = my_matrix
492 ## pass 2 - contrast each selected level with each of the other levels individually ## 728 ## pass 2 - contrast each selected level with each of the other levels individually ##
493 completed <- c() 729 completed <- c()
494 utils::combn( 730 utils::combn(
495 x = level_union 731 x = level_union
496 , m = 2 732 , m = 2
497 , FUN = function(x) { 733 , FUN = function(x) {
498 fctr_lvl_1 <- x[1] 734 fctr_lvl_1 <- x[1]
499 fctr_lvl_2 <- x[2] 735 fctr_lvl_2 <- x[2]
500 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 736 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
501 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 737 progress_action(
738 sprintf("calculating/plotting contrast of %s vs. %s"
739 , fctr_lvl_1, fctr_lvl_2))
502 if (length(unique(chosen_samples)) < 1) { 740 if (length(unique(chosen_samples)) < 1) {
503 progress_action("NOTHING TO PLOT...") 741 progress_action("NOTHING TO PLOT...")
504 } else { 742 } else {
505 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) 743 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
506 predictor <- chosen_facC 744 predictor <- chosen_facC
534 } else { 772 } else {
535 progress_action("NOTHING TO PLOT....") 773 progress_action("NOTHING TO PLOT....")
536 } 774 }
537 } 775 }
538 if (!did_plot) { 776 if (!did_plot) {
539 failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV)) 777 failure_action(
778 sprintf(
779 "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'"
780 , facC, originalLevCSV))
540 return ( FALSE ) 781 return ( FALSE )
541 } 782 }
542 return ( TRUE ) 783 return ( TRUE )
543 } 784 }
544 785
545 # Calculate data for correlation-versus-covariance plot 786 # Calculate data for correlation-versus-covariance plot
546 # Adapted from: 787 # Adapted from:
547 # Wiklund_2008 doi:10.1021/ac0713510 788 # Wiklund_2008 doi:10.1021/ac0713510
548 # Galindo_Prieto_2014 doi:10.1002/cem.2627 789 # Galindo_Prieto_2014 doi:10.1002/cem.2627
549 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R 790 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R
550 cor_vs_cov <- function(matrix_x, ropls_x, parallel_x = TRUE) { 791 cor_vs_cov <- function(
792 matrix_x, ropls_x
793 , predictor_projection_x = TRUE
794 ) {
551 x_class <- class(ropls_x) 795 x_class <- class(ropls_x)
552 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) 796 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
553 stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) 797 stop(
798 paste(
799 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "
800 , as.character(x_class)
801 )
802 )
554 } 803 }
555 result <- list() 804 result <- list()
556 result$projection <- projection <- if (parallel_x) 1 else 2 805 result$projection <- projection <- if (predictor_projection_x) 1 else 2
557 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS 806 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
558 if ( ropls_x@suppLs$algoC == "nipals") { 807 if ( ropls_x@suppLs$algoC == "nipals") {
559 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 808 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510
560 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) 809 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
561 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) 810 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))
562 if (parallel_x) 811 if (predictor_projection_x)
563 score_matrix <- ropls_x@scoreMN 812 score_matrix <- ropls_x@scoreMN
564 else 813 else
565 score_matrix <- ropls_x@orthoScoreMN 814 score_matrix <- ropls_x@orthoScoreMN
566 score_matrix_transposed <- t(score_matrix) 815 score_matrix_transposed <- t(score_matrix)
567 score_matrix_magnitude <- mag(score_matrix) 816 score_matrix_magnitude <- mag(score_matrix)
568 result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) 817 result$covariance <-
569 result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) 818 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
819 result$correlation <-
820 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
570 } else { 821 } else {
571 # WARNING - untested code - I don't have test data to exercise this branch 822 # WARNING - untested code - I don't have test data to exercise this branch
572 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 823 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
573 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F 824 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
574 if (parallel_x) 825 if (predictor_projection_x)
575 score_matrix <- ropls_x@scoreMN 826 score_matrix <- ropls_x@scoreMN
576 else 827 else
577 score_matrix <- ropls_x@orthoScoreMN 828 score_matrix <- ropls_x@orthoScoreMN
578 score_matrix_transposed <- t(score_matrix) 829 score_matrix_transposed <- t(score_matrix)
579 cov_divisor <- nrow(matrix_x) - 1 830 cov_divisor <- nrow(matrix_x) - 1
610 feature_count <- length(ropls_x@vipVn) 861 feature_count <- length(ropls_x@vipVn)
611 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) 862 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count)
612 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) 863 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count)
613 superresult <- list() 864 superresult <- list()
614 if (length(result$vip4o) == 0) result$vip4o <- NA 865 if (length(result$vip4o) == 0) result$vip4o <- NA
615 greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) 866 greaterLevel <- sapply(
616 superresult$tsv1 <- data.frame( 867 X = result$correlation
617 featureID = names(ropls_x@vipVn) 868 , FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
869 )
870
871 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
872 featureID <- names(ropls_x@vipVn)
873 greaterLevel <- greaterLevel[featureID]
874 result$correlation <- result$correlation[featureID]
875 result$covariance <- result$covariance[featureID]
876 # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
877
878 tsv1 <- data.frame(
879 featureID = featureID
618 , factorLevel1 = result$level1 880 , factorLevel1 = result$level1
619 , factorLevel2 = result$level2 881 , factorLevel2 = result$level2
620 , greaterLevel = greaterLevel 882 , greaterLevel = greaterLevel
621 , projection = result$projection 883 , projection = result$projection
622 , correlation = result$correlation 884 , correlation = result$correlation
625 , vip4o = result$vip4o 887 , vip4o = result$vip4o
626 , loadp = result$loadp 888 , loadp = result$loadp
627 , loado = result$loado 889 , loado = result$loado
628 , row.names = NULL 890 , row.names = NULL
629 ) 891 )
630 rownames(superresult$tsv1) <- superresult$tsv1$featureID 892 tsv1 <- tsv1[!is.na(tsv1$correlation),]
893 tsv1 <- tsv1[!is.na(tsv1$covariance),]
894 superresult$tsv1 <- tsv1
895 rownames(superresult$tsv1) <- tsv1$featureID
631 superresult$projection <- result$projection 896 superresult$projection <- result$projection
632 superresult$covariance <- result$covariance 897 superresult$covariance <- result$covariance
633 superresult$correlation <- result$correlation 898 superresult$correlation <- result$correlation
634 superresult$vip4p <- result$vip4p 899 superresult$vip4p <- result$vip4p
635 superresult$vip4o <- result$vip4o 900 superresult$vip4o <- result$vip4o
636 superresult$loadp <- result$loadp 901 superresult$loadp <- result$loadp
637 superresult$loado <- result$loado 902 superresult$loado <- result$loado
638 superresult$details <- result 903 superresult$details <- result
639 result$superresult <- superresult 904 result$superresult <- superresult
640 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways 905 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways
641 result$oplsda <- ropls_x 906 result$oplsda <- ropls_x
642 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways 907 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways
643 return (superresult) 908 return (superresult)
644 } 909 }
645 910
646 # vim: sw=2 ts=2 et : 911 # vim: sw=2 ts=2 et :