comparison w4mcorcov_calc.R @ 5:50f60f94c034 draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit aff1790e25523d038a1e9528de748191c096132f
author eschen42
date Fri, 30 Mar 2018 14:59:19 -0400
parents 8bba31f628da
children 7bd523ca1f9a
comparison
equal deleted inserted replaced
4:8bba31f628da 5:50f60f94c034
5 } 5 }
6 6
7 #### OPLS-DA 7 #### OPLS-DA
8 algoC <- "nipals" 8 algoC <- "nipals"
9 9
10 do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) { 10 do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_progress = print, x_env, x_crossval_i) {
11 off <- function(x) if (x_show_labels == "0") 0 else x 11 off <- function(x) if (x_show_labels == "0") 0 else x
12 if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { 12 if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) {
13 my_oplsda <- opls( 13 my_oplsda <- opls(
14 x = x_dataMatrix 14 x = x_dataMatrix
15 , y = x_predictor 15 , y = x_predictor
21 , crossvalI = x_crossval_i 21 , crossvalI = x_crossval_i
22 ) 22 )
23 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) 23 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
24 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] 24 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
25 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] 25 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
26 my_cor_vs_cov <- cor_vs_cov( 26 do_s_plot <- function(parallel_x) {
27 matrix_x = x_dataMatrix 27 my_cor_vs_cov <- cor_vs_cov(
28 , ropls_x = my_oplsda 28 matrix_x = x_dataMatrix
29 , ropls_x = my_oplsda
30 , parallel_x = parallel_x
31 )
32 with(
33 my_cor_vs_cov
34 , {
35 min_x <- min(covariance)
36 max_x <- max(covariance)
37 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
38 covariance <- covariance / lim_x
39 lim_x <- 1.2
40 # "It is generally accepted that a variable should be selected if vj>1, [27–29],
41 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
42 # (Mehmood 2012 doi:10.1186/1748-7188-6-27)
43 plus_cor <- correlation
44 plus_cov <- covariance
45 cex <- 0.75
46 which_projection <- if (projection == 1) "t1" else "o1"
47 which_loading <- if (projection == 1) "parallel" else "orthogonal"
48 if (projection == 1) {
49 my_xlab <- "relative covariance(feature,t1)"
50 my_x <- plus_cov
51 my_ylab <- "correlation(feature,t1) [~ parallel loading]"
52 my_y <- plus_cor
53 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
54 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) )
55 my_load_distal <- loadp
56 my_load_proximal <- loado
57 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
58 red <- as.numeric(correlation > 0) * vipcp
59 blue <- as.numeric(correlation < 0) * vipcp
60 alpha <- 0.1 + 0.4 * vipcp
61 my_col = rgb(blue = blue, red = red, green = 0, alpha = alpha)
62 main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2)
63 } else {
64 my_xlab <- "relative covariance(feature,to1)"
65 my_x <- -plus_cov
66 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
67 my_ylab <- "correlation(feature,to1) [~ orthogonal loading]"
68 my_y <- plus_cor
69 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) )
70 my_load_distal <- loado
71 my_load_proximal <- loadp
72 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
73 alpha <- 0.1 + 0.4 * vipco
74 my_col = rgb(blue = 0, red = 0, green = 0, alpha = alpha)
75 main_label <- sprintf("Features influencing orthogonal projection for level %s versus %s", fctr_lvl_1, fctr_lvl_2)
76 }
77 main_cex <- min(1.0, 46.0/nchar(main_label))
78 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
79 plot(
80 y = my_y
81 , x = my_x
82 , type = "p"
83 , xlim = my_xlim
84 , ylim = my_ylim
85 , xlab = my_xlab
86 , ylab = my_ylab
87 , main = main_label
88 , cex.main = main_cex
89 , cex = cex
90 , pch = 16
91 , col = my_col
92 )
93 low_x <- -0.7 * lim_x
94 high_x <- 0.7 * lim_x
95 if (projection == 1) {
96 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue")
97 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red")
98 }
99 if ( x_show_labels != "0" ) {
100 names(my_load_distal) <- tsv1$featureID
101 names(my_load_proximal) <- tsv1$featureID
102 if ( x_show_labels == "ALL" ) {
103 n_labels <- length(my_load_distal)
104 } else {
105 n_labels <- as.numeric(x_show_labels)
106 }
107 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
108 labels_to_show <- c(
109 names(head(sort(my_load_distal),n = n_labels))
110 , names(tail(sort(my_load_distal),n = n_labels))
111 )
112 labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" ))
113 x_text_offset <- 0.024
114 y_text_offset <- (if (projection == 1) 1 else -1) * -0.017
115 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
116 print("str(x_arg)")
117 print(str(x_arg))
118 print("str(y_arg)")
119 print(str(y_arg))
120 print("str(labels_arg)")
121 print(str(labels_arg))
122 text(
123 y = y_arg
124 , x = x_arg + x_text_offset
125 , cex = 0.4
126 , labels = labels_arg
127 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
128 , srt = slant_arg
129 , adj = 0 # left-justified
130 )
131 }
132 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
133 if (length(my_x) > 1) {
134 label_features(
135 x_arg = my_x [my_x > 0]
136 , y_arg = my_y [my_x > 0] - y_text_offset
137 , labels_arg = labels[my_x > 0]
138 , slant_arg = -my_slant
139 )
140 label_features(
141 x_arg = my_x [my_x < 0]
142 , y_arg = my_y [my_x < 0] + y_text_offset
143 , labels_arg = labels[my_x < 0]
144 , slant_arg = my_slant
145 )
146 } else {
147 label_features(
148 x_arg = my_x
149 , y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
150 , labels_arg = labels
151 , slant_arg = (if (my_x > 1) -1 else 1) * my_slant
152 )
153 }
154 }
155 }
29 ) 156 )
30 with( 157 return (my_cor_vs_cov)
31 my_cor_vs_cov 158 }
32 , { 159 my_cor_vs_cov <- do_s_plot( parallel_x = TRUE )
33 min_x <- min(covariance)
34 max_x <- max(covariance)
35 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
36 covariance <- covariance / lim_x
37 lim_x <- 1.2
38 main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2)
39 main_cex <- min(1.0, 46.0/nchar(main_label))
40 # "It is generally accepted that a variable should be selected if vj>1, [27–29],
41 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
42 # (Mehmood 2012 doi:10.1186/1748-7188-6-27)
43 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
44 alpha <- 0.1 + 0.4 * vipco
45 red <- as.numeric(correlation > 0) * vipco
46 blue <- as.numeric(correlation < 0) * vipco
47 plus_cor <- correlation
48 plus_cov <- covariance
49 cex <- 0.75
50 plot(
51 y = plus_cor
52 , x = plus_cov
53 , type="p"
54 , xlim=c( -lim_x - off(0.2), lim_x + off(0.2) )
55 , ylim=c( -1.0 - off(0.2), 1.0 + off(0.2) )
56 , xlab = sprintf("relative covariance(feature,t1)")
57 , ylab = sprintf("correlation(feature,t1)")
58 , main = main_label
59 , cex.main = main_cex
60 , cex = cex
61 , pch = 16
62 , col = rgb(blue = blue, red = red, green = 0, alpha = alpha)
63 )
64 low_x <- -0.7 * lim_x
65 high_x <- 0.7 * lim_x
66 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue")
67 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red")
68 if ( x_show_labels != "0" ) {
69 my_loadp <- loadp
70 my_loado <- loado
71 names(my_loadp) <- tsv1$featureID
72 names(my_loado) <- tsv1$featureID
73 if ( x_show_labels == "ALL" ) {
74 n_labels <- length(loadp)
75 } else {
76 n_labels <- as.numeric(x_show_labels)
77 }
78 n_labels <- min( n_labels, (1 + length(loadp)) / 2 )
79 labels_to_show <- c(
80 names(head(sort(my_loadp),n = n_labels))
81 , names(tail(sort(my_loadp),n = n_labels))
82 )
83 if ( x_show_loado_labels ) {
84 labels_to_show <- c(
85 labels_to_show
86 , names(head(sort(my_loado),n = n_labels))
87 , names(tail(sort(my_loado),n = n_labels))
88 )
89 }
90 labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" ))
91 text(
92 y = plus_cor - 0.013
93 , x = plus_cov + 0.020
94 , cex = 0.4
95 , labels = labels
96 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha)
97 , srt = -30 # slant 30 degrees downward
98 , adj = 0 # left-justified
99 )
100 }
101 }
102 )
103 typeVc <- c("correlation", # 1 160 typeVc <- c("correlation", # 1
104 "outlier", # 2 161 "outlier", # 2
105 "overview", # 3 162 "overview", # 3
106 "permutation", # 4 163 "permutation", # 4
107 "predict-train", # 5 164 "predict-train", # 5
118 } else { 175 } else {
119 my_typevc <- c("(dummy)","overview","(dummy)") 176 my_typevc <- c("(dummy)","overview","(dummy)")
120 } 177 }
121 for (my_type in my_typevc) { 178 for (my_type in my_typevc) {
122 if (my_type %in% typeVc) { 179 if (my_type %in% typeVc) {
123 # print(sprintf("plotting type %s", my_type))
124 tryCatch({ 180 tryCatch({
125 plot( 181 if ( my_type != "x-loading" ) {
126 x = my_oplsda 182 plot(
127 , typeVc = my_type 183 x = my_oplsda
128 , parCexN = 0.4 184 , typeVc = my_type
129 , parDevNewL = FALSE 185 , parCexN = 0.4
130 , parLayL = TRUE 186 , parDevNewL = FALSE
131 , parEllipsesL = TRUE 187 , parLayL = TRUE
132 ) 188 , parEllipsesL = TRUE
189 )
190 } else {
191 do_s_plot( parallel_x = FALSE )
192 }
133 }, error = function(e) { 193 }, error = function(e) {
134 x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2)) 194 x_progress( sprintf( "factor level %s or %s may have only one sample - %s", fctr_lvl_1, fctr_lvl_2, e$message ) )
135 }) 195 })
136 } else { 196 } else {
137 # print("plotting dummy graph")
138 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") 197 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
139 text(x=1, y=1, labels="no orthogonal projection is possible") 198 text(x=1, y=1, labels="no orthogonal projection is possible")
140 } 199 }
141 } 200 }
142 return (my_cor_vs_cov) 201 return (my_cor_vs_cov)
143 } else { 202 } else {
144 # x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor))))
145 return (NULL) 203 return (NULL)
146 } 204 }
147 } 205 }
148 206
149 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 207 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
172 # extract the levels from the environment 230 # extract the levels from the environment
173 originalLevCSV <- levCSV <- calc_env$levCSV 231 originalLevCSV <- levCSV <- calc_env$levCSV
174 # matchingC is one of { "none", "wildcard", "regex" } 232 # matchingC is one of { "none", "wildcard", "regex" }
175 matchingC <- calc_env$matchingC 233 matchingC <- calc_env$matchingC
176 labelFeatures <- calc_env$labelFeatures 234 labelFeatures <- calc_env$labelFeatures
177 labelOrthoFeatures <- calc_env$labelOrthoFeatures
178 235
179 # arg/env checking 236 # arg/env checking
180 if (!(facC %in% names(smpl_metadata))) { 237 if (!(facC %in% names(smpl_metadata))) {
181 failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC)) 238 failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC))
182 return ( FALSE ) 239 return ( FALSE )
306 , x_predictor = predictor 363 , x_predictor = predictor
307 , x_is_match = is_match 364 , x_is_match = is_match
308 , x_algorithm = algoC 365 , x_algorithm = algoC
309 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" 366 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
310 , x_show_labels = labelFeatures 367 , x_show_labels = labelFeatures
311 , x_show_loado_labels = labelOrthoFeatures
312 , x_progress = progress_action 368 , x_progress = progress_action
313 , x_crossval_i = min(7, length(chosen_samples)) 369 , x_crossval_i = min(7, length(chosen_samples))
314 , x_env = calc_env 370 , x_env = calc_env
315 ) 371 )
316 if ( is.null(my_cor_cov) ) { 372 if ( is.null(my_cor_cov) ) {
347 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1 403 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1
348 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 404 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2
349 # only process this column if both factors are members of lvlCSV 405 # only process this column if both factors are members of lvlCSV
350 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) 406 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
351 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 407 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
352 # TODO delete next line displaying currently-processed column
353 # cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match))
354 # choose only samples with one of the two factors for this column 408 # choose only samples with one of the two factors for this column
355 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 409 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
356 predictor <- smpl_metadata_facC[chosen_samples] 410 predictor <- smpl_metadata_facC[chosen_samples]
357 # extract only the significantly-varying features and the chosen samples 411 # extract only the significantly-varying features and the chosen samples
358 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) 412 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
363 , x_predictor = predictor 417 , x_predictor = predictor
364 , x_is_match = is_match 418 , x_is_match = is_match
365 , x_algorithm = algoC 419 , x_algorithm = algoC
366 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" 420 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
367 , x_show_labels = labelFeatures 421 , x_show_labels = labelFeatures
368 , x_show_loado_labels = labelOrthoFeatures
369 , x_progress = progress_action 422 , x_progress = progress_action
370 , x_crossval_i = min(7, length(chosen_samples)) 423 , x_crossval_i = min(7, length(chosen_samples))
371 , x_env = calc_env 424 , x_env = calc_env
372 ) 425 )
373 if ( is.null(my_cor_cov) ) { 426 if ( is.null(my_cor_cov) ) {
400 completed <<- c(completed, fctr_lvl_1) 453 completed <<- c(completed, fctr_lvl_1)
401 setdiff(level_union, fctr_lvl_1) 454 setdiff(level_union, fctr_lvl_1)
402 } 455 }
403 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 456 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
404 fctr_lvl_2 <- "other" 457 fctr_lvl_2 <- "other"
405 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
406 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 458 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
407 if (length(unique(chosen_samples)) < 1) { 459 if (length(unique(chosen_samples)) < 1) {
408 progress_action("NOTHING TO PLOT...") 460 progress_action("NOTHING TO PLOT...")
409 } else { 461 } else {
410 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) 462 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
417 , x_predictor = predictor 469 , x_predictor = predictor
418 , x_is_match = is_match 470 , x_is_match = is_match
419 , x_algorithm = algoC 471 , x_algorithm = algoC
420 , x_prefix = "Features" 472 , x_prefix = "Features"
421 , x_show_labels = labelFeatures 473 , x_show_labels = labelFeatures
422 , x_show_loado_labels = labelOrthoFeatures
423 , x_progress = progress_action 474 , x_progress = progress_action
424 , x_crossval_i = min(7, length(chosen_samples)) 475 , x_crossval_i = min(7, length(chosen_samples))
425 , x_env = calc_env 476 , x_env = calc_env
426 ) 477 )
427 if ( is.null(my_cor_cov) ) { 478 if ( is.null(my_cor_cov) ) {
432 tsv$rt <- rt_lookup(tsv$featureID) 483 tsv$rt <- rt_lookup(tsv$featureID)
433 corcov_tsv_action(tsv) 484 corcov_tsv_action(tsv)
434 did_plot <<- TRUE 485 did_plot <<- TRUE
435 } 486 }
436 } 487 }
437 #print("baz")
438 "dummy" # need to return a value; otherwise combn fails with an error 488 "dummy" # need to return a value; otherwise combn fails with an error
439 } 489 }
440 ) 490 )
441 } 491 }
442 ## pass 2 - contrast each selected level with each of the other levels individually ## 492 ## pass 2 - contrast each selected level with each of the other levels individually ##
446 , m = 2 496 , m = 2
447 , FUN = function(x) { 497 , FUN = function(x) {
448 fctr_lvl_1 <- x[1] 498 fctr_lvl_1 <- x[1]
449 fctr_lvl_2 <- x[2] 499 fctr_lvl_2 <- x[2]
450 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 500 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
451 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
452 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 501 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
453 if (length(unique(chosen_samples)) < 1) { 502 if (length(unique(chosen_samples)) < 1) {
454 progress_action("NOTHING TO PLOT...") 503 progress_action("NOTHING TO PLOT...")
455 } else { 504 } else {
456 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) 505 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
463 , x_predictor = predictor 512 , x_predictor = predictor
464 , x_is_match = is_match 513 , x_is_match = is_match
465 , x_algorithm = algoC 514 , x_algorithm = algoC
466 , x_prefix = "Features" 515 , x_prefix = "Features"
467 , x_show_labels = labelFeatures 516 , x_show_labels = labelFeatures
468 , x_show_loado_labels = labelOrthoFeatures
469 , x_progress = progress_action 517 , x_progress = progress_action
470 , x_crossval_i = min(7, length(chosen_samples)) 518 , x_crossval_i = min(7, length(chosen_samples))
471 , x_env = calc_env 519 , x_env = calc_env
472 ) 520 )
473 if ( is.null(my_cor_cov) ) { 521 if ( is.null(my_cor_cov) ) {
478 tsv$rt <- rt_lookup(tsv$featureID) 526 tsv$rt <- rt_lookup(tsv$featureID)
479 corcov_tsv_action(tsv) 527 corcov_tsv_action(tsv)
480 did_plot <<- TRUE 528 did_plot <<- TRUE
481 } 529 }
482 } 530 }
483 #print("baz")
484 "dummy" # need to return a value; otherwise combn fails with an error 531 "dummy" # need to return a value; otherwise combn fails with an error
485 } 532 }
486 ) 533 )
487 } else { 534 } else {
488 progress_action("NOTHING TO PLOT....") 535 progress_action("NOTHING TO PLOT....")
498 # Calculate data for correlation-versus-covariance plot 545 # Calculate data for correlation-versus-covariance plot
499 # Adapted from: 546 # Adapted from:
500 # Wiklund_2008 doi:10.1021/ac0713510 547 # Wiklund_2008 doi:10.1021/ac0713510
501 # Galindo_Prieto_2014 doi:10.1002/cem.2627 548 # Galindo_Prieto_2014 doi:10.1002/cem.2627
502 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R 549 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R
503 cor_vs_cov <- function(matrix_x, ropls_x) { 550 cor_vs_cov <- function(matrix_x, ropls_x, parallel_x = TRUE) {
504 x_class <- class(ropls_x) 551 x_class <- class(ropls_x)
505 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) 552 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
506 stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) 553 stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) )
507 } 554 }
508 result <- list() 555 result <- list()
556 result$projection <- projection <- if (parallel_x) 1 else 2
509 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS 557 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
510 if ( ropls_x@suppLs$algoC == "nipals") { 558 if ( ropls_x@suppLs$algoC == "nipals") {
511 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 559 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510
512 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) 560 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
513 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) 561 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))
514 score_matrix <- ropls_x@scoreMN 562 if (parallel_x)
563 score_matrix <- ropls_x@scoreMN
564 else
565 score_matrix <- ropls_x@orthoScoreMN
515 score_matrix_transposed <- t(score_matrix) 566 score_matrix_transposed <- t(score_matrix)
516 score_matrix_magnitude <- mag(score_matrix) 567 score_matrix_magnitude <- mag(score_matrix)
517 result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) 568 result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
518 result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) 569 result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
519 } else { 570 } else {
520 # WARNING - untested code - I don't have test data to exercise this branch 571 # WARNING - untested code - I don't have test data to exercise this branch
521 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 572 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
522 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F 573 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
523 score_matrix <- ropls_x@scoreMN 574 if (parallel_x)
575 score_matrix <- ropls_x@scoreMN
576 else
577 score_matrix <- ropls_x@orthoScoreMN
524 score_matrix_transposed <- t(score_matrix) 578 score_matrix_transposed <- t(score_matrix)
525 cov_divisor <- nrow(matrix_x) - 1 579 cov_divisor <- nrow(matrix_x) - 1
526 result$covariance <- sapply( 580 result$covariance <- sapply(
527 X = 1:ncol(matrix_x) 581 X = 1:ncol(matrix_x)
528 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor 582 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor
538 , FUN = function(x) { 592 , FUN = function(x) {
539 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) 593 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) )
540 } 594 }
541 ) 595 )
542 } 596 }
543 result$correlation <- result$correlation[1,,drop = TRUE] 597 result$correlation <- result$correlation[ 1, , drop = TRUE ]
544 result$covariance <- result$covariance[1,,drop = TRUE] 598 result$covariance <- result$covariance [ 1, , drop = TRUE ]
545 599
546 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 600 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014
547 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) 601 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.)
548 result$vip4p <- as.numeric(ropls_x@vipVn) 602 result$vip4p <- as.numeric(ropls_x@vipVn)
549 result$vip4o <- as.numeric(ropls_x@orthoVipVn) 603 result$vip4o <- as.numeric(ropls_x@orthoVipVn)
554 fctr_lvl_1 <- level_names[1] 608 fctr_lvl_1 <- level_names[1]
555 fctr_lvl_2 <- level_names[2] 609 fctr_lvl_2 <- level_names[2]
556 feature_count <- length(ropls_x@vipVn) 610 feature_count <- length(ropls_x@vipVn)
557 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) 611 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count)
558 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) 612 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count)
559 # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation)))
560 superresult <- list() 613 superresult <- list()
561 if (length(result$vip4o) == 0) result$vip4o <- NA 614 if (length(result$vip4o) == 0) result$vip4o <- NA
562 greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) 615 greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 )
563 superresult$tsv1 <- data.frame( 616 superresult$tsv1 <- data.frame(
564 featureID = names(ropls_x@vipVn) 617 featureID = names(ropls_x@vipVn)
565 , factorLevel1 = result$level1 618 , factorLevel1 = result$level1
566 , factorLevel2 = result$level2 619 , factorLevel2 = result$level2
567 , greaterLevel = greaterLevel 620 , greaterLevel = greaterLevel
621 , projection = result$projection
568 , correlation = result$correlation 622 , correlation = result$correlation
569 , covariance = result$covariance 623 , covariance = result$covariance
570 , vip4p = result$vip4p 624 , vip4p = result$vip4p
571 , vip4o = result$vip4o 625 , vip4o = result$vip4o
572 , loadp = result$loadp 626 , loadp = result$loadp
573 , loado = result$loado 627 , loado = result$loado
574 , row.names = NULL 628 , row.names = NULL
575 ) 629 )
576 rownames(superresult$tsv1) <- superresult$tsv1$featureID 630 rownames(superresult$tsv1) <- superresult$tsv1$featureID
631 superresult$projection <- result$projection
577 superresult$covariance <- result$covariance 632 superresult$covariance <- result$covariance
578 superresult$correlation <- result$correlation 633 superresult$correlation <- result$correlation
579 superresult$vip4p <- result$vip4p 634 superresult$vip4p <- result$vip4p
580 superresult$vip4o <- result$vip4o 635 superresult$vip4o <- result$vip4o
581 superresult$loadp <- result$loadp 636 superresult$loadp <- result$loadp
582 superresult$loado <- result$loado 637 superresult$loado <- result$loado
583 superresult$details <- result 638 superresult$details <- result
584 # #print(superresult$tsv1)
585 result$superresult <- superresult 639 result$superresult <- superresult
586 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways 640 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways
587 result$oplsda <- ropls_x 641 result$oplsda <- ropls_x
588 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways 642 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways
589 return (superresult) 643 return (superresult)
590 } 644 }
591 645
592 646 # vim: sw=2 ts=2 et :