comparison w4mcorcov_calc.R @ 13:2ae2d26e3270 draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit e89c652c0849eb1d5a1e6c9100c72c64a8d388b4
author eschen42
date Wed, 12 Dec 2018 09:20:02 -0500
parents ddaf84e15d06
children 90708fdbc22d
comparison
equal deleted inserted replaced
12:ddaf84e15d06 13:2ae2d26e3270
1 # center with 'colMeans()' - ref: http://gastonsanchez.com/visually-enforced/how-to/2014/01/15/Center-data-in-R/ 1 # compute and output detail plots
2 center_colmeans <- function(x) {
3 xcenter = colMeans(x)
4 x - rep(xcenter, rep.int(nrow(x), ncol(x)))
5 }
6
7 #### OPLS-DA
8 algoC <- "nipals"
9
10 do_detail_plot <- function( 2 do_detail_plot <- function(
11 x_dataMatrix 3 x_dataMatrix
12 , x_predictor 4 , x_predictor
13 , x_is_match 5 , x_is_match
14 , x_algorithm 6 , x_algorithm
19 , x_crossval_i 11 , x_crossval_i
20 ) { 12 ) {
21 off <- function(x) if (x_show_labels == "0") 0 else x 13 off <- function(x) if (x_show_labels == "0") 0 else x
22 if ( x_is_match 14 if ( x_is_match
23 && ncol(x_dataMatrix) > 0 15 && ncol(x_dataMatrix) > 0
24 && length(unique(x_predictor))> 1 16 && length(unique(x_predictor)) > 1
25 && x_crossval_i < nrow(x_dataMatrix) 17 && x_crossval_i < nrow(x_dataMatrix)
26 ) { 18 ) {
27 my_oplsda <- opls( 19 my_oplsda <- opls(
28 x = x_dataMatrix 20 x = x_dataMatrix
29 , y = x_predictor 21 , y = x_predictor
34 , plotL = FALSE 26 , plotL = FALSE
35 , crossvalI = x_crossval_i 27 , crossvalI = x_crossval_i
36 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. 28 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2.
37 ) 29 )
38 # strip out variables having negligible variance 30 # strip out variables having negligible variance
39 x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] 31 x_dataMatrix <- x_dataMatrix[ , names(my_oplsda@vipVn), drop = FALSE]
40 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) 32 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
41 33
42 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] 34 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
43 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] 35 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
44 do_s_plot <- function( 36 do_s_plot <- function(
45 x_env 37 x_env
46 , predictor_projection_x = TRUE 38 , predictor_projection_x = TRUE
47 , cplot_x = FALSE 39 , cplot_x = FALSE
48 , cor_vs_cov_x = NULL 40 , cor_vs_cov_x = NULL
49 ) 41 ) {
50 {
51 if (cplot_x) { 42 if (cplot_x) {
52 cplot_y_correlation <- (x_env$cplot_y == "correlation") 43 cplot_y_correlation <- (x_env$cplot_y == "correlation")
44 default_lim_x <- 10
45 } else {
46 default_lim_x <- 1.2
53 } 47 }
54 if (is.null(cor_vs_cov_x)) { 48 if (is.null(cor_vs_cov_x)) {
55 my_cor_vs_cov <- cor_vs_cov( 49 my_cor_vs_cov <- cor_vs_cov(
56 matrix_x = x_dataMatrix 50 matrix_x = x_dataMatrix
57 , ropls_x = my_oplsda 51 , ropls_x = my_oplsda
58 , predictor_projection_x = predictor_projection_x 52 , predictor_projection_x = predictor_projection_x
59 , x_progress 53 , x_progress
54 , x_env
60 ) 55 )
61 } else { 56 } else {
62 my_cor_vs_cov <- cor_vs_cov_x 57 my_cor_vs_cov <- cor_vs_cov_x
63 } 58 }
64 # str(my_cor_vs_cov) 59
65 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { 60 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
66 if (is.null(cor_vs_cov_x)) { 61 if (is.null(cor_vs_cov_x)) {
67 x_progress("No cor_vs_cov data produced") 62 x_progress(
63 sprintf("No cor_vs_cov data produced for level %s versus %s", fctr_lvl_1, fctr_lvl_2)
64 )
68 } 65 }
69 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") 66 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
70 text(x=1, y=1, labels="too few covariance data") 67 text(x=1, y=1, labels="too few covariance data")
71 return(my_cor_vs_cov) 68 return(my_cor_vs_cov)
72 } 69 }
74 my_cor_vs_cov 71 my_cor_vs_cov
75 , { 72 , {
76 min_x <- min(covariance, na.rm = TRUE) 73 min_x <- min(covariance, na.rm = TRUE)
77 max_x <- max(covariance, na.rm = TRUE) 74 max_x <- max(covariance, na.rm = TRUE)
78 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) 75 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
79 covariance <- covariance / lim_x 76
80 lim_x <- 1.2 77 # Regarding using VIP as a guide to selecting a biomarker:
81 # "It is generally accepted that a variable should be selected if vj>1, [27–29], 78 # "It is generally accepted that a variable should be selected if vj>1, [27–29],
82 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." 79 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
83 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) 80 # (Mehmood 2012 doi:10.1186/1748-7188-6-27)
84 plus_cor <- correlation 81 plus_cor <- correlation
85 plus_cov <- covariance 82 plus_cov <- covariance
86 cex <- 0.65 83 if (length(plus_cor) != 0 && length(plus_cor) != 0) {
87 which_projection <- if (projection == 1) "t1" else "o1" 84 cex <- 0.65
88 which_loading <- if (projection == 1) "parallel" else "orthogonal" 85 if (projection == 1) {
89 if (projection == 1) { # predictor-projection 86 # predictor-projection
90 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) 87 vipcp <- pmax(0, pmin(1, (vip4p - 0.83) / (1.21 - 0.83)))
91 if (!cplot_x) { # S-plot predictor-projection 88 if (!cplot_x) {
92 my_xlab <- "relative covariance(feature,t1)" 89 # S-plot predictor-projection
93 my_x <- plus_cov 90 my_xlab <- "covariance(feature,t1)"
94 my_ylab <- "correlation(feature,t1)" 91 my_x <- plus_cov
95 my_y <- plus_cor
96 } else { # C-plot predictor-projection
97 my_xlab <- "variable importance in predictor-projection"
98 my_x <- vip4p
99 if (cplot_y_correlation) {
100 my_ylab <- "correlation(feature,t1)" 92 my_ylab <- "correlation(feature,t1)"
101 my_y <- plus_cor 93 my_y <- plus_cor
94 # X,Y limits for S-PLOT
95 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3))
96 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
102 } else { 97 } else {
103 my_ylab <- "relative covariance(feature,t1)" 98 # C-plot predictor-projection
104 my_y <- plus_cov 99 my_xlab <- "variable importance in predictor-projection"
100 my_x <- vip4p
101 if (cplot_y_correlation) {
102 my_ylab <- "correlation(feature,t1)"
103 my_y <- plus_cor
104 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
105 } else {
106 my_ylab <- "covariance(feature,t1)"
107 my_y <- plus_cov
108 my_ylim <- max(abs(plus_cov))
109 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) )
110 }
111 # X,Y limits for C-plot
112 lim_x <- max(my_x, na.rm = TRUE) * 1.1
113 lim_x <- min(lim_x, default_lim_x)
114 my_xlim <- c( 0, lim_x ) # + off(0.2) )
105 } 115 }
106 } 116 my_load_distal <- loadp
107 if (cplot_x) { 117 my_load_proximal <- loado
108 lim_x <- max(my_x, na.rm = TRUE) * 1.1 118 red <- as.numeric(correlation > 0) * vipcp
109 my_xlim <- c( 0, lim_x + off(0.2) ) 119 blue <- as.numeric(correlation < 0) * vipcp
120 alpha <- 0.1 + 0.4 * vipcp
121 red[is.na(red)] <- 0
122 blue[is.na(blue)] <- 0
123 alpha[is.na(alpha)] <- 0
124 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
125 main_label <- sprintf("%s for level %s versus %s"
126 , x_prefix, fctr_lvl_1, fctr_lvl_2)
110 } else { 127 } else {
111 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) 128 # orthogonal projection
112 } 129 vipco <- pmax(0, pmin(1, (vip4o - 0.83) / (1.21 - 0.83)))
113 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) 130 if (!cplot_x) {
114 my_load_distal <- loadp 131 # S-PLOT orthogonal-projection
115 my_load_proximal <- loado 132 my_xlab <- "covariance(feature,to1)"
116 red <- as.numeric(correlation > 0) * vipcp 133 my_x <- -plus_cov
117 blue <- as.numeric(correlation < 0) * vipcp 134 # X,Y limits for S-PLOT
118 alpha <- 0.1 + 0.4 * vipcp 135 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3))
119 red[is.na(red)] <- 0
120 blue[is.na(blue)] <- 0
121 alpha[is.na(alpha)] <- 0
122 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
123 main_label <- sprintf("%s for level %s versus %s"
124 , x_prefix, fctr_lvl_1, fctr_lvl_2)
125 } else { # orthogonal projection
126 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
127 if (!cplot_x) {
128 my_xlab <- "relative covariance(feature,to1)"
129 my_x <- -plus_cov
130 } else {
131 my_xlab <- "variable importance in orthogonal projection"
132 my_x <- vip4o
133 }
134 if (!cplot_x) { # S-plot orthogonal projection
135 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
136 my_ylab <- "correlation(feature,to1)"
137 my_y <- plus_cor
138 } else { # C-plot orthogonal projection
139 lim_x <- max(my_x, na.rm = TRUE) * 1.1
140 my_xlim <- c( 0, lim_x + off(0.2) )
141 if (cplot_y_correlation) {
142 my_ylab <- "correlation(feature,to1)" 136 my_ylab <- "correlation(feature,to1)"
143 my_y <- plus_cor 137 my_y <- plus_cor
138 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
144 } else { 139 } else {
145 my_ylab <- "relative covariance(feature,to1)" 140 # C-plot orthogonal-projection
146 my_y <- plus_cov 141 my_xlab <- "variable importance in orthogonal projection"
142 my_x <- vip4o
143 # C-plot orthogonal projection
144 # X,Y limits for C-plot
145 lim_x <- max(my_x, na.rm = TRUE) * 1.1
146 my_xlim <- c( 0, lim_x ) # + off(0.2) )
147 if (cplot_y_correlation) {
148 my_ylab <- "correlation(feature,to1)"
149 my_y <- plus_cor
150 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
151 } else {
152 my_ylab <- "covariance(feature,to1)"
153 my_y <- plus_cov
154 my_ylim <- max(abs(plus_cov))
155 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) )
156 }
147 } 157 }
158 my_load_distal <- loado
159 my_load_proximal <- loadp
160 alpha <- 0.1 + 0.4 * vipco
161 alpha[is.na(alpha)] <- 0
162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha)
163 main_label <- sprintf(
164 "Features influencing orthogonal projection for %s versus %s"
165 , fctr_lvl_1, fctr_lvl_2)
148 } 166 }
149 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) 167 main_cex <- min(1.0, 46.0/nchar(main_label))
150 my_load_distal <- loado 168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
151 my_load_proximal <- loadp 169 my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18)
152 alpha <- 0.1 + 0.4 * vipco 170 if ( sum(is.infinite(my_xlim)) == 0 ) {
153 alpha[is.na(alpha)] <- 0 171 plot(
154 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) 172 y = my_y
155 main_label <- sprintf( 173 , x = my_x
156 "Features influencing orthogonal projection for %s versus %s" 174 , type = "p"
157 , fctr_lvl_1, fctr_lvl_2) 175 , xlim = my_xlim
158 } 176 , ylim = my_ylim
159 main_cex <- min(1.0, 46.0/nchar(main_label)) 177 , xlab = my_xlab
160 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward 178 , ylab = my_ylab
161 my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18) 179 , main = main_label
162 plot( 180 , cex.main = main_cex
163 y = my_y 181 , cex = cex
164 , x = my_x 182 , pch = my_pch
165 , type = "p" 183 , col = my_col
166 , xlim = my_xlim
167 , ylim = my_ylim
168 , xlab = my_xlab
169 , ylab = my_ylab
170 , main = main_label
171 , cex.main = main_cex
172 , cex = cex
173 , pch = my_pch
174 , col = my_col
175 )
176 low_x <- -0.7 * lim_x
177 high_x <- 0.7 * lim_x
178 if (projection == 1 && !cplot_x) {
179 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue")
180 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red")
181 }
182 if ( x_show_labels != "0" ) {
183 names(my_load_distal) <- tsv1$featureID
184 names(my_load_proximal) <- tsv1$featureID
185 if ( x_show_labels == "ALL" ) {
186 n_labels <- length(my_load_distal)
187 } else {
188 n_labels <- as.numeric(x_show_labels)
189 }
190 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
191 labels_to_show <- c(
192 names(head(sort(my_load_distal),n = n_labels))
193 , names(tail(sort(my_load_distal),n = n_labels))
194 )
195 labels <- unname(
196 sapply(
197 X = tsv1$featureID
198 , FUN = function(x) if( x %in% labels_to_show ) x else ""
199 ) 184 )
200 ) 185 low_x <- -0.7 * lim_x
201 x_text_offset <- 0.024 186 high_x <- 0.7 * lim_x
202 y_text_off <- 0.017 187 if (projection == 1 && !cplot_x) {
203 if (!cplot_x) { # S-plot 188 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue")
204 y_text_offset <- if (projection == 1) -y_text_off else y_text_off 189 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red")
205 } else { # C-plot 190 }
206 y_text_offset <- 191 if ( x_show_labels != "0" ) {
207 sapply( 192 names(my_load_distal) <- tsv1$featureID
208 X = (my_y > 0) 193 names(my_load_proximal) <- tsv1$featureID
209 , FUN = function(x) { if (x) y_text_off else -y_text_off } 194 if ( x_show_labels == "ALL" ) {
195 n_labels <- length(my_load_distal)
196 } else {
197 n_labels <- as.numeric(x_show_labels)
198 }
199 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
200 labels_to_show <- c(
201 names(head(sort(my_load_distal), n = n_labels))
202 , names(tail(sort(my_load_distal), n = n_labels))
210 ) 203 )
211 } 204 labels <- unname(
212 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { 205 sapply(
213 if (length(labels_arg) > 0) { 206 X = tsv1$featureID
214 unique_slant <- unique(slant_arg) 207 , FUN = function(x) if ( x %in% labels_to_show ) x else ""
215 if (length(unique_slant) == 1) {
216 text(
217 y = y_arg
218 , x = x_arg + x_text_offset
219 , cex = 0.4
220 , labels = labels_arg
221 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
222 , srt = slant_arg
223 , adj = 0 # left-justified
224 ) 208 )
209 )
210 x_text_offset <- 0.024
211 y_text_off <- 0.017
212 if (!cplot_x) {
213 # S-plot
214 y_text_offset <- if (projection == 1) -y_text_off else y_text_off
225 } else { 215 } else {
226 for (slant in unique_slant) { 216 # C-plot
227 text( 217 y_text_offset <-
228 y = y_arg[slant_arg == slant] 218 sapply(
229 , x = x_arg[slant_arg == slant] + x_text_offset 219 X = (my_y > 0)
230 , cex = 0.4 220 , FUN = function(x) {
231 , labels = labels_arg[slant_arg == slant] 221 if (x) y_text_off else -y_text_off
232 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels 222 }
233 , srt = slant 223 )
234 , adj = 0 # left-justified 224 }
225 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
226 if (length(labels_arg) > 0) {
227 unique_slant <- unique(slant_arg)
228 if (length(unique_slant) == 1) {
229 text(
230 y = y_arg
231 , x = x_arg + x_text_offset
232 , cex = 0.4
233 , labels = labels_arg
234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
235 , srt = slant_arg
236 , adj = 0 # left-justified
237 )
238 } else {
239 for (slant in unique_slant) {
240 text(
241 y = y_arg[slant_arg == slant]
242 , x = x_arg[slant_arg == slant] + x_text_offset
243 , cex = 0.4
244 , labels = labels_arg[slant_arg == slant]
245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
246 , srt = slant
247 , adj = 0 # left-justified
248 )
249 }
250 }
251 }
252 }
253 if (!cplot_x) {
254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
255 } else {
256 my_slant <- sapply(
257 X = (my_y > 0)
258 , FUN = function(x) if (x) 2 else -2
259 ) * my_feature_label_slant
260 }
261 if (length(my_x) > 1) {
262 label_features(
263 x_arg = my_x [my_x > 0]
264 , y_arg = my_y [my_x > 0] - y_text_offset
265 , labels_arg = labels[my_x > 0]
266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant))
267 )
268 if (!cplot_x) {
269 label_features(
270 x_arg = my_x [my_x < 0]
271 , y_arg = my_y [my_x < 0] + y_text_offset
272 , labels_arg = labels[my_x < 0]
273 , slant_arg = my_slant
235 ) 274 )
236 } 275 }
237 } 276 } else {
238 } 277 if (!cplot_x) {
239 } 278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant
240 if (!cplot_x) { 279 my_y_arg <- my_y + (if (my_x > 1) -1 else 1) * y_text_offset
241 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant 280 } else {
281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant
282 my_y_arg <- my_y + (if (my_y > 1) -1 else 1) * y_text_offset
283 }
284 label_features(
285 x_arg = my_x
286 , y_arg = my_y_arg
287 , labels_arg = labels
288 , slant_arg = my_slant
289 )
290 } # end if (length(my_x) > 1)
291 } # end if ( x_show_labels != "0" )
242 } else { 292 } else {
243 my_slant <- sapply( 293 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
244 X = (my_y > 0) 294 text(x=1, y=1, labels="no S-plot is possible")
245 , FUN = function(x) if (x) 2 else -2 295 } # end if (sum(is.infinte(my_xlim))==0)
246 ) * my_feature_label_slant 296 } else {
247 } 297 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
248 if (length(my_x) > 1) { 298 text(x=1, y=1, labels="no S-plot is possible")
249 label_features( 299 } # end if (length(plus_cor) != 0 && length(plus_cor) != 0)
250 x_arg = my_x [my_x > 0] 300 } # end action
251 , y_arg = my_y [my_x > 0] - y_text_offset 301 ) # end with( my_cor_vs_cov, action )
252 , labels_arg = labels[my_x > 0]
253 , slant_arg = (if (!cplot_x) -my_slant else (my_slant))
254 )
255 if (!cplot_x) {
256 label_features(
257 x_arg = my_x [my_x < 0]
258 , y_arg = my_y [my_x < 0] + y_text_offset
259 , labels_arg = labels[my_x < 0]
260 , slant_arg = my_slant
261 )
262 }
263 } else {
264 if (!cplot_x) {
265 my_slant <- (if (my_x > 1) -1 else 1) * my_slant
266 my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
267 } else {
268 my_slant <- (if (my_y > 1) -1 else 1) * my_slant
269 my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset
270 }
271 label_features(
272 x_arg = my_x
273 , y_arg = my_y_arg
274 , labels_arg = labels
275 , slant_arg = my_slant
276 )
277 }
278 }
279 }
280 )
281 return (my_cor_vs_cov) 302 return (my_cor_vs_cov)
282 } 303 } # end function do_s_plot
283 my_cor_vs_cov <- do_s_plot( 304 my_cor_vs_cov <- do_s_plot(
284 x_env = x_env 305 x_env = x_env
285 , predictor_projection_x = TRUE 306 , predictor_projection_x = TRUE
286 , cplot_x = FALSE 307 , cplot_x = FALSE
287 ) 308 )
288 typeVc <- c("correlation", # 1 309 typevc <- c("correlation", # 1
289 "outlier", # 2 310 "outlier", # 2
290 "overview", # 3 311 "overview", # 3
291 "permutation", # 4 312 "permutation", # 4
292 "predict-train", # 5 313 "predict-train", # 5
293 "predict-test", # 6 314 "predict-test", # 6
297 "x-variance", # 10 318 "x-variance", # 10
298 "xy-score", # 11 319 "xy-score", # 11
299 "xy-weight" # 12 320 "xy-weight" # 12
300 ) # [c(3,8,9)] # [c(4,3,8,9)] 321 ) # [c(3,8,9)] # [c(4,3,8,9)]
301 if ( length(my_oplsda@orthoVipVn) > 0 ) { 322 if ( length(my_oplsda@orthoVipVn) > 0 ) {
302 my_typevc <- typeVc[c(9,3,8)] 323 my_typevc <- typevc[c(9,3,8)]
303 } else { 324 } else {
304 my_typevc <- c("(dummy)","overview","(dummy)") 325 my_typevc <- c("(dummy)","overview","(dummy)")
305 } 326 }
306 my_ortho_cor_vs_cov_exists <- FALSE 327 my_ortho_cor_vs_cov_exists <- FALSE
307 for (my_type in my_typevc) { 328 for (my_type in my_typevc) {
308 if (my_type %in% typeVc) { 329 if (my_type %in% typevc) {
309 tryCatch({ 330 tryCatch({
310 if ( my_type != "x-loading" ) { 331 if ( my_type != "x-loading" ) {
311 plot( 332 plot(
312 x = my_oplsda 333 x = my_oplsda
313 , typeVc = my_type 334 , typeVc = my_type
314 , parCexN = 0.4 335 , parCexN = 0.4
315 , parDevNewL = FALSE 336 , parDevNewL = FALSE
316 , parLayL = TRUE 337 , parLayL = TRUE
317 , parEllipsesL = TRUE 338 , parEllipsesL = TRUE
318 ) 339 )
319 if (my_type == "overview") { 340 if (my_type == "overview") {
320 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) 341 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2)
321 title(sub = sub_label) 342 title(sub = sub_label)
322 } 343 }
323 } else { 344 } else {
324 my_ortho_cor_vs_cov <- do_s_plot( 345 my_ortho_cor_vs_cov <- do_s_plot(
325 x_env = x_env 346 x_env = x_env
326 , predictor_projection_x = FALSE 347 , predictor_projection_x = FALSE
327 , cplot_x = FALSE 348 , cplot_x = FALSE
328 ) 349 )
329 my_ortho_cor_vs_cov_exists <- TRUE 350 my_ortho_cor_vs_cov_exists <- TRUE
351 }
330 } 352 }
331 }, error = function(e) { 353 , error = function(e) {
332 x_progress( 354 x_progress(
333 sprintf( 355 sprintf(
334 "factor level %s or %s may have only one sample - %s" 356 "factor level %s or %s may have only one sample - %s"
335 , fctr_lvl_1 357 , fctr_lvl_1
336 , fctr_lvl_2 358 , fctr_lvl_2
345 } 367 }
346 cplot_p <- x_env$cplot_p 368 cplot_p <- x_env$cplot_p
347 cplot_o <- x_env$cplot_o 369 cplot_o <- x_env$cplot_o
348 if (cplot_p || cplot_o) { 370 if (cplot_p || cplot_o) {
349 if (cplot_p) { 371 if (cplot_p) {
350 do_s_plot( 372 if (!is.null(my_cor_vs_cov)) {
351 x_env = x_env 373 do_s_plot(
352 , predictor_projection_x = TRUE 374 x_env = x_env
353 , cplot_x = TRUE 375 , predictor_projection_x = TRUE
354 , cor_vs_cov_x = my_cor_vs_cov 376 , cplot_x = TRUE
355 ) 377 , cor_vs_cov_x = my_cor_vs_cov
378 )
379 } else {
380 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
381 text(x=1, y=1, labels="no predictor projection is possible")
382 }
356 did_plots <- 1 383 did_plots <- 1
357 } else { 384 } else {
358 did_plots <- 0 385 did_plots <- 0
359 } 386 }
360 if (cplot_o) { 387 if (cplot_o) {
403 return ( FALSE ) 430 return ( FALSE )
404 } 431 }
405 432
406 # extract parameters from the environment 433 # extract parameters from the environment
407 vrbl_metadata <- calc_env$vrbl_metadata 434 vrbl_metadata <- calc_env$vrbl_metadata
408 vrbl_metadata_names <- vrbl_metadata[,1] 435 vrbl_metadata_names <- vrbl_metadata[, 1]
409 smpl_metadata <- calc_env$smpl_metadata 436 smpl_metadata <- calc_env$smpl_metadata
410 data_matrix <- calc_env$data_matrix 437 data_matrix <- calc_env$data_matrix
411 pairSigFeatOnly <- calc_env$pairSigFeatOnly 438 pair_significant_features_only <- calc_env$pairSigFeatOnly
412 facC <- calc_env$facC 439 facC <- calc_env$facC
413 tesC <- calc_env$tesC 440 tesC <- calc_env$tesC
414 # extract the levels from the environment 441 # extract the levels from the environment
415 originalLevCSV <- levCSV <- calc_env$levCSV 442 originalLevCSV <- levCSV <- calc_env$levCSV
416 # matchingC is one of { "none", "wildcard", "regex" } 443 # matchingC is one of { "none", "wildcard", "regex" }
431 458
432 rt <- vrbl_metadata$rt 459 rt <- vrbl_metadata$rt
433 names(rt) <- vrbl_metadata$variableMetadata 460 names(rt) <- vrbl_metadata$variableMetadata
434 rt_lookup <- function(feature) unname(rt[feature]) 461 rt_lookup <- function(feature) unname(rt[feature])
435 462
436 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) 463 # calculate salience_df as data.frame(feature, max_level, max_median, salience_rcv, mean_median, salience, salience_rcv)
437 salience_df <- calc_env$salience_df <- w4msalience( 464 salience_df <- calc_env$salience_df <- w4msalience(
438 data_matrix = data_matrix 465 data_matrix = data_matrix
439 , sample_class = smpl_metadata[,facC] 466 , sample_class = smpl_metadata[,facC]
440 , failure_action = failure_action 467 , failure_action = failure_action
441 ) 468 )
442 salience_tsv_action({ 469 salience_tsv_action({
443 my_df <- data.frame( 470 with (
444 featureID = salience_df$feature 471 salience_df
445 , salientLevel = salience_df$max_level 472 , {
446 , salientRCV = salience_df$salient_rcv 473 my_df <<- data.frame(
447 , salience = salience_df$salience 474 featureID = feature
448 , mz = mz_lookup(salience_df$feature) 475 , salientLevel = max_level
449 , rt = rt_lookup(salience_df$feature) 476 , salienceRCV = salience_rcv
450 ) 477 , relativeSalientDistance = relative_salient_distance
451 my_df[order(-my_df$salience),] 478 , salience = salience
479 , mz = mz_lookup(feature)
480 , rt = rt_lookup(feature)
481 )
482 })
483 my_df[order(-my_df$relativeSalientDistance),]
452 }) 484 })
453 485
454 # transform wildcards to regexen 486 # transform wildcards to regexen
455 if (matchingC == "wildcard") { 487 if (matchingC == "wildcard") {
456 # strsplit(x = "hello,wild,world", split = ",") 488 # strsplit(x = "hello,wild,world", split = ",")
558 ) 590 )
559 my_cor_cov <- do_detail_plot( 591 my_cor_cov <- do_detail_plot(
560 x_dataMatrix = my_matrix 592 x_dataMatrix = my_matrix
561 , x_predictor = predictor 593 , x_predictor = predictor
562 , x_is_match = TRUE 594 , x_is_match = TRUE
563 , x_algorithm = algoC 595 , x_algorithm = "nipals"
564 , x_prefix = if (pairSigFeatOnly) { 596 , x_prefix = if (pair_significant_features_only) {
565 "Significantly contrasting features" 597 "Significantly contrasting features"
566 } else { 598 } else {
567 "Significant features" 599 "Significant features"
568 } 600 }
569 , x_show_labels = labelFeatures 601 , x_show_labels = labelFeatures
625 } else { 657 } else {
626 1 658 1
627 } 659 }
628 ) 660 )
629 col_selector <- vrbl_metadata_names[ 661 col_selector <- vrbl_metadata_names[
630 if ( pairSigFeatOnly ) fully_significant else overall_significant 662 if (pair_significant_features_only) fully_significant else overall_significant
631 ] 663 ]
632 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] 664 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
633 my_cor_cov <- do_detail_plot( 665 my_cor_cov <- do_detail_plot(
634 x_dataMatrix = my_matrix 666 x_dataMatrix = my_matrix
635 , x_predictor = predictor 667 , x_predictor = predictor
636 , x_is_match = is_match 668 , x_is_match = is_match
637 , x_algorithm = algoC 669 , x_algorithm = "nipals"
638 , x_prefix = if (pairSigFeatOnly) { 670 , x_prefix = if (pair_significant_features_only) {
639 "Significantly contrasting features" 671 "Significantly contrasting features"
640 } else { 672 } else {
641 "Significant features" 673 "Significant features"
642 } 674 }
643 , x_show_labels = labelFeatures 675 , x_show_labels = labelFeatures
666 ) 698 )
667 } 699 }
668 } 700 }
669 } 701 }
670 } 702 }
671 } else { # tesC == "none" 703 } else {
704 # tesC == "none"
672 # find all the levels for factor facC in sampleMetadata 705 # find all the levels for factor facC in sampleMetadata
673 level_union <- unique(sort(smpl_metadata_facC)) 706 level_union <- unique(sort(smpl_metadata_facC))
674 # identify the selected levels for factor facC from sampleMetadata 707 # identify the selected levels for factor facC from sampleMetadata
675 level_include <- sapply(X = level_union, FUN = isLevelSelected) 708 level_include <- sapply(X = level_union, FUN = isLevelSelected)
676 # discard the non-selected levels for factor facC 709 # discard the non-selected levels for factor facC
684 , FUN = function(x) { 717 , FUN = function(x) {
685 fctr_lvl_1 <- x[1] 718 fctr_lvl_1 <- x[1]
686 fctr_lvl_2 <- { 719 fctr_lvl_2 <- {
687 if ( fctr_lvl_1 %in% completed ) 720 if ( fctr_lvl_1 %in% completed )
688 return("DUMMY") 721 return("DUMMY")
689 # strF(completed)
690 completed <<- c(completed, fctr_lvl_1) 722 completed <<- c(completed, fctr_lvl_1)
691 setdiff(level_union, fctr_lvl_1) 723 setdiff(level_union, fctr_lvl_1)
692 } 724 }
693 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 725 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
694 fctr_lvl_2 <- "other" 726 fctr_lvl_2 <- "other"
715 ) 747 )
716 my_cor_cov <- do_detail_plot( 748 my_cor_cov <- do_detail_plot(
717 x_dataMatrix = my_matrix 749 x_dataMatrix = my_matrix
718 , x_predictor = predictor 750 , x_predictor = predictor
719 , x_is_match = is_match 751 , x_is_match = is_match
720 , x_algorithm = algoC 752 , x_algorithm = "nipals"
721 , x_prefix = "Features" 753 , x_prefix = "Features"
722 , x_show_labels = labelFeatures 754 , x_show_labels = labelFeatures
723 , x_progress = progress_action 755 , x_progress = progress_action
724 , x_crossval_i = min(7, length(chosen_samples)) 756 , x_crossval_i = min(7, length(chosen_samples))
725 , x_env = calc_env 757 , x_env = calc_env
768 ) 800 )
769 my_cor_cov <- do_detail_plot( 801 my_cor_cov <- do_detail_plot(
770 x_dataMatrix = my_matrix 802 x_dataMatrix = my_matrix
771 , x_predictor = predictor 803 , x_predictor = predictor
772 , x_is_match = is_match 804 , x_is_match = is_match
773 , x_algorithm = algoC 805 , x_algorithm = "nipals"
774 , x_prefix = "Features" 806 , x_prefix = "Features"
775 , x_show_labels = labelFeatures 807 , x_show_labels = labelFeatures
776 , x_progress = progress_action 808 , x_progress = progress_action
777 , x_crossval_i = min(7, length(chosen_samples)) 809 , x_crossval_i = min(7, length(chosen_samples))
778 , x_env = calc_env 810 , x_env = calc_env
802 } 834 }
803 } 835 }
804 if (!did_plot) { 836 if (!did_plot) {
805 failure_action( 837 failure_action(
806 sprintf( 838 sprintf(
807 "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" 839 "Did not plot. Does sampleMetadata have at least two levels of factor '%s' matching '%s' ?"
808 , facC, originalLevCSV)) 840 , facC, originalLevCSV))
809 return ( FALSE ) 841 return ( FALSE )
810 } 842 }
811 return ( TRUE ) 843 return ( TRUE )
812 } 844 }
819 cor_vs_cov <- function( 851 cor_vs_cov <- function(
820 matrix_x 852 matrix_x
821 , ropls_x 853 , ropls_x
822 , predictor_projection_x = TRUE 854 , predictor_projection_x = TRUE
823 , x_progress = print 855 , x_progress = print
856 , x_env
824 ) { 857 ) {
825 tryCatch({ 858 tryCatch({
826 return( 859 return(
827 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) 860 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress, x_env)
828 ) 861 )
829 }, error = function(e) { 862 }
863 , error = function(e) {
830 x_progress( 864 x_progress(
831 sprintf( 865 sprintf(
832 "cor_vs_cov fatal error - %s" 866 "cor_vs_cov fatal error - %s"
833 , as.character(e) # e$message 867 , as.character(e) # e$message
834 ) 868 )
840 cor_vs_cov_try <- function( 874 cor_vs_cov_try <- function(
841 matrix_x # rows are samples; columns, features 875 matrix_x # rows are samples; columns, features
842 , ropls_x # an instance of ropls::opls 876 , ropls_x # an instance of ropls::opls
843 , predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection 877 , predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection
844 , x_progress = print # function to produce progress and error messages 878 , x_progress = print # function to produce progress and error messages
879 , x_env
845 ) { 880 ) {
881 my_matrix_x <- matrix_x
882 my_matrix_x[my_matrix_x==0] <- NA
883 fdr_features <- x_env$fdr_features
884
846 x_class <- class(ropls_x) 885 x_class <- class(ropls_x)
847 if ( !( as.character(x_class) == "opls" ) ) { 886 if ( !( as.character(x_class) == "opls" ) ) {
848 stop( 887 stop(
849 paste( 888 paste(
850 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " 889 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "
864 result <- list() 903 result <- list()
865 result$projection <- projection <- if (predictor_projection_x) 1 else 2 904 result$projection <- projection <- if (predictor_projection_x) 1 else 2
866 905
867 # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 906 # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
868 # (and not from the supplement despite the statement that, for the NIPALS algorithm, 907 # (and not from the supplement despite the statement that, for the NIPALS algorithm,
869 # the equations from the supplement should be used) because of the definition of the 908 # the equations from the supplement should be used) because of the definition of the
870 # Pearson/Galton coefficient of correlation is defined as 909 # Pearson/Galton coefficient of correlation is defined as
871 # $$ 910 # $$
872 # \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y} 911 # \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y}
873 # $$ 912 # $$
874 # as described (among other places) on Wikipedia at 913 # as described (among other places) on Wikipedia at
875 # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population 914 # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population
876 # The equations in the supplement said to use, for the predictive component t1, 915 # The equations in the supplement said to use, for the predictive component t1,
877 # \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))} 916 # \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))}
878 # but the results that I got were dramatically different from published results for S-PLOTs; 917 # but the results that I got were dramatically different from published results for S-PLOTs;
879 # perhaps my data are not centered exactly the same way that theirs were. 918 # perhaps my data are not centered exactly the same way that theirs were.
880 # The correlations calculated here are in agreement with those calculated with the code from 919 # The correlations calculated here are in agreement with those calculated with the code from
881 # page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf 920 # page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf
882 # I did transform covariance to "relative covariance" (relative to the maximum value) 921
883 # to keep the figures consistent with one another. 922
884 923 # count the features/variables (one column for each sample)
885 # count the features (one column for each sample) 924 # count the features/variables (one column for each sample)
886 Nfeatures <- ncol(matrix_x) 925 n_features <- ncol(my_matrix_x)
887 # count the samples (one row for each sample) 926 all_n_features <- x_env$fdr_features
888 Nobservations <- nrow(matrix_x) 927 if (length(grep("^[0-9][0-9]*$", all_n_features)) > 0) {
889 # a one-dimensional magnitude function (i.e., take the vector norm) 928 all_n_features <- as.integer(all_n_features)
890 vector_norm <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) 929 } else {
891 # calculate the standard deviation for each feature 930 all_n_features <- n_features
892 sd_xi <- sapply(X = 1:Nfeatures, FUN = function(x) sd(matrix_x[,x])) 931 }
932 fdr_n_features <- max(n_features, all_n_features)
933 # print("n_features")
934 # print(n_features)
935
936 # count the samples/observations (one row for each sample)
937 n_observations <- nrow(my_matrix_x)
938
893 # choose whether to plot the predictive score vector or orthogonal score vector 939 # choose whether to plot the predictive score vector or orthogonal score vector
894 if (predictor_projection_x) 940 if (predictor_projection_x)
895 score_matrix <- ropls_x@scoreMN 941 score_vector <- ropls_x@scoreMN
896 else 942 else
897 score_matrix <- ropls_x@orthoScoreMN 943 score_vector <- ropls_x@orthoScoreMN
898 # transpose the score (or orthoscore) vector for use as a premultiplier in covariance calculation 944
899 score_matrix_transposed <- t(score_matrix) 945 # compute the covariance of each feature with the score vector
900 # compute the norm of the vector (i.e., the magnitude)
901 score_matrix_magnitude <- vector_norm(score_matrix)
902 # compute the standard deviation of the vector
903 score_matrix_sd <- sd(score_matrix)
904 # compute the relative covariance of each feature with the score vector
905 result$covariance <- 946 result$covariance <-
906 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) 947 sapply(
948 X = 1:n_features
949 , FUN = function(i) {
950 cov(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs")
951 }
952 )
953 # access covariance by feature name
954 names(result$covariance) <- colnames(my_matrix_x)
955
907 # compute the correlation of each feature with the score vector 956 # compute the correlation of each feature with the score vector
908 result$correlation <- 957 result$correlation <-
909 score_matrix_transposed %*% matrix_x / ( (Nobservations - 1) * ( score_matrix_sd * sd_xi ) ) 958 sapply(
910 959 X = 1:n_features
911 # convert covariance and correlation from one-dimensional matrices to arrays of values, 960 , FUN = function(i) {
912 # which are accessed by feature name below 961 cor(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs")
913 p1 <- result$covariance <- result$covariance [ 1, , drop = TRUE ] 962 }
914 # x_progress("strF(p1)") 963 )
915 # x_progress(strF(p1)) 964 # access correlation by feature name
916 965 names(result$correlation) <- colnames(my_matrix_x)
917 pcorr1 <- result$correlation <- result$correlation[ 1, , drop = TRUE ] 966
918 # x_progress("pearson strF(pcorr1)") 967 # eliminate NAs in either correlation or covariance
919 # x_progress(strF(pcorr1)) 968 nas <- is.na(result$correlation) | is.na(result$covariance)
920 # x_progress(typeof(pcorr1)) 969 featureID <- names(ropls_x@vipVn)
921 # x_progress(str(pcorr1)) 970 featureID <- featureID[!nas]
922 971 result$correlation <- result$correlation[!nas]
923 # # this is how to use Spearman correlation instead of pearson 972 result$covariance <- result$covariance[!nas]
924 # result$spearcor <- sapply( 973 n_features <- length(featureID)
925 # X = 1:Nfeatures 974
926 # , FUN = function(i) {
927 # stats::cor(
928 # x = as.vector(score_matrix)
929 # , y = as.vector(matrix_x[,i])
930 # # , method = "spearman"
931 # , method = "pearson"
932 # )
933 # }
934 # )
935 # names(result$spearcor) <- names(p1)
936 # pcorr1 <- result$spearcor
937 # x_progress("spearman strF(pcorr1)")
938 # x_progress(strF(pcorr1))
939 # x_progress(typeof(pcorr1))
940 # x_progress(str(pcorr1))
941 # pcorr1 <- result$correlation <- result$spearcor
942
943 # correl.ci(r, n, a = 0.05, rho = 0)
944 correl_pci <- lapply( 975 correl_pci <- lapply(
945 X = 1:Nfeatures 976 X = 1:n_features
946 , FUN = function(i) correl.ci(r = pcorr1[i], n = Nobservations) 977 , FUN = function(i) {
978 correl.ci(
979 r = result$correlation[i]
980 , n_obs = n_observations
981 , n_vars = fdr_n_features
982 )
983 }
947 ) 984 )
948 result$p_value_raw <- sapply( 985 result$p_value_raw <- sapply(
949 X = 1:Nfeatures 986 X = 1:n_features
987 , FUN = function(i) correl_pci[[i]]$p.value.raw
988 )
989 result$p_value_raw[is.na(result$p_value_raw)] <- 1.0
990 result$p_value <- sapply(
991 X = 1:n_features
950 , FUN = function(i) correl_pci[[i]]$p.value 992 , FUN = function(i) correl_pci[[i]]$p.value
951 ) 993 )
952 result$p_value_raw[is.na(result$p_value_raw)] <- 0.0 994 result$p_value[is.na(result$p_value)] <- 1.0
953 result$ci_lower <- sapply( 995 result$ci_lower <- sapply(
954 X = 1:Nfeatures 996 X = 1:n_features
955 , FUN = function(i) correl_pci[[i]]$CI['lower'] 997 , FUN = function(i) correl_pci[[i]]$CI["lower"]
956 ) 998 )
957 result$ci_upper <- sapply( 999 result$ci_upper <- sapply(
958 X = 1:Nfeatures 1000 X = 1:n_features
959 , FUN = function(i) correl_pci[[i]]$CI['upper'] 1001 , FUN = function(i) correl_pci[[i]]$CI["upper"]
960 ) 1002 )
961 1003
962 1004
963 # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627) 1005 # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627)
964 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) 1006 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.)
965 result$vip4p <- as.numeric(ropls_x@vipVn) 1007 result$vip4p <- as.numeric(ropls_x@vipVn)[!nas]
966 result$vip4o <- as.numeric(ropls_x@orthoVipVn) 1008 result$vip4o <- as.numeric(ropls_x@orthoVipVn)[!nas]
967 if (length(result$vip4o) == 0) result$vip4o <- NA 1009 if (length(result$vip4o) == 0) result$vip4o <- NA
968 # extract the loadings 1010 # extract the loadings
969 result$loadp <- as.numeric(ropls_x@loadingMN) 1011 result$loadp <- as.numeric(ropls_x@loadingMN)[!nas]
970 result$loado <- as.numeric(ropls_x@orthoLoadingMN) 1012 result$loado <- as.numeric(ropls_x@orthoLoadingMN)[!nas]
971 # get the level names 1013 # get the level names
972 level_names <- sort(levels(as.factor(ropls_x@suppLs$y))) 1014 level_names <- sort(levels(as.factor(ropls_x@suppLs$y)))
973 fctr_lvl_1 <- level_names[1] 1015 fctr_lvl_1 <- level_names[1]
974 fctr_lvl_2 <- level_names[2] 1016 fctr_lvl_2 <- level_names[2]
975 feature_count <- length(ropls_x@vipVn) 1017 result$level1 <- rep.int(x = fctr_lvl_1, times = n_features)
976 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) 1018 result$level2 <- rep.int(x = fctr_lvl_2, times = n_features)
977 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count)
978 greaterLevel <- sapply( 1019 greaterLevel <- sapply(
979 X = result$correlation 1020 X = result$correlation
980 , FUN = function(my_corr) 1021 , FUN = function(my_corr) {
981 tryCatch({ 1022 tryCatch({
982 if ( is.nan( my_corr ) ) { 1023 if ( is.na(my_corr) || ! is.numeric( my_corr ) ) {
983 NA 1024 NA
984 } else { 1025 } else {
985 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 1026 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
986 } 1027 }
987 }, error = function(e) { 1028 }
1029 , error = function(e) {
1030 print(my_corr)
988 x_progress( 1031 x_progress(
989 sprintf( 1032 sprintf(
990 "cor_vs_cov -> sapply: error - substituting NA - %s" 1033 "cor_vs_cov -> sapply: error - substituting NA - %s"
991 , as.character(e) 1034 , as.character(e)
992 ) 1035 )
993 ) 1036 )
994 NA 1037 NA
995 }) 1038 }
1039 )
1040 }
996 ) 1041 )
997
998 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
999 featureID <- names(ropls_x@vipVn)
1000 greaterLevel <- greaterLevel[featureID]
1001 result$correlation <- result$correlation[featureID]
1002 result$covariance <- result$covariance[featureID]
1003 # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
1004 1042
1005 # build a data frame to hold the content for the tab-separated values file 1043 # build a data frame to hold the content for the tab-separated values file
1006 tsv1 <- data.frame( 1044 tsv1 <- data.frame(
1007 featureID = featureID 1045 featureID = featureID
1008 , factorLevel1 = result$level1 1046 , factorLevel1 = result$level1
1014 , vip4p = result$vip4p 1052 , vip4p = result$vip4p
1015 , vip4o = result$vip4o 1053 , vip4o = result$vip4o
1016 , loadp = result$loadp 1054 , loadp = result$loadp
1017 , loado = result$loado 1055 , loado = result$loado
1018 , cor_p_val_raw = result$p_value_raw 1056 , cor_p_val_raw = result$p_value_raw
1019 , cor_p_value = p.adjust(p = result$p_value_raw, method = "BY") 1057 , cor_p_value = result$p_value
1020 , cor_ci_lower = result$ci_lower 1058 , cor_ci_lower = result$ci_lower
1021 , cor_ci_upper = result$ci_upper 1059 , cor_ci_upper = result$ci_upper
1022 ) 1060 )
1023 rownames(tsv1) <- tsv1$featureID 1061 rownames(tsv1) <- tsv1$featureID
1024 1062
1025 # build the superresult, i.e., the result returned by this function 1063 # build the superresult, i.e., the result returned by this function
1037 # remove any rows having NA for covariance or correlation 1075 # remove any rows having NA for covariance or correlation
1038 tsv1 <- tsv1[!is.na(tsv1$correlation),] 1076 tsv1 <- tsv1[!is.na(tsv1$correlation),]
1039 tsv1 <- tsv1[!is.na(tsv1$covariance),] 1077 tsv1 <- tsv1[!is.na(tsv1$covariance),]
1040 superresult$tsv1 <- tsv1 1078 superresult$tsv1 <- tsv1
1041 1079
1042 # # I did not include these but left them commentd out in case future 1080 # # I did not include these but left them commentd out in case future
1043 # # consumers of this routine want to use it in currently unanticipated ways 1081 # # consumers of this routine want to use it in currently unanticipated ways
1044 # result$superresult <- superresult 1082 # result$superresult <- superresult
1045 # result$oplsda <- ropls_x 1083 # result$oplsda <- ropls_x
1046 # result$predictor <- ropls_x@suppLs$y 1084 # result$predictor <- ropls_x@suppLs$y
1047 1085
1057 # title = {Multivariate data analysis in R} 1095 # title = {Multivariate data analysis in R}
1058 # } 1096 # }
1059 # which follows 1097 # which follows
1060 # https://en.wikipedia.org/wiki/Fisher_transformation#Definition 1098 # https://en.wikipedia.org/wiki/Fisher_transformation#Definition
1061 1099
1062 correl.ci <- function(r, n, a = 0.05, rho = 0) { 1100 correl.ci <- function(r, n_obs, n_vars, a = 0.05, rho = 0) {
1063 ## r is the calculated correlation coefficient for n pairs 1101 ## r is the calculated correlation coefficient for n_obs pairs of observations of one variable
1064 ## a is the significance level 1102 ## a is the significance level
1065 ## rho is the hypothesised correlation 1103 ## rho is the hypothesised correlation
1066 zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho 1104 zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho
1067 zh1 <- atanh(r) # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1 1105 zh1 <- atanh(r) # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1
1068 se <- (1 - r^2)/sqrt(n - 3) ## standard error for Fisher's z-transformation of Ho 1106 se <- (1 - r^2)/sqrt(n_obs - 3) ## standard error for Fisher's z-transformation of Ho
1069 test <- (zh1 - zh0)/se ### test statistic 1107 test <- (zh1 - zh0)/se ### test statistic
1070 pvalue <- 2*(1 - pnorm(abs(test))) ## p-value 1108 pvalue <- 2*(1 - pnorm(abs(test))) ## p-value
1071 zL <- zh1 - qnorm(1 - a/2)*se 1109 pvalue.adj <- p.adjust(p = pvalue, method = "BY", n = n_vars)
1072 zH <- zh1 + qnorm(1 - a/2)*se 1110 z_L <- zh1 - qnorm(1 - a/2)*se
1073 fishL <- tanh(zL) # (exp(2*zL)-1)/(exp(2*zL)+1), i.e., lower confidence limit 1111 z_h <- zh1 + qnorm(1 - a/2)*se
1074 fishH <- tanh(zH) # (exp(2*zH)-1)/(exp(2*zH)+1), i.e., upper confidence limit 1112 fish_l <- tanh(z_L) # (exp(2*z_l)-1)/(exp(2*z_l)+1), i.e., lower confidence limit
1075 CI <- c(fishL, fishH) 1113 fish_h <- tanh(z_h) # (exp(2*z_h)-1)/(exp(2*z_h)+1), i.e., upper confidence limit
1076 names(CI) <- c('lower', 'upper') 1114 ci <- c(fish_l, fish_h)
1077 list(correlation = r, p.value = pvalue, CI = CI) 1115 names(ci) <- c("lower", "upper")
1116 list(
1117 correlation = r
1118 , p.value.raw = pvalue
1119 , p.value = pvalue.adj
1120 , CI = ci
1121 )
1078 } 1122 }
1079 1123
1080 # vim: sw=2 ts=2 et : 1124 # vim: sw=2 ts=2 et ai :