# HG changeset patch # User eschen42 # Date 1544624402 18000 # Node ID 2ae2d26e327009b8a0beab3d9b8062b0a1acc8cb # Parent ddaf84e15d06010eccc474548b82f7823ff8e7e5 planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit e89c652c0849eb1d5a1e6c9100c72c64a8d388b4 diff -r ddaf84e15d06 -r 2ae2d26e3270 w4mcorcov.xml --- a/w4mcorcov.xml Thu Nov 08 23:06:09 2018 -0500 +++ b/w4mcorcov.xml Wed Dec 12 09:20:02 2018 -0500 @@ -1,10 +1,10 @@ - + OPLS-DA Contrasts of Univariate Results + help="When this option is set to 'Yes', analysis will be performed including only features scored by the univariate test as differing significantly for the pair of levels being contrasted; when set to 'No', any feature that varies significantly across all levels will be included (i.e., exclude only features not scored by the univariate test as significantly varying when all levels are considered). See examples below." /> @@ -17,14 +17,12 @@ + - r-base r-batch bioconductor-ropls @@ -35,9 +33,9 @@ sampleMetadata_in '$sampleMetadata_in' variableMetadata_in '$variableMetadata_in' facC '$facC' - #if str( $signif_test.tesC ) == "none": - tesC "none" - pairSigFeatOnly "FALSE" + #if str( $signif_test.tesC ) == 'none': + tesC 'none' + pairSigFeatOnly 'FALSE' #else: tesC '$signif_test.tesC' pairSigFeatOnly '$signif_test.pairSigFeatOnly' @@ -45,20 +43,21 @@ levCSV '$levCSV' matchingC '$matchingC' labelFeatures '$labelFeatures' - #if str( $xplots.expPlot ) == "none": - cplot_p "FALSE" - cplot_o "FALSE" - cplot_y "correlation" - #else if str( $xplots.expPlot ) == "cplot": - cplot_p '$xplots.cplot_p' - cplot_o '$xplots.cplot_o' - cplot_y '$xplots.cplot_y' + #if str( $advanced.advancedFeatures ) == 'none': + fdr_features 'ALL' + cplot_p 'FALSE' + cplot_o 'FALSE' + cplot_y 'correlation' + #else if str( $advanced.advancedFeatures ) == 'advanced': + fdr_features '$advanced.fdr_features' + cplot_p '$advanced.cplot_p' + cplot_o '$advanced.cplot_o' + cplot_y '$advanced.cplot_y' #end if contrast_detail '$contrast_detail' contrast_corcov '$contrast_corcov' contrast_salience '$contrast_salience' - ]]> - + ]]> @@ -79,7 +78,7 @@ - + @@ -101,7 +100,7 @@ + help="Comma-separated level-names (or comma-separated regular expressions to match level-names) to consider in analysis; must match at least two levels; levels must be non-numeric; may include wild cards or regular expressions. Note that extra space characters will affect results - when 'a,b' is correct, 'a, b' is not equivalent and likely will fail or give different results."> @@ -134,15 +133,17 @@ - - - - + + + + - + @@ -151,8 +152,9 @@ - - + + + + + + + - + + + + + + + @@ -218,20 +232,12 @@ - + - - - - - - - - - - - - + + + + @@ -244,6 +250,7 @@ + @@ -259,8 +266,8 @@ - - + + @@ -270,20 +277,12 @@ - + - - - - - - - - - - - - + + + + @@ -295,6 +294,7 @@ + @@ -309,38 +309,29 @@ - - + + - + - - + - - - - - - - - - - - - + + + + @@ -352,6 +343,7 @@ + @@ -369,7 +361,7 @@ - + @@ -383,17 +375,14 @@ - + - - - - - - + + + @@ -405,6 +394,7 @@ + @@ -420,13 +410,13 @@ - + - + @@ -440,6 +430,7 @@ + @@ -454,10 +445,15 @@ - - - - + + + + + + + + + @@ -469,6 +465,7 @@ + @@ -484,14 +481,25 @@ + + + + + + + + + + + ééééé 10.5281/zenodo.1034784 + + - + 10.1021/ac0713510 10.1214/aos/1013699998 diff -r ddaf84e15d06 -r 2ae2d26e3270 w4mcorcov_calc.R --- a/w4mcorcov_calc.R Thu Nov 08 23:06:09 2018 -0500 +++ b/w4mcorcov_calc.R Wed Dec 12 09:20:02 2018 -0500 @@ -1,12 +1,4 @@ -# center with 'colMeans()' - ref: http://gastonsanchez.com/visually-enforced/how-to/2014/01/15/Center-data-in-R/ -center_colmeans <- function(x) { - xcenter = colMeans(x) - x - rep(xcenter, rep.int(nrow(x), ncol(x))) -} - -#### OPLS-DA -algoC <- "nipals" - +# compute and output detail plots do_detail_plot <- function( x_dataMatrix , x_predictor @@ -21,7 +13,7 @@ off <- function(x) if (x_show_labels == "0") 0 else x if ( x_is_match && ncol(x_dataMatrix) > 0 - && length(unique(x_predictor))> 1 + && length(unique(x_predictor)) > 1 && x_crossval_i < nrow(x_dataMatrix) ) { my_oplsda <- opls( @@ -36,20 +28,22 @@ , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. ) # strip out variables having negligible variance - x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] + x_dataMatrix <- x_dataMatrix[ , names(my_oplsda@vipVn), drop = FALSE] my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] do_s_plot <- function( - x_env - , predictor_projection_x = TRUE - , cplot_x = FALSE - , cor_vs_cov_x = NULL - ) - { + x_env + , predictor_projection_x = TRUE + , cplot_x = FALSE + , cor_vs_cov_x = NULL + ) { if (cplot_x) { cplot_y_correlation <- (x_env$cplot_y == "correlation") + default_lim_x <- 10 + } else { + default_lim_x <- 1.2 } if (is.null(cor_vs_cov_x)) { my_cor_vs_cov <- cor_vs_cov( @@ -57,14 +51,17 @@ , ropls_x = my_oplsda , predictor_projection_x = predictor_projection_x , x_progress + , x_env ) } else { my_cor_vs_cov <- cor_vs_cov_x } - # str(my_cor_vs_cov) + if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { if (is.null(cor_vs_cov_x)) { - x_progress("No cor_vs_cov data produced") + x_progress( + sprintf("No cor_vs_cov data produced for level %s versus %s", fctr_lvl_1, fctr_lvl_2) + ) } plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") text(x=1, y=1, labels="too few covariance data") @@ -76,216 +73,240 @@ min_x <- min(covariance, na.rm = TRUE) max_x <- max(covariance, na.rm = TRUE) lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) - covariance <- covariance / lim_x - lim_x <- 1.2 - # "It is generally accepted that a variable should be selected if vj>1, [27–29], + + # Regarding using VIP as a guide to selecting a biomarker: + # "It is generally accepted that a variable should be selected if vj>1, [27–29], # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." # (Mehmood 2012 doi:10.1186/1748-7188-6-27) plus_cor <- correlation plus_cov <- covariance - cex <- 0.65 - which_projection <- if (projection == 1) "t1" else "o1" - which_loading <- if (projection == 1) "parallel" else "orthogonal" - if (projection == 1) { # predictor-projection - vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) - if (!cplot_x) { # S-plot predictor-projection - my_xlab <- "relative covariance(feature,t1)" - my_x <- plus_cov - my_ylab <- "correlation(feature,t1)" - my_y <- plus_cor - } else { # C-plot predictor-projection - my_xlab <- "variable importance in predictor-projection" - my_x <- vip4p - if (cplot_y_correlation) { + if (length(plus_cor) != 0 && length(plus_cor) != 0) { + cex <- 0.65 + if (projection == 1) { + # predictor-projection + vipcp <- pmax(0, pmin(1, (vip4p - 0.83) / (1.21 - 0.83))) + if (!cplot_x) { + # S-plot predictor-projection + my_xlab <- "covariance(feature,t1)" + my_x <- plus_cov my_ylab <- "correlation(feature,t1)" my_y <- plus_cor + # X,Y limits for S-PLOT + my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3)) + my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) } else { - my_ylab <- "relative covariance(feature,t1)" - my_y <- plus_cov + # C-plot predictor-projection + my_xlab <- "variable importance in predictor-projection" + my_x <- vip4p + if (cplot_y_correlation) { + my_ylab <- "correlation(feature,t1)" + my_y <- plus_cor + my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) + } else { + my_ylab <- "covariance(feature,t1)" + my_y <- plus_cov + my_ylim <- max(abs(plus_cov)) + my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) ) + } + # X,Y limits for C-plot + lim_x <- max(my_x, na.rm = TRUE) * 1.1 + lim_x <- min(lim_x, default_lim_x) + my_xlim <- c( 0, lim_x ) # + off(0.2) ) } - } - if (cplot_x) { - lim_x <- max(my_x, na.rm = TRUE) * 1.1 - my_xlim <- c( 0, lim_x + off(0.2) ) + my_load_distal <- loadp + my_load_proximal <- loado + red <- as.numeric(correlation > 0) * vipcp + blue <- as.numeric(correlation < 0) * vipcp + alpha <- 0.1 + 0.4 * vipcp + red[is.na(red)] <- 0 + blue[is.na(blue)] <- 0 + alpha[is.na(alpha)] <- 0 + my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) + main_label <- sprintf("%s for level %s versus %s" + , x_prefix, fctr_lvl_1, fctr_lvl_2) } else { - my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) - } - my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) - my_load_distal <- loadp - my_load_proximal <- loado - red <- as.numeric(correlation > 0) * vipcp - blue <- as.numeric(correlation < 0) * vipcp - alpha <- 0.1 + 0.4 * vipcp - red[is.na(red)] <- 0 - blue[is.na(blue)] <- 0 - alpha[is.na(alpha)] <- 0 - my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) - main_label <- sprintf("%s for level %s versus %s" - , x_prefix, fctr_lvl_1, fctr_lvl_2) - } else { # orthogonal projection - vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) - if (!cplot_x) { - my_xlab <- "relative covariance(feature,to1)" - my_x <- -plus_cov - } else { - my_xlab <- "variable importance in orthogonal projection" - my_x <- vip4o - } - if (!cplot_x) { # S-plot orthogonal projection - my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) - my_ylab <- "correlation(feature,to1)" - my_y <- plus_cor - } else { # C-plot orthogonal projection - lim_x <- max(my_x, na.rm = TRUE) * 1.1 - my_xlim <- c( 0, lim_x + off(0.2) ) - if (cplot_y_correlation) { + # orthogonal projection + vipco <- pmax(0, pmin(1, (vip4o - 0.83) / (1.21 - 0.83))) + if (!cplot_x) { + # S-PLOT orthogonal-projection + my_xlab <- "covariance(feature,to1)" + my_x <- -plus_cov + # X,Y limits for S-PLOT + my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3)) my_ylab <- "correlation(feature,to1)" my_y <- plus_cor + my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) } else { - my_ylab <- "relative covariance(feature,to1)" - my_y <- plus_cov + # C-plot orthogonal-projection + my_xlab <- "variable importance in orthogonal projection" + my_x <- vip4o + # C-plot orthogonal projection + # X,Y limits for C-plot + lim_x <- max(my_x, na.rm = TRUE) * 1.1 + my_xlim <- c( 0, lim_x ) # + off(0.2) ) + if (cplot_y_correlation) { + my_ylab <- "correlation(feature,to1)" + my_y <- plus_cor + my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) + } else { + my_ylab <- "covariance(feature,to1)" + my_y <- plus_cov + my_ylim <- max(abs(plus_cov)) + my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) ) + } } - } - my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) - my_load_distal <- loado - my_load_proximal <- loadp - alpha <- 0.1 + 0.4 * vipco - alpha[is.na(alpha)] <- 0 - my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) - main_label <- sprintf( - "Features influencing orthogonal projection for %s versus %s" - , fctr_lvl_1, fctr_lvl_2) - } - main_cex <- min(1.0, 46.0/nchar(main_label)) - my_feature_label_slant <- -30 # slant feature labels 30 degrees downward - my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18) - plot( - y = my_y - , x = my_x - , type = "p" - , xlim = my_xlim - , ylim = my_ylim - , xlab = my_xlab - , ylab = my_ylab - , main = main_label - , cex.main = main_cex - , cex = cex - , pch = my_pch - , col = my_col - ) - low_x <- -0.7 * lim_x - high_x <- 0.7 * lim_x - if (projection == 1 && !cplot_x) { - text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") - text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") - } - if ( x_show_labels != "0" ) { - names(my_load_distal) <- tsv1$featureID - names(my_load_proximal) <- tsv1$featureID - if ( x_show_labels == "ALL" ) { - n_labels <- length(my_load_distal) - } else { - n_labels <- as.numeric(x_show_labels) + my_load_distal <- loado + my_load_proximal <- loadp + alpha <- 0.1 + 0.4 * vipco + alpha[is.na(alpha)] <- 0 + my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) + main_label <- sprintf( + "Features influencing orthogonal projection for %s versus %s" + , fctr_lvl_1, fctr_lvl_2) } - n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) - labels_to_show <- c( - names(head(sort(my_load_distal),n = n_labels)) - , names(tail(sort(my_load_distal),n = n_labels)) - ) - labels <- unname( - sapply( - X = tsv1$featureID - , FUN = function(x) if( x %in% labels_to_show ) x else "" + main_cex <- min(1.0, 46.0/nchar(main_label)) + my_feature_label_slant <- -30 # slant feature labels 30 degrees downward + my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18) + if ( sum(is.infinite(my_xlim)) == 0 ) { + plot( + y = my_y + , x = my_x + , type = "p" + , xlim = my_xlim + , ylim = my_ylim + , xlab = my_xlab + , ylab = my_ylab + , main = main_label + , cex.main = main_cex + , cex = cex + , pch = my_pch + , col = my_col ) - ) - x_text_offset <- 0.024 - y_text_off <- 0.017 - if (!cplot_x) { # S-plot - y_text_offset <- if (projection == 1) -y_text_off else y_text_off - } else { # C-plot - y_text_offset <- - sapply( - X = (my_y > 0) - , FUN = function(x) { if (x) y_text_off else -y_text_off } + low_x <- -0.7 * lim_x + high_x <- 0.7 * lim_x + if (projection == 1 && !cplot_x) { + text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") + text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") + } + if ( x_show_labels != "0" ) { + names(my_load_distal) <- tsv1$featureID + names(my_load_proximal) <- tsv1$featureID + if ( x_show_labels == "ALL" ) { + n_labels <- length(my_load_distal) + } else { + n_labels <- as.numeric(x_show_labels) + } + n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) + labels_to_show <- c( + names(head(sort(my_load_distal), n = n_labels)) + , names(tail(sort(my_load_distal), n = n_labels)) + ) + labels <- unname( + sapply( + X = tsv1$featureID + , FUN = function(x) if ( x %in% labels_to_show ) x else "" + ) ) - } - label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { - if (length(labels_arg) > 0) { - unique_slant <- unique(slant_arg) - if (length(unique_slant) == 1) { - text( - y = y_arg - , x = x_arg + x_text_offset - , cex = 0.4 - , labels = labels_arg - , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels - , srt = slant_arg - , adj = 0 # left-justified + x_text_offset <- 0.024 + y_text_off <- 0.017 + if (!cplot_x) { + # S-plot + y_text_offset <- if (projection == 1) -y_text_off else y_text_off + } else { + # C-plot + y_text_offset <- + sapply( + X = (my_y > 0) + , FUN = function(x) { + if (x) y_text_off else -y_text_off + } + ) + } + label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { + if (length(labels_arg) > 0) { + unique_slant <- unique(slant_arg) + if (length(unique_slant) == 1) { + text( + y = y_arg + , x = x_arg + x_text_offset + , cex = 0.4 + , labels = labels_arg + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant_arg + , adj = 0 # left-justified + ) + } else { + for (slant in unique_slant) { + text( + y = y_arg[slant_arg == slant] + , x = x_arg[slant_arg == slant] + x_text_offset + , cex = 0.4 + , labels = labels_arg[slant_arg == slant] + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant + , adj = 0 # left-justified + ) + } + } + } + } + if (!cplot_x) { + my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant + } else { + my_slant <- sapply( + X = (my_y > 0) + , FUN = function(x) if (x) 2 else -2 + ) * my_feature_label_slant + } + if (length(my_x) > 1) { + label_features( + x_arg = my_x [my_x > 0] + , y_arg = my_y [my_x > 0] - y_text_offset + , labels_arg = labels[my_x > 0] + , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) ) - } else { - for (slant in unique_slant) { - text( - y = y_arg[slant_arg == slant] - , x = x_arg[slant_arg == slant] + x_text_offset - , cex = 0.4 - , labels = labels_arg[slant_arg == slant] - , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels - , srt = slant - , adj = 0 # left-justified + if (!cplot_x) { + label_features( + x_arg = my_x [my_x < 0] + , y_arg = my_y [my_x < 0] + y_text_offset + , labels_arg = labels[my_x < 0] + , slant_arg = my_slant ) } - } - } - } - if (!cplot_x) { - my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant + } else { + if (!cplot_x) { + my_slant <- (if (my_x > 1) -1 else 1) * my_slant + my_y_arg <- my_y + (if (my_x > 1) -1 else 1) * y_text_offset + } else { + my_slant <- (if (my_y > 1) -1 else 1) * my_slant + my_y_arg <- my_y + (if (my_y > 1) -1 else 1) * y_text_offset + } + label_features( + x_arg = my_x + , y_arg = my_y_arg + , labels_arg = labels + , slant_arg = my_slant + ) + } # end if (length(my_x) > 1) + } # end if ( x_show_labels != "0" ) } else { - my_slant <- sapply( - X = (my_y > 0) - , FUN = function(x) if (x) 2 else -2 - ) * my_feature_label_slant - } - if (length(my_x) > 1) { - label_features( - x_arg = my_x [my_x > 0] - , y_arg = my_y [my_x > 0] - y_text_offset - , labels_arg = labels[my_x > 0] - , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) - ) - if (!cplot_x) { - label_features( - x_arg = my_x [my_x < 0] - , y_arg = my_y [my_x < 0] + y_text_offset - , labels_arg = labels[my_x < 0] - , slant_arg = my_slant - ) - } - } else { - if (!cplot_x) { - my_slant <- (if (my_x > 1) -1 else 1) * my_slant - my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset - } else { - my_slant <- (if (my_y > 1) -1 else 1) * my_slant - my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset - } - label_features( - x_arg = my_x - , y_arg = my_y_arg - , labels_arg = labels - , slant_arg = my_slant - ) - } - } - } - ) + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") + text(x=1, y=1, labels="no S-plot is possible") + } # end if (sum(is.infinte(my_xlim))==0) + } else { + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") + text(x=1, y=1, labels="no S-plot is possible") + } # end if (length(plus_cor) != 0 && length(plus_cor) != 0) + } # end action + ) # end with( my_cor_vs_cov, action ) return (my_cor_vs_cov) - } + } # end function do_s_plot my_cor_vs_cov <- do_s_plot( x_env = x_env , predictor_projection_x = TRUE , cplot_x = FALSE ) - typeVc <- c("correlation", # 1 + typevc <- c("correlation", # 1 "outlier", # 2 "overview", # 3 "permutation", # 4 @@ -299,36 +320,37 @@ "xy-weight" # 12 ) # [c(3,8,9)] # [c(4,3,8,9)] if ( length(my_oplsda@orthoVipVn) > 0 ) { - my_typevc <- typeVc[c(9,3,8)] + my_typevc <- typevc[c(9,3,8)] } else { my_typevc <- c("(dummy)","overview","(dummy)") } my_ortho_cor_vs_cov_exists <- FALSE for (my_type in my_typevc) { - if (my_type %in% typeVc) { + if (my_type %in% typevc) { tryCatch({ - if ( my_type != "x-loading" ) { - plot( - x = my_oplsda - , typeVc = my_type - , parCexN = 0.4 - , parDevNewL = FALSE - , parLayL = TRUE - , parEllipsesL = TRUE - ) - if (my_type == "overview") { - sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) - title(sub = sub_label) - } - } else { - my_ortho_cor_vs_cov <- do_s_plot( - x_env = x_env - , predictor_projection_x = FALSE - , cplot_x = FALSE - ) - my_ortho_cor_vs_cov_exists <- TRUE + if ( my_type != "x-loading" ) { + plot( + x = my_oplsda + , typeVc = my_type + , parCexN = 0.4 + , parDevNewL = FALSE + , parLayL = TRUE + , parEllipsesL = TRUE + ) + if (my_type == "overview") { + sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) + title(sub = sub_label) + } + } else { + my_ortho_cor_vs_cov <- do_s_plot( + x_env = x_env + , predictor_projection_x = FALSE + , cplot_x = FALSE + ) + my_ortho_cor_vs_cov_exists <- TRUE + } } - }, error = function(e) { + , error = function(e) { x_progress( sprintf( "factor level %s or %s may have only one sample - %s" @@ -347,12 +369,17 @@ cplot_o <- x_env$cplot_o if (cplot_p || cplot_o) { if (cplot_p) { - do_s_plot( - x_env = x_env - , predictor_projection_x = TRUE - , cplot_x = TRUE - , cor_vs_cov_x = my_cor_vs_cov - ) + if (!is.null(my_cor_vs_cov)) { + do_s_plot( + x_env = x_env + , predictor_projection_x = TRUE + , cplot_x = TRUE + , cor_vs_cov_x = my_cor_vs_cov + ) + } else { + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") + text(x=1, y=1, labels="no predictor projection is possible") + } did_plots <- 1 } else { did_plots <- 0 @@ -405,10 +432,10 @@ # extract parameters from the environment vrbl_metadata <- calc_env$vrbl_metadata - vrbl_metadata_names <- vrbl_metadata[,1] + vrbl_metadata_names <- vrbl_metadata[, 1] smpl_metadata <- calc_env$smpl_metadata data_matrix <- calc_env$data_matrix - pairSigFeatOnly <- calc_env$pairSigFeatOnly + pair_significant_features_only <- calc_env$pairSigFeatOnly facC <- calc_env$facC tesC <- calc_env$tesC # extract the levels from the environment @@ -433,22 +460,27 @@ names(rt) <- vrbl_metadata$variableMetadata rt_lookup <- function(feature) unname(rt[feature]) - # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) + # calculate salience_df as data.frame(feature, max_level, max_median, salience_rcv, mean_median, salience, salience_rcv) salience_df <- calc_env$salience_df <- w4msalience( data_matrix = data_matrix , sample_class = smpl_metadata[,facC] , failure_action = failure_action ) salience_tsv_action({ - my_df <- data.frame( - featureID = salience_df$feature - , salientLevel = salience_df$max_level - , salientRCV = salience_df$salient_rcv - , salience = salience_df$salience - , mz = mz_lookup(salience_df$feature) - , rt = rt_lookup(salience_df$feature) - ) - my_df[order(-my_df$salience),] + with ( + salience_df + , { + my_df <<- data.frame( + featureID = feature + , salientLevel = max_level + , salienceRCV = salience_rcv + , relativeSalientDistance = relative_salient_distance + , salience = salience + , mz = mz_lookup(feature) + , rt = rt_lookup(feature) + ) + }) + my_df[order(-my_df$relativeSalientDistance),] }) # transform wildcards to regexen @@ -560,8 +592,8 @@ x_dataMatrix = my_matrix , x_predictor = predictor , x_is_match = TRUE - , x_algorithm = algoC - , x_prefix = if (pairSigFeatOnly) { + , x_algorithm = "nipals" + , x_prefix = if (pair_significant_features_only) { "Significantly contrasting features" } else { "Significant features" @@ -627,15 +659,15 @@ } ) col_selector <- vrbl_metadata_names[ - if ( pairSigFeatOnly ) fully_significant else overall_significant + if (pair_significant_features_only) fully_significant else overall_significant ] my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] my_cor_cov <- do_detail_plot( x_dataMatrix = my_matrix , x_predictor = predictor , x_is_match = is_match - , x_algorithm = algoC - , x_prefix = if (pairSigFeatOnly) { + , x_algorithm = "nipals" + , x_prefix = if (pair_significant_features_only) { "Significantly contrasting features" } else { "Significant features" @@ -668,7 +700,8 @@ } } } - } else { # tesC == "none" + } else { + # tesC == "none" # find all the levels for factor facC in sampleMetadata level_union <- unique(sort(smpl_metadata_facC)) # identify the selected levels for factor facC from sampleMetadata @@ -686,7 +719,6 @@ fctr_lvl_2 <- { if ( fctr_lvl_1 %in% completed ) return("DUMMY") - # strF(completed) completed <<- c(completed, fctr_lvl_1) setdiff(level_union, fctr_lvl_1) } @@ -717,7 +749,7 @@ x_dataMatrix = my_matrix , x_predictor = predictor , x_is_match = is_match - , x_algorithm = algoC + , x_algorithm = "nipals" , x_prefix = "Features" , x_show_labels = labelFeatures , x_progress = progress_action @@ -770,7 +802,7 @@ x_dataMatrix = my_matrix , x_predictor = predictor , x_is_match = is_match - , x_algorithm = algoC + , x_algorithm = "nipals" , x_prefix = "Features" , x_show_labels = labelFeatures , x_progress = progress_action @@ -804,7 +836,7 @@ if (!did_plot) { failure_action( sprintf( - "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" + "Did not plot. Does sampleMetadata have at least two levels of factor '%s' matching '%s' ?" , facC, originalLevCSV)) return ( FALSE ) } @@ -821,12 +853,14 @@ , ropls_x , predictor_projection_x = TRUE , x_progress = print +, x_env ) { tryCatch({ - return( - cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) - ) - }, error = function(e) { + return( + cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress, x_env) + ) + } + , error = function(e) { x_progress( sprintf( "cor_vs_cov fatal error - %s" @@ -842,7 +876,12 @@ , ropls_x # an instance of ropls::opls , predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection , x_progress = print # function to produce progress and error messages +, x_env ) { + my_matrix_x <- matrix_x + my_matrix_x[my_matrix_x==0] <- NA + fdr_features <- x_env$fdr_features + x_class <- class(ropls_x) if ( !( as.character(x_class) == "opls" ) ) { stop( @@ -866,12 +905,12 @@ # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 # (and not from the supplement despite the statement that, for the NIPALS algorithm, - # the equations from the supplement should be used) because of the definition of the + # the equations from the supplement should be used) because of the definition of the # Pearson/Galton coefficient of correlation is defined as # $$ # \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y} # $$ - # as described (among other places) on Wikipedia at + # as described (among other places) on Wikipedia at # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population # The equations in the supplement said to use, for the predictive component t1, # \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))} @@ -879,112 +918,116 @@ # perhaps my data are not centered exactly the same way that theirs were. # The correlations calculated here are in agreement with those calculated with the code from # page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf - # I did transform covariance to "relative covariance" (relative to the maximum value) - # to keep the figures consistent with one another. + - # count the features (one column for each sample) - Nfeatures <- ncol(matrix_x) - # count the samples (one row for each sample) - Nobservations <- nrow(matrix_x) - # a one-dimensional magnitude function (i.e., take the vector norm) - vector_norm <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) - # calculate the standard deviation for each feature - sd_xi <- sapply(X = 1:Nfeatures, FUN = function(x) sd(matrix_x[,x])) + # count the features/variables (one column for each sample) + # count the features/variables (one column for each sample) + n_features <- ncol(my_matrix_x) + all_n_features <- x_env$fdr_features + if (length(grep("^[0-9][0-9]*$", all_n_features)) > 0) { + all_n_features <- as.integer(all_n_features) + } else { + all_n_features <- n_features + } + fdr_n_features <- max(n_features, all_n_features) + # print("n_features") + # print(n_features) + + # count the samples/observations (one row for each sample) + n_observations <- nrow(my_matrix_x) + # choose whether to plot the predictive score vector or orthogonal score vector if (predictor_projection_x) - score_matrix <- ropls_x@scoreMN + score_vector <- ropls_x@scoreMN else - score_matrix <- ropls_x@orthoScoreMN - # transpose the score (or orthoscore) vector for use as a premultiplier in covariance calculation - score_matrix_transposed <- t(score_matrix) - # compute the norm of the vector (i.e., the magnitude) - score_matrix_magnitude <- vector_norm(score_matrix) - # compute the standard deviation of the vector - score_matrix_sd <- sd(score_matrix) - # compute the relative covariance of each feature with the score vector + score_vector <- ropls_x@orthoScoreMN + + # compute the covariance of each feature with the score vector result$covariance <- - score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) + sapply( + X = 1:n_features + , FUN = function(i) { + cov(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs") + } + ) + # access covariance by feature name + names(result$covariance) <- colnames(my_matrix_x) + # compute the correlation of each feature with the score vector result$correlation <- - score_matrix_transposed %*% matrix_x / ( (Nobservations - 1) * ( score_matrix_sd * sd_xi ) ) - - # convert covariance and correlation from one-dimensional matrices to arrays of values, - # which are accessed by feature name below - p1 <- result$covariance <- result$covariance [ 1, , drop = TRUE ] - # x_progress("strF(p1)") - # x_progress(strF(p1)) + sapply( + X = 1:n_features + , FUN = function(i) { + cor(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs") + } + ) + # access correlation by feature name + names(result$correlation) <- colnames(my_matrix_x) - pcorr1 <- result$correlation <- result$correlation[ 1, , drop = TRUE ] - # x_progress("pearson strF(pcorr1)") - # x_progress(strF(pcorr1)) - # x_progress(typeof(pcorr1)) - # x_progress(str(pcorr1)) - - # # this is how to use Spearman correlation instead of pearson - # result$spearcor <- sapply( - # X = 1:Nfeatures - # , FUN = function(i) { - # stats::cor( - # x = as.vector(score_matrix) - # , y = as.vector(matrix_x[,i]) - # # , method = "spearman" - # , method = "pearson" - # ) - # } - # ) - # names(result$spearcor) <- names(p1) - # pcorr1 <- result$spearcor - # x_progress("spearman strF(pcorr1)") - # x_progress(strF(pcorr1)) - # x_progress(typeof(pcorr1)) - # x_progress(str(pcorr1)) - # pcorr1 <- result$correlation <- result$spearcor + # eliminate NAs in either correlation or covariance + nas <- is.na(result$correlation) | is.na(result$covariance) + featureID <- names(ropls_x@vipVn) + featureID <- featureID[!nas] + result$correlation <- result$correlation[!nas] + result$covariance <- result$covariance[!nas] + n_features <- length(featureID) - # correl.ci(r, n, a = 0.05, rho = 0) correl_pci <- lapply( - X = 1:Nfeatures - , FUN = function(i) correl.ci(r = pcorr1[i], n = Nobservations) + X = 1:n_features + , FUN = function(i) { + correl.ci( + r = result$correlation[i] + , n_obs = n_observations + , n_vars = fdr_n_features + ) + } ) result$p_value_raw <- sapply( - X = 1:Nfeatures + X = 1:n_features + , FUN = function(i) correl_pci[[i]]$p.value.raw + ) + result$p_value_raw[is.na(result$p_value_raw)] <- 1.0 + result$p_value <- sapply( + X = 1:n_features , FUN = function(i) correl_pci[[i]]$p.value ) - result$p_value_raw[is.na(result$p_value_raw)] <- 0.0 + result$p_value[is.na(result$p_value)] <- 1.0 result$ci_lower <- sapply( - X = 1:Nfeatures - , FUN = function(i) correl_pci[[i]]$CI['lower'] + X = 1:n_features + , FUN = function(i) correl_pci[[i]]$CI["lower"] ) result$ci_upper <- sapply( - X = 1:Nfeatures - , FUN = function(i) correl_pci[[i]]$CI['upper'] + X = 1:n_features + , FUN = function(i) correl_pci[[i]]$CI["upper"] ) # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627) # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) - result$vip4p <- as.numeric(ropls_x@vipVn) - result$vip4o <- as.numeric(ropls_x@orthoVipVn) + result$vip4p <- as.numeric(ropls_x@vipVn)[!nas] + result$vip4o <- as.numeric(ropls_x@orthoVipVn)[!nas] if (length(result$vip4o) == 0) result$vip4o <- NA # extract the loadings - result$loadp <- as.numeric(ropls_x@loadingMN) - result$loado <- as.numeric(ropls_x@orthoLoadingMN) + result$loadp <- as.numeric(ropls_x@loadingMN)[!nas] + result$loado <- as.numeric(ropls_x@orthoLoadingMN)[!nas] # get the level names level_names <- sort(levels(as.factor(ropls_x@suppLs$y))) fctr_lvl_1 <- level_names[1] fctr_lvl_2 <- level_names[2] - feature_count <- length(ropls_x@vipVn) - result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) - result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) + result$level1 <- rep.int(x = fctr_lvl_1, times = n_features) + result$level2 <- rep.int(x = fctr_lvl_2, times = n_features) greaterLevel <- sapply( X = result$correlation - , FUN = function(my_corr) + , FUN = function(my_corr) { tryCatch({ - if ( is.nan( my_corr ) ) { - NA + if ( is.na(my_corr) || ! is.numeric( my_corr ) ) { + NA } else { if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 } - }, error = function(e) { + } + , error = function(e) { + print(my_corr) x_progress( sprintf( "cor_vs_cov -> sapply: error - substituting NA - %s" @@ -992,16 +1035,11 @@ ) ) NA - }) + } + ) + } ) - # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 - featureID <- names(ropls_x@vipVn) - greaterLevel <- greaterLevel[featureID] - result$correlation <- result$correlation[featureID] - result$covariance <- result$covariance[featureID] - # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 - # build a data frame to hold the content for the tab-separated values file tsv1 <- data.frame( featureID = featureID @@ -1016,8 +1054,8 @@ , loadp = result$loadp , loado = result$loado , cor_p_val_raw = result$p_value_raw - , cor_p_value = p.adjust(p = result$p_value_raw, method = "BY") - , cor_ci_lower = result$ci_lower + , cor_p_value = result$p_value + , cor_ci_lower = result$ci_lower , cor_ci_upper = result$ci_upper ) rownames(tsv1) <- tsv1$featureID @@ -1039,7 +1077,7 @@ tsv1 <- tsv1[!is.na(tsv1$covariance),] superresult$tsv1 <- tsv1 - # # I did not include these but left them commentd out in case future + # # I did not include these but left them commentd out in case future # # consumers of this routine want to use it in currently unanticipated ways # result$superresult <- superresult # result$oplsda <- ropls_x @@ -1059,22 +1097,28 @@ # which follows # https://en.wikipedia.org/wiki/Fisher_transformation#Definition -correl.ci <- function(r, n, a = 0.05, rho = 0) { - ## r is the calculated correlation coefficient for n pairs +correl.ci <- function(r, n_obs, n_vars, a = 0.05, rho = 0) { + ## r is the calculated correlation coefficient for n_obs pairs of observations of one variable ## a is the significance level ## rho is the hypothesised correlation zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho zh1 <- atanh(r) # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1 - se <- (1 - r^2)/sqrt(n - 3) ## standard error for Fisher's z-transformation of Ho + se <- (1 - r^2)/sqrt(n_obs - 3) ## standard error for Fisher's z-transformation of Ho test <- (zh1 - zh0)/se ### test statistic pvalue <- 2*(1 - pnorm(abs(test))) ## p-value - zL <- zh1 - qnorm(1 - a/2)*se - zH <- zh1 + qnorm(1 - a/2)*se - fishL <- tanh(zL) # (exp(2*zL)-1)/(exp(2*zL)+1), i.e., lower confidence limit - fishH <- tanh(zH) # (exp(2*zH)-1)/(exp(2*zH)+1), i.e., upper confidence limit - CI <- c(fishL, fishH) - names(CI) <- c('lower', 'upper') - list(correlation = r, p.value = pvalue, CI = CI) + pvalue.adj <- p.adjust(p = pvalue, method = "BY", n = n_vars) + z_L <- zh1 - qnorm(1 - a/2)*se + z_h <- zh1 + qnorm(1 - a/2)*se + fish_l <- tanh(z_L) # (exp(2*z_l)-1)/(exp(2*z_l)+1), i.e., lower confidence limit + fish_h <- tanh(z_h) # (exp(2*z_h)-1)/(exp(2*z_h)+1), i.e., upper confidence limit + ci <- c(fish_l, fish_h) + names(ci) <- c("lower", "upper") + list( + correlation = r + , p.value.raw = pvalue + , p.value = pvalue.adj + , CI = ci + ) } -# vim: sw=2 ts=2 et : +# vim: sw=2 ts=2 et ai : diff -r ddaf84e15d06 -r 2ae2d26e3270 w4mcorcov_lib.R --- a/w4mcorcov_lib.R Thu Nov 08 23:06:09 2018 -0500 +++ b/w4mcorcov_lib.R Wed Dec 12 09:20:02 2018 -0500 @@ -1,3 +1,3 @@ suppressMessages(library(batch)) suppressMessages(library(ropls)) -suppressMessages(library(methods)) +#suppressMessages(library(methods)) diff -r ddaf84e15d06 -r 2ae2d26e3270 w4mcorcov_salience.R --- a/w4mcorcov_salience.R Thu Nov 08 23:06:09 2018 -0500 +++ b/w4mcorcov_salience.R Wed Dec 12 09:20:02 2018 -0500 @@ -19,14 +19,21 @@ failure_action("w4msalience: Expected data_matrix to be a matrix (or data.frame) of numeric") return (NULL) } + + feature_names <- colnames(t_data_matrix) + n_features <- ncol(t_data_matrix) - n_features_plus_1 <- 1 + n_features - features <- colnames(t_data_matrix) n_samples <- nrow(t_data_matrix) if ( length(sample_class) != n_samples ) { strF(data_matrix) strF(sample_class) - failure_action(sprintf("w4msalience: The data_matrix has %d samples but sample_class has %d", n_samples, length(sample_class))) + failure_action( + sprintf( + "w4msalience: The data_matrix has %d samples but sample_class has %d" + , n_samples + , length(sample_class) + ) + ) return (NULL) } # end sanity checks @@ -39,13 +46,6 @@ , FUN = "median" ) - # "For each feature, 'select sample_class, max(intensity) from feature group by sample_class'." - maxOfFeatureBySampleClassLevel <- aggregate( - x = as.data.frame(t_data_matrix) - , by = list(sample_class) - , FUN = "max" - ) - # "For each feature, 'select sample_class, rcv(intensity) from feature group by sample_class'." # cv is less robust; deviation from normality degrades performance # cv(x) == sd(x) / mean(x) @@ -56,61 +56,72 @@ , by = list(sample_class) , FUN = "mad" ) - rcvOfFeatureBySampleClassLevel <- as.matrix( - madOfFeatureBySampleClassLevel[,2:n_features_plus_1] / medianOfFeatureBySampleClassLevel[,2:n_features_plus_1] - ) - rcvOfFeatureBySampleClassLevel[is.nan(rcvOfFeatureBySampleClassLevel)] <- max(9999,max(rcvOfFeatureBySampleClassLevel, na.rm = TRUE)) - # "For each feature, 'select max(max_feature_intensity) from feature'." - maxApplyMaxOfFeatureBySampleClassLevel <- sapply( - X = 1:n_features - , FUN = function(i) { - match( - max(maxOfFeatureBySampleClassLevel[, i + 1]) - , maxOfFeatureBySampleClassLevel[, i + 1] - ) - } - ) - - # "For each feature, 'select mean(median_feature_intensity) from feature'." - meanApplyMedianOfFeatureBySampleClassLevel <- sapply( - X = 1:n_features - , FUN = function(i) mean(medianOfFeatureBySampleClassLevel[, i + 1]) - ) + # Note that `apply(X=array(1:10), MARGIN = 1, FUN = function(x) return(c(x,x^2)))` + # produces a matrix with two rows and ten columns - # Compute the 'salience' for each feature, i.e., how salient the intensity of a feature - # is for one particular class-level relative to the intensity across all class-levels. - salience_df <- data.frame( - # the feature name - feature = features - # the name (or factor-level) of the class-level with the highest median intensity for the feature - , max_level = medianOfFeatureBySampleClassLevel[maxApplyMaxOfFeatureBySampleClassLevel,1] - # the median intensity for the feature and the level max_level - , max_median = sapply( - X = 1:n_features - , FUN = function(i) { - maxOfFeatureBySampleClassLevel[maxApplyMaxOfFeatureBySampleClassLevel[i], 1 + i] - } + my_list <- apply( + X = array(1:n_features) + , MARGIN = 1 + , FUN = function(x) { + my_df <- data.frame( + median = medianOfFeatureBySampleClassLevel[ , 1 + x] + , mad = madOfFeatureBySampleClassLevel[ , 1 + x] + ) + my_df$salient_level <- medianOfFeatureBySampleClassLevel[ , 1] + my_df <- my_df[ order(my_df$median, decreasing = TRUE), ] + my_dist_df <- my_df[ 1:2, ] + # "robust coefficient of variation", i.e., + # mad(feature-intensity for class-level max_level) / median(feature-intensity for class-level max_level) + rcv_result <- my_dist_df$mad[1] / my_dist_df$median[1] + dist_result <- + ( my_dist_df$median[1] - my_dist_df$median[2] ) / + sqrt( my_dist_df$mad[1] * my_dist_df$mad[2] ) + if (is.infinite(dist_result) || is.nan(dist_result)) + dist_result <- 0 + mean_median <- mean(my_df$median) + salience_result <- if (mean_median > 0) my_df$median[1] / mean_median else 0 + return ( + data.frame( + dist_result = dist_result + , max_median = my_df$median[1] + , mean_median = mean_median + , salience_result = salience_result + , salient_level = my_df$salient_level[1] + , rcv_result = rcv_result + ) + ) + } + ) + results_matrix <- sapply(X = 1:n_features, FUN = function(i) my_list[[i]]) + results_df <- as.data.frame(t(results_matrix)) + + relative_salient_distance <- unlist(results_df$dist_result) + salience <- unlist(results_df$salience_result) + salient_level <- unlist(results_df$salient_level) + max_median <- unlist(results_df$max_median) + mean_median <- unlist(results_df$mean_median) + rcv_result <- unlist(results_df$rcv_result) + + salience_df <- + data.frame( + # the feature name + feature = feature_names + # the name (or factor-level) of the class-level with the highest median intensity for the feature + , max_level = salient_level + # the median intensity for the feature and the level max_level + , max_median = max_median + # the distance between the maximum intensities for the feature at the two highest levels + , relative_salient_distance = relative_salient_distance + # the coefficient of variation (expressed as a proportion) for the intensity for the feature and the level max_level + , salience_rcv = rcv_result + # the mean of the medians of intensity for all class-levels for the feature + , mean_median = mean_median + # raw salience is the ratio of the most-prominent level to the mean of all levels for the feature + , salience = salience + # don't coerce strings to factors (this is a parameter for the data.frame constructor, not a column of the data.frame) + , stringsAsFactors = FALSE ) - # the coefficient of variation (expressed as a proportion) for the intensity for the feature and the level max_level - , max_rcv = sapply( - X = 1:n_features - , FUN = function(i) { - rcvOfFeatureBySampleClassLevel[maxApplyMaxOfFeatureBySampleClassLevel[i], i] - } - ) - # the mean of the medians of intensity for all class-levels for the feature - , mean_median = meanApplyMedianOfFeatureBySampleClassLevel - # don't coerce strings to factors (this is a parameter for the data.frame constructor, not a column of the data.frame) - , stringsAsFactors = FALSE - ) - # raw salience is the ratio of the most-prominent level to the mean of all levels for the feature - salience_df$salience <- sapply( - X = 1:nrow(salience_df) - , FUN = function(i) with(salience_df[i,], if (mean_median > 0) { max_median / mean_median } else { 0 } ) - ) - # "robust coefficient of variation, i.e., mad(feature-intensity for class-level max_level) / median(feature-intensity for class-level max_level) - salience_df$salient_rcv <- salience_df$max_rcv return (salience_df) } diff -r ddaf84e15d06 -r 2ae2d26e3270 w4mcorcov_wrapper.R --- a/w4mcorcov_wrapper.R Thu Nov 08 23:06:09 2018 -0500 +++ b/w4mcorcov_wrapper.R Wed Dec 12 09:20:02 2018 -0500 @@ -73,6 +73,10 @@ ######## errorPrint(sessionInfo()) +errorCat("\nsearch path:") +errorPrint(search()) +# errorCat("\nCurrently loaded namespaces:\n") +# errorPrint(loadedNamespaces()) argVc <- unlist(parseCommandArgs(evaluate=FALSE)) errorCat("\n\n---\n\nArguments that were passed to R are as follows:\n") @@ -104,6 +108,7 @@ my_env$levCSV <- as.character(argVc["levCSV"]) my_env$matchingC <- as.character(argVc["matchingC"]) my_env$labelFeatures <- as.character(argVc["labelFeatures"]) # number of features to label at each extreme of the loadings or 'ALL' +my_env$fdr_features <- as.character(argVc["fdr_features"]) # number of features to consider when adjusting p-value, or 'ALL' my_env$cplot_o <- as.logical(argVc["cplot_o"]) # TRUE if orthogonal C-plot is requested my_env$cplot_p <- as.logical(argVc["cplot_p"]) # TRUE if parallel C-plot is requested my_env$cplot_y <- as.character(argVc["cplot_y"]) # Choice of covariance/correlation for Y-axis on C-plot