Mercurial > repos > eschen42 > w4mcorcov
changeset 12:ddaf84e15d06 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 6775c83b89d9d903c81a2229cdc200fc93538dfe-dirty
author | eschen42 |
---|---|
date | Thu, 08 Nov 2018 23:06:09 -0500 |
parents | ddcc33ff3205 |
children | 2ae2d26e3270 |
files | w4mcorcov.xml w4mcorcov_calc.R |
diffstat | 2 files changed, 238 insertions(+), 107 deletions(-) [+] |
line wrap: on
line diff
--- a/w4mcorcov.xml Wed Sep 05 22:31:21 2018 -0400 +++ b/w4mcorcov.xml Thu Nov 08 23:06:09 2018 -0500 @@ -1,4 +1,4 @@ -<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.15"> +<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.16"> <description>OPLS-DA Contrasts of Univariate Results</description> <macros> <xml name="paramPairSigFeatOnly"> @@ -201,13 +201,13 @@ <has_text text="level1Level2Sig" /> <!-- first matched line --> <has_text text="M349.2383T700" /> - <has_text text="-0.3704185" /> + <has_text text="-0.462909875" /> <has_text text="-36.6668927" /> <has_text text="0.4914638" /> <has_text text="0.01302117" /> <!-- second matched line --> <has_text text="M207.9308T206" /> - <has_text text="0.3235022" /> + <has_text text="0.504885262" /> <has_text text="5.97529097" /> <has_text text="0.207196379" /> <has_text text="0.04438632" /> @@ -259,7 +259,7 @@ <has_text text="level1Level2Sig" /> <!-- first matched line --> <has_text text="M200.005T296" /> - <has_text text="-0.24533821" /> + <has_text text="-0.28035717" /> <has_text text="-3.3573953" /> <has_text text="0.1157346" /> <has_text text="0.0647860" /> @@ -309,13 +309,13 @@ <has_text text="vip4o" /> <!-- first matched line --> <has_text text="M349.2383T700" /> - <has_text text="-0.37867079" /> + <has_text text="-0.4732226665" /> <has_text text="-37.71066" /> <has_text text="0.5246766" /> <has_text text="0.0103341" /> <!-- second matched line --> <has_text text="M207.9308T206" /> - <has_text text="0.31570433" /> + <has_text text="0.4927151212" /> <has_text text="5.86655640" /> <has_text text="0.2111623" /> <has_text text="0.0488654" /> @@ -368,7 +368,7 @@ <has_text text="NM516T251" /> <has_text text="flower_yes" /> <has_text text="other" /> - <has_text text="0.03402807" /> + <has_text text="0.3499550705" /> <has_text text="0.03526926" /> <has_text text="0.43664386" /> <has_text text="0.587701897" /> @@ -419,13 +419,13 @@ <has_text text="vip4o" /> <!-- first matched line --> <has_text text="M349.2383T700" /> - <has_text text="0.43361563" /> + <has_text text="0.61594030" /> <has_text text="37.76875778" /> <has_text text="0.54672558" /> <has_text text="0.3920409" /> <!-- second matched line --> <has_text text="M207.9308T206" /> - <has_text text="-0.3365475" /> + <has_text text="-0.89716403" /> <has_text text="-6.337903" /> <has_text text="0.270297" /> <has_text text="0.037661" /> @@ -454,14 +454,14 @@ <has_text text="vip4o" /> <!-- first matched line --> <has_text text="M349.2383T700" /> - <has_text text="-0.0435663" /> - <has_text text="-1.9068114" /> - <has_text text="0.0304592" /> - <has_text text="0.104748883" /> + <has_text text="-0.331230562" /> + <has_text text="-2.47167915" /> + <has_text text="0.0892595" /> + <has_text text="0.0049228872" /> </assert_contents> </output> </test> - <!-- test #6 - issue 8 --> + <!-- test #7 - issue 8 --> <test> <param name="dataMatrix_in" value="input_dataMatrix.tsv"/> <param name="sampleMetadata_in" value="issue8_input_sampleMetadata.tsv"/> @@ -495,6 +495,7 @@ **Author** - Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu) +**Release Notes** - https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper#release-notes Motivation ---------- @@ -616,9 +617,13 @@ | [OUT] Contrast-detail output PDF - | Several plots for each two-projection OPLS-DA analysis: + | File containing several plots for each two-projection OPLS-DA analysis. + +- (first row, left) **correlation-versus-covariance plot** of OPLS-DA results -- (first row, left) **correlation-versus-covariance plot** of OPLS-DA results (a work-alike for the S-PLOT, computed using formula in Supplement to Wiklund, *op. cit.*); point-color becomes saturated as the "variable importance in projection to the predictive components" (VIP\ :subscript:`4,p` from Galindo-Prieto *et al.* 2014) ranges from 0.83 and 1.21 (Mehmood *et al.* 2012), for use to identify features for consideration as biomarkers. + - This is a work-alike for the S-PLOT, computed using formula in equations 1 and 2 from Wiklund, (*op. cit.*); + - point-color becomes saturated as the "variable importance in projection to the predictive components" (VIP\ :subscript:`4,p` from Galindo-Prieto *et al.* 2014) increases through the range from 0.83 and 1.21 (Mehmood *et al.* 2012), for use to identify features for consideration as biomarkers; + - plot symbols are diamonds when the adjusted p-value of correlation is greater than 0.05, circles when it is less than 0.01, and triangles when it between these limits. - (second row, left) **model-overview plot** for the two projections; grey bars are the correlation coefficient for the fitted data; black bars indicate performance in cross-validation tests (Th]]>é<![CDATA[venot, 2017) - (first row, right) OPLS-DA **scores-plot** for the two projections (Th]]>é<![CDATA[venot *et al.*, 2015) - (second row, right) **correlation-versus-covariance plot** of OPLS-DA results **orthogonal to the predictor** (see section "S-Plot of Orthogonal Component" in Wiklund, *op. cit.*, pp. 120-121; this characterizes features with the greatest variation independent of the predictor). @@ -633,12 +638,18 @@ - **featureID** - feature-identifier - **factorLevel1** - factor-level 1 - **factorLevel2** - factor-level 2 (or "other" when contrasting factor-level 1 with all other levels) -- **correlation** - correlation of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in Supplement to Wiklund, *op. cit.*). Note that, for a given contrast, there is a linear relationship between 'loadp' and 'correlation'. -- **covariance** - covariance of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in *ibid.*) +- **correlation** - correlation of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from equation 2 in Wiklund, *op. cit.*). Note that, for a given contrast, there is a linear relationship between 'loadp' and 'correlation'. +- **covariance** - relative covariance of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in *ibid.*, but scaled so that the greatest value has a magnitude of 1) - **vip4p** - "variable importance in projection" to the predictive projection, VIP\ :subscript:`4,p` (Galindo-Prieto *op. cit.*) - **vip4o** - "variable importance in projection" to the orthogonal projection, VIP\ :subscript:`4,o` (*ibid.*) - **loadp** - variable loading for the predictive projection (Wiklund *op. cit.*) - **loado** - variable loading for the orthogonal projection (*ibid.*) +- **cor_p_val_raw** - p-value for Fisher-transformed correlation (Fisher, 1921; Snedecor, 1980; see also https://en.wikipedia.org/wiki/Fisher_transformation), with no family-wise error-rate correction. +- **cor_p_value** - p-value for Fisher-transformed correlation, adjusted for family-wise error rate (Yekutieli *et al.*, 2001). Caveat: any previous selection for features that vary notably by factor level may result in too little adjustment. +- **cor_ci_lower** - lower limit of 95% confidence interval for correlation (see e.g. https://en.wikipedia.org/wiki/Fisher_transformation) +- **cor_ci_upper** - upper limit of 95% confidence interval for correlation (*ibid.*) +- **mz** - *m/z* ratio for feature, copied from input variableMetadata +- **rt** - retention time for feature, copied from input variableMetadata - **level1Level2Sig** - (Only present when a test other than "none" is chosen) '1' when feature varies significantly across all classes (i.e., not pair-wise); '0' otherwise [OUT] Feature "Salience" data TABULAR @@ -819,6 +830,19 @@ <citations> <!-- this tool --> <citation type="doi">10.5281/zenodo.1034784</citation> + <!-- Fisher_1921: Fisher z-transformation of correlation coefficient --> + <citation type="bibtex"><![CDATA[ + @article{Fisher_1921, + author = {Fisher, R. A.}, + title = {{On the probable error of a coefficient of correlation deduced from a small sample}}, + journal = {Metron}, + year = {1921}, + volume = {1}, + pages = {3--32}, + note = {Defines the Fisher z-transformation of a coefficient of correlation. Citation adapted from http://www.citeulike.org/group/894/article/2344770}, + url = {https://digital.library.adelaide.edu.au/dspace/bitstream/2440/15169/1/14.pdf}, + } + ]]></citation> <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS --> <citation type="doi">10.1002/cem.2627</citation> <!-- Giacomoni_2014 W4M 2.5 --> @@ -833,6 +857,22 @@ <citation type="doi">10.3389/fmolb.2016.00026</citation> <!-- Sun_2016 Urinary Biomarkers for adolescent idiopathic scoliosis --> <citation type="doi">10.1038/srep22274</citation> + <!-- Snedecor_1980: Fisher z-transformation of correlation coefficient --> + <citation type="bibtex"><![CDATA[ + @book{Snedecor_1980, + author = {Snedecor, George W. and Cochran, William G.}, + title = {Statistical methods}, + publisher = {Iowa State University Press}, + year = {1980}, + pages = {186}, + isbn = {0813815606}, + language = {eng}, + keyword = {Statistics, Statistics as Topic -- methods}, + lccn = {80014582}, + edition = {7th ed..}, + address = {Ames, Iowa}, + } + ]]></citation> <!-- Thevenot_2015 Urinary metabolome statistics --> <citation type="doi">10.1021/acs.jproteome.5b00354</citation> <!-- ropls package --> @@ -849,6 +889,8 @@ ]]></citation> <!-- Wiklund_2008 OPLS PLS-DA and S-PLOT --> <citation type="doi">10.1021/ac0713510</citation> + <!-- Yekutieli_2001 The control of the false discovery rate in multiple testing under dependency --> + <citation type="doi">10.1214/aos/1013699998</citation> </citations> <!-- vim:et:sw=4:ts=4
--- a/w4mcorcov_calc.R Wed Sep 05 22:31:21 2018 -0400 +++ b/w4mcorcov_calc.R Thu Nov 08 23:06:09 2018 -0500 @@ -38,10 +38,7 @@ # strip out variables having negligible variance x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) - # x_progress(strF(x_dataMatrix)) - # x_progress(strF(my_oplsda)) - #x_progress(head(my_oplsda_suppLs_y_levels)) - #x_progress(unique(my_oplsda_suppLs_y_levels)) + fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] do_s_plot <- function( @@ -51,12 +48,8 @@ , cor_vs_cov_x = NULL ) { - #print(ls(x_env)) # "cplot_y" etc - #print(str(x_env$cplot_y)) # chr "covariance" if (cplot_x) { - #print(x_env$cplot_y) # "covariance" cplot_y_correlation <- (x_env$cplot_y == "correlation") - #print(cplot_y_correlation) # FALSE } if (is.null(cor_vs_cov_x)) { my_cor_vs_cov <- cor_vs_cov( @@ -68,7 +61,6 @@ } else { my_cor_vs_cov <- cor_vs_cov_x } - # print("str(my_cor_vs_cov)") # str(my_cor_vs_cov) if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { if (is.null(cor_vs_cov_x)) { @@ -166,6 +158,7 @@ } main_cex <- min(1.0, 46.0/nchar(main_label)) my_feature_label_slant <- -30 # slant feature labels 30 degrees downward + my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18) plot( y = my_y , x = my_x @@ -177,7 +170,7 @@ , main = main_label , cex.main = main_cex , cex = cex - , pch = 16 + , pch = my_pch , col = my_col ) low_x <- -0.7 * lim_x @@ -217,12 +210,6 @@ ) } label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { - # print("str(x_arg)") - # print(str(x_arg)) - # print("str(y_arg)") - # print(str(y_arg)) - # print("str(labels_arg)") - # print(str(labels_arg)) if (length(labels_arg) > 0) { unique_slant <- unique(slant_arg) if (length(unique_slant) == 1) { @@ -851,13 +838,13 @@ } cor_vs_cov_try <- function( - matrix_x -, ropls_x -, predictor_projection_x = TRUE -, x_progress = print + matrix_x # rows are samples; columns, features +, ropls_x # an instance of ropls::opls +, predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection +, x_progress = print # function to produce progress and error messages ) { x_class <- class(ropls_x) - if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) + if ( !( as.character(x_class) == "opls" ) ) { stop( paste( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " @@ -865,57 +852,120 @@ ) ) } + if ( !ropls_x@suppLs$algoC == "nipals" ) { + # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS + stop( + paste( + "cor_vs_cov: Expected ropls::opls instance to have been computed by the NIPALS algorithm rather than " + , ropls_x@suppLs$algoC + ) + ) + } result <- list() result$projection <- projection <- if (predictor_projection_x) 1 else 2 - # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS - if ( ropls_x@suppLs$algoC == "nipals") { - # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 - mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) - mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) - if (predictor_projection_x) - score_matrix <- ropls_x@scoreMN - else - score_matrix <- ropls_x@orthoScoreMN - score_matrix_transposed <- t(score_matrix) - score_matrix_magnitude <- mag(score_matrix) - result$covariance <- - score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) - result$correlation <- - score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) - } else { - # WARNING - untested code - I don't have test data to exercise this branch - # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 - # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F - if (predictor_projection_x) - score_matrix <- ropls_x@scoreMN - else - score_matrix <- ropls_x@orthoScoreMN - score_matrix_transposed <- t(score_matrix) - cov_divisor <- nrow(matrix_x) - 1 - result$covariance <- sapply( - X = 1:ncol(matrix_x) - , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor - ) - score_sd <- sapply( - X = 1:ncol(score_matrix) - , FUN = function(x) sd(score_matrix[,x]) - ) - # xSdVn - Numerical vector: variable standard deviations of the 'x' matrix - xSdVn <- ropls_x@xSdVn - result$correlation <- sapply( - X = 1:ncol(matrix_x) - , FUN = function(x) { - ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) - } - ) - } - result$correlation <- result$correlation[ 1, , drop = TRUE ] - result$covariance <- result$covariance [ 1, , drop = TRUE ] + + # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 + # (and not from the supplement despite the statement that, for the NIPALS algorithm, + # the equations from the supplement should be used) because of the definition of the + # Pearson/Galton coefficient of correlation is defined as + # $$ + # \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y} + # $$ + # as described (among other places) on Wikipedia at + # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population + # The equations in the supplement said to use, for the predictive component t1, + # \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))} + # but the results that I got were dramatically different from published results for S-PLOTs; + # perhaps my data are not centered exactly the same way that theirs were. + # The correlations calculated here are in agreement with those calculated with the code from + # page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf + # I did transform covariance to "relative covariance" (relative to the maximum value) + # to keep the figures consistent with one another. + + # count the features (one column for each sample) + Nfeatures <- ncol(matrix_x) + # count the samples (one row for each sample) + Nobservations <- nrow(matrix_x) + # a one-dimensional magnitude function (i.e., take the vector norm) + vector_norm <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) + # calculate the standard deviation for each feature + sd_xi <- sapply(X = 1:Nfeatures, FUN = function(x) sd(matrix_x[,x])) + # choose whether to plot the predictive score vector or orthogonal score vector + if (predictor_projection_x) + score_matrix <- ropls_x@scoreMN + else + score_matrix <- ropls_x@orthoScoreMN + # transpose the score (or orthoscore) vector for use as a premultiplier in covariance calculation + score_matrix_transposed <- t(score_matrix) + # compute the norm of the vector (i.e., the magnitude) + score_matrix_magnitude <- vector_norm(score_matrix) + # compute the standard deviation of the vector + score_matrix_sd <- sd(score_matrix) + # compute the relative covariance of each feature with the score vector + result$covariance <- + score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) + # compute the correlation of each feature with the score vector + result$correlation <- + score_matrix_transposed %*% matrix_x / ( (Nobservations - 1) * ( score_matrix_sd * sd_xi ) ) - # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 + # convert covariance and correlation from one-dimensional matrices to arrays of values, + # which are accessed by feature name below + p1 <- result$covariance <- result$covariance [ 1, , drop = TRUE ] + # x_progress("strF(p1)") + # x_progress(strF(p1)) + + pcorr1 <- result$correlation <- result$correlation[ 1, , drop = TRUE ] + # x_progress("pearson strF(pcorr1)") + # x_progress(strF(pcorr1)) + # x_progress(typeof(pcorr1)) + # x_progress(str(pcorr1)) + + # # this is how to use Spearman correlation instead of pearson + # result$spearcor <- sapply( + # X = 1:Nfeatures + # , FUN = function(i) { + # stats::cor( + # x = as.vector(score_matrix) + # , y = as.vector(matrix_x[,i]) + # # , method = "spearman" + # , method = "pearson" + # ) + # } + # ) + # names(result$spearcor) <- names(p1) + # pcorr1 <- result$spearcor + # x_progress("spearman strF(pcorr1)") + # x_progress(strF(pcorr1)) + # x_progress(typeof(pcorr1)) + # x_progress(str(pcorr1)) + # pcorr1 <- result$correlation <- result$spearcor + + # correl.ci(r, n, a = 0.05, rho = 0) + correl_pci <- lapply( + X = 1:Nfeatures + , FUN = function(i) correl.ci(r = pcorr1[i], n = Nobservations) + ) + result$p_value_raw <- sapply( + X = 1:Nfeatures + , FUN = function(i) correl_pci[[i]]$p.value + ) + result$p_value_raw[is.na(result$p_value_raw)] <- 0.0 + result$ci_lower <- sapply( + X = 1:Nfeatures + , FUN = function(i) correl_pci[[i]]$CI['lower'] + ) + result$ci_upper <- sapply( + X = 1:Nfeatures + , FUN = function(i) correl_pci[[i]]$CI['upper'] + ) + + + # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627) # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) result$vip4p <- as.numeric(ropls_x@vipVn) result$vip4o <- as.numeric(ropls_x@orthoVipVn) + if (length(result$vip4o) == 0) result$vip4o <- NA + # extract the loadings result$loadp <- as.numeric(ropls_x@loadingMN) result$loado <- as.numeric(ropls_x@orthoLoadingMN) # get the level names @@ -925,14 +975,11 @@ feature_count <- length(ropls_x@vipVn) result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) - superresult <- list() - if (length(result$vip4o) == 0) result$vip4o <- NA greaterLevel <- sapply( X = result$correlation , FUN = function(my_corr) tryCatch({ if ( is.nan( my_corr ) ) { - print("my_corr is NaN") NA } else { if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 @@ -955,24 +1002,28 @@ result$covariance <- result$covariance[featureID] # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 + # build a data frame to hold the content for the tab-separated values file tsv1 <- data.frame( - featureID = featureID - , factorLevel1 = result$level1 - , factorLevel2 = result$level2 - , greaterLevel = greaterLevel - , projection = result$projection - , correlation = result$correlation - , covariance = result$covariance - , vip4p = result$vip4p - , vip4o = result$vip4o - , loadp = result$loadp - , loado = result$loado - , row.names = NULL + featureID = featureID + , factorLevel1 = result$level1 + , factorLevel2 = result$level2 + , greaterLevel = greaterLevel + , projection = result$projection + , correlation = result$correlation + , covariance = result$covariance + , vip4p = result$vip4p + , vip4o = result$vip4o + , loadp = result$loadp + , loado = result$loado + , cor_p_val_raw = result$p_value_raw + , cor_p_value = p.adjust(p = result$p_value_raw, method = "BY") + , cor_ci_lower = result$ci_lower + , cor_ci_upper = result$ci_upper ) - tsv1 <- tsv1[!is.na(tsv1$correlation),] - tsv1 <- tsv1[!is.na(tsv1$covariance),] - superresult$tsv1 <- tsv1 - rownames(superresult$tsv1) <- tsv1$featureID + rownames(tsv1) <- tsv1$featureID + + # build the superresult, i.e., the result returned by this function + superresult <- list() superresult$projection <- result$projection superresult$covariance <- result$covariance superresult$correlation <- result$correlation @@ -980,12 +1031,50 @@ superresult$vip4o <- result$vip4o superresult$loadp <- result$loadp superresult$loado <- result$loado + superresult$cor_p_value <- tsv1$cor_p_value superresult$details <- result - result$superresult <- superresult - # Include thise in case future consumers of this routine want to use it in currently unanticipated ways - result$oplsda <- ropls_x - result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways + + # remove any rows having NA for covariance or correlation + tsv1 <- tsv1[!is.na(tsv1$correlation),] + tsv1 <- tsv1[!is.na(tsv1$covariance),] + superresult$tsv1 <- tsv1 + + # # I did not include these but left them commentd out in case future + # # consumers of this routine want to use it in currently unanticipated ways + # result$superresult <- superresult + # result$oplsda <- ropls_x + # result$predictor <- ropls_x@suppLs$y + return (superresult) } +# Code for correl.ci was adapted from correl function from: +# @book{ +# Tsagris_2018, +# author = {Tsagris, Michail}, +# year = {2018}, +# link = {https://www.researchgate.net/publication/324363311_Multivariate_data_analysis_in_R}, +# title = {Multivariate data analysis in R} +# } +# which follows +# https://en.wikipedia.org/wiki/Fisher_transformation#Definition + +correl.ci <- function(r, n, a = 0.05, rho = 0) { + ## r is the calculated correlation coefficient for n pairs + ## a is the significance level + ## rho is the hypothesised correlation + zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho + zh1 <- atanh(r) # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1 + se <- (1 - r^2)/sqrt(n - 3) ## standard error for Fisher's z-transformation of Ho + test <- (zh1 - zh0)/se ### test statistic + pvalue <- 2*(1 - pnorm(abs(test))) ## p-value + zL <- zh1 - qnorm(1 - a/2)*se + zH <- zh1 + qnorm(1 - a/2)*se + fishL <- tanh(zL) # (exp(2*zL)-1)/(exp(2*zL)+1), i.e., lower confidence limit + fishH <- tanh(zH) # (exp(2*zH)-1)/(exp(2*zH)+1), i.e., upper confidence limit + CI <- c(fishL, fishH) + names(CI) <- c('lower', 'upper') + list(correlation = r, p.value = pvalue, CI = CI) +} + # vim: sw=2 ts=2 et :