changeset 12:ddaf84e15d06 draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 6775c83b89d9d903c81a2229cdc200fc93538dfe-dirty
author eschen42
date Thu, 08 Nov 2018 23:06:09 -0500
parents ddcc33ff3205
children 2ae2d26e3270
files w4mcorcov.xml w4mcorcov_calc.R
diffstat 2 files changed, 238 insertions(+), 107 deletions(-) [+]
line wrap: on
line diff
--- a/w4mcorcov.xml	Wed Sep 05 22:31:21 2018 -0400
+++ b/w4mcorcov.xml	Thu Nov 08 23:06:09 2018 -0500
@@ -1,4 +1,4 @@
-<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.15">
+<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.16">
     <description>OPLS-DA Contrasts of Univariate Results</description>
     <macros>
         <xml name="paramPairSigFeatOnly">
@@ -201,13 +201,13 @@
           <has_text text="level1Level2Sig" />
           <!-- first matched line -->
           <has_text text="M349.2383T700" />
-          <has_text text="-0.3704185" />
+          <has_text text="-0.462909875" />
           <has_text text="-36.6668927" />
           <has_text text="0.4914638" />
           <has_text text="0.01302117" />
           <!-- second matched line -->
           <has_text text="M207.9308T206" />
-          <has_text text="0.3235022" />
+          <has_text text="0.504885262" />
           <has_text text="5.97529097" />
           <has_text text="0.207196379" />
           <has_text text="0.04438632" />
@@ -259,7 +259,7 @@
           <has_text text="level1Level2Sig" />
           <!-- first matched line -->
           <has_text text="M200.005T296" />
-          <has_text text="-0.24533821" />
+          <has_text text="-0.28035717" />
           <has_text text="-3.3573953" />
           <has_text text="0.1157346" />
           <has_text text="0.0647860" />
@@ -309,13 +309,13 @@
           <has_text text="vip4o" />
           <!-- first matched line -->
           <has_text text="M349.2383T700" />
-          <has_text text="-0.37867079" />
+          <has_text text="-0.4732226665" />
           <has_text text="-37.71066" />
           <has_text text="0.5246766" />
           <has_text text="0.0103341" />
           <!-- second matched line -->
           <has_text text="M207.9308T206" />
-          <has_text text="0.31570433" />
+          <has_text text="0.4927151212" />
           <has_text text="5.86655640" />
           <has_text text="0.2111623" />
           <has_text text="0.0488654" />
@@ -368,7 +368,7 @@
           <has_text text="NM516T251" />
           <has_text text="flower_yes" />
           <has_text text="other" />
-          <has_text text="0.03402807" />
+          <has_text text="0.3499550705" />
           <has_text text="0.03526926" />
           <has_text text="0.43664386" />
           <has_text text="0.587701897" />
@@ -419,13 +419,13 @@
           <has_text text="vip4o" />
           <!-- first matched line -->
           <has_text text="M349.2383T700" />
-          <has_text text="0.43361563" />
+          <has_text text="0.61594030" />
           <has_text text="37.76875778" />
           <has_text text="0.54672558" />
           <has_text text="0.3920409" />
           <!-- second matched line -->
           <has_text text="M207.9308T206" />
-          <has_text text="-0.3365475" />
+          <has_text text="-0.89716403" />
           <has_text text="-6.337903" />
           <has_text text="0.270297" />
           <has_text text="0.037661" />
@@ -454,14 +454,14 @@
           <has_text text="vip4o" />
           <!-- first matched line -->
           <has_text text="M349.2383T700" />
-          <has_text text="-0.0435663" />
-          <has_text text="-1.9068114" />
-          <has_text text="0.0304592" />
-          <has_text text="0.104748883" />
+          <has_text text="-0.331230562" />
+          <has_text text="-2.47167915" />
+          <has_text text="0.0892595" />
+          <has_text text="0.0049228872" />
         </assert_contents>
       </output>
     </test>
-    <!-- test #6 - issue 8 -->
+    <!-- test #7 - issue 8 -->
     <test>
       <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
       <param name="sampleMetadata_in" value="issue8_input_sampleMetadata.tsv"/>
@@ -495,6 +495,7 @@
 
 **Author** - Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu)
 
+**Release Notes** - https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper#release-notes
 
 Motivation
 ----------
@@ -616,9 +617,13 @@
   |
 
 [OUT] Contrast-detail output PDF
-  | Several plots for each two-projection OPLS-DA analysis:
+  | File containing several plots for each two-projection OPLS-DA analysis.
+
+- (first row, left) **correlation-versus-covariance plot** of OPLS-DA results
 
-- (first row, left) **correlation-versus-covariance plot** of OPLS-DA results (a work-alike for the S-PLOT, computed using formula in Supplement to Wiklund, *op. cit.*); point-color becomes saturated as the "variable importance in projection to the predictive components" (VIP\ :subscript:`4,p` from Galindo-Prieto *et al.* 2014) ranges from 0.83 and 1.21 (Mehmood *et al.* 2012), for use to identify features for consideration as biomarkers.
+    - This is a work-alike for the S-PLOT, computed using formula in equations 1 and 2 from Wiklund, (*op. cit.*);
+    - point-color becomes saturated as the "variable importance in projection to the predictive components" (VIP\ :subscript:`4,p` from Galindo-Prieto *et al.* 2014) increases through the range from 0.83 and 1.21 (Mehmood *et al.* 2012), for use to identify features for consideration as biomarkers;
+    - plot symbols are diamonds when the adjusted p-value of correlation is greater than 0.05, circles when it is less than 0.01, and triangles when it between these limits.
 - (second row, left) **model-overview plot** for the two projections; grey bars are the correlation coefficient for the fitted data; black bars indicate performance in cross-validation tests (Th]]>&#233;<![CDATA[venot, 2017)
 - (first row, right) OPLS-DA **scores-plot** for the two projections (Th]]>&#233;<![CDATA[venot *et al.*, 2015)
 - (second row, right) **correlation-versus-covariance plot** of OPLS-DA results **orthogonal to the predictor** (see section "S-Plot of Orthogonal Component" in Wiklund, *op. cit.*, pp. 120-121; this characterizes features with the greatest variation independent of the predictor).
@@ -633,12 +638,18 @@
 - **featureID** - feature-identifier
 - **factorLevel1** - factor-level 1
 - **factorLevel2** - factor-level 2 (or "other" when contrasting factor-level 1 with all other levels)
-- **correlation** - correlation of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in Supplement to Wiklund, *op. cit.*).  Note that, for a given contrast, there is a linear relationship between 'loadp' and 'correlation'.
-- **covariance** - covariance of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in *ibid.*)
+- **correlation** - correlation of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from equation 2 in Wiklund, *op. cit.*).  Note that, for a given contrast, there is a linear relationship between 'loadp' and 'correlation'.
+- **covariance** - relative covariance of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in *ibid.*, but scaled so that the greatest value has a magnitude of 1)
 - **vip4p** - "variable importance in projection" to the predictive projection, VIP\ :subscript:`4,p` (Galindo-Prieto *op. cit.*)
 - **vip4o** - "variable importance in projection" to the orthogonal projection, VIP\ :subscript:`4,o` (*ibid.*)
 - **loadp** - variable loading for the predictive projection (Wiklund *op. cit.*)
 - **loado** - variable loading for the orthogonal projection (*ibid.*)
+- **cor_p_val_raw** - p-value for Fisher-transformed correlation (Fisher, 1921; Snedecor, 1980; see also https://en.wikipedia.org/wiki/Fisher_transformation), with no family-wise error-rate correction.
+- **cor_p_value** - p-value for Fisher-transformed correlation, adjusted for family-wise error rate (Yekutieli *et al.*, 2001).  Caveat: any previous selection for features that vary notably by factor level may result in too little adjustment.
+- **cor_ci_lower** - lower limit of 95% confidence interval for correlation (see e.g. https://en.wikipedia.org/wiki/Fisher_transformation)
+- **cor_ci_upper** - upper limit of 95% confidence interval for correlation (*ibid.*)
+- **mz** - *m/z* ratio for feature, copied from input variableMetadata
+- **rt** - retention time for feature, copied from input variableMetadata
 - **level1Level2Sig** - (Only present when a test other than "none" is chosen) '1' when feature varies significantly across all classes (i.e., not pair-wise); '0' otherwise
 
 [OUT] Feature "Salience" data TABULAR
@@ -819,6 +830,19 @@
   <citations>
     <!-- this tool -->
     <citation type="doi">10.5281/zenodo.1034784</citation>
+    <!-- Fisher_1921: Fisher z-transformation of correlation coefficient -->
+    <citation type="bibtex"><![CDATA[
+    @article{Fisher_1921,
+      author = {Fisher, R. A.},
+      title = {{On the probable error of a coefficient of correlation deduced from a small sample}},
+      journal = {Metron},
+      year = {1921},
+      volume = {1},
+      pages = {3--32},
+      note = {Defines the Fisher z-transformation of a coefficient of correlation.  Citation adapted from http://www.citeulike.org/group/894/article/2344770},
+      url = {https://digital.library.adelaide.edu.au/dspace/bitstream/2440/15169/1/14.pdf},
+    }
+    ]]></citation>
     <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS -->
     <citation type="doi">10.1002/cem.2627</citation>
     <!-- Giacomoni_2014 W4M 2.5 -->
@@ -833,6 +857,22 @@
     <citation type="doi">10.3389/fmolb.2016.00026</citation>
     <!-- Sun_2016 Urinary Biomarkers for adolescent idiopathic scoliosis -->
     <citation type="doi">10.1038/srep22274</citation>
+    <!-- Snedecor_1980: Fisher z-transformation of correlation coefficient -->
+    <citation type="bibtex"><![CDATA[
+    @book{Snedecor_1980,
+      author = {Snedecor, George W. and Cochran, William G.},
+      title = {Statistical methods},
+      publisher = {Iowa State University Press},
+      year = {1980},
+      pages = {186},
+      isbn = {0813815606},
+      language = {eng},
+      keyword = {Statistics, Statistics as Topic -- methods},
+      lccn = {80014582},
+      edition = {7th ed..},
+      address = {Ames, Iowa},
+    }
+    ]]></citation>
     <!-- Thevenot_2015 Urinary metabolome statistics -->
     <citation type="doi">10.1021/acs.jproteome.5b00354</citation>
     <!-- ropls package -->
@@ -849,6 +889,8 @@
     ]]></citation>
     <!-- Wiklund_2008 OPLS PLS-DA and S-PLOT -->
     <citation type="doi">10.1021/ac0713510</citation>
+    <!-- Yekutieli_2001 The control of the false discovery rate in multiple testing under dependency -->
+    <citation type="doi">10.1214/aos/1013699998</citation>
   </citations>
   <!--
      vim:et:sw=4:ts=4
--- a/w4mcorcov_calc.R	Wed Sep 05 22:31:21 2018 -0400
+++ b/w4mcorcov_calc.R	Thu Nov 08 23:06:09 2018 -0500
@@ -38,10 +38,7 @@
     # strip out variables having negligible variance
     x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE]
     my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
-    # x_progress(strF(x_dataMatrix))
-    # x_progress(strF(my_oplsda))
-    #x_progress(head(my_oplsda_suppLs_y_levels))
-    #x_progress(unique(my_oplsda_suppLs_y_levels))
+
     fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
     fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
     do_s_plot <- function(
@@ -51,12 +48,8 @@
       , cor_vs_cov_x = NULL
       )
     {
-      #print(ls(x_env))               # "cplot_y" etc
-      #print(str(x_env$cplot_y))      # chr "covariance"
       if (cplot_x) {
-        #print(x_env$cplot_y)         # "covariance"
         cplot_y_correlation <- (x_env$cplot_y == "correlation")
-        #print(cplot_y_correlation)   # FALSE
       }
       if (is.null(cor_vs_cov_x)) {
         my_cor_vs_cov <- cor_vs_cov(
@@ -68,7 +61,6 @@
       } else {
         my_cor_vs_cov <- cor_vs_cov_x
       }
-      # print("str(my_cor_vs_cov)")
       # str(my_cor_vs_cov)
       if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
         if (is.null(cor_vs_cov_x)) {
@@ -166,6 +158,7 @@
           }
           main_cex <- min(1.0, 46.0/nchar(main_label))
           my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
+          my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18)
           plot(
             y = my_y
           , x = my_x
@@ -177,7 +170,7 @@
           , main = main_label
           , cex.main = main_cex
           , cex = cex
-          , pch = 16
+          , pch = my_pch
           , col = my_col
           )
           low_x <- -0.7 * lim_x
@@ -217,12 +210,6 @@
                 )
             }
             label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
-              # print("str(x_arg)")
-              # print(str(x_arg))
-              # print("str(y_arg)")
-              # print(str(y_arg))
-              # print("str(labels_arg)")
-              # print(str(labels_arg))
               if (length(labels_arg) > 0) {
                 unique_slant <- unique(slant_arg)
                 if (length(unique_slant) == 1) {
@@ -851,13 +838,13 @@
 }
 
 cor_vs_cov_try <- function(
-  matrix_x
-, ropls_x
-, predictor_projection_x = TRUE
-, x_progress = print
+  matrix_x                      # rows are samples; columns, features
+, ropls_x                       # an instance of ropls::opls
+, predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection
+, x_progress = print            # function to produce progress and error messages
 ) {
   x_class <- class(ropls_x)
-  if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
+  if ( !( as.character(x_class) == "opls" ) ) {
     stop(
       paste(
         "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "
@@ -865,57 +852,120 @@
       )
     )
   }
+  if ( !ropls_x@suppLs$algoC == "nipals" ) {
+    # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
+    stop(
+      paste(
+        "cor_vs_cov: Expected ropls::opls instance to have been computed by the NIPALS algorithm rather than "
+      , ropls_x@suppLs$algoC
+      )
+    )
+  }
   result <- list()
   result$projection <- projection <- if (predictor_projection_x) 1 else 2
-  # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
-  if ( ropls_x@suppLs$algoC == "nipals") {
-    # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510
-    mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
-    mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))
-    if (predictor_projection_x)
-       score_matrix <- ropls_x@scoreMN
-    else
-       score_matrix <- ropls_x@orthoScoreMN
-    score_matrix_transposed <- t(score_matrix)
-    score_matrix_magnitude <- mag(score_matrix)
-    result$covariance <-
-      score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
-    result$correlation <-
-      score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
-  } else {
-    # WARNING - untested code - I don't have test data to exercise this branch
-    # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
-    # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
-    if (predictor_projection_x)
-       score_matrix <- ropls_x@scoreMN
-    else
-       score_matrix <- ropls_x@orthoScoreMN
-    score_matrix_transposed <- t(score_matrix)
-    cov_divisor <- nrow(matrix_x) - 1
-    result$covariance <- sapply(
-      X = 1:ncol(matrix_x)
-    , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor
-    )
-    score_sd <- sapply(
-      X = 1:ncol(score_matrix)
-      , FUN = function(x) sd(score_matrix[,x])
-    )
-    # xSdVn - Numerical vector: variable standard deviations of the 'x' matrix
-    xSdVn <- ropls_x@xSdVn
-    result$correlation <- sapply(
-      X = 1:ncol(matrix_x)
-    , FUN = function(x) {
-        ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) )
-      }
-    )
-  }
-  result$correlation <- result$correlation[ 1, , drop = TRUE ]
-  result$covariance  <- result$covariance [ 1, , drop = TRUE ]
+
+  # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
+  #   (and not from the supplement despite the statement that, for the NIPALS algorithm,
+  #   the equations from the supplement should be used) because of the definition of the 
+  #   Pearson/Galton coefficient of correlation is defined as
+  #   $$
+  #      \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y}
+  #   $$
+  #   as described (among other places) on Wikipedia at 
+  #     https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population
+  # The equations in the supplement said to use, for the predictive component t1,
+  #      \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))}
+  # but the results that I got were dramatically different from published results for S-PLOTs;
+  # perhaps my data are not centered exactly the same way that theirs were.
+  # The correlations calculated here are in agreement with those calculated with the code from
+  #   page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf
+  # I did transform covariance to "relative covariance" (relative to the maximum value)
+  #   to keep the figures consistent with one another.
+
+  # count the features (one column for each sample)
+  Nfeatures <- ncol(matrix_x)
+  # count the samples (one row for each sample)
+  Nobservations <- nrow(matrix_x)
+  # a one-dimensional magnitude function (i.e., take the vector norm)
+  vector_norm <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
+  # calculate the standard deviation for each feature 
+  sd_xi <- sapply(X = 1:Nfeatures, FUN = function(x) sd(matrix_x[,x]))
+  # choose whether to plot the predictive score vector or orthogonal score vector
+  if (predictor_projection_x)
+     score_matrix <- ropls_x@scoreMN
+  else
+     score_matrix <- ropls_x@orthoScoreMN
+  # transpose the score (or orthoscore) vector for use as a premultiplier in covariance calculation
+  score_matrix_transposed <- t(score_matrix)
+  # compute the norm of the vector (i.e., the magnitude)
+  score_matrix_magnitude  <- vector_norm(score_matrix)
+  # compute the standard deviation of the vector
+  score_matrix_sd         <- sd(score_matrix)
+  # compute the relative covariance of each feature with the score vector
+  result$covariance <-
+    score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
+  # compute the correlation of each feature with the score vector
+  result$correlation <-
+    score_matrix_transposed %*% matrix_x / ( (Nobservations - 1) * ( score_matrix_sd * sd_xi ) )
 
-  # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014
+  # convert covariance and correlation from one-dimensional matrices to arrays of values, 
+  #   which are accessed by feature name below
+  p1     <- result$covariance  <- result$covariance [ 1, , drop = TRUE ]
+  # x_progress("strF(p1)")
+  # x_progress(strF(p1))
+
+  pcorr1 <- result$correlation <- result$correlation[ 1, , drop = TRUE ]
+  # x_progress("pearson strF(pcorr1)")
+  # x_progress(strF(pcorr1))
+  # x_progress(typeof(pcorr1))
+  # x_progress(str(pcorr1))
+  
+  # # this is how to use Spearman correlation instead of pearson
+  # result$spearcor <- sapply(
+  #   X = 1:Nfeatures
+  # , FUN = function(i) {
+  #     stats::cor(
+  #       x = as.vector(score_matrix)
+  #     , y = as.vector(matrix_x[,i])
+  #     # , method = "spearman"
+  #     , method = "pearson"
+  #     )
+  #   }
+  # )
+  # names(result$spearcor) <- names(p1)
+  # pcorr1 <- result$spearcor
+  # x_progress("spearman strF(pcorr1)")
+  # x_progress(strF(pcorr1))
+  # x_progress(typeof(pcorr1))
+  # x_progress(str(pcorr1))
+  # pcorr1 <- result$correlation <- result$spearcor
+
+  # correl.ci(r, n, a = 0.05, rho = 0)
+  correl_pci <- lapply(
+    X = 1:Nfeatures
+  , FUN = function(i) correl.ci(r = pcorr1[i], n = Nobservations)
+  )
+  result$p_value_raw <- sapply(
+    X = 1:Nfeatures
+  , FUN = function(i) correl_pci[[i]]$p.value
+  )
+  result$p_value_raw[is.na(result$p_value_raw)] <- 0.0
+  result$ci_lower <- sapply(
+    X = 1:Nfeatures
+  , FUN = function(i) correl_pci[[i]]$CI['lower']
+  )
+  result$ci_upper <- sapply(
+    X = 1:Nfeatures
+  , FUN = function(i) correl_pci[[i]]$CI['upper']
+  )
+
+
+  # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627)
   #    Length = number of features; labels = feature identifiers.  (The same is true for $correlation and $covariance.)
   result$vip4p     <- as.numeric(ropls_x@vipVn)
   result$vip4o     <- as.numeric(ropls_x@orthoVipVn)
+  if (length(result$vip4o) == 0) result$vip4o <- NA
+  # extract the loadings
   result$loadp     <- as.numeric(ropls_x@loadingMN)
   result$loado     <- as.numeric(ropls_x@orthoLoadingMN)
   # get the level names
@@ -925,14 +975,11 @@
   feature_count    <- length(ropls_x@vipVn)
   result$level1    <- rep.int(x = fctr_lvl_1, times = feature_count)
   result$level2    <- rep.int(x = fctr_lvl_2, times = feature_count)
-  superresult <- list()
-  if (length(result$vip4o) == 0) result$vip4o <- NA
   greaterLevel <- sapply(
     X = result$correlation
   , FUN = function(my_corr)
       tryCatch({
           if ( is.nan( my_corr ) ) {
-            print("my_corr is NaN")
             NA 
           } else {
             if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
@@ -955,24 +1002,28 @@
   result$covariance  <- result$covariance[featureID]
   # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
 
+  # build a data frame to hold the content for the tab-separated values file
   tsv1 <- data.frame(
-    featureID           = featureID
-  , factorLevel1        = result$level1
-  , factorLevel2        = result$level2
-  , greaterLevel        = greaterLevel
-  , projection          = result$projection
-  , correlation         = result$correlation
-  , covariance          = result$covariance
-  , vip4p               = result$vip4p
-  , vip4o               = result$vip4o
-  , loadp               = result$loadp
-  , loado               = result$loado
-  , row.names           = NULL
+    featureID     = featureID
+  , factorLevel1  = result$level1
+  , factorLevel2  = result$level2
+  , greaterLevel  = greaterLevel
+  , projection    = result$projection
+  , correlation   = result$correlation
+  , covariance    = result$covariance
+  , vip4p         = result$vip4p
+  , vip4o         = result$vip4o
+  , loadp         = result$loadp
+  , loado         = result$loado
+  , cor_p_val_raw = result$p_value_raw
+  , cor_p_value   = p.adjust(p = result$p_value_raw, method = "BY")
+  , cor_ci_lower  = result$ci_lower 
+  , cor_ci_upper  = result$ci_upper
   )
-  tsv1 <- tsv1[!is.na(tsv1$correlation),]
-  tsv1 <- tsv1[!is.na(tsv1$covariance),]
-  superresult$tsv1 <- tsv1
-  rownames(superresult$tsv1) <- tsv1$featureID
+  rownames(tsv1) <- tsv1$featureID
+
+  # build the superresult, i.e., the result returned by this function
+  superresult <- list()
   superresult$projection <- result$projection
   superresult$covariance <- result$covariance
   superresult$correlation <- result$correlation
@@ -980,12 +1031,50 @@
   superresult$vip4o <- result$vip4o
   superresult$loadp <- result$loadp
   superresult$loado <- result$loado
+  superresult$cor_p_value <- tsv1$cor_p_value
   superresult$details <- result
-  result$superresult <- superresult
-  # Include thise in case future consumers of this routine want to use it in currently unanticipated ways
-  result$oplsda    <- ropls_x
-  result$predictor <- ropls_x@suppLs$y   # in case future consumers of this routine want to use it in currently unanticipated ways
+
+  # remove any rows having NA for covariance or correlation
+  tsv1 <- tsv1[!is.na(tsv1$correlation),]
+  tsv1 <- tsv1[!is.na(tsv1$covariance),]
+  superresult$tsv1 <- tsv1
+
+  # # I did not include these but left them commentd out in case future 
+  # #   consumers of this routine want to use it in currently unanticipated ways
+  # result$superresult <- superresult
+  # result$oplsda    <- ropls_x
+  # result$predictor <- ropls_x@suppLs$y
+
   return (superresult)
 }
 
+# Code for correl.ci was adapted from correl function from:
+#   @book{
+#     Tsagris_2018,
+#     author = {Tsagris, Michail},
+#     year = {2018},
+#     link = {https://www.researchgate.net/publication/324363311_Multivariate_data_analysis_in_R},
+#     title = {Multivariate data analysis in R}
+#   }
+# which follows
+#   https://en.wikipedia.org/wiki/Fisher_transformation#Definition
+
+correl.ci <- function(r, n, a = 0.05, rho = 0) {
+  ## r is the calculated correlation coefficient for n pairs
+  ## a is the significance level
+  ## rho is the hypothesised correlation
+  zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho
+  zh1 <- atanh(r)   # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1
+  se <- (1 - r^2)/sqrt(n - 3) ## standard error for Fisher's z-transformation of Ho
+  test <- (zh1 - zh0)/se ### test statistic
+  pvalue <- 2*(1 - pnorm(abs(test))) ## p-value
+  zL <- zh1 - qnorm(1 - a/2)*se
+  zH <- zh1 + qnorm(1 - a/2)*se
+  fishL <- tanh(zL) # (exp(2*zL)-1)/(exp(2*zL)+1), i.e., lower confidence limit
+  fishH <- tanh(zH) # (exp(2*zH)-1)/(exp(2*zH)+1), i.e., upper confidence limit
+  CI <- c(fishL, fishH)
+  names(CI) <- c('lower', 'upper')
+  list(correlation = r, p.value = pvalue, CI = CI)
+}
+
 # vim: sw=2 ts=2 et :