Repository 'w4mcorcov'
hg clone https://toolshed.g2.bx.psu.edu/repos/eschen42/w4mcorcov

Changeset 12:ddaf84e15d06 (2018-11-08)
Previous changeset 11:ddcc33ff3205 (2018-09-05) Next changeset 13:2ae2d26e3270 (2018-12-12)
Commit message:
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 6775c83b89d9d903c81a2229cdc200fc93538dfe-dirty
modified:
w4mcorcov.xml
w4mcorcov_calc.R
b
diff -r ddcc33ff3205 -r ddaf84e15d06 w4mcorcov.xml
--- a/w4mcorcov.xml Wed Sep 05 22:31:21 2018 -0400
+++ b/w4mcorcov.xml Thu Nov 08 23:06:09 2018 -0500
[
b'@@ -1,4 +1,4 @@\n-\xef\xbb\xbf<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.15">\n+\xef\xbb\xbf<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.16">\n     <description>OPLS-DA Contrasts of Univariate Results</description>\n     <macros>\n         <xml name="paramPairSigFeatOnly">\n@@ -201,13 +201,13 @@\n           <has_text text="level1Level2Sig" />\n           <!-- first matched line -->\n           <has_text text="M349.2383T700" />\n-          <has_text text="-0.3704185" />\n+          <has_text text="-0.462909875" />\n           <has_text text="-36.6668927" />\n           <has_text text="0.4914638" />\n           <has_text text="0.01302117" />\n           <!-- second matched line -->\n           <has_text text="M207.9308T206" />\n-          <has_text text="0.3235022" />\n+          <has_text text="0.504885262" />\n           <has_text text="5.97529097" />\n           <has_text text="0.207196379" />\n           <has_text text="0.04438632" />\n@@ -259,7 +259,7 @@\n           <has_text text="level1Level2Sig" />\n           <!-- first matched line -->\n           <has_text text="M200.005T296" />\n-          <has_text text="-0.24533821" />\n+          <has_text text="-0.28035717" />\n           <has_text text="-3.3573953" />\n           <has_text text="0.1157346" />\n           <has_text text="0.0647860" />\n@@ -309,13 +309,13 @@\n           <has_text text="vip4o" />\n           <!-- first matched line -->\n           <has_text text="M349.2383T700" />\n-          <has_text text="-0.37867079" />\n+          <has_text text="-0.4732226665" />\n           <has_text text="-37.71066" />\n           <has_text text="0.5246766" />\n           <has_text text="0.0103341" />\n           <!-- second matched line -->\n           <has_text text="M207.9308T206" />\n-          <has_text text="0.31570433" />\n+          <has_text text="0.4927151212" />\n           <has_text text="5.86655640" />\n           <has_text text="0.2111623" />\n           <has_text text="0.0488654" />\n@@ -368,7 +368,7 @@\n           <has_text text="NM516T251" />\n           <has_text text="flower_yes" />\n           <has_text text="other" />\n-          <has_text text="0.03402807" />\n+          <has_text text="0.3499550705" />\n           <has_text text="0.03526926" />\n           <has_text text="0.43664386" />\n           <has_text text="0.587701897" />\n@@ -419,13 +419,13 @@\n           <has_text text="vip4o" />\n           <!-- first matched line -->\n           <has_text text="M349.2383T700" />\n-          <has_text text="0.43361563" />\n+          <has_text text="0.61594030" />\n           <has_text text="37.76875778" />\n           <has_text text="0.54672558" />\n           <has_text text="0.3920409" />\n           <!-- second matched line -->\n           <has_text text="M207.9308T206" />\n-          <has_text text="-0.3365475" />\n+          <has_text text="-0.89716403" />\n           <has_text text="-6.337903" />\n           <has_text text="0.270297" />\n           <has_text text="0.037661" />\n@@ -454,14 +454,14 @@\n           <has_text text="vip4o" />\n           <!-- first matched line -->\n           <has_text text="M349.2383T700" />\n-          <has_text text="-0.0435663" />\n-          <has_text text="-1.9068114" />\n-          <has_text text="0.0304592" />\n-          <has_text text="0.104748883" />\n+          <has_text text="-0.331230562" />\n+          <has_text text="-2.47167915" />\n+          <has_text text="0.0892595" />\n+          <has_text text="0.0049228872" />\n         </assert_contents>\n       </output>\n     </test>\n-    <!-- test #6 - issue 8 -->\n+    <!-- test #7 - issue 8 -->\n     <test>\n       <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>\n       <param name="sampleMetadata_in" value="issue8_input_sampleMetadata.tsv"/>\n@@ -495,6 +495,7 @@\n \n **Author** - Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu)\n \n+**Release Notes** - https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper#release-notes\n \n Motivation\n ----------\n@@ -616,9 +617,13 @@\n   |\n \n [OUT] Contrast-detail output PDF\n-  | S'..b'n contrast, there is a linear relationship between \'loadp\' and \'correlation\'.\n+- **covariance** - relative covariance of the features projection explaining the difference between the features, < 0 when intensity for level 1 is greater (from formula in *ibid.*, but scaled so that the greatest value has a magnitude of 1)\n - **vip4p** - "variable importance in projection" to the predictive projection, VIP\\ :subscript:`4,p` (Galindo-Prieto *op. cit.*)\n - **vip4o** - "variable importance in projection" to the orthogonal projection, VIP\\ :subscript:`4,o` (*ibid.*)\n - **loadp** - variable loading for the predictive projection (Wiklund *op. cit.*)\n - **loado** - variable loading for the orthogonal projection (*ibid.*)\n+- **cor_p_val_raw** - p-value for Fisher-transformed correlation (Fisher, 1921; Snedecor, 1980; see also https://en.wikipedia.org/wiki/Fisher_transformation), with no family-wise error-rate correction.\n+- **cor_p_value** - p-value for Fisher-transformed correlation, adjusted for family-wise error rate (Yekutieli *et al.*, 2001).  Caveat: any previous selection for features that vary notably by factor level may result in too little adjustment.\n+- **cor_ci_lower** - lower limit of 95% confidence interval for correlation (see e.g. https://en.wikipedia.org/wiki/Fisher_transformation)\n+- **cor_ci_upper** - upper limit of 95% confidence interval for correlation (*ibid.*)\n+- **mz** - *m/z* ratio for feature, copied from input variableMetadata\n+- **rt** - retention time for feature, copied from input variableMetadata\n - **level1Level2Sig** - (Only present when a test other than "none" is chosen) \'1\' when feature varies significantly across all classes (i.e., not pair-wise); \'0\' otherwise\n \n [OUT] Feature "Salience" data TABULAR\n@@ -819,6 +830,19 @@\n   <citations>\n     <!-- this tool -->\n     <citation type="doi">10.5281/zenodo.1034784</citation>\n+    <!-- Fisher_1921: Fisher z-transformation of correlation coefficient -->\n+    <citation type="bibtex"><![CDATA[\n+    @article{Fisher_1921,\n+      author = {Fisher, R. A.},\n+      title = {{On the probable error of a coefficient of correlation deduced from a small sample}},\n+      journal = {Metron},\n+      year = {1921},\n+      volume = {1},\n+      pages = {3--32},\n+      note = {Defines the Fisher z-transformation of a coefficient of correlation.  Citation adapted from http://www.citeulike.org/group/894/article/2344770},\n+      url = {https://digital.library.adelaide.edu.au/dspace/bitstream/2440/15169/1/14.pdf},\n+    }\n+    ]]></citation>\n     <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS -->\n     <citation type="doi">10.1002/cem.2627</citation>\n     <!-- Giacomoni_2014 W4M 2.5 -->\n@@ -833,6 +857,22 @@\n     <citation type="doi">10.3389/fmolb.2016.00026</citation>\n     <!-- Sun_2016 Urinary Biomarkers for adolescent idiopathic scoliosis -->\n     <citation type="doi">10.1038/srep22274</citation>\n+    <!-- Snedecor_1980: Fisher z-transformation of correlation coefficient -->\n+    <citation type="bibtex"><![CDATA[\n+    @book{Snedecor_1980,\n+      author = {Snedecor, George W. and Cochran, William G.},\n+      title = {Statistical methods},\n+      publisher = {Iowa State University Press},\n+      year = {1980},\n+      pages = {186},\n+      isbn = {0813815606},\n+      language = {eng},\n+      keyword = {Statistics, Statistics as Topic -- methods},\n+      lccn = {80014582},\n+      edition = {7th ed..},\n+      address = {Ames, Iowa},\n+    }\n+    ]]></citation>\n     <!-- Thevenot_2015 Urinary metabolome statistics -->\n     <citation type="doi">10.1021/acs.jproteome.5b00354</citation>\n     <!-- ropls package -->\n@@ -849,6 +889,8 @@\n     ]]></citation>\n     <!-- Wiklund_2008 OPLS PLS-DA and S-PLOT -->\n     <citation type="doi">10.1021/ac0713510</citation>\n+    <!-- Yekutieli_2001 The control of the false discovery rate in multiple testing under dependency -->\n+    <citation type="doi">10.1214/aos/1013699998</citation>\n   </citations>\n   <!--\n      vim:et:sw=4:ts=4\n'
b
diff -r ddcc33ff3205 -r ddaf84e15d06 w4mcorcov_calc.R
--- a/w4mcorcov_calc.R Wed Sep 05 22:31:21 2018 -0400
+++ b/w4mcorcov_calc.R Thu Nov 08 23:06:09 2018 -0500
[
b'@@ -38,10 +38,7 @@\n     # strip out variables having negligible variance\n     x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE]\n     my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))\n-    # x_progress(strF(x_dataMatrix))\n-    # x_progress(strF(my_oplsda))\n-    #x_progress(head(my_oplsda_suppLs_y_levels))\n-    #x_progress(unique(my_oplsda_suppLs_y_levels))\n+\n     fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]\n     fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]\n     do_s_plot <- function(\n@@ -51,12 +48,8 @@\n       , cor_vs_cov_x = NULL\n       )\n     {\n-      #print(ls(x_env))               # "cplot_y" etc\n-      #print(str(x_env$cplot_y))      # chr "covariance"\n       if (cplot_x) {\n-        #print(x_env$cplot_y)         # "covariance"\n         cplot_y_correlation <- (x_env$cplot_y == "correlation")\n-        #print(cplot_y_correlation)   # FALSE\n       }\n       if (is.null(cor_vs_cov_x)) {\n         my_cor_vs_cov <- cor_vs_cov(\n@@ -68,7 +61,6 @@\n       } else {\n         my_cor_vs_cov <- cor_vs_cov_x\n       }\n-      # print("str(my_cor_vs_cov)")\n       # str(my_cor_vs_cov)\n       if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {\n         if (is.null(cor_vs_cov_x)) {\n@@ -166,6 +158,7 @@\n           }\n           main_cex <- min(1.0, 46.0/nchar(main_label))\n           my_feature_label_slant <- -30 # slant feature labels 30 degrees downward\n+          my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18)\n           plot(\n             y = my_y\n           , x = my_x\n@@ -177,7 +170,7 @@\n           , main = main_label\n           , cex.main = main_cex\n           , cex = cex\n-          , pch = 16\n+          , pch = my_pch\n           , col = my_col\n           )\n           low_x <- -0.7 * lim_x\n@@ -217,12 +210,6 @@\n                 )\n             }\n             label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {\n-              # print("str(x_arg)")\n-              # print(str(x_arg))\n-              # print("str(y_arg)")\n-              # print(str(y_arg))\n-              # print("str(labels_arg)")\n-              # print(str(labels_arg))\n               if (length(labels_arg) > 0) {\n                 unique_slant <- unique(slant_arg)\n                 if (length(unique_slant) == 1) {\n@@ -851,13 +838,13 @@\n }\n \n cor_vs_cov_try <- function(\n-  matrix_x\n-, ropls_x\n-, predictor_projection_x = TRUE\n-, x_progress = print\n+  matrix_x                      # rows are samples; columns, features\n+, ropls_x                       # an instance of ropls::opls\n+, predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection\n+, x_progress = print            # function to produce progress and error messages\n ) {\n   x_class <- class(ropls_x)\n-  if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )\n+  if ( !( as.character(x_class) == "opls" ) ) {\n     stop(\n       paste(\n         "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "\n@@ -865,57 +852,120 @@\n       )\n     )\n   }\n+  if ( !ropls_x@suppLs$algoC == "nipals" ) {\n+    # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS\n+    stop(\n+      paste(\n+        "cor_vs_cov: Expected ropls::opls instance to have been computed by the NIPALS algorithm rather than "\n+      , ropls_x@suppLs$algoC\n+      )\n+    )\n+  }\n   result <- list()\n   result$projection <- projection <- if (predictor_projection_x) 1 else 2\n-  # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS\n-  if ( ropls_x@suppLs$algoC == "nipals") {\n-    # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510\n-    mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))\n-    mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))\n-    if (predictor_projection_x)\n-       score_m'..b'ureID]\n   # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1\n \n+  # build a data frame to hold the content for the tab-separated values file\n   tsv1 <- data.frame(\n-    featureID           = featureID\n-  , factorLevel1        = result$level1\n-  , factorLevel2        = result$level2\n-  , greaterLevel        = greaterLevel\n-  , projection          = result$projection\n-  , correlation         = result$correlation\n-  , covariance          = result$covariance\n-  , vip4p               = result$vip4p\n-  , vip4o               = result$vip4o\n-  , loadp               = result$loadp\n-  , loado               = result$loado\n-  , row.names           = NULL\n+    featureID     = featureID\n+  , factorLevel1  = result$level1\n+  , factorLevel2  = result$level2\n+  , greaterLevel  = greaterLevel\n+  , projection    = result$projection\n+  , correlation   = result$correlation\n+  , covariance    = result$covariance\n+  , vip4p         = result$vip4p\n+  , vip4o         = result$vip4o\n+  , loadp         = result$loadp\n+  , loado         = result$loado\n+  , cor_p_val_raw = result$p_value_raw\n+  , cor_p_value   = p.adjust(p = result$p_value_raw, method = "BY")\n+  , cor_ci_lower  = result$ci_lower \n+  , cor_ci_upper  = result$ci_upper\n   )\n-  tsv1 <- tsv1[!is.na(tsv1$correlation),]\n-  tsv1 <- tsv1[!is.na(tsv1$covariance),]\n-  superresult$tsv1 <- tsv1\n-  rownames(superresult$tsv1) <- tsv1$featureID\n+  rownames(tsv1) <- tsv1$featureID\n+\n+  # build the superresult, i.e., the result returned by this function\n+  superresult <- list()\n   superresult$projection <- result$projection\n   superresult$covariance <- result$covariance\n   superresult$correlation <- result$correlation\n@@ -980,12 +1031,50 @@\n   superresult$vip4o <- result$vip4o\n   superresult$loadp <- result$loadp\n   superresult$loado <- result$loado\n+  superresult$cor_p_value <- tsv1$cor_p_value\n   superresult$details <- result\n-  result$superresult <- superresult\n-  # Include thise in case future consumers of this routine want to use it in currently unanticipated ways\n-  result$oplsda    <- ropls_x\n-  result$predictor <- ropls_x@suppLs$y   # in case future consumers of this routine want to use it in currently unanticipated ways\n+\n+  # remove any rows having NA for covariance or correlation\n+  tsv1 <- tsv1[!is.na(tsv1$correlation),]\n+  tsv1 <- tsv1[!is.na(tsv1$covariance),]\n+  superresult$tsv1 <- tsv1\n+\n+  # # I did not include these but left them commentd out in case future \n+  # #   consumers of this routine want to use it in currently unanticipated ways\n+  # result$superresult <- superresult\n+  # result$oplsda    <- ropls_x\n+  # result$predictor <- ropls_x@suppLs$y\n+\n   return (superresult)\n }\n \n+# Code for correl.ci was adapted from correl function from:\n+#   @book{\n+#     Tsagris_2018,\n+#     author = {Tsagris, Michail},\n+#     year = {2018},\n+#     link = {https://www.researchgate.net/publication/324363311_Multivariate_data_analysis_in_R},\n+#     title = {Multivariate data analysis in R}\n+#   }\n+# which follows\n+#   https://en.wikipedia.org/wiki/Fisher_transformation#Definition\n+\n+correl.ci <- function(r, n, a = 0.05, rho = 0) {\n+  ## r is the calculated correlation coefficient for n pairs\n+  ## a is the significance level\n+  ## rho is the hypothesised correlation\n+  zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher\'s z-transformation for Ho\n+  zh1 <- atanh(r)   # 0.5*log((1+r)/(1-r)), i.e., Fisher\'s z-transformation for H1\n+  se <- (1 - r^2)/sqrt(n - 3) ## standard error for Fisher\'s z-transformation of Ho\n+  test <- (zh1 - zh0)/se ### test statistic\n+  pvalue <- 2*(1 - pnorm(abs(test))) ## p-value\n+  zL <- zh1 - qnorm(1 - a/2)*se\n+  zH <- zh1 + qnorm(1 - a/2)*se\n+  fishL <- tanh(zL) # (exp(2*zL)-1)/(exp(2*zL)+1), i.e., lower confidence limit\n+  fishH <- tanh(zH) # (exp(2*zH)-1)/(exp(2*zH)+1), i.e., upper confidence limit\n+  CI <- c(fishL, fishH)\n+  names(CI) <- c(\'lower\', \'upper\')\n+  list(correlation = r, p.value = pvalue, CI = CI)\n+}\n+\n # vim: sw=2 ts=2 et :\n'