changeset 7:066b1f409e9f draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit e73fabe1b3c871abbcb2e89914c181149c8e2066
author eschen42
date Thu, 26 Jul 2018 10:23:34 -0400
parents 7bd523ca1f9a
children 342570ad880c
files w4mcorcov.xml w4mcorcov_calc.R w4mcorcov_input.R
diffstat 3 files changed, 83 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/w4mcorcov.xml	Wed Jul 18 12:35:55 2018 -0400
+++ b/w4mcorcov.xml	Thu Jul 26 10:23:34 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.10">
+<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.11">
 
   <description>OPLS-DA Contrasts of Univariate Results</description>
 
@@ -202,22 +202,16 @@
           <has_text text="level1Level2Sig" />
           <!-- first matched line -->
           <has_text text="M349.2383T700" />
-          <has_text text="-0.04051509" />
-          <has_text text="-0.001964912" />
-          <has_text text="0.02106343" />
-          <has_text text="0.2446366813" />
+          <has_text text="-0.3704185" />
+          <has_text text="-36.6668927" />
+          <has_text text="0.4914638" />
+          <has_text text="0.01302117" />
           <!-- second matched line -->
           <has_text text="M207.9308T206" />
-          <has_text text="0.504885262" />
-          <has_text text="0.020749097" />
+          <has_text text="0.3235022" />
+          <has_text text="5.97529097" />
           <has_text text="0.207196379" />
           <has_text text="0.04438632" />
-          <!-- third matched line -->
-          <has_text text="M211.0607T263" />
-          <has_text text="0.0680900" />
-          <has_text text="0.0020163" />
-          <has_text text="0.0201345" />
-          <has_text text="0.0690773" />
         </assert_contents>
       </output>
       <output name="contrast_salience">
@@ -266,8 +260,8 @@
           <has_text text="level1Level2Sig" />
           <!-- first matched line -->
           <has_text text="M200.005T296" />
-          <has_text text="-0.2803571" />
-          <has_text text="-0.0115899" />
+          <has_text text="-0.24533821" />
+          <has_text text="-3.3573953" />
           <has_text text="0.1157346" />
           <has_text text="0.0647860" />
         </assert_contents>
@@ -316,14 +310,14 @@
           <has_text text="vip4o" />
           <!-- first matched line -->
           <has_text text="M349.2383T700" />
-          <has_text text="-0.4732226" />
-          <has_text text="-0.0506172" />
+          <has_text text="-0.37867079" />
+          <has_text text="-37.71066" />
           <has_text text="0.5246766" />
           <has_text text="0.0103341" />
           <!-- second matched line -->
           <has_text text="M207.9308T206" />
-          <has_text text="0.4927151" />
-          <has_text text="0.0203715" />
+          <has_text text="0.31570433" />
+          <has_text text="5.86655640" />
           <has_text text="0.2111623" />
           <has_text text="0.0488654" />
         </assert_contents>
@@ -374,12 +368,12 @@
           <has_text text="NM516T251" />
           <has_text text="flower_yes" />
           <has_text text="other" />
-          <has_text text="0.3490559" />
-          <has_text text="0.0260147" />
-          <has_text text="0.4377872" />
-          <has_text text="0.5916089" />
-          <has_text text="0.0260147" />
-          <has_text text="0.0438942" />
+          <has_text text="0.03402807" />
+          <has_text text="0.03526926" />
+          <has_text text="0.43664386" />
+          <has_text text="0.587701897" />
+          <has_text text="0.026082688" />
+          <has_text text="0.0437742145" />
           <has_text text="516.0845" />
           <has_text text="250.8762" />
         </assert_contents>
@@ -734,6 +728,10 @@
 Release notes
 -------------
 
+0.98.11
+
+- bug fix: Readdress issue 2 - features now are pareto-scaled *and centered* per Wikland *op cit.*.
+
 0.98.10
 
 - new feature: C-plots of VIP versus correlation or relative covariance.
--- a/w4mcorcov_calc.R	Wed Jul 18 12:35:55 2018 -0400
+++ b/w4mcorcov_calc.R	Thu Jul 26 10:23:34 2018 -0400
@@ -33,9 +33,15 @@
       , printL = FALSE
       , plotL  = FALSE
       , crossvalI = x_crossval_i
-      , scaleC = "none" # data have been pareto scaled outside this routine and must not be scaled again. This line fixes issue #2.
+      , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2.
       )
+    # strip out variables having negligible variance
+    x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE]
     my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
+    # x_progress(strF(x_dataMatrix))
+    # x_progress(strF(my_oplsda))
+    #x_progress(head(my_oplsda_suppLs_y_levels))
+    #x_progress(unique(my_oplsda_suppLs_y_levels))
     fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
     fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
     do_s_plot <- function(
@@ -57,13 +63,14 @@
             matrix_x   = x_dataMatrix
           , ropls_x    = my_oplsda
           , predictor_projection_x = predictor_projection_x
+          , x_progress
           )
       } else {
         my_cor_vs_cov <- cor_vs_cov_x
       }
       # print("str(my_cor_vs_cov)")
       # str(my_cor_vs_cov)
-      if (sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
+      if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
         x_progress("No cor_vs_cov data produced")
         plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
         text(x=1, y=1, labels="too few covariance data")
@@ -483,12 +490,9 @@
   }
 
   # transpose matrix because ropls matrix is the transpose of XCMS matrix
+  tdm <- t(data_matrix)
   # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot
-  # center
-  cdm <- center_colmeans(t(data_matrix))
-  # pareto-scale
-  my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE))
-  scdm <- sweep(cdm, 2, my_scale, "/")
+  # However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2.
 
   # pattern to match column names like k10_kruskal_k4.k3_sig
   col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC)
@@ -553,7 +557,7 @@
       chosen_samples <- smpl_metadata_facC %in% level_union
       chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
       col_selector <- vrbl_metadata_names[ overall_significant ]
-      my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
+      my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
       plot_action <- function(fctr_lvl_1, fctr_lvl_2) {
         progress_action(
           sprintf("calculating/plotting contrast of %s vs. %s"
@@ -632,7 +636,7 @@
           col_selector <- vrbl_metadata_names[
             if ( pairSigFeatOnly ) fully_significant else overall_significant
           ]
-          my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
+          my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
           my_cor_cov <- do_detail_plot(
             x_dataMatrix  = my_matrix
           , x_predictor   = predictor
@@ -697,7 +701,7 @@
                   if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
                 }
               )
-              my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+              my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
               # only process this column if both factors are members of lvlCSV
               is_match <- isLevelSelected(fctr_lvl_1)
               my_cor_cov <- do_detail_plot(
@@ -742,7 +746,7 @@
           } else {
             chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
             predictor <- chosen_facC
-            my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+            my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
             # only process this column if both factors are members of lvlCSV
             is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
             my_cor_cov <- do_detail_plot(
@@ -789,8 +793,31 @@
 #     Galindo_Prieto_2014 doi:10.1002/cem.2627
 #     https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R
 cor_vs_cov <- function(
-  matrix_x, ropls_x
+  matrix_x
+, ropls_x
 , predictor_projection_x = TRUE
+, x_progress = print
+) {
+  tryCatch({
+    return(
+      cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress)
+    )
+  }, error = function(e) {
+    x_progress(
+      sprintf(
+        "cor_vs_cov fatal error - %s"
+      , as.character(e) # e$message
+      )
+    )
+    return ( NULL )
+  })
+}
+
+cor_vs_cov_try <- function(
+  matrix_x
+, ropls_x
+, predictor_projection_x = TRUE
+, x_progress = print
 ) {
   x_class <- class(ropls_x)
   if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
@@ -865,7 +892,23 @@
   if (length(result$vip4o) == 0) result$vip4o <- NA
   greaterLevel <- sapply(
     X = result$correlation
-  , FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
+  , FUN = function(my_corr)
+      tryCatch({
+          if ( is.nan( my_corr ) ) {
+            print("my_corr is NaN")
+            NA 
+          } else {
+            if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
+          }
+        }, error = function(e) {
+          x_progress(
+            sprintf(
+              "cor_vs_cov -> sapply:  error - substituting NA - %s"
+            , as.character(e)
+            )
+          )
+          NA
+        })
   )
 
   # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
--- a/w4mcorcov_input.R	Wed Jul 18 12:35:55 2018 -0400
+++ b/w4mcorcov_input.R	Thu Jul 26 10:23:34 2018 -0400
@@ -165,6 +165,10 @@
       data_matrix <- as.matrix(data_matrix)
     }
 
+    # Omit any feature not found in variableMetadata and any sample not found in sampleMetadata
+    #   For something more elaborate, see https://github.com/HegemanLab/w4mclassfilter
+    data_matrix <- data_matrix[rownames(data_matrix) %in% rownames(vrbl_metadata),colnames(data_matrix) %in% rownames(smpl_metadata)]
+
     input_env$data_matrix <- data_matrix
     # ...
   } else {