changeset 14:90708fdbc22d draft default tip

"planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 5fd9687d543a48a715b1180caf93abebebd58b0e"
author eschen42
date Wed, 18 Nov 2020 18:53:37 +0000 (2020-11-18)
parents 2ae2d26e3270
children
files LICENSE README w4mcorcov.xml w4mcorcov_calc.R w4mcorcov_input.R w4mcorcov_lib.R w4mcorcov_output.R w4mcorcov_salience.R w4mcorcov_util.R w4mcorcov_wrapper.R
diffstat 3 files changed, 157 insertions(+), 87 deletions(-) [+]
line wrap: on
line diff
--- a/w4mcorcov.xml	Wed Dec 12 09:20:02 2018 -0500
+++ b/w4mcorcov.xml	Wed Nov 18 18:53:37 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.17">
+<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.18">
     <description>OPLS-DA Contrasts of Univariate Results</description>
     <macros>
         <xml name="paramPairSigFeatOnly">
@@ -25,7 +25,7 @@
     <requirements>
         <requirement type="package">r-base</requirement>
         <requirement type="package">r-batch</requirement>
-        <requirement type="package" version="1.10.0">bioconductor-ropls</requirement>
+        <requirement type="package" version="1.22.0">bioconductor-ropls</requirement>
     </requirements>
     <command detect_errors="aggressive"><![CDATA[
     Rscript '$__tool_directory__/w4mcorcov_wrapper.R'
@@ -57,6 +57,7 @@
         contrast_detail '$contrast_detail'
         contrast_corcov '$contrast_corcov'
         contrast_salience '$contrast_salience'
+        min_crossval_i '$min_crossval_i'
     ]]></command>
     <inputs>
         <param name="dataMatrix_in" format="tabular" label="Data matrix file" type="data"
@@ -135,6 +136,9 @@
         <param name="labelFeatures" type="text" value="3"
             label="How many features having extreme loadings should be labelled on cov-vs.-cor plot?"
             help="Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features or '0' to label no features; this choice has no effect on the OPLS-DA loadings plot."/>
+        <param name="min_crossval_i" type="text" value="7"
+            label="Minumum number of samples for OPLS-DA cross-validation."
+            help="What is the minimum number of samples to be used by the ropls package for cross-validation of OPLS-DA predictions?  This should be not more than half the number of your samples."/>
         <conditional name="advanced">
             <param name="advancedFeatures" type="select" 
                 label="Advanced (C-plots and customized p-value adjustment)"
@@ -179,7 +183,121 @@
     <data name="contrast_salience" format="tabular" label="${tool.name}_${variableMetadata_in.name}_salience" />
   </outputs>
   <tests>
-    <!-- test #1 -->
+    <!-- test #1 - issue 14 -->
+    <test>
+      <param name="dataMatrix_in" value="issue14_input_dataMatrix.tsv"/>
+      <param name="sampleMetadata_in" value="issue14_input_sampleMetadata.tsv"/>
+      <param name="variableMetadata_in" value="issue14_input_variableMetadata.tsv"/>
+      <param name="tesC" value="none"/>
+      <param name="facC" value="tissue_flowering"/>
+      <param name="labelFeatures" value="3"/>
+      <param name="min_crossval_i" value="4"/>
+      <param name="fdr_features" value="ALL"/>
+      <param name="levCSV" value="*"/>
+      <param name="matchingC" value="wildcard"/>
+      <output name="contrast_corcov">
+        <assert_contents>
+          <!-- column-labels line -->
+          <has_text text="featureID" />
+          <has_text text="factorLevel1" />
+          <has_text text="factorLevel2" />
+          <has_text text="correlation" />
+          <has_text text="covariance" />
+          <has_text text="vip4p" />
+          <has_text text="vip4o" />
+          <!-- a matched line -->
+          <has_text text="NM516T283_1" />
+          <has_text text="flower_yes" />
+          <has_text text="leaf_no" />
+          <has_text text="-0.98475578586" />
+          <has_text text="-58.1219648" />
+          <has_text text="2.0103501" />
+          <has_text text="2.872672881" />
+          <has_text text="-0.1208407903" />
+          <has_text text="-0.2032249" />
+          <has_text text="-0.9857575" />
+          <has_text text="-0.983684189899" />
+          <has_text text="516.080116" />
+          <has_text text="282.50076" />
+        </assert_contents>
+      </output>
+    </test>
+    <!-- test #2 - issue 6 -->
+    <test>
+      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
+      <param name="sampleMetadata_in" value="issue6_input_sampleMetadata.tsv"/>
+      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
+      <param name="tesC" value="none"/>
+      <param name="facC" value="k._10"/>
+      <param name="labelFeatures" value="3"/>
+      <param name="min_crossval_i" value="7"/>
+      <param name="fdr_features" value="ALL"/>
+      <param name="levCSV" value="k_3,k-4"/>
+      <param name="matchingC" value="none"/>
+      <output name="contrast_corcov">
+        <assert_contents>
+          <!-- column-labels line -->
+          <has_text text="featureID" />
+          <has_text text="factorLevel1" />
+          <has_text text="factorLevel2" />
+          <has_text text="correlation" />
+          <has_text text="covariance" />
+          <has_text text="vip4p" />
+          <has_text text="vip4o" />
+          <!-- first matched line -->
+          <has_text text="M349.2383T700" />
+          <has_text text="-0.1221966" />
+          <has_text text="-917311734" />
+          <has_text text="0.0304592" />
+          <has_text text="0.104748883" />
+          <has_text text="-0.002736415" />
+          <has_text text="-0.0113968" />
+          <has_text text="0.387723" />
+          <has_text text="-0.3812168081" />
+          <has_text text="0.154611878" />
+        </assert_contents>
+      </output>
+    </test>
+    <!-- test #3 - issue 8 -->
+    <test>
+      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
+      <param name="sampleMetadata_in" value="issue8_input_sampleMetadata.tsv"/>
+      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
+      <param name="tesC" value="none"/>
+      <param name="facC" value="k._10"/>
+      <param name="labelFeatures" value="3"/>
+      <param name="min_crossval_i" value="7"/>
+      <param name="fdr_features" value="ALL"/>
+      <param name="levCSV" value="k_3,k-4"/>
+      <param name="matchingC" value="none"/>
+      <output name="contrast_corcov">
+        <assert_contents>
+          <!-- column-labels line -->
+          <has_text text="featureID" />
+          <has_text text="factorLevel1" />
+          <has_text text="factorLevel2" />
+          <has_text text="correlation" />
+          <has_text text="covariance" />
+          <has_text text="vip4p" />
+          <has_text text="vip4o" />
+          <!-- k1 rejected by levCSV, leaving only k_3 and k-4 -->
+          <not_has_text text="k1" />
+          <not_has_text text="other" />
+          <!-- first matched line -->
+          <has_text text="M200.005T296" />
+          <has_text text="-0.1829149760" />
+          <has_text text="-115723402" />
+          <has_text text="0.0892595" />
+          <has_text text="0.00492288" />
+          <has_text text="-0.00801895" />
+          <has_text text="0.0005356178" />
+          <has_text text="0.1848186" />
+          <has_text text="-0.428802311" />
+          <has_text text="0.0882045811" />
+        </assert_contents>
+      </output>
+    </test>
+    <!-- test #4 -->
     <test>
       <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
       <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
@@ -188,6 +306,7 @@
       <param name="facC" value="k10"/>
       <param name="pairSigFeatOnly" value="FALSE"/>
       <param name="labelFeatures" value="3"/>
+      <param name="min_crossval_i" value="7"/>
       <param name="fdr_features" value="250"/>
       <param name="levCSV" value="k[12],k[3-4]"/>
       <param name="matchingC" value="regex"/>
@@ -241,7 +360,7 @@
         </assert_contents>
       </output>
     </test>
-    <!-- test #2 -->
+    <!-- test #5 -->
     <test>
       <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
       <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
@@ -250,6 +369,7 @@
       <param name="facC" value="k10"/>
       <param name="pairSigFeatOnly" value="TRUE"/>
       <param name="labelFeatures" value="3"/>
+      <param name="min_crossval_i" value="7"/>
       <param name="fdr_features" value="ALL"/>
       <param name="levCSV" value="k[12],k[3-4]"/>
       <param name="matchingC" value="regex"/>
@@ -286,7 +406,7 @@
         </assert_contents>
       </output>
     </test>
-    <!-- test #3 -->
+    <!-- test #6 -->
     <test>
       <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
       <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
@@ -294,6 +414,7 @@
       <param name="tesC" value="none"/>
       <param name="facC" value="k10"/>
       <param name="labelFeatures" value="3"/>
+      <param name="min_crossval_i" value="7"/>
       <param name="fdr_features" value="ALL"/>
       <param name="levCSV" value="k[12],k[3-4]"/>
       <param name="matchingC" value="regex"/>
@@ -335,7 +456,7 @@
         </assert_contents>
       </output>
     </test>
-    <!-- test #4 -->
+    <!-- test #7 -->
     <test>
       <param name="dataMatrix_in" value="issue1_input_dataMatrix.tsv"/>
       <param name="sampleMetadata_in" value="issue1_input_sampleMetadata.tsv"/>
@@ -343,6 +464,7 @@
       <param name="tesC" value="none"/>
       <param name="facC" value="tissue_flowering"/>
       <param name="labelFeatures" value="3"/>
+      <param name="min_crossval_i" value="7"/>
       <param name="fdr_features" value="ALL"/>
       <param name="levCSV" value="*"/>
       <param name="matchingC" value="wildcard"/>
@@ -386,7 +508,7 @@
         </assert_contents>
       </output>
     </test>
-    <!-- test #5 -->
+    <!-- test #8 -->
     <test>
       <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
       <param name="sampleMetadata_in" value="issue6_input_sampleMetadata.tsv"/>
@@ -394,6 +516,7 @@
       <param name="tesC" value="none"/>
       <param name="facC" value="k._10"/>
       <param name="labelFeatures" value="3"/>
+      <param name="min_crossval_i" value="7"/>
       <param name="fdr_features" value="ALL"/>
       <param name="levCSV" value="k1,k.2"/>
       <param name="matchingC" value="none"/>
@@ -422,79 +545,6 @@
         </assert_contents>
       </output>
     </test>
-    <!-- test #6 - issue 6 -->
-    <test>
-      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
-      <param name="sampleMetadata_in" value="issue6_input_sampleMetadata.tsv"/>
-      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
-      <param name="tesC" value="none"/>
-      <param name="facC" value="k._10"/>
-      <param name="labelFeatures" value="3"/>
-      <param name="fdr_features" value="ALL"/>
-      <param name="levCSV" value="k_3,k-4"/>
-      <param name="matchingC" value="none"/>
-      <output name="contrast_corcov">
-        <assert_contents>
-          <!-- column-labels line -->
-          <has_text text="featureID" />
-          <has_text text="factorLevel1" />
-          <has_text text="factorLevel2" />
-          <has_text text="correlation" />
-          <has_text text="covariance" />
-          <has_text text="vip4p" />
-          <has_text text="vip4o" />
-          <!-- first matched line -->
-          <has_text text="M349.2383T700" />
-          <has_text text="-0.1221966" />
-          <has_text text="-917311734" />
-          <has_text text="0.0304592" />
-          <has_text text="0.104748883" />
-          <has_text text="-0.002736415" />
-          <has_text text="-0.0113968" />
-          <has_text text="0.387723" />
-          <has_text text="-0.3812168081" />
-          <has_text text="0.154611878" />
-        </assert_contents>
-      </output>
-    </test>
-    <!-- test #7 - issue 8 -->
-    <test>
-      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
-      <param name="sampleMetadata_in" value="issue8_input_sampleMetadata.tsv"/>
-      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
-      <param name="tesC" value="none"/>
-      <param name="facC" value="k._10"/>
-      <param name="labelFeatures" value="3"/>
-      <param name="fdr_features" value="ALL"/>
-      <param name="levCSV" value="k_3,k-4"/>
-      <param name="matchingC" value="none"/>
-      <output name="contrast_corcov">
-        <assert_contents>
-          <!-- column-labels line -->
-          <has_text text="featureID" />
-          <has_text text="factorLevel1" />
-          <has_text text="factorLevel2" />
-          <has_text text="correlation" />
-          <has_text text="covariance" />
-          <has_text text="vip4p" />
-          <has_text text="vip4o" />
-          <!-- k1 rejected by levCSV, leaving only k_3 and k-4 -->
-          <not_has_text text="k1" />
-          <not_has_text text="other" />
-          <!-- first matched line -->
-          <has_text text="M200.005T296" />
-          <has_text text="-0.1829149760" />
-          <has_text text="-115723402" />
-          <has_text text="0.0892595" />
-          <has_text text="0.00492288" />
-          <has_text text="-0.00801895" />
-          <has_text text="0.0005356178" />
-          <has_text text="0.1848186" />
-          <has_text text="-0.428802311" />
-          <has_text text="0.0882045811" />
-        </assert_contents>
-      </output>
-    </test>
   </tests>
   <help><![CDATA[
 
--- a/w4mcorcov_calc.R	Wed Dec 12 09:20:02 2018 -0500
+++ b/w4mcorcov_calc.R	Wed Nov 18 18:53:37 2020 +0000
@@ -22,8 +22,8 @@
       , algoC  = x_algorithm
       , predI  = 1
       , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0
-      , printL = FALSE
-      , plotL  = FALSE
+      , fig.pdfC = 'none'
+      , info.txtC  = 'none'
       , crossvalI = x_crossval_i
       , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2.
       )
@@ -443,6 +443,7 @@
   # matchingC is one of { "none", "wildcard", "regex" }
   matchingC <- calc_env$matchingC
   labelFeatures <- calc_env$labelFeatures
+  minCrossvalI <- as.integer(calc_env$min_crossval_i)
 
   # arg/env checking
   if (!(facC %in% names(smpl_metadata))) {
@@ -600,7 +601,7 @@
                           }
         , x_show_labels = labelFeatures
         , x_progress    = progress_action
-        , x_crossval_i  = min(7, length(chosen_samples))
+        , x_crossval_i  = min(minCrossvalI, length(chosen_samples))
         , x_env         = calc_env
         )
         if ( is.null(my_cor_cov) ) {
@@ -674,7 +675,7 @@
                               }
             , x_show_labels = labelFeatures
             , x_progress    = progress_action
-            , x_crossval_i  = min(7, length(chosen_samples))
+            , x_crossval_i  = min(minCrossvalI, length(chosen_samples))
             , x_env         = calc_env
             )
             if ( is.null(my_cor_cov) ) {
@@ -753,7 +754,7 @@
                 , x_prefix      = "Features"
                 , x_show_labels = labelFeatures
                 , x_progress    = progress_action
-                , x_crossval_i  = min(7, length(chosen_samples))
+                , x_crossval_i  = min(minCrossvalI, length(chosen_samples))
                 , x_env         = calc_env
                 )
                 if ( is.null(my_cor_cov) ) {
@@ -806,7 +807,7 @@
               , x_prefix      = "Features"
               , x_show_labels = labelFeatures
               , x_progress    = progress_action
-              , x_crossval_i  = min(7, length(chosen_samples))
+              , x_crossval_i  = min(minCrossvalI, length(chosen_samples))
               , x_env         = calc_env
               )
               if ( is.null(my_cor_cov) ) {
--- a/w4mcorcov_wrapper.R	Wed Dec 12 09:20:02 2018 -0500
+++ b/w4mcorcov_wrapper.R	Wed Nov 18 18:53:37 2020 +0000
@@ -4,7 +4,7 @@
 #   - w4mcorcov_lib.R
 #   - w4mcorcov_input.R
 #   - w4mcorcov_calc.R
-
+options(warn=1)
 ## constants
 ##----------
 
@@ -108,6 +108,7 @@
 my_env$levCSV             <- as.character(argVc["levCSV"])
 my_env$matchingC          <- as.character(argVc["matchingC"])
 my_env$labelFeatures      <- as.character(argVc["labelFeatures"]) # number of features to label at each extreme of the loadings or 'ALL'
+my_env$min_crossval_i     <- as.character(argVc["min_crossval_i"]) # Minumum number of samples for OPLS-DA cross-validation
 my_env$fdr_features       <- as.character(argVc["fdr_features"]) # number of features to consider when adjusting p-value, or 'ALL'
 my_env$cplot_o            <- as.logical(argVc["cplot_o"]) # TRUE if orthogonal C-plot is requested
 my_env$cplot_p            <- as.logical(argVc["cplot_p"]) # TRUE if parallel C-plot is requested
@@ -131,6 +132,24 @@
   quit(save = "no", status = 10, runLast = TRUE)
 }
 
+min_crossval_i <- my_env$min_crossval_i
+crossval_check <- TRUE
+if ( is.na(min_crossval_i) ) {
+  crossval_check <- FALSE
+} else if ( is.null(min_crossval_i) ) {
+  crossval_check <- FALSE
+} else {
+  if ( is.na(as.numeric(min_crossval_i)) )
+    crossval_check <- FALSE
+  else if ( as.numeric(min_crossval_i) < 0 )
+    crossval_check <- FALSE
+}
+if ( !crossval_check ) {
+  my_log("invalid argument: min_crossval_i")
+  print(min_crossval_i)
+  quit(save = "no", status = 10, runLast = TRUE)
+}
+
 corcov_tsv_colnames <- TRUE
 corcov_tsv_append   <- FALSE
 corcov_tsv_action <- function(tsv) {