Repository 'withinvariation'
hg clone https://toolshed.g2.bx.psu.edu/repos/yguitton/withinvariation

Changeset 0:5086ad0c0992 (2017-05-05)
Commit message:
Uploaded v0.4
added:
withinvariation-26603602a823/mixomics_multilevel.r
withinvariation-26603602a823/mixomics_multilevel.xml
withinvariation-26603602a823/test-data/dataMatrix.csv
withinvariation-26603602a823/test-data/dataMatrix_out.tsv
withinvariation-26603602a823/test-data/dataMatrix_out_log10.tsv
withinvariation-26603602a823/test-data/sampleMetadata.csv
withinvariation-26603602a823/test-data/variableMetadata.csv
withinvariation-26603602a823/transformation_script.R
b
diff -r 000000000000 -r 5086ad0c0992 withinvariation-26603602a823/mixomics_multilevel.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/withinvariation-26603602a823/mixomics_multilevel.r Fri May 05 05:04:36 2017 -0400
[
@@ -0,0 +1,210 @@
+#!/usr/bin/env Rscript
+
+###############################################################################
+#
+# mixOmics multilevel function
+#
+# This script is written specifically for the mixOmics web-interface
+# using the Galaxy system.
+#
+# R-Package: mixOmics
+#
+# Version: 1.2.3
+#
+# Author (wrapper): Xin-Yi Chua (xinyi.chua@qfab.org)
+# Author (mixOmics.multilevel): Benoit Liquet, Kim-Anh Le Cao
+# Author (warpper & .r adaptation for workflow4metabolomics.org): Yann GUITTON
+# 
+# Expected parameters from the commandline
+# input files:
+#             dataMatrix
+#             sampleMetadata
+# params:
+#             respL (respL for one level & respL1 & respL2 for 2 levels)
+#             trans (need log2 or log10 transformation made before withinVar)
+#             scaling
+#             centering
+# output files:
+#             dataMatrix_out (after withinVariation correction )
+#             result (Robject) 
+################################################################################
+
+#Redirect all stdout to the log file
+log_file=file("multilevel.log", open = "wt")
+sink(log_file)
+sink(log_file, type = "output")
+
+#remove rgl warning
+options(rgl.useNULL = TRUE)
+
+# ----- PACKAGE -----
+cat("\tPACKAGE INFO\n")
+
+pkgs=c("mixOmics","batch","pcaMethods")
+for(pkg in pkgs) {
+  suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
+  cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="")
+}
+
+
+source_local <- function(fname) {
+    argv <- commandArgs(trailingOnly = FALSE)
+    base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+    source(paste(base_dir, fname, sep="/"))
+}
+
+
+
+#load transformation function
+source_local("transformation_script.R")
+# source("transformation_script.R")
+print("first loadings OK")
+
+
+listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
+print(listArguments)
+
+## libraries
+##----------
+
+cat('\n\nRunning mixomics_multilevel.r\n');
+
+options(warn=-1);
+##suppressPackageStartupMessages(library(mixOmics)); #not needed?
+
+
+## constants
+##----------
+
+modNamC <- "Multilevel" ## module name
+
+topEnvC <- environment()
+flgC <- "\n"
+
+## functions
+##----------For manual input of function
+##--end function
+
+flgF <- function(tesC,
+                 envC = topEnvC,
+                 txtC = NA) { ## management of warning and error messages
+
+    tesL <- eval(parse(text = tesC), envir = envC)
+
+    if(!tesL) {
+
+        sink(NULL)
+        stpTxtC <- ifelse(is.na(txtC),
+                          paste0(tesC, " is FALSE"),
+                          txtC)
+
+        stop(stpTxtC,
+             call. = FALSE)
+
+    }
+
+} ## flgF
+
+
+## log file
+##---------
+
+
+cat("\nStart of the '", modNamC, "' Galaxy module call: ",
+    format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
+
+
+## arguments
+##----------
+
+## loading files and checks
+xMN <- t(as.matrix(read.table(listArguments[["dataMatrix_in"]],
+                              check.names = FALSE,
+                              header = TRUE,
+                              row.names = 1,
+                              sep = "\t")))
+
+samDF <- read.table(listArguments[["sampleMetadata_in"]],
+                    check.names = FALSE,
+                    header = TRUE,
+                    row.names = 1,
+sep = "\t")
+flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
+
+##Here Add transformation scripts if trans<>none
+if (listArguments[["transfo"]]=="go"){
+cat("\n Start transformation with trans=",listArguments[["trans"]]," scale=",listArguments[["scale"]]," center=",listArguments[["center"]],"\n", sep="")
+  if (listArguments[["trans"]]!="none"){
+     metC <- listArguments[["trans"]]
+     xMN <- transformF(datMN = xMN, ## dataMatrix
+                           metC = metC) ## transformation method
+  }    
+    if (listArguments[["center"]]=="true"){
+ listArguments[["center"]]<-TRUE
+ }else{
+ listArguments[["center"]]<-FALSE
+ } 
+
+ xMN<-prep(xMN, scale=listArguments[["scale"]],center=listArguments[["center"]])
+}
+
+
+##end tranformation
+
+if (listArguments[["respL2"]]!="NULL"){
+  cat("\n\nMultilevel (two levels)\n");
+  flgF("((listArguments[['respL1']] %in% colnames(samDF)) || (listArguments[['respL2']] %in% colnames(samDF)))", txtC = paste("Level argument (",listArguments[['respL2']]," ,",listArguments[['respL1']], ") must be one of the column names (first row) of your sample metadata", sep = ""))
+
+  tryCatch({
+    result <- withinVariation(xMN, design=samDF[,c(listArguments[["repmeasure"]],listArguments[["respL1"]],listArguments[["respL2"]])]);
+  }, error = function(err) {
+    stop(paste("There was an error when trying to run the Multilevel (two levels) function.\n\n",err));
+  });
+} else {
+    cat("\n\nMultilevel (one level)\n");
+ flgF("(listArguments[['respL']] %in% colnames(samDF))", txtC = paste("Level argument (",listArguments[['respL']],") must be one of the column names (first row) of your sample metadata", sep = ""))
+
+  tryCatch({
+     result <- withinVariation(xMN, design=samDF[,c(listArguments[["repmeasure"]], listArguments[["respL"]])]);
+  }, error = function(err) {
+    stop(paste("There was an error when trying to run the Multilevel (one level) function.\n\n",err));
+  });
+}
+
+
+##saving
+
+if (exists("result")) {
+  ## writing output files
+  cat("\n\nWriting output files\n\n");
+  ## transpose matrix 
+  
+  datDF <- cbind.data.frame(dataMatrix = colnames(xMN),
+                          as.data.frame(t(result)))
+  write.table(datDF,
+            file = "dataMatrix_out.tsv",
+            quote = FALSE,
+            row.names = FALSE,
+   sep = "\t")
+  
+  tryCatch({
+    save(result, file="multilevel.RData");
+  }, warning = function(w) {
+    print(paste("Warning: ", w));
+  }, error = function(err) {
+    stop(paste("ERROR saving result RData object:", err));
+  });
+}
+
+## ending
+##-------
+
+cat("\nEnd of the '", modNamC, "' Galaxy module call: ",
+    format(Sys.time(), "%a %d %b %Y %X"), "\n", sep = "")
+
+sink()
+
+# options(stringsAsFactors = strAsFacL)
+
+
+rm(list = ls())
b
diff -r 000000000000 -r 5086ad0c0992 withinvariation-26603602a823/mixomics_multilevel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/withinvariation-26603602a823/mixomics_multilevel.xml Fri May 05 05:04:36 2017 -0400
b
@@ -0,0 +1,155 @@
+<tool id="mixomics_multilevel" name="Multilevel" version="0.5.0">
+
+   <description>Data transformation: Within matrix decomposition for repeated measurements (cross-over design) with mixOmics package</description>
+    
+    <requirements>
+        <requirement type="package" version="6.1.1">r-mixomics</requirement>
+     <requirement type="package" version="1.1_4">r-batch</requirement>
+     <requirement type="package" version="1.64.0">bioconductor-pcamethods</requirement>
+    </requirements>
+    
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+
+  
+  <command>
+   Rscript $__tool_directory__/mixomics_multilevel.r
+   
+   dataMatrix_in "$dataMatrix_in"
+   sampleMetadata_in "$sampleMetadata_in"
+   repmeasure "$repmeasure"
+   #if $transformation.option_transformation == "false"
+ transfo "none"
+   #end if
+   #if $nblevel.option_level == "onelevel"
+    respL "$nblevel.respL"
+    respL2 "NULL"
+   #end if
+   #if $nblevel.option_level == "twolevel"
+    respL1 "$nblevel.respL1"
+    respL2 "$nblevel.respL2"
+   #end if
+   #if $transformation.option_transformation == "true"
+    transfo "go"
+    trans "$transformation.trans"
+    scale "$transformation.scale"
+    center "$transformation.center"
+   #end if
+   
+  </command>
+  <inputs>
+    <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, sep: tabular" />
+    <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata, decimal: '.', missing: NA, mode: character and numerical, sep: tabular" />
+    <param name="repmeasure" label="Repeated mesurement label (Individual IDs, ...)" type="text" value="none" help="Indicate the column name of the sample table to be used as repeated mesurement factor" />
+    
+
+    <conditional name="nblevel">
+      <param name="option_level" type="select" label="Select number of levels1">
+        <option value="onelevel" selected="True">One Level</option>
+        <option value="twolevel">Two levels</option>
+      </param>
+      <when value="onelevel">
+         <param name="respL" label="Level name (as in sampleMetadata)" type="text" value = "none" help="Indicate the column name of the sample table to be used as factor" />
+      </when>
+      <when value="twolevel">
+   <param name="respL1" label="First level name (as in sampleMetadata)" type="text" value = "none" help="Indicate the column name of the sample table to be used as first factor" />
+          <param name="respL2" label="Second level name (as in sampleMetadata)" type="text" value = "none" help="Indicate the column name of the sample table to be used as second factor" />
+   </when>
+    </conditional>
+ <conditional name="transformation">
+      <param name="option_transformation" type="boolean" label="Add transformation to dataMatrix before withinVariation" />
+          <when value="false">
+          </when>
+      <when value="true">
+          <param name="trans" label="Transformation" type="select" help="" >
+      <option value="none">none</option>
+      <option value="log2">log2</option>
+             <option value="log10">log10</option>
+             <option value="sqrt">square root</option>
+   </param>  
+          <param name="scale" label="Scaling" type="select" help="" >
+      <option value="none">none</option>
+      <option value="pareto">pareto</option>
+             <option value="vector">vector</option>
+             <option value="uv">UV</option>
+   </param>
+   <param name="center" label="Centering" type="boolean" />
+      </when>
+    </conditional>
+  
+   
+  </inputs>
+
+  <outputs>
+    <data name="dataMatrix_out" format="tabular" from_work_dir="dataMatrix_out.tsv" label="dataMatrix_out.tsv"  ></data>
+    <data name="multilevelRData" format="rdata" from_work_dir="multilevel.RData" label="multilevel.RData" />
+    <data name="log" format="txt" from_work_dir="multilevel.log" label="multilevel.log.txt" />
+  </outputs>

+  <tests>
+        <test>
+            <param name="dataMatrix_in" value="dataMatrix.csv"  ftype="tabular" />
+            <param name="sampleMetadata_in" value="sampleMetadata.csv" ftype="tabular" />
+ <param name="repmeasure" value="Subject" />
+            <param name="nblevel|option_level" value="onelevel" />
+            <param name="respL" value="Occasion" />
+            <output name="dataMatrix_out" file="dataMatrix_out.tsv" lines_diff="2" />
+        </test>
+     <test>
+            <param name="dataMatrix_in" value="dataMatrix.csv"  ftype="tabular" />
+            <param name="sampleMetadata_in" value="sampleMetadata.csv" ftype="tabular" />
+ <param name="repmeasure" value="Subject" />
+            <param name="nblevel|option_level" value="onelevel" />
+            <param name="respL" value="Occasion" />
+ <param name="transformation|option_transformation" value="true" />
+ <param name="transfo" value="go" />
+ <param name="trans" value="log10" />
+ <param name="scale" value="pareto" />
+ <param name="center" value="false" />
+            <output name="dataMatrix_out" file="dataMatrix_out_log10.tsv" lines_diff="2" />
+        </test>
+     
+  </tests>
+  
+
+  <help>
+
+.. class:: infomark
+
+**Author(s)**  Benoit Liquet, Kim-Anh Le Cao, Benoit Gautier, Ignacio Gonzalez.
+
+.. class:: infomark
+
+**Galaxy wrapper and scripts developpers for W4M integration** Guitton Yann LABERCA yann.guitton@oniris-nantes.fr
+
+===========
+Description
+===========
+
+withinVariation function decomposes the Within variation in the dataMatrix (One or two-factor analyses are available). The resulting  matrix is then input in any multivariate analyses.
+
+withinVariation simply returns the Xw within matrix, which can be input in the other multivariate approaches already implemented in mixOmics 
+
+
+That tool make use of withinVariation function for cross-over design experiment (Repeated mesurement) from mixOmics R package packurl_
+
+.. _packurl: https://CRAN.R-project.org/package=mixOmics
+
+For details information about mixOmics please connect to link_
+
+.. _link: http://mixomics.org/
+
+**Please cite:**
+Kim-Anh Le Cao, Florian Rohart, Ignacio Gonzalez, Sebastien Dejean with key contributors Benoit Gautier, Francois Bartolo, 
+contributions from Pierre Monget, Jeff Coquery, FangZou Yao and Benoit Liquet(2016). 
+mixOmics: Omics Data Integration Project. R package version 6.1.1. 

+  </help>
+  
+    <citations>
+ <citation type="doi">10.1186/1471-2105-13-325</citation>
+ <citation type="doi">10.1007/s11306-009-0185-z</citation>
+        <citation type="doi">10.1093/bioinformatics/btu813</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 5086ad0c0992 withinvariation-26603602a823/test-data/dataMatrix.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/withinvariation-26603602a823/test-data/dataMatrix.csv Fri May 05 05:04:36 2017 -0400
b
@@ -0,0 +1,4 @@
+name c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10
+A 20 18 16 14 10 9 7 7 3 2 21 21 17 17 11 12 8 10 4 5
+B 10 12 15 16 2 3 7 7 9 9 12 14 17 18 4 5 9 9 11 11
+C 20 17 14 11 8 5 2 8 14 17 20 17 14 11 8 5 2 8 14 17
b
diff -r 000000000000 -r 5086ad0c0992 withinvariation-26603602a823/test-data/dataMatrix_out.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/withinvariation-26603602a823/test-data/dataMatrix_out.tsv Fri May 05 05:04:36 2017 -0400
b
@@ -0,0 +1,4 @@
+dataMatrix c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10
+A -0.5 -1.5 -0.5 -1.5 -0.5 -1.5 -0.5 -1.5 -0.5 -1.5 0.5 1.5 0.5 1.5 0.5 1.5 0.5 1.5 0.5 1.5
+B -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 1 1 1 1 1 1 1 1 1
+C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
b
diff -r 000000000000 -r 5086ad0c0992 withinvariation-26603602a823/test-data/dataMatrix_out_log10.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/withinvariation-26603602a823/test-data/dataMatrix_out_log10.tsv Fri May 05 05:04:36 2017 -0400
b
@@ -0,0 +1,4 @@
+dataMatrix c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10
+A -0.020092280840537 -0.0633189420824745 -0.0246870704491013 -0.0787458016166358 -0.0375807488225819 -0.113316739305819 -0.0508712174396608 -0.137542124890243 -0.0963770720961892 -0.299374529985989 0.020092280840537 0.0633189420824749 0.0246870704491013 0.0787458016166362 0.0375807488225823 0.113316739305819 0.0508712174396608 0.137542124890243 0.0963770720961892 0.299374529985989
+B -0.0777778824969806 -0.0666256118450792 -0.0548380192312634 -0.0517849916432631 -0.237832767883695 -0.188778488108502 -0.103892299006456 -0.103892299006456 -0.084886189102046 -0.084886189102046 0.0777778824969806 0.0666256118450788 0.0548380192312639 0.0517849916432631 0.237832767883695 0.188778488108502 0.103892299006457 0.103892299006457 0.084886189102046 0.084886189102046
+C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
b
diff -r 000000000000 -r 5086ad0c0992 withinvariation-26603602a823/test-data/sampleMetadata.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/withinvariation-26603602a823/test-data/sampleMetadata.csv Fri May 05 05:04:36 2017 -0400
b
@@ -0,0 +1,21 @@
+sampleMetadata Occasion Subject
+c1 Control 1
+c2 Control 2
+c3 Control 3
+c4 Control 4
+c5 Control 5
+c6 Control 6
+c7 Control 7
+c8 Control 8
+c9 Control 9
+c10 Control 10
+t1 Treatment 1
+t2 Treatment 2
+t3 Treatment 3
+t4 Treatment 4
+t5 Treatment 5
+t6 Treatment 6
+t7 Treatment 7
+t8 Treatment 8
+t9 Treatment 9
+t10 Treatment 10
b
diff -r 000000000000 -r 5086ad0c0992 withinvariation-26603602a823/test-data/variableMetadata.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/withinvariation-26603602a823/test-data/variableMetadata.csv Fri May 05 05:04:36 2017 -0400
b
@@ -0,0 +1,4 @@
+name
+A
+B
+C
b
diff -r 000000000000 -r 5086ad0c0992 withinvariation-26603602a823/transformation_script.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/withinvariation-26603602a823/transformation_script.R Fri May 05 05:04:36 2017 -0400
[
@@ -0,0 +1,73 @@
+## Etienne Thevenot
+## W4M Core Development Team
+## etienne.thevenot@cea.fr
+## 2015-04-25
+
+transformF <- function(datMN,
+                       metC) {
+    
+    ## options
+    
+    optStrAsFacL <- options()[["stringsAsFactors"]]
+    options(stringsAsFactors = FALSE)
+    
+    ## checking      
+    
+    if(length(which(datMN < 0))) {
+        cat("\nThe 'dataMatrix' contains negative values\n")
+        sink()
+        stop("The 'dataMatrix' contains negative values", call. = FALSE)
+    }
+    
+    ## Number of missing values
+    nasN <- length(which(is.na(datMN)))
+    cat("\nMissing values in the 'dataMatrix': ",
+        nasN,
+        " (",
+        round(nasN / cumprod(dim(datMN))[2] * 100),
+        "%)\n",
+        sep="")
+    
+    ## Number of zero values
+    zerN <- length(which(datMN == 0))
+    cat("\nZero values in the 'dataMatrix': ",
+        zerN,
+        " (",
+        round(zerN / cumprod(dim(datMN))[2] * 100),
+        "%)\n",
+        sep="")
+    
+    ## transformation
+    
+    switch(metC,
+           log2 = {
+
+               cat("\n'log2' transformation\n", sep="")
+
+               trfMN <- log2(1 + datMN)
+
+           },
+           log10 = {
+
+               cat("\n'log10' transformation\n", sep="")
+
+               trfMN <- log10(1 + datMN)
+
+           },
+           sqrt = {
+
+               cat("\n'Square root' transformation\n", sep="")
+               
+               trfMN <- sqrt(datMN)
+
+
+           }) ## end of method
+
+
+    ## returning
+
+    options(stringsAsFactors=optStrAsFacL)
+
+    return(trfMN)
+
+} ## end of transformF
\ No newline at end of file