Mercurial > repos > yguitton > withinvariation
changeset 0:5086ad0c0992 draft default tip
Uploaded v0.4
author | yguitton |
---|---|
date | Fri, 05 May 2017 05:04:36 -0400 |
parents | |
children | |
files | withinvariation-26603602a823/mixomics_multilevel.r withinvariation-26603602a823/mixomics_multilevel.xml withinvariation-26603602a823/test-data/dataMatrix.csv withinvariation-26603602a823/test-data/dataMatrix_out.tsv withinvariation-26603602a823/test-data/dataMatrix_out_log10.tsv withinvariation-26603602a823/test-data/sampleMetadata.csv withinvariation-26603602a823/test-data/variableMetadata.csv withinvariation-26603602a823/transformation_script.R |
diffstat | 8 files changed, 475 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/withinvariation-26603602a823/mixomics_multilevel.r Fri May 05 05:04:36 2017 -0400 @@ -0,0 +1,210 @@ +#!/usr/bin/env Rscript + +############################################################################### +# +# mixOmics multilevel function +# +# This script is written specifically for the mixOmics web-interface +# using the Galaxy system. +# +# R-Package: mixOmics +# +# Version: 1.2.3 +# +# Author (wrapper): Xin-Yi Chua (xinyi.chua@qfab.org) +# Author (mixOmics.multilevel): Benoit Liquet, Kim-Anh Le Cao +# Author (warpper & .r adaptation for workflow4metabolomics.org): Yann GUITTON +# +# Expected parameters from the commandline +# input files: +# dataMatrix +# sampleMetadata +# params: +# respL (respL for one level & respL1 & respL2 for 2 levels) +# trans (need log2 or log10 transformation made before withinVar) +# scaling +# centering +# output files: +# dataMatrix_out (after withinVariation correction ) +# result (Robject) +################################################################################ + +#Redirect all stdout to the log file +log_file=file("multilevel.log", open = "wt") +sink(log_file) +sink(log_file, type = "output") + +#remove rgl warning +options(rgl.useNULL = TRUE) + +# ----- PACKAGE ----- +cat("\tPACKAGE INFO\n") + +pkgs=c("mixOmics","batch","pcaMethods") +for(pkg in pkgs) { + suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") +} + + +source_local <- function(fname) { + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep="/")) +} + + + +#load transformation function +source_local("transformation_script.R") +# source("transformation_script.R") +print("first loadings OK") + + +listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects +print(listArguments) + +## libraries +##---------- + +cat('\n\nRunning mixomics_multilevel.r\n'); + +options(warn=-1); +##suppressPackageStartupMessages(library(mixOmics)); #not needed? + + +## constants +##---------- + +modNamC <- "Multilevel" ## module name + +topEnvC <- environment() +flgC <- "\n" + +## functions +##----------For manual input of function +##--end function + +flgF <- function(tesC, + envC = topEnvC, + txtC = NA) { ## management of warning and error messages + + tesL <- eval(parse(text = tesC), envir = envC) + + if(!tesL) { + + sink(NULL) + stpTxtC <- ifelse(is.na(txtC), + paste0(tesC, " is FALSE"), + txtC) + + stop(stpTxtC, + call. = FALSE) + + } + +} ## flgF + + +## log file +##--------- + + +cat("\nStart of the '", modNamC, "' Galaxy module call: ", + format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") + + +## arguments +##---------- + +## loading files and checks +xMN <- t(as.matrix(read.table(listArguments[["dataMatrix_in"]], + check.names = FALSE, + header = TRUE, + row.names = 1, + sep = "\t"))) + +samDF <- read.table(listArguments[["sampleMetadata_in"]], + check.names = FALSE, + header = TRUE, + row.names = 1, +sep = "\t") +flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section") + +##Here Add transformation scripts if trans<>none +if (listArguments[["transfo"]]=="go"){ +cat("\n Start transformation with trans=",listArguments[["trans"]]," scale=",listArguments[["scale"]]," center=",listArguments[["center"]],"\n", sep="") + if (listArguments[["trans"]]!="none"){ + metC <- listArguments[["trans"]] + xMN <- transformF(datMN = xMN, ## dataMatrix + metC = metC) ## transformation method + } + if (listArguments[["center"]]=="true"){ + listArguments[["center"]]<-TRUE + }else{ + listArguments[["center"]]<-FALSE + } + + xMN<-prep(xMN, scale=listArguments[["scale"]],center=listArguments[["center"]]) +} + + +##end tranformation + +if (listArguments[["respL2"]]!="NULL"){ + cat("\n\nMultilevel (two levels)\n"); + flgF("((listArguments[['respL1']] %in% colnames(samDF)) || (listArguments[['respL2']] %in% colnames(samDF)))", txtC = paste("Level argument (",listArguments[['respL2']]," ,",listArguments[['respL1']], ") must be one of the column names (first row) of your sample metadata", sep = "")) + + tryCatch({ + result <- withinVariation(xMN, design=samDF[,c(listArguments[["repmeasure"]],listArguments[["respL1"]],listArguments[["respL2"]])]); + }, error = function(err) { + stop(paste("There was an error when trying to run the Multilevel (two levels) function.\n\n",err)); + }); +} else { + cat("\n\nMultilevel (one level)\n"); + flgF("(listArguments[['respL']] %in% colnames(samDF))", txtC = paste("Level argument (",listArguments[['respL']],") must be one of the column names (first row) of your sample metadata", sep = "")) + + tryCatch({ + result <- withinVariation(xMN, design=samDF[,c(listArguments[["repmeasure"]], listArguments[["respL"]])]); + }, error = function(err) { + stop(paste("There was an error when trying to run the Multilevel (one level) function.\n\n",err)); + }); +} + + +##saving + +if (exists("result")) { + ## writing output files + cat("\n\nWriting output files\n\n"); + ## transpose matrix + + datDF <- cbind.data.frame(dataMatrix = colnames(xMN), + as.data.frame(t(result))) + write.table(datDF, + file = "dataMatrix_out.tsv", + quote = FALSE, + row.names = FALSE, + sep = "\t") + + tryCatch({ + save(result, file="multilevel.RData"); + }, warning = function(w) { + print(paste("Warning: ", w)); + }, error = function(err) { + stop(paste("ERROR saving result RData object:", err)); + }); +} + +## ending +##------- + +cat("\nEnd of the '", modNamC, "' Galaxy module call: ", + format(Sys.time(), "%a %d %b %Y %X"), "\n", sep = "") + +sink() + +# options(stringsAsFactors = strAsFacL) + + +rm(list = ls())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/withinvariation-26603602a823/mixomics_multilevel.xml Fri May 05 05:04:36 2017 -0400 @@ -0,0 +1,155 @@ +<tool id="mixomics_multilevel" name="Multilevel" version="0.5.0"> + + <description>Data transformation: Within matrix decomposition for repeated measurements (cross-over design) with mixOmics package</description> + + <requirements> + <requirement type="package" version="6.1.1">r-mixomics</requirement> + <requirement type="package" version="1.1_4">r-batch</requirement> + <requirement type="package" version="1.64.0">bioconductor-pcamethods</requirement> + </requirements> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + + <command> + Rscript $__tool_directory__/mixomics_multilevel.r + + dataMatrix_in "$dataMatrix_in" + sampleMetadata_in "$sampleMetadata_in" + repmeasure "$repmeasure" + #if $transformation.option_transformation == "false" + transfo "none" + #end if + #if $nblevel.option_level == "onelevel" + respL "$nblevel.respL" + respL2 "NULL" + #end if + #if $nblevel.option_level == "twolevel" + respL1 "$nblevel.respL1" + respL2 "$nblevel.respL2" + #end if + #if $transformation.option_transformation == "true" + transfo "go" + trans "$transformation.trans" + scale "$transformation.scale" + center "$transformation.center" + #end if + + </command> + <inputs> + <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, sep: tabular" /> + <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata, decimal: '.', missing: NA, mode: character and numerical, sep: tabular" /> + <param name="repmeasure" label="Repeated mesurement label (Individual IDs, ...)" type="text" value="none" help="Indicate the column name of the sample table to be used as repeated mesurement factor" /> + + + <conditional name="nblevel"> + <param name="option_level" type="select" label="Select number of levels1"> + <option value="onelevel" selected="True">One Level</option> + <option value="twolevel">Two levels</option> + </param> + <when value="onelevel"> + <param name="respL" label="Level name (as in sampleMetadata)" type="text" value = "none" help="Indicate the column name of the sample table to be used as factor" /> + </when> + <when value="twolevel"> + <param name="respL1" label="First level name (as in sampleMetadata)" type="text" value = "none" help="Indicate the column name of the sample table to be used as first factor" /> + <param name="respL2" label="Second level name (as in sampleMetadata)" type="text" value = "none" help="Indicate the column name of the sample table to be used as second factor" /> + </when> + </conditional> + <conditional name="transformation"> + <param name="option_transformation" type="boolean" label="Add transformation to dataMatrix before withinVariation" /> + <when value="false"> + </when> + <when value="true"> + <param name="trans" label="Transformation" type="select" help="" > + <option value="none">none</option> + <option value="log2">log2</option> + <option value="log10">log10</option> + <option value="sqrt">square root</option> + </param> + <param name="scale" label="Scaling" type="select" help="" > + <option value="none">none</option> + <option value="pareto">pareto</option> + <option value="vector">vector</option> + <option value="uv">UV</option> + </param> + <param name="center" label="Centering" type="boolean" /> + </when> + </conditional> + + + </inputs> + + <outputs> + <data name="dataMatrix_out" format="tabular" from_work_dir="dataMatrix_out.tsv" label="dataMatrix_out.tsv" ></data> + <data name="multilevelRData" format="rdata" from_work_dir="multilevel.RData" label="multilevel.RData" /> + <data name="log" format="txt" from_work_dir="multilevel.log" label="multilevel.log.txt" /> + </outputs> + + <tests> + <test> + <param name="dataMatrix_in" value="dataMatrix.csv" ftype="tabular" /> + <param name="sampleMetadata_in" value="sampleMetadata.csv" ftype="tabular" /> + <param name="repmeasure" value="Subject" /> + <param name="nblevel|option_level" value="onelevel" /> + <param name="respL" value="Occasion" /> + <output name="dataMatrix_out" file="dataMatrix_out.tsv" lines_diff="2" /> + </test> + <test> + <param name="dataMatrix_in" value="dataMatrix.csv" ftype="tabular" /> + <param name="sampleMetadata_in" value="sampleMetadata.csv" ftype="tabular" /> + <param name="repmeasure" value="Subject" /> + <param name="nblevel|option_level" value="onelevel" /> + <param name="respL" value="Occasion" /> + <param name="transformation|option_transformation" value="true" /> + <param name="transfo" value="go" /> + <param name="trans" value="log10" /> + <param name="scale" value="pareto" /> + <param name="center" value="false" /> + <output name="dataMatrix_out" file="dataMatrix_out_log10.tsv" lines_diff="2" /> + </test> + + </tests> + + + <help> + +.. class:: infomark + +**Author(s)** Benoit Liquet, Kim-Anh Le Cao, Benoit Gautier, Ignacio Gonzalez. + +.. class:: infomark + +**Galaxy wrapper and scripts developpers for W4M integration** Guitton Yann LABERCA yann.guitton@oniris-nantes.fr + +=========== +Description +=========== + +withinVariation function decomposes the Within variation in the dataMatrix (One or two-factor analyses are available). The resulting matrix is then input in any multivariate analyses. + +withinVariation simply returns the Xw within matrix, which can be input in the other multivariate approaches already implemented in mixOmics + + +That tool make use of withinVariation function for cross-over design experiment (Repeated mesurement) from mixOmics R package packurl_ + +.. _packurl: https://CRAN.R-project.org/package=mixOmics + +For details information about mixOmics please connect to link_ + +.. _link: http://mixomics.org/ + +**Please cite:** +Kim-Anh Le Cao, Florian Rohart, Ignacio Gonzalez, Sebastien Dejean with key contributors Benoit Gautier, Francois Bartolo, +contributions from Pierre Monget, Jeff Coquery, FangZou Yao and Benoit Liquet(2016). +mixOmics: Omics Data Integration Project. R package version 6.1.1. + + </help> + + <citations> + <citation type="doi">10.1186/1471-2105-13-325</citation> + <citation type="doi">10.1007/s11306-009-0185-z</citation> + <citation type="doi">10.1093/bioinformatics/btu813</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/withinvariation-26603602a823/test-data/dataMatrix.csv Fri May 05 05:04:36 2017 -0400 @@ -0,0 +1,4 @@ +name c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 +A 20 18 16 14 10 9 7 7 3 2 21 21 17 17 11 12 8 10 4 5 +B 10 12 15 16 2 3 7 7 9 9 12 14 17 18 4 5 9 9 11 11 +C 20 17 14 11 8 5 2 8 14 17 20 17 14 11 8 5 2 8 14 17
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/withinvariation-26603602a823/test-data/dataMatrix_out.tsv Fri May 05 05:04:36 2017 -0400 @@ -0,0 +1,4 @@ +dataMatrix c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 +A -0.5 -1.5 -0.5 -1.5 -0.5 -1.5 -0.5 -1.5 -0.5 -1.5 0.5 1.5 0.5 1.5 0.5 1.5 0.5 1.5 0.5 1.5 +B -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 1 1 1 1 1 1 1 1 1 +C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/withinvariation-26603602a823/test-data/dataMatrix_out_log10.tsv Fri May 05 05:04:36 2017 -0400 @@ -0,0 +1,4 @@ +dataMatrix c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 +A -0.020092280840537 -0.0633189420824745 -0.0246870704491013 -0.0787458016166358 -0.0375807488225819 -0.113316739305819 -0.0508712174396608 -0.137542124890243 -0.0963770720961892 -0.299374529985989 0.020092280840537 0.0633189420824749 0.0246870704491013 0.0787458016166362 0.0375807488225823 0.113316739305819 0.0508712174396608 0.137542124890243 0.0963770720961892 0.299374529985989 +B -0.0777778824969806 -0.0666256118450792 -0.0548380192312634 -0.0517849916432631 -0.237832767883695 -0.188778488108502 -0.103892299006456 -0.103892299006456 -0.084886189102046 -0.084886189102046 0.0777778824969806 0.0666256118450788 0.0548380192312639 0.0517849916432631 0.237832767883695 0.188778488108502 0.103892299006457 0.103892299006457 0.084886189102046 0.084886189102046 +C 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/withinvariation-26603602a823/test-data/sampleMetadata.csv Fri May 05 05:04:36 2017 -0400 @@ -0,0 +1,21 @@ +sampleMetadata Occasion Subject +c1 Control 1 +c2 Control 2 +c3 Control 3 +c4 Control 4 +c5 Control 5 +c6 Control 6 +c7 Control 7 +c8 Control 8 +c9 Control 9 +c10 Control 10 +t1 Treatment 1 +t2 Treatment 2 +t3 Treatment 3 +t4 Treatment 4 +t5 Treatment 5 +t6 Treatment 6 +t7 Treatment 7 +t8 Treatment 8 +t9 Treatment 9 +t10 Treatment 10
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/withinvariation-26603602a823/test-data/variableMetadata.csv Fri May 05 05:04:36 2017 -0400 @@ -0,0 +1,4 @@ +name +A +B +C
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/withinvariation-26603602a823/transformation_script.R Fri May 05 05:04:36 2017 -0400 @@ -0,0 +1,73 @@ +## Etienne Thevenot +## W4M Core Development Team +## etienne.thevenot@cea.fr +## 2015-04-25 + +transformF <- function(datMN, + metC) { + + ## options + + optStrAsFacL <- options()[["stringsAsFactors"]] + options(stringsAsFactors = FALSE) + + ## checking + + if(length(which(datMN < 0))) { + cat("\nThe 'dataMatrix' contains negative values\n") + sink() + stop("The 'dataMatrix' contains negative values", call. = FALSE) + } + + ## Number of missing values + nasN <- length(which(is.na(datMN))) + cat("\nMissing values in the 'dataMatrix': ", + nasN, + " (", + round(nasN / cumprod(dim(datMN))[2] * 100), + "%)\n", + sep="") + + ## Number of zero values + zerN <- length(which(datMN == 0)) + cat("\nZero values in the 'dataMatrix': ", + zerN, + " (", + round(zerN / cumprod(dim(datMN))[2] * 100), + "%)\n", + sep="") + + ## transformation + + switch(metC, + log2 = { + + cat("\n'log2' transformation\n", sep="") + + trfMN <- log2(1 + datMN) + + }, + log10 = { + + cat("\n'log10' transformation\n", sep="") + + trfMN <- log10(1 + datMN) + + }, + sqrt = { + + cat("\n'Square root' transformation\n", sep="") + + trfMN <- sqrt(datMN) + + + }) ## end of method + + + ## returning + + options(stringsAsFactors=optStrAsFacL) + + return(trfMN) + +} ## end of transformF \ No newline at end of file