Mercurial > repos > ethevenot > transformation
changeset 1:d9e05021553c draft
planemo upload for repository https://github.com/workflow4metabolomics/transformation.git commit c4b65942d74d5bdfd46e748c0040a8b5ebe4fd1d
author | ethevenot |
---|---|
date | Sat, 06 Aug 2016 12:02:52 -0400 |
parents | eacea1349a7c |
children | 0ccfc3e15710 |
files | README.md runit/input/dataMatrix.tsv runit/output/dataMatrix.tsv runit/output/information.txt runit/transformation_runtests.R runit/transformation_tests.R tests/input/dataMatrix.tsv tests/transformation_tests.R transformation_config.xml transformation_script.R transformation_wrapper.R |
diffstat | 11 files changed, 340 insertions(+), 359 deletions(-) [+] |
line wrap: on
line diff
--- a/README.md Fri Jul 29 12:11:01 2016 -0400 +++ b/README.md Sat Aug 06 12:02:52 2016 -0400 @@ -1,12 +1,14 @@ -## Transformation of the data matrix -#### A Galaxy module from the [Workflow4metabolomics](http://workflow4metabolomics.org) project +Transformation of the data matrix +================================= + +A Galaxy module from the [Workflow4metabolomics](http://workflow4metabolomics.org) infrastructure Status: [![Build Status](https://travis-ci.org/workflow4metabolomics/transformation.svg?branch=master)](https://travis-ci.org/workflow4metabolomics/transformation). ### Description -**Version:** 2.0.2 -**Date:** 2016-07-27 +**Version:** 2.2.0 +**Date:** 2016-08-04 **Author:** Etienne A. Thevenot (CEA, LIST, MetaboHUB, W4M Core Development Team) **Email:** [etienne.thevenot(at)cea.fr](mailto:etienne.thevenot@cea.fr) **Citation:** Thevenot E.A., Roux A., Xu Y., Ezan E. and Junot C. (2015). Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses. *Journal of Proteome Research*, **14**:3322-3335. [doi:10.1021/acs.jproteome.5b00354](http://dx.doi.org/10.1021/acs.jproteome.5b00354) @@ -16,23 +18,39 @@ ### Installation -* Configuration file: **transformation_config.xml** -* Image files: **static/images/transformation_workflowPositionImage.png** -* Wrapper file: **transformation_wrapper.R** +* Configuration file: `transformation_config.xml` +* Image files: `static/images/transformation_workflowPositionImage.png` +* Wrapper file: `transformation_wrapper.R` * R packages + **batch** from CRAN -> install.packages("batch", dep=TRUE) - + + ```r + install.packages("batch", dep=TRUE) + ``` + ### Tests -The code in the wrapper can be tested by running the **tests/transformation_tests.R** in R +The code in the wrapper can be tested by running the `runit/transformation_runtests.R` R file + +You will need to install **RUnit** package in order to make it run: +```r +install.packages('RUnit', dependencies = TRUE) +``` + +### Working example + +See the **W4M00001b_sacurine-complete** or **W4M00002_mtbls2** shared histories in the **Shared Data/Published Histories** menu (https://galaxy.workflow4metabolomics.org/history/list_published) ### News +##### CHANGES IN VERSION 2.2.0 + +NEW FEATURE + + * Square root transformation now available + ##### CHANGES IN VERSION 2.0.2 INTERNAL MODIFICATION - o Creating tests for R code - -*** \ No newline at end of file + * Creating tests for R code \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/runit/input/dataMatrix.tsv Sat Aug 06 12:02:52 2016 -0400 @@ -0,0 +1,4 @@ +dataMatrix HU_017 HU_021 HU_027 HU_032 HU_041 HU_048 HU_049 HU_050 HU_052 HU_059 HU_060 HU_066 HU_072 HU_077 HU_090 HU_109 HU_110 HU_125 HU_126 HU_131 HU_134 HU_149 HU_150 HU_173 HU_179 HU_180 HU_182 HU_202 HU_204 HU_209 +HMDB01032 2569204.92420381 0 NA 1258838.24348419 13039543.0754619 1909391.77026598 3495.09386434063 2293521.90928998 128503.275117713 81872.5276382213 8103557.56578035 149574887.036181 1544036.41049333 7103429.53933206 14138796.50382 NA 263054.73056162 1671332.30008058 88433.1944958815 23602331.2894815 18648126.5206986 1554657.98756878 34152.3646391152 209372.71275317 33187733.370626 202438.591636003 13581070.0886437 354170.810678102 9120781.48986975 43419175.4051586 +HMDB03072 3628416.30251025 65626.9834353751 112170.118946651 3261804.34422417 42228.2787747563 343254.201250707 1958217.69317664 11983270.0435677 5932111.41638028 5511385.83359531 9154521.47755199 2632133.21209418 9500411.14556502 6551644.51726592 7204319.80891836 1273412.04795188 0 8932005.5351622 8340827.52597275 NA 11217839.169041 5919262.81433556 11790077.0657915 9567977.80797097 73717.5811684739 9991787.29074293 4208098.14739633 623970.649925847 10904221.2642849 2171793.93621067 +HMDB00792 429568.609438384 3887629.50527037 1330692.11658995 1367446.73023821 844197.447472453 2948090.71886592 1614157.90566884 3740009.19379795 3292251.66531919 2310688.79492013 4404239.59008605 3043289.12780863 825736.467181043 2523241.91730649 NA 474901.604069803 0 2955990.64049134 1917716.3427982 1767962.67737699 5926203.40397675 1639065.69474684 346810.763557826 1054776.22313737 2390258.27543894 1831346.37315857 1026696.36904362 7079792.50047866 4368341.01359769 3495986.87280275
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/runit/output/dataMatrix.tsv Sat Aug 06 12:02:52 2016 -0400 @@ -0,0 +1,4 @@ +dataMatrix HU_017 HU_021 HU_027 HU_032 HU_041 HU_048 HU_049 HU_050 HU_052 HU_059 HU_060 HU_066 HU_072 HU_077 HU_090 HU_109 HU_110 HU_125 HU_126 HU_131 HU_134 HU_149 HU_150 HU_173 HU_179 HU_180 HU_182 HU_202 HU_204 HU_209 +HMDB01032 1602.87395767846 0 NA 1121.97960921052 3611.03074972533 1381.80742879244 59.1193188758178 1514.43781955219 358.473534752167 286.133758298844 2846.6748261402 12230.0812358782 1242.592616465 2665.22598278871 3760.15910618421 NA 512.888614185985 1292.80017793957 297.377192292687 4858.22305884379 4318.34766093452 1246.85924930153 184.803583945537 457.572631123377 5760.87956571095 449.931763310841 3685.25034273707 595.122517367728 3020.063159914 6589.32283358151 +HMDB03072 1904.84023017949 256.177640389194 334.91807796333 1806.04660632669 205.495203775554 585.878998813498 1399.36331707553 3461.68601169541 2435.59262118694 2347.63409278263 3025.64397732978 1622.38503817503 3082.27369738072 2559.61804128388 2684.0864011649 1128.45560300434 0 2988.64610403477 2888.04908648949 NA 3349.30428134575 2432.95351668205 3433.66816477532 3093.21480145996 271.50981781231 3160.97885009421 2051.36494739389 789.918128622104 3302.1540340034 1473.70076209883 +HMDB00792 655.414837670299 1971.70725648367 1153.55629103653 1169.37877962541 918.802180816117 1717.00050054329 1270.49514193044 1933.91033757978 1814.45630019551 1520.09499536053 2098.62802566011 1744.50254451194 908.700427633355 1588.47156641423 NA 689.131049996881 0 1719.29946213315 1384.81635706624 1329.64757638142 2434.37947000396 1280.26001060208 588.906413242228 1027.0229905593 1546.04601336407 1353.27246818908 1013.26026717898 2660.78794729656 2090.05765796011 1869.75583240239
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/runit/output/information.txt Sat Aug 06 12:02:52 2016 -0400 @@ -0,0 +1,10 @@ + +Start of the 'Transformation' module: Thu 04 Aug 2016 08:58:11 PM + +Missing values in the 'dataMatrix': 4 (4%) + +Zero values in the 'dataMatrix': 3 (3%) + +'Square root' transformation + +End of the 'Transformation' Galaxy module call: Thu 04 Aug 2016 08:58:11 PM
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/runit/transformation_runtests.R Sat Aug 06 12:02:52 2016 -0400 @@ -0,0 +1,102 @@ +#!/usr/bin/env Rscript + +## Package +##-------- + +library(RUnit) + +## Constants +##---------- + +testOutDirC <- "output" +argVc <- commandArgs(trailingOnly = FALSE) +scriptPathC <- sub("--file=", "", argVc[grep("--file=", argVc)]) + + +## Functions +##----------- + +## Reading tables (matrix or data frame) +readTableF <- function(fileC, typeC = c("matrix", "dataframe")[1]) { + + file.exists(fileC) || stop(paste0("No output file \"", fileC ,"\".")) + + switch(typeC, + matrix = return(t(as.matrix(read.table(file = fileC, + header = TRUE, + row.names = 1, + sep = "\t", + stringsAsFactors = FALSE)))), + dataframe = return(read.table(file = fileC, + header = TRUE, + row.names = 1, + sep = "\t", + stringsAsFactors = FALSE))) + +} + +## Call wrapper +wrapperCallF <- function(paramLs) { + + ## Set program path + wrapperPathC <- file.path(dirname(scriptPathC), "..", "transformation_wrapper.R") + + ## Set arguments + argLs <- NULL + for (parC in names(paramLs)) + argLs <- c(argLs, parC, paramLs[[parC]]) + + ## Call + wrapperCallC <- paste(c(wrapperPathC, argLs), collapse = " ") + + if(.Platform$OS.type == "windows") + wrapperCallC <- paste("Rscript", wrapperCallC) + + wrapperCodeN <- system(wrapperCallC) + + if (wrapperCodeN != 0) + stop("Error when running transformation_wrapper.R.") + + ## Get output + outLs <- list() + if ("dataMatrix_out" %in% names(paramLs)) + outLs[["datMN"]] <- readTableF(paramLs[["dataMatrix_out"]], "matrix") + if ("sampleMetadata_out" %in% names(paramLs)) + outLs[["samDF"]] <- readTableF(paramLs[["sampleMetadata_out"]], "dataframe") + if ("variableMetadata_out" %in% names(paramLs)) + outLs[["varDF"]] <- readTableF(paramLs[["variableMetadata_out"]], "dataframe") + if("information" %in% names(paramLs)) + outLs[["infVc"]] <- readLines(paramLs[["information"]]) + + return(outLs) +} + +## Setting default parameters +defaultArgF <- function(testInDirC) { + + defaultArgLs <- list() + if(file.exists(file.path(dirname(scriptPathC), testInDirC, "dataMatrix.tsv"))) + defaultArgLs[["dataMatrix_in"]] <- file.path(dirname(scriptPathC), testInDirC, "dataMatrix.tsv") + if(file.exists(file.path(dirname(scriptPathC), testInDirC, "sampleMetadata.tsv"))) + defaultArgLs[["sampleMetadata_in"]] <- file.path(dirname(scriptPathC), testInDirC, "sampleMetadata.tsv") + if(file.exists(file.path(dirname(scriptPathC), testInDirC, "variableMetadata.tsv"))) + defaultArgLs[["variableMetadata_in"]] <- file.path(dirname(scriptPathC), testInDirC, "variableMetadata.tsv") + + defaultArgLs[["dataMatrix_out"]] <- file.path(dirname(scriptPathC), testOutDirC, "dataMatrix.tsv") + defaultArgLs[["information"]] <- file.path(dirname(scriptPathC), testOutDirC, "information.txt") + + defaultArgLs + +} + +## Main +##----- + +## Create output folder +file.exists(testOutDirC) || dir.create(testOutDirC) + +## Run tests +test.suite <- defineTestSuite('tests', dirname(scriptPathC), testFileRegexp = paste0('^.*_tests\\.R$'), testFuncRegexp = '^.*$') +isValidTestSuite(test.suite) +test.results <- runTestSuite(test.suite) +print(test.results) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/runit/transformation_tests.R Sat Aug 06 12:02:52 2016 -0400 @@ -0,0 +1,24 @@ +test_input_log10 <- function() { + + testDirC <- "input" + argLs <- list(method = "log10") + + argLs <- c(defaultArgF(testDirC), argLs) + outLs <- wrapperCallF(argLs) + + checkEqualsNumeric(outLs[['datMN']]['HU_021', 'HMDB03072'], 4.817089, tolerance = 1e-6) + +} + +test_input_sqrt <- function() { + + testDirC <- "input" + argLs <- list(method = "sqrt") + + argLs <- c(defaultArgF(testDirC), argLs) + outLs <- wrapperCallF(argLs) + + checkEqualsNumeric(outLs[['datMN']]['HU_021', 'HMDB03072'], 256.177640, tolerance = 1e-6) + +} +
--- a/tests/input/dataMatrix.tsv Fri Jul 29 12:11:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -dataMatrix HU_017 HU_021 HU_027 HU_032 HU_041 HU_048 HU_049 HU_050 HU_052 HU_059 HU_060 HU_066 HU_072 HU_077 HU_090 HU_109 HU_110 HU_125 HU_126 HU_131 HU_134 HU_149 HU_150 HU_173 HU_179 HU_180 HU_182 HU_202 HU_204 HU_209 -HMDB01032 2569204.92420381 0 NA 1258838.24348419 13039543.0754619 1909391.77026598 3495.09386434063 2293521.90928998 128503.275117713 81872.5276382213 8103557.56578035 149574887.036181 1544036.41049333 7103429.53933206 14138796.50382 NA 263054.73056162 1671332.30008058 88433.1944958815 23602331.2894815 18648126.5206986 1554657.98756878 34152.3646391152 209372.71275317 33187733.370626 202438.591636003 13581070.0886437 354170.810678102 9120781.48986975 43419175.4051586 -HMDB03072 3628416.30251025 65626.9834353751 112170.118946651 3261804.34422417 42228.2787747563 343254.201250707 1958217.69317664 11983270.0435677 5932111.41638028 5511385.83359531 9154521.47755199 2632133.21209418 9500411.14556502 6551644.51726592 7204319.80891836 1273412.04795188 0 8932005.5351622 8340827.52597275 NA 11217839.169041 5919262.81433556 11790077.0657915 9567977.80797097 73717.5811684739 9991787.29074293 4208098.14739633 623970.649925847 10904221.2642849 2171793.93621067 -HMDB00792 429568.609438384 3887629.50527037 1330692.11658995 1367446.73023821 844197.447472453 2948090.71886592 1614157.90566884 3740009.19379795 3292251.66531919 2310688.79492013 4404239.59008605 3043289.12780863 825736.467181043 2523241.91730649 NA 474901.604069803 0 2955990.64049134 1917716.3427982 1767962.67737699 5926203.40397675 1639065.69474684 346810.763557826 1054776.22313737 2390258.27543894 1831346.37315857 1026696.36904362 7079792.50047866 4368341.01359769 3495986.87280275
--- a/tests/transformation_tests.R Fri Jul 29 12:11:01 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,142 +0,0 @@ -library(RUnit) - -wrapperF <- function(argVc) { - - source("../transformation_script.R") - - -#### Start_of_testing_code <- function() {} - - -##------------------------------ -## Initializing -##------------------------------ - -## options -##-------- - -strAsFacL <- options()[["stringsAsFactors"]] -options(stringsAsFactors=FALSE) - -## constants -##---------- - -modNamC <- "Transformation" ## module name -metVc <- c("log2", "log10") ## available methods - -topEnvC <- environment() -flagC <- "\n" - -## functions -##---------- - -flgF <- function(tesC, - envC = topEnvC, - txtC = NA) { ## management of warning and error messages - - tesL <- eval(parse(text = tesC), envir = envC) - - if(!tesL) { - - sink(NULL) - stpTxtC <- ifelse(is.na(txtC), - paste0(tesC, " is FALSE"), - txtC) - - stop(stpTxtC, - call. = FALSE) - - } - -} ## flgF - -## log file -##--------- - -sink(argVc[["information"]]) - -cat("\nStart of the '", modNamC, "' module: ", - format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") - -## loading -##-------- - -datMN <- t(as.matrix(read.table(argVc[["dataMatrix_in"]], - check.names = FALSE, - header = TRUE, - row.names = 1, - sep = "\t"))) - -metC <- argVc[["method"]] - -## checking -##--------- - -flgF("metC %in% metVc", txtC = paste0("Transformation method must be either '", paste(metVc, collapse = "', '"), "'")) - - -##------------------------------ -## Computation -##------------------------------ - - -datMN <- transformF(datMN = datMN, ## dataMatrix - metC = metC) ## transformation method - - -##------------------------------ -## Ending -##------------------------------ - - -## saving -##------- - -datDF <- cbind.data.frame(dataMatrix = colnames(datMN), - as.data.frame(t(datMN))) -write.table(datDF, - file = argVc[["dataMatrix_out"]], - quote = FALSE, - row.names = FALSE, - sep = "\t") - -## ending -##------- - -cat("\nEnd of the '", modNamC, "' Galaxy module call: ", - format(Sys.time(), "%a %d %b %Y %X"), "\n", sep = "") - -sink() - -options(stringsAsFactors = strAsFacL) - - -#### End_of_testing_code <- function() {} - - - return(list(datDF = datDF)) - - rm(list = ls()) - -} - -exaDirOutC <- "output" -file.exists(exaDirOutC) || dir.create(exaDirOutC) - -tesArgLs <- list(input_log10 = c(method = "log10", - .chkC = "checkEqualsNumeric(outLs[['datDF']]['HMDB03072', 'HU_021'], 4.817089, tolerance = 1e-6)")) - -for(tesC in names(tesArgLs)) - tesArgLs[[tesC]] <- c(tesArgLs[[tesC]], - dataMatrix_in = file.path(unlist(strsplit(tesC, "_"))[1], "dataMatrix.tsv"), - dataMatrix_out = file.path(exaDirOutC, "dataMatrix.tsv"), - information = file.path(exaDirOutC, "information.txt")) - -for(tesC in names(tesArgLs)) { - print(tesC) - outLs <- wrapperF(tesArgLs[[tesC]]) - if(".chkC" %in% names(tesArgLs[[tesC]])) - stopifnot(eval(parse(text = tesArgLs[[tesC]][[".chkC"]]))) -} - -message("Checks successfully completed")
--- a/transformation_config.xml Fri Jul 29 12:11:01 2016 -0400 +++ b/transformation_config.xml Sat Aug 06 12:02:52 2016 -0400 @@ -1,93 +1,98 @@ -<tool id="Transformation" name="Transformation" version="2.0.2; 2016-07-27"> - <description>Transforms the dataMatrix intensity values</description> - - <requirements> - <requirement type="package" version="3.2.2">R</requirement> - <requirement type="package">r-batch</requirement> - </requirements> - - <command><![CDATA[ - Rscript $__tool_directory__/transformation_wrapper.R - dataMatrix_in "$dataMatrix_in" - method "$method" - - dataMatrix_out "$dataMatrix_out" - information "$information" - ]]></command> +<tool id="Transformation" name="Transformation" version="2.2.0"> + <description>Transforms the dataMatrix intensity values</description> - <inputs> - <param name="dataMatrix_in" type="data" label="Data matrix file" help="" format="tabular" /> - <param name="method" label="Method" type="select" help=""> - <option value="log2">log2</option> - <option value="log10">log10</option> - </param> - </inputs> + <requirements> + <requirement type="package" version="3.2.2">R</requirement> + <requirement type="package">r-batch</requirement> + </requirements> - <outputs> - <data name="dataMatrix_out" label="${tool.name}_${dataMatrix_in.name}" format="tabular" ></data> - <data name="information" label="${tool.name}_information.txt" format="txt"/> - </outputs> - - <tests> - <test> - <param name="dataMatrix_in" value="input-dataMatrix.tsv"/> - <param name="method" value="log10"/> - <output name="dataMatrix_out" file="output-dataMatrix.tsv"/> - </test> - </tests> - - <help> - + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <command><![CDATA[ + Rscript $__tool_directory__/transformation_wrapper.R + dataMatrix_in "$dataMatrix_in" + method "$method" + + dataMatrix_out "$dataMatrix_out" + information "$information" + ]]></command> + + <inputs> + <param name="dataMatrix_in" type="data" label="Data matrix file" help="" format="tabular" /> + <param name="method" label="Method" type="select" help=""> + <option value="log2">log2</option> + <option value="log10">log10</option> + <option value="sqrt">square root</option> + </param> + </inputs> + + <outputs> + <data name="dataMatrix_out" label="${tool.name}_${dataMatrix_in.name}" format="tabular" ></data> + <data name="information" label="${tool.name}_information.txt" format="txt"/> + </outputs> + + <tests> + <test> + <param name="dataMatrix_in" value="input-dataMatrix.tsv"/> + <param name="method" value="log10"/> + <output name="dataMatrix_out" file="output-dataMatrix.tsv"/> + </test> + </tests> + + <help> + .. class:: infomark - + **Author** Etienne Thevenot (W4M Core Development Team, MetaboHUB Paris, CEA) - + --------------------------------------------------- - + .. class:: infomark - + **Tool updates** - + See the **NEWS** section at the bottom of this page - + --------------------------------------------------- - + ======================== Transformation ======================== - + ----------- Description ----------- - - | Performs transformation of the dataMatrix intensity values aimed at stabilizing variance. - | For mass spectrometry data, where multiplicative noise has been reported, logarithm transformation can be useful to make the peak intensity variance independent of the intensity mean (see for example Veselkov et al, 2011). - | For the logarithm transformations, log(1+X) is used (to avoid returning -Inf for 0 values); NA values remain unchanged - + +| Performs transformation of the dataMatrix intensity values aimed at stabilizing variance. +| For mass spectrometry data, where multiplicative noise has been reported, logarithm transformation can be useful to make the peak intensity variance independent of the intensity mean (see for example Veselkov et al, 2011). +| For the logarithm transformations, log(1+X) is used (to avoid returning -Inf for 0 values); NA values remain unchanged + | `Veselkov et al (2011). Optimized Preprocessing of Ultra-Performance Liquid Chromatography/Mass Spectrometry Urinary Metabolic Profiles for Improved Information Recovery. Analytical Chemistry, 83:5864-5872. <http://dx.doi.org/10.1021/ac201065j>`_ - + ----------------- Workflow position ----------------- - + | In the workflow example below, the intensities are first transformed, before the signal drift and batch-effects are corrected; finally, the data quality is evaluated numerically and visually. | - + .. image:: transformation_workflowPositionImage.png - :width: 600 - - - +:width: 600 + + + ----------- Input files ----------- - + +----------------------------+---------+ | Parameter : num + label | Format | +============================+=========+ | 1 : Data matrix file | tabular | +----------------------------+---------+ - + | | **Required format for the dataMatrix is described in the HowTo entitled 'Format Data For Postprocessing' available on the main page of Workflow4Metabolomics.org** | @@ -95,82 +100,92 @@ ---------- Parameters ---------- - + Method - | Method to be used for transforming the intensity values of the dataMatrix: - | **log2** (resp. **log10**): intensities values are log2 (resp. log10) transformed: log(1+X) is used to avoid generating -Inf for 0 values - | in case of negative intensities in the initial dataMatrix, an error is returned - | in case of missing (NA) intensities in the initial dataMatrix, these intensities remain set to NA after transformation - +| Method to be used for transforming the intensity values of the dataMatrix: +| **log2** (resp. **log10**): intensities values are log2 (resp. log10) transformed: log(1+X) is used to avoid generating -Inf for 0 values +| in case of negative intensities in the initial dataMatrix, an error is returned +| in case of missing (NA) intensities in the initial dataMatrix, these intensities remain set to NA after transformation + ------------ Output files ------------ - + dataMatrix_out.tabular - | dataMatrix data file with the transformed intensity values - | +| dataMatrix data file with the transformed intensity values +| information.txt - | Text file giving some informations about the computation (eg, number of NA and 0 values in the initial dataMatrix) - | - +| Text file giving some informations about the computation (eg, number of NA and 0 valuesin the initial dataMatrix) +| + --------------------------------------------------- - + --------------- Working example --------------- - + .. class:: infomark - -See the **W4M00001b_sacurine-complete** shared history in the **Shared Data/Published Histories** menu - + +See the **W4M00001b_sacurine-complete** shared history in the **Shared Data/Published Histories** menu (https://galaxy.workflow4metabolomics.org/history/list_published) + --------------------------------------------------- - + ---- NEWS ---- -CHANGES IN VERSION 2.0.2 +CHANGES IN VERSION 2.2.0 ======================== +NEW FEATURE + +Square root transformation now available + +CHANGES IN VERSION 2.0.2 +======================== + INTERNAL MODIFICATIONS - + Creating tests for R code - + </help> - + <citations> <citation type="bibtex">@Article{Thevenot2015, - Title = {Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses}, - Author = {Thévenot, Etienne A. and Roux, Aurélie and Xu, Ying and Ezan, Eric and Junot, Christophe}, - Journal = {Journal of Proteome Research}, - Year = {2015}, - Note = {PMID: 26088811}, - Number = {8}, - Pages = {3322-3335}, - Volume = {14}, + Title = {Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses}, + Author = {Thévenot, Etienne A. and Roux, Aurélie and Xu, Ying and Ezan, Eric and Junot, Christophe}, + Journal = {Journal of Proteome Research}, + Year = {2015}, + Note = {PMID: 26088811}, + Number = {8}, + Pages = {3322-3335}, + Volume = {14}, + + Doi = {10.1021/acs.jproteome.5b00354}, + Url = {http://pubs.acs.org/doi/full/10.1021/acs.jproteome.5b00354} + }</citation> + <citation type="doi">10.1093/bioinformatics/btu813</citation> + </citations> - Doi = {10.1021/acs.jproteome.5b00354}, - Url = {http://pubs.acs.org/doi/full/10.1021/acs.jproteome.5b00354} -}</citation> - -</citations> + <!-- -Input files -=========== + Input files + =========== + + | **To generate the "dataMatrix":** + | **1) copy/paste the values below in a .txt files** + | **2) use the "Get Data" / "Upload File" in the "Tools" (left) panel from the Galaxy / ABiMS page by choosing:** + | **Set the option 'Convert spaces to tabs' to 'Yes'** + | + + **dataMatrix file**:: + + dataMatrix HU_017 HU_021 HU_027 HU_032 HU_041 HU_048 HU_049 HU_050 HU_052 HU_059 HU_060 HU_066 HU_072 HU_077 HU_090 HU_109 HU_110 HU_125 HU_126 HU_131 HU_134 HU_149 HU_150 HU_173 HU_179 HU_180 HU_182 HU_202 HU_204 HU_209 + HMDB01032 2569204.92420381 0 NA 1258838.24348419 13039543.0754619 1909391.77026598 3495.09386434063 2293521.90928998 128503.275117713 81872.5276382213 8103557.56578035 149574887.036181 1544036.41049333 7103429.53933206 14138796.50382 NA 263054.73056162 1671332.30008058 88433.1944958815 23602331.2894815 18648126.5206986 1554657.98756878 34152.3646391152 209372.71275317 33187733.370626 202438.591636003 13581070.0886437 354170.810678102 9120781.48986975 43419175.4051586 + HMDB03072 3628416.30251025 65626.9834353751 112170.118946651 3261804.34422417 42228.2787747563 343254.201250707 1958217.69317664 11983270.0435677 5932111.41638028 5511385.83359531 9154521.47755199 2632133.21209418 9500411.14556502 6551644.51726592 7204319.80891836 1273412.04795188 0 8932005.5351622 8340827.52597275 NA 11217839.169041 5919262.81433556 11790077.0657915 9567977.80797097 73717.5811684739 9991787.29074293 4208098.14739633 623970.649925847 10904221.2642849 2171793.93621067 + HMDB00792 429568.609438384 3887629.50527037 1330692.11658995 1367446.73023821 844197.447472453 2948090.71886592 1614157.90566884 3740009.19379795 3292251.66531919 2310688.79492013 4404239.59008605 3043289.12780863 825736.467181043 2523241.91730649 NA 474901.604069803 0 2955990.64049134 1917716.3427982 1767962.67737699 5926203.40397675 1639065.69474684 346810.763557826 1054776.22313737 2390258.27543894 1831346.37315857 1026696.36904362 7079792.50047866 4368341.01359769 3495986.87280275 -| **To generate the "dataMatrix":** -| **1) copy/paste the values below in a .txt files** -| **2) use the "Get Data" / "Upload File" in the "Tools" (left) panel from the Galaxy / ABiMS page by choosing:** -| **Set the option 'Convert spaces to tabs' to 'Yes'** -| - -**dataMatrix file**:: - - dataMatrix HU_017 HU_021 HU_027 HU_032 HU_041 HU_048 HU_049 HU_050 HU_052 HU_059 HU_060 HU_066 HU_072 HU_077 HU_090 HU_109 HU_110 HU_125 HU_126 HU_131 HU_134 HU_149 HU_150 HU_173 HU_179 HU_180 HU_182 HU_202 HU_204 HU_209 - HMDB01032 2569204.92420381 0 NA 1258838.24348419 13039543.0754619 1909391.77026598 3495.09386434063 2293521.90928998 128503.275117713 81872.5276382213 8103557.56578035 149574887.036181 1544036.41049333 7103429.53933206 14138796.50382 NA 263054.73056162 1671332.30008058 88433.1944958815 23602331.2894815 18648126.5206986 1554657.98756878 34152.3646391152 209372.71275317 33187733.370626 202438.591636003 13581070.0886437 354170.810678102 9120781.48986975 43419175.4051586 - HMDB03072 3628416.30251025 65626.9834353751 112170.118946651 3261804.34422417 42228.2787747563 343254.201250707 1958217.69317664 11983270.0435677 5932111.41638028 5511385.83359531 9154521.47755199 2632133.21209418 9500411.14556502 6551644.51726592 7204319.80891836 1273412.04795188 0 8932005.5351622 8340827.52597275 NA 11217839.169041 5919262.81433556 11790077.0657915 9567977.80797097 73717.5811684739 9991787.29074293 4208098.14739633 623970.649925847 10904221.2642849 2171793.93621067 - HMDB00792 429568.609438384 3887629.50527037 1330692.11658995 1367446.73023821 844197.447472453 2948090.71886592 1614157.90566884 3740009.19379795 3292251.66531919 2310688.79492013 4404239.59008605 3043289.12780863 825736.467181043 2523241.91730649 NA 474901.604069803 0 2955990.64049134 1917716.3427982 1767962.67737699 5926203.40397675 1639065.69474684 346810.763557826 1054776.22313737 2390258.27543894 1831346.37315857 1026696.36904362 7079792.50047866 4368341.01359769 3495986.87280275 - +--> </tool>
--- a/transformation_script.R Fri Jul 29 12:11:01 2016 -0400 +++ b/transformation_script.R Sat Aug 06 12:02:52 2016 -0400 @@ -5,42 +5,45 @@ transformF <- function(datMN, metC) { - + ## options - + optStrAsFacL <- options()[["stringsAsFactors"]] options(stringsAsFactors = FALSE) - + + ## checking + + if(length(which(datMN < 0))) { + cat("\nThe 'dataMatrix' contains negative values\n") + sink() + stop("The 'dataMatrix' contains negative values", call. = FALSE) + } + + ## Number of missing values + nasN <- length(which(is.na(datMN))) + cat("\nMissing values in the 'dataMatrix': ", + nasN, + " (", + round(nasN / cumprod(dim(datMN))[2] * 100), + "%)\n", + sep="") + + ## Number of zero values + zerN <- length(which(datMN == 0)) + cat("\nZero values in the 'dataMatrix': ", + zerN, + " (", + round(zerN / cumprod(dim(datMN))[2] * 100), + "%)\n", + sep="") + ## transformation - + switch(metC, log2 = { cat("\n'log2' transformation\n", sep="") - if(length(which(datMN < 0))) - stop("The 'dataMatrix' contains negative values") - - zerMN <- datMN == 0 - - ## Number of missing values - nasN <- length(which(is.na(datMN))) - cat("\nMissing values in the 'dataMatrix': ", - nasN, - " (", - round(nasN / cumprod(dim(datMN))[2] * 100), - "%)\n", - sep="") - - ## Number of zero values - zerN <- length(which(zerMN)) - cat("\nZero values in the 'dataMatrix': ", - zerN, - " (", - round(zerN / cumprod(dim(datMN))[2] * 100), - "%)\n", - sep="") - trfMN <- log2(1 + datMN) }, @@ -48,32 +51,17 @@ cat("\n'log10' transformation\n", sep="") - if(length(which(datMN < 0))) - stop("The 'dataMatrix' contains negative values") - - zerMN <- datMN == 0 - - ## Number of missing values - nasN <- length(which(is.na(datMN))) - cat("\nMissing values in the 'dataMatrix': ", - nasN, - " (", - round(nasN / cumprod(dim(datMN))[2] * 100), - "%)\n", - sep="") - - ## Number of zero values - zerN <- length(which(zerMN)) - cat("\nZero values in the 'dataMatrix': ", - zerN, - " (", - round(zerN / cumprod(dim(datMN))[2] * 100), - "%)\n", - sep="") - trfMN <- log10(1 + datMN) - }) ## end of 'log10' method + }, + sqrt = { + + cat("\n'Square root' transformation\n", sep="") + + trfMN <- sqrt(datMN) + + + }) ## end of method ## returning
--- a/transformation_wrapper.R Fri Jul 29 12:11:01 2016 -0400 +++ b/transformation_wrapper.R Sat Aug 06 12:02:52 2016 -0400 @@ -1,5 +1,4 @@ -#!/usr/bin/Rscript --vanilla --slave --no-site-file - +#!/usr/bin/env Rscript library(batch) ## parseCommandArgs @@ -14,9 +13,6 @@ argVc <- unlist(parseCommandArgs(evaluate=FALSE)) -#### Start_of_tested_code <- function() {} - - ##------------------------------ ## Initializing ##------------------------------ @@ -31,33 +27,7 @@ ##---------- modNamC <- "Transformation" ## module name -metVc <- c("log2", "log10") ## available methods -topEnvC <- environment() -flagC <- "\n" - -## functions -##---------- - -flgF <- function(tesC, - envC = topEnvC, - txtC = NA) { ## management of warning and error messages - - tesL <- eval(parse(text = tesC), envir = envC) - - if(!tesL) { - - sink(NULL) - stpTxtC <- ifelse(is.na(txtC), - paste0(tesC, " is FALSE"), - txtC) - - stop(stpTxtC, - call. = FALSE) - - } - -} ## flgF ## log file ##--------- @@ -78,11 +48,6 @@ metC <- argVc[["method"]] -## checking -##--------- - -flgF("metC %in% metVc", txtC = paste0("Transformation method must be either '", paste(metVc, collapse = "', '"), "'")) - ##------------------------------ ## Computation @@ -120,7 +85,4 @@ options(stringsAsFactors = strAsFacL) -#### End_of_tested_code <- function() {} - - rm(list = ls())