# HG changeset patch # User lecorguille # Date 1460126372 14400 # Node ID 54cc3edfe35ce8e5c12d0d8b5d918a723a885cd6 # Parent da716e2937f21e4c95a9d527c477d3deaaaec408 planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 83b80dcd96b379518c2e4ace992affc889d32ca6 diff -r da716e2937f2 -r 54cc3edfe35c README.rst --- a/README.rst Mon Feb 22 16:38:15 2016 -0500 +++ b/README.rst Fri Apr 08 10:39:32 2016 -0400 @@ -2,6 +2,11 @@ Changelog/News -------------- +**Version 2.0.6 - 04/04/2016** + +- TEST: refactoring to pass planemo test using conda dependencies + + **Version 2.0.5 - 10/02/2016** - BUGFIX: better management of errors. Datasets remained green although the process failed @@ -19,3 +24,14 @@ - IMPROVEMENT: parameter labels have changed to facilitate their reading. + +Test Status +----------- + +Planemo test using conda: passed + +Planemo test using source env.sh: passed + +Planemo shed_test : passed + + diff -r da716e2937f2 -r 54cc3edfe35c abims_xcms_retcor.xml --- a/abims_xcms_retcor.xml Mon Feb 22 16:38:15 2016 -0500 +++ b/abims_xcms_retcor.xml Fri Apr 08 10:39:32 2016 -0400 @@ -1,20 +1,16 @@ - + Retention Time Correction using retcor function from xcms R package - - R - Rscript - xcms - xcms_w4m_script - + + macros.xml + - - - + + @@ -64,6 +55,7 @@ + @@ -82,14 +74,15 @@ + - - + + @@ -108,25 +101,21 @@ - - - - - - - - - - - - - + + + + + + + + + - - - - + + + + @@ -135,21 +124,8 @@ - - 10.1021/ac051437y - 10.1093/bioinformatics/btu813 - + + diff -r da716e2937f2 -r 54cc3edfe35c lib.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib.r Fri Apr 08 10:39:32 2016 -0400 @@ -0,0 +1,400 @@ +# lib.r version="2.0.1" +#Authors ABiMS TEAM +#Lib.r for Galaxy Workflow4Metabo +#version 2.2 +#Based on lib.r 2.1 +#Modifications made by Guitton Yann +#correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet +#Note if scanrange is used a warning is prompted in R console but do not stop PDF generation + + + + +#@author Y. Guitton +getBPC <- function(file,rtcor=NULL, ...) { + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + #plotChrom(xcmsRaw(file), base=T) +} + +#@author Y. Guitton +getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { + cat("Creating BIC pdf...\n") + + if (is.null(xcmsSet)) { + cat("Enter an xcmsSet \n") + stop() + } else { + files <- filepaths(xcmsSet) + } + + class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) + } + + N <- dim(phenoData(xcmsSet))[1] + + TIC <- vector("list",N) + + + for (j in 1:N) { + + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[j]] else + rtcor <- NULL + + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + # TIC[[j]][,1]<-rtcor + } + + + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + #print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + + if (length(class)==2){ + k=1 + l=2 + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + + dev.off() #pdf(pdfname,w=16,h=10) + + invisible(TIC) +} + + + +#@author Y. Guitton +getTIC <- function(file,rtcor=NULL) { + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) +} + +## +## overlay TIC from all files in current folder or from xcmsSet, create pdf +## +#@author Y. Guitton +getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { + cat("Creating TIC pdf...\n") + + if (is.null(xcmsSet)) { + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + if (is.null(files)) + files <- getwd() + info <- file.info(files) + listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) + files <- c(files[!info$isdir], listed) + } else { + files <- filepaths(xcmsSet) + } + + class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + + classnames<-vector("list",length(class)) + for (i in 1:length(class)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) + } + + N <- length(files) + TIC <- vector("list",N) + + for (i in 1:N) { + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } + + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) + + + ##plot start + if (length(class)>2){ + for (k in 1:(length(class)-1)){ + for (l in (k+1):length(class)){ + #print(paste(class[k],"vs",class[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(class)==2){ + k=1 + l=2 + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==2 + dev.off() #pdf(pdfname,w=16,h=10) + + invisible(TIC) +} + + + +## +## Get the polarities from all the samples of a condition +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { + cat("Creating the sampleMetadata file...\n") + + #Create the sampleMetada dataframe + sampleMetadata=xset@phenoData + sampleNamesOrigin=rownames(sampleMetadata) + sampleNamesMakeNames=make.names(sampleNamesOrigin) + + if (any(duplicated(sampleNamesMakeNames))) { + write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) + for (sampleName in sampleNamesOrigin) { + write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) + } + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } + + if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { + cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") + for (sampleName in sampleNamesOrigin) { + cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) + } + } + + sampleMetadata$sampleMetadata=sampleNamesMakeNames + sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns + rownames(sampleMetadata)=NULL + + #Create a list of files name in the current directory + list_files=xset@filepaths + #For each sample file, the following actions are done + for (file in list_files){ + #Check if the file is in the CDF format + if (!mzR:::netCDFIsFile(file)){ + + # If the column isn't exist, with add one filled with NA + if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA + + #Create a simple xcmsRaw object for each sample + xcmsRaw=xcmsRaw(file) + #Extract the polarity (a list of polarities) + polarity=xcmsRaw@polarity + #Verify if all the scans have the same polarity + uniq_list=unique(polarity) + if (length(uniq_list)>1){ + polarity="mixed" + } else { + polarity=as.character(uniq_list) + } + #Transforms the character to obtain only the sample name + filename=basename(file) + library(tools) + samplename=file_path_sans_ext(filename) + + #Set the polarity attribute + sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity + + #Delete xcmsRaw object because it creates a bug for the fillpeaks step + rm(xcmsRaw) + } + + } + + write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + + return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) + +} + + +## +## This function check if xcms will found all the files +## +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM +checkFilesCompatibilityWithXcms <- function(directory) { + cat("Checking files filenames compatibilities with xmcs...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + files_abs <- file.path(getwd(), files) + exists <- file.exists(files_abs) + files[exists] <- files_abs[exists] + files[exists] <- sub("//","/",files[exists]) + + # WHAT IS ON THE FILESYSTEM + filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) + filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] + + # COMPARISON + if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { + write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) + write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + + } +} + + + +## +## This function check if XML contains special caracters. It also checks integrity and completness. +## +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +checkXmlStructure <- function (directory) { + cat("Checking XML structure...\n") + + cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture=system(cmd,intern=TRUE) + + if (length(capture)>0){ + #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) + write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) + write(capture, stderr()) + stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") + } + +} + + +## +## This function check if XML contain special characters +## +#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM +deleteXmlBadCharacters<- function (directory) { + cat("Checking Non ASCII characters in the XML...\n") + + processed=F + l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) + for (i in l){ + cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") + capture=suppressWarnings(system(cmd,intern=TRUE)) + if (length(capture)>0){ + cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) + print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) + c=system(cmd,intern=TRUE) + capture="" + processed=T + } + } + if (processed) cat("\n\n") + return(processed) +} + + +## +## This function will compute MD5 checksum to check the data integrity +## +#@author Gildas Le Corguille lecorguille@sb-roscoff.fr +getMd5sum <- function (directory) { + cat("Compute md5 checksum...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] + + library(tools) + + #cat("\n\n") + + return(as.matrix(md5sum(files))) +} + diff -r da716e2937f2 -r 54cc3edfe35c macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Apr 08 10:39:32 2016 -0400 @@ -0,0 +1,51 @@ + + + + + R + r-snow + bioconductor-xcms + r-batch + + + + + + + + + + LANG=C Rscript $__tool_directory__/xcms.r + + + + ; + return=\$?; + mv log.txt $log; + cat $log; + sh -c "exit \$return" + + + +.. class:: infomark + +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + +.. class:: infomark + +**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@oniris-nantes.fr - part of Workflow4Metabolomics.org [W4M] + + | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. + +--------------------------------------------------- + + + + + + + 10.1021/ac051437y + 10.1093/bioinformatics/btu813 + + + diff -r da716e2937f2 -r 54cc3edfe35c planemo.sh --- a/planemo.sh Mon Feb 22 16:38:15 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -planemo shed_init -f --name=xcms_retcor --owner=lecorguille --description="[W4M][GC-MS] XCMS R Package - Preprocessing - Correct retention time from different samples" --homepage_url="http://workflow4metabolomics.org" --long_description="Part of the W4M project: http://workflow4metabolomics.org\n\nXCMS: http://www.bioconductor.org/packages/release/bioc/html/xcms.html\n\nRetention Time Correction using retcor function from xcms R package\n\nBEWARE: this tool don't come with its script. You will need to install the dedicated package_xcms_w4m_script too" --category="Metabolomics" diff -r da716e2937f2 -r 54cc3edfe35c planemo_test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo_test.sh Fri Apr 08 10:39:32 2016 -0400 @@ -0,0 +1,19 @@ +planemo conda_init +planemo conda_install . +planemo test --install_galaxy --conda_dependency_resolution --galaxy_branch "dev" + +#All 1 test(s) executed passed. +#abims_xcms_retcor[0]: passed + + +source /w/galaxy/dev/shed_tools_tool_dependency_dir/R/3.1.2/iuc/package_r_3_1_2/1ca39eb16186/env.sh +source /w/galaxy/dev/shed_tools_tool_dependency_dir/bioconductor-xcms/1.44.0/lecorguille/package_bioconductor_xcms_1_44_0/0c38f7d43e08/env.sh +planemo test --install_galaxy --galaxy_branch "dev" + +#All 1 test(s) executed passed. +#abims_xcms_retcor[0]: passed + + +planemo shed_test --install_galaxy --galaxy_branch "dev" -t testtoolshed +#All 1 test(s) executed passed. +#testtoolshed.g2.bx.psu.edu/repos/lecorguille/xcms_retcor/abims_xcms_retcor/2.0.6[0]: passed \ No newline at end of file diff -r da716e2937f2 -r 54cc3edfe35c repository_dependencies.xml --- a/repository_dependencies.xml Mon Feb 22 16:38:15 2016 -0500 +++ b/repository_dependencies.xml Fri Apr 08 10:39:32 2016 -0400 @@ -1,5 +1,5 @@ - + diff -r da716e2937f2 -r 54cc3edfe35c test-data/sacuri.zip Binary file test-data/sacuri.zip has changed diff -r da716e2937f2 -r 54cc3edfe35c test-data/sacuri_dir_root.zip Binary file test-data/sacuri_dir_root.zip has changed diff -r da716e2937f2 -r 54cc3edfe35c test-data/xset.group.RData Binary file test-data/xset.group.RData has changed diff -r da716e2937f2 -r 54cc3edfe35c test-data/xset.group.retcor.BPCs_corrected.pdf Binary file test-data/xset.group.retcor.BPCs_corrected.pdf has changed diff -r da716e2937f2 -r 54cc3edfe35c test-data/xset.group.retcor.RData Binary file test-data/xset.group.retcor.RData has changed diff -r da716e2937f2 -r 54cc3edfe35c test-data/xset.group.retcor.Rplots.pdf Binary file test-data/xset.group.retcor.Rplots.pdf has changed diff -r da716e2937f2 -r 54cc3edfe35c test-data/xset.group.retcor.TICs_corrected.pdf Binary file test-data/xset.group.retcor.TICs_corrected.pdf has changed diff -r da716e2937f2 -r 54cc3edfe35c tool_dependencies.xml --- a/tool_dependencies.xml Mon Feb 22 16:38:15 2016 -0500 +++ b/tool_dependencies.xml Fri Apr 08 10:39:32 2016 -0400 @@ -1,12 +1,9 @@ - + - - - - - + + diff -r da716e2937f2 -r 54cc3edfe35c xcms.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms.r Fri Apr 08 10:39:32 2016 -0400 @@ -0,0 +1,246 @@ +#!/usr/bin/env Rscript +# xcms.r version="2.2.0" +#Authors ABIMS TEAM +#BPC Addition from Y.guitton + + +# ----- LOG FILE ----- +log_file=file("log.txt", open = "wt") +sink(log_file) +sink(log_file, type = "output") + + +# ----- PACKAGE ----- +cat("\tPACKAGE INFO\n") +#pkgs=c("xcms","batch") +pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") +for(pkg in pkgs) { + suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") +} +source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } +cat("\n\n"); + + + + + +# ----- ARGUMENTS ----- +cat("\tARGUMENTS INFO\n") +listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects +write.table(as.matrix(listArguments), col.names=F, quote=F, sep='\t') + +cat("\n\n"); + + +# ----- ARGUMENTS PROCESSING ----- +cat("\tINFILE PROCESSING INFO\n") + +#image is an .RData file necessary to use xset variable given by previous tools +if (!is.null(listArguments[["image"]])){ + load(listArguments[["image"]]); listArguments[["image"]]=NULL +} + +#Import the different functions +source_local("lib.r") + +cat("\n\n") + +#Import the different functions + +# ----- PROCESSING INFILE ----- +cat("\tARGUMENTS PROCESSING INFO\n") + +# Save arguments to generate a report +if (!exists("listOFlistArguments")) listOFlistArguments=list() +listOFlistArguments[[paste(format(Sys.time(), "%y%m%d-%H:%M:%S_"),listArguments[["xfunction"]],sep="")]] = listArguments + + +#saving the commun parameters +thefunction = listArguments[["xfunction"]]; listArguments[["xfunction"]]=NULL #delete from the list of arguments + +xsetRdataOutput = paste(thefunction,"RData",sep=".") +if (!is.null(listArguments[["xsetRdataOutput"]])){ + xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL +} + +rplotspdf = "Rplots.pdf" +if (!is.null(listArguments[["rplotspdf"]])){ + rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL +} + +sampleMetadataOutput = "sampleMetadata.tsv" +if (!is.null(listArguments[["sampleMetadataOutput"]])){ + sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL +} + + + + +if (thefunction %in% c("xcmsSet","retcor")) { + ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL + bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL +} + +#necessary to unzip .zip file uploaded to Galaxy +#thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories + + +if (!is.null(listArguments[["zipfile"]])){ + zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL +} + +if (!is.null(listArguments[["library"]])){ + directory=listArguments[["library"]]; listArguments[["library"]]=NULL + if(!file.exists(directory)){ + error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.") + print(error_message) + stop(error_message) + } +} + +# We unzip automatically the chromatograms from the zip files. +if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { + if(exists("zipfile") && (zipfile!="")) { + if(!file.exists(zipfile)){ + error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") + print(error_message) + stop(error_message) + } + + #list all file in the zip file + #zip_files=unzip(zipfile,list=T)[,"Name"] + + + #unzip + suppressWarnings(unzip(zipfile, unzip="unzip")) + + #get the directory name + filesInZip=unzip(zipfile, list=T); + directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); + directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory = "." + if (length(directories) == 1) directory = directories + + cat("files_root_directory\t",directory,"\n") + + # + md5sumList=list("origin"=getMd5sum(directory)) + + # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files. + # Remove because can create issue with some clean files + #@TODO: fix me + #if (deleteXmlBadCharacters(directory)) { + # md5sumList=list("removalBadCharacters"=getMd5sum(directory)) + #} + + } +} + +#addition of the directory to the list of arguments in the first position +if (thefunction == "xcmsSet") { + checkXmlStructure(directory) + checkFilesCompatibilityWithXcms(directory) + listArguments=append(directory, listArguments) +} + + +#addition of xset object to the list of arguments in the first position +if (exists("xset")){ + listArguments=append(list(xset), listArguments) +} + +cat("\n\n") + + + + + + +# ----- MAIN PROCESSING INFO ----- +cat("\tMAIN PROCESSING INFO\n") + + +#Verification of a group step before doing the fillpeaks job. + +if (thefunction == "fillPeaks") { + res=try(is.null(groupnames(xset))) + if (class(res) == "try-error"){ + error<-geterrmessage() + write(error, stderr()) + stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") + } + +} + +#change the default display settings +#dev.new(file="Rplots.pdf", width=16, height=12) +pdf(file=rplotspdf, width=16, height=12) +if (thefunction == "group") { + par(mfrow=c(2,2)) +} +#else if (thefunction == "retcor") { +#try to change the legend display +# par(xpd=NA) +# par(xpd=T, mar=par()$mar+c(0,0,0,4)) +#} + + +#execution of the function "thefunction" with the parameters given in "listArguments" +xset = do.call(thefunction, listArguments) + + +cat("\n\n") + +dev.off() #dev.new(file="Rplots.pdf", width=16, height=12) + +if (thefunction == "xcmsSet") { + + #transform the files absolute pathways into relative pathways + xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) + + if(exists("zipfile") && (zipfile!="")) { + + #Modify the samples names (erase the path) + for(i in 1:length(sampnames(xset))){ + + sample_name=unlist(strsplit(sampnames(xset)[i], "/")) + sample_name=sample_name[length(sample_name)] + sample_name= unlist(strsplit(sample_name,"[.]"))[1] + sampnames(xset)[i]=sample_name + + } + + } + +} + +# -- TIC -- +if (thefunction == "xcmsSet") { + sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") + getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) +} else if (thefunction == "retcor") { + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") + getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) +} + +cat("\n\n") + + +# ----- EXPORT ----- + +cat("\tXSET OBJECT INFO\n") +print(xset) +#delete the parameters to avoid the passage to the next tool in .RData image + + +#saving R data in .Rdata file to save the variables used in the present tool +objects2save = c("xset","zipfile","listOFlistArguments","md5sumList","sampleNamesList") +save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput) + +cat("\n\n") + + +cat("\tDONE\n") +