Mercurial > repos > lecorguille > xcms_retcor
changeset 8:4bfef820569b draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit a6f5f18b3d6130f7d7fbb9f2df856838c6217797
author | lecorguille |
---|---|
date | Fri, 07 Apr 2017 07:36:24 -0400 |
parents | bb602a5b8819 |
children | e4e0254a3c0a |
files | Makefile README.rst abims_xcms_retcor.xml lib.r macros.xml planemo_test.sh static/images/xcms_retcor_workflow.png test-data/faahKO-single-class.xset.group.RData test-data/ko15.CDF test-data/ko16.CDF test-data/wt15.CDF test-data/wt16.CDF tool_dependencies.xml xcms.r |
diffstat | 14 files changed, 573 insertions(+), 513 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Mon Jan 30 08:53:30 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -# USAGE: make [install|clean] - -# -------- VARIABLE -------- - -OBJ=xcms_retcor.tgz -DEP=abims_xcms_retcor.xml tool_dependencies.xml repository_dependencies.xml static test-data - - -# ------------------------ - -all: $(OBJ) - -$(OBJ): $(DEP) - tar --exclude=".svn" -zchf $@ $^ - -# ------------------------ - -install: $(OBJ) - mv *.tgz ~ - -clean: - rm *.tgz -
--- a/README.rst Mon Jan 30 08:53:30 2017 -0500 +++ b/README.rst Fri Apr 07 07:36:24 2017 -0400 @@ -2,6 +2,10 @@ Changelog/News -------------- +**Version 2.1.0 - 03/02/2017** + +- IMPROVEMENT: xcms.retcor can deal with merged individual data + **Version 2.0.8 - 22/12/2016** - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph @@ -31,12 +35,3 @@ - IMPROVEMENT: parameter labels have changed to facilitate their reading. - -Test Status ------------ - -Planemo test using conda: passed - -Planemo test using source env.sh: passed - -Planemo shed_test : passed
--- a/abims_xcms_retcor.xml Mon Jan 30 08:53:30 2017 -0500 +++ b/abims_xcms_retcor.xml Fri Apr 07 07:36:24 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="abims_xcms_retcor" name="xcms.retcor" version="2.0.8"> +<tool id="abims_xcms_retcor" name="xcms.retcor" version="2.1.0"> <description>Retention Time Correction using retcor function from xcms R package </description> @@ -11,13 +11,13 @@ <command><![CDATA[ @COMMAND_XCMS_SCRIPT@ - image $image + image '$image' xfunction retcor - xsetRdataOutput $xsetRData - ticspdf $ticsCorPdf - bicspdf $bpcsCorPdf - rplotspdf $rplotsPdf + xsetRdataOutput '$xsetRData' + ticspdf '$ticsCorPdf' + bicspdf '$bpcsCorPdf' + rplotspdf '$rplotsPdf' method $methods.method #if $methods.method == "obiwarp": @@ -33,7 +33,7 @@ #end if #end if - @COMMAND_ZIPFILE_LOAD@ + @COMMAND_FILE_LOAD@ @COMMAND_LOG_EXIT@ ]]></command> @@ -43,7 +43,7 @@ <conditional name="methods"> <param name="method" type="select" label="Method to use for retention time correction" help="[method] See the help section below" > <option value="obiwarp" >obiwarp</option> - <option value="peakgroups" selected="peakgroups">peakgroups</option> + <option value="peakgroups" selected="true">peakgroups</option> </param> <when value="obiwarp"> <param name="profStep" type="float" value="1" label="Step size (in m/z)" help="[profStep] to use for profile generation from the raw data files" /> @@ -82,7 +82,7 @@ </when> </conditional> - <expand macro="zipfile_load"/> + <expand macro="input_file_load"/> </inputs> @@ -133,8 +133,29 @@ <param name="methods|options|span" value="0.2"/> <param name="methods|options|family" value="gaussian"/> <param name="methods|options|plottype" value="deviation"/> - <param name="zipfile_load_conditional|zipfile_load_select" value="yes" /> - <param name="zipfile_load_conditional|zip_file" value="faahKO_reduce.zip" ftype="zip" /> + <expand macro="test_file_load_zip"/> + <output name="log"> + <assert_contents> + <has_text text="object with 4 samples" /> + <has_text text="Time range: 2507.7-4481.7 seconds (41.8-74.7 minutes)" /> + <has_text text="Mass range: 200.1-600 m/z" /> + <has_text text="Peaks: 9251 (about 2313 per sample)" /> + <has_text text="Peak Groups: 0" /> + <has_text text="Sample classes: KO, WT" /> + </assert_contents> + </output> + </test> + <test> + <param name="image" value="faahKO-single-class.xset.group.RData"/> + <param name="methods|method" value="peakgroups"/> + <param name="methods|smooth" value="loess"/> + <param name="methods|extra" value="1"/> + <param name="methods|missing" value="1"/> + <param name="methods|options|option" value="show"/> + <param name="methods|options|span" value="0.2"/> + <param name="methods|options|family" value="gaussian"/> + <param name="methods|options|plottype" value="deviation"/> + <expand macro="test_file_load_single"/> <output name="log"> <assert_contents> <has_text text="object with 4 samples" /> @@ -298,6 +319,10 @@ Changelog/News -------------- +**Version 2.1.0 - 03/02/2017** + +- IMPROVEMENT: xcms.retcor can deal with merged individual data + **Version 2.0.8 - 22/12/2016** - BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph
--- a/lib.r Mon Jan 30 08:53:30 2017 -0500 +++ b/lib.r Fri Apr 07 07:36:24 2017 -0400 @@ -27,168 +27,165 @@ #@author G. Le Corguille #This function format ions identifiers -formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) { - return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT)))) +formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { + splitDeco = strsplit(as.character(variableMetadata$name),"_") + idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) + namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) + variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) + return(variableMetadata) } #@author G. Le Corguille # value: intensity values to be used into, maxo or intb getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { - groups <- xset@groups - values <- groupval(xset, "medret", value=intval) - - # renamming of the column rtmed to rt to fit with camera peaklist function output - colnames(groups)[colnames(groups)=="rtmed"] <- "rt" - colnames(groups)[colnames(groups)=="mzmed"] <- "mz" - - ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) - groups = RTSecondToMinute(groups, convertRTMinute) + variableMetadata_dataMatrix = peakTable(xset, method="medret", value=intval) + variableMetadata_dataMatrix = cbind(name=groupnames(xset),variableMetadata_dataMatrix) + + dataMatrix = variableMetadata_dataMatrix[,(make.names(colnames(variableMetadata_dataMatrix)) %in% c("name", make.names(sampnames(xset))))] - rownames(groups) = ids - rownames(values) = ids + variableMetadata = variableMetadata_dataMatrix[,!(make.names(colnames(variableMetadata_dataMatrix)) %in% c(make.names(sampnames(xset))))] + variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute) + variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) - #@TODO: add "name" as the first column name - #colnames(groups)[1] = "name" - #colnames(values)[1] = "name" - - write.table(groups, file=variableMetadataOutput,sep="\t",quote=F,row.names = T,col.names = NA) - write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA) + write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) + write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) } #@author Y. Guitton getBPC <- function(file,rtcor=NULL, ...) { - object <- xcmsRaw(file) - sel <- profRange(object, ...) - cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) - #plotChrom(xcmsRaw(file), base=T) + object <- xcmsRaw(file) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + #plotChrom(xcmsRaw(file), base=T) } #@author Y. Guitton getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { - cat("Creating BIC pdf...\n") + cat("Creating BIC pdf...\n") - if (is.null(xcmsSet)) { - cat("Enter an xcmsSet \n") - stop() - } else { - files <- filepaths(xcmsSet) - } + if (is.null(xcmsSet)) { + cat("Enter an xcmsSet \n") + stop() + } else { + files <- filepaths(xcmsSet) + } - class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) - } + classnames<-vector("list",length(phenoDataClass)) + for (i in 1:length(phenoDataClass)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) + } - N <- dim(phenoData(xcmsSet))[1] + N <- dim(phenoData(xcmsSet))[1] - TIC <- vector("list",N) + TIC <- vector("list",N) - for (j in 1:N) { + for (j in 1:N) { - TIC[[j]] <- getBPC(files[j]) - #good for raw - # seems strange for corrected - #errors if scanrange used in xcmsSetgeneration - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[j]] else - rtcor <- NULL + TIC[[j]] <- getBPC(files[j]) + #good for raw + # seems strange for corrected + #errors if scanrange used in xcmsSetgeneration + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[j]] + else + rtcor <- NULL - TIC[[j]] <- getBPC(files[j],rtcor=rtcor) - # TIC[[j]][,1]<-rtcor - } + TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + # TIC[[j]][,1]<-rtcor + } - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in BPCs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in BPCs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) - ##plot start + ##plot start - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - #print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + if (length(phenoDataClass)>2){ + for (k in 1:(length(phenoDataClass)-1)){ + for (l in (k+1):length(phenoDataClass)){ + #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + + if (length(phenoDataClass)==2){ + k=1 + l=2 colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - } - } - }#end if length >2 + + }#end length ==2 - if (length(class)==2){ - k=1 - l=2 - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - - for (j in 1:length(classnames[[k]])) { + #case where only one class + if (length(phenoDataClass)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) + colvect<-NULL + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==2 + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } - #case where only one class - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) - colvect<-NULL - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } + dev.off() #pdf(pdfname,w=16,h=10) - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 - - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) + invisible(TIC) } #@author Y. Guitton getTIC <- function(file,rtcor=NULL) { - object <- xcmsRaw(file) - cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) + object <- xcmsRaw(file) + cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) } ## @@ -196,114 +193,112 @@ ## #@author Y. Guitton getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { - cat("Creating TIC pdf...\n") + cat("Creating TIC pdf...\n") - if (is.null(xcmsSet)) { - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") - if (is.null(files)) - files <- getwd() - info <- file.info(files) - listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) - files <- c(files[!info$isdir], listed) - } else { - files <- filepaths(xcmsSet) - } - - class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + if (is.null(xcmsSet)) { + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") + if (is.null(files)) + files <- getwd() + info <- file.info(files) + listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) + files <- c(files[!info$isdir], listed) + } else { + files <- filepaths(xcmsSet) + } - classnames<-vector("list",length(class)) - for (i in 1:length(class)){ - classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) - } + phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class + classnames<-vector("list",length(phenoDataClass)) + for (i in 1:length(phenoDataClass)){ + classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) + } - N <- length(files) - TIC <- vector("list",N) + N <- length(files) + TIC <- vector("list",N) - for (i in 1:N) { - if (!is.null(xcmsSet) && rt == "corrected") - rtcor <- xcmsSet@rt$corrected[[i]] else - rtcor <- NULL - TIC[[i]] <- getTIC(files[i],rtcor=rtcor) - } + for (i in 1:N) { + if (!is.null(xcmsSet) && rt == "corrected") + rtcor <- xcmsSet@rt$corrected[[i]] else + rtcor <- NULL + TIC[[i]] <- getTIC(files[i],rtcor=rtcor) + } - pdf(pdfname,w=16,h=10) - cols <- rainbow(N) - lty = 1:N - pch = 1:N - #search for max x and max y in TICs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) - ylim = c(-ylim[2], ylim[2]) + pdf(pdfname,w=16,h=10) + cols <- rainbow(N) + lty = 1:N + pch = 1:N + #search for max x and max y in TICs + xlim = range(sapply(TIC, function(x) range(x[,1]))) + ylim = range(sapply(TIC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) - ##plot start - if (length(class)>2){ - for (k in 1:(length(class)-1)){ - for (l in (k+1):length(class)){ - #print(paste(class[k],"vs",class[l],sep=" ")) - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + ##plot start + if (length(phenoDataClass)>2){ + for (k in 1:(length(phenoDataClass)-1)){ + for (l in (k+1):length(phenoDataClass)){ + #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } + for (j in 1:length(classnames[[l]])) { + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) + } + legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + } + } + }#end if length >2 + if (length(phenoDataClass)==2){ + k=1 + l=2 + + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") colvect<-NULL for (j in 1:length(classnames[[k]])) { - - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) + # i=class2names[j] + tic <- TIC[[classnames[[l]][j]]] + points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) - } - } - }#end if length >2 - if (length(class)==2){ - k=1 - l=2 + + }#end length ==2 - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } - for (j in 1:length(classnames[[l]])) { - # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") - colvect<-append(colvect,cols[classnames[[l]][j]]) - } - legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) + #case where only one class + if (length(phenoDataClass)==1){ + k=1 + ylim = range(sapply(TIC, function(x) range(x[,2]))) - }#end length ==2 - - #case where only one class - if (length(class)==1){ - k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) + plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") + colvect<-NULL + for (j in 1:length(classnames[[k]])) { + tic <- TIC[[classnames[[k]][j]]] + # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") + points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + colvect<-append(colvect,cols[classnames[[k]][j]]) + } - plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") - colvect<-NULL - for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") - colvect<-append(colvect,cols[classnames[[k]][j]]) - } + legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) + + }#end length ==1 - legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) - - }#end length ==1 + dev.off() #pdf(pdfname,w=16,h=10) - dev.off() #pdf(pdfname,w=16,h=10) - - invisible(TIC) + invisible(TIC) } @@ -313,70 +308,70 @@ #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { - cat("Creating the sampleMetadata file...\n") - - #Create the sampleMetada dataframe - sampleMetadata=xset@phenoData - sampleNamesOrigin=rownames(sampleMetadata) - sampleNamesMakeNames=make.names(sampleNamesOrigin) + cat("Creating the sampleMetadata file...\n") - if (any(duplicated(sampleNamesMakeNames))) { - write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) - for (sampleName in sampleNamesOrigin) { - write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) - } - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - } - - if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { - cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") - for (sampleName in sampleNamesOrigin) { - cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) - } - } - - sampleMetadata$sampleMetadata=sampleNamesMakeNames - sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns - rownames(sampleMetadata)=NULL + #Create the sampleMetada dataframe + sampleMetadata=xset@phenoData + sampleNamesOrigin=rownames(sampleMetadata) + sampleNamesMakeNames=make.names(sampleNamesOrigin) - #Create a list of files name in the current directory - list_files=xset@filepaths - #For each sample file, the following actions are done - for (file in list_files){ - #Check if the file is in the CDF format - if (!mzR:::netCDFIsFile(file)){ - - # If the column isn't exist, with add one filled with NA - if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA + if (any(duplicated(sampleNamesMakeNames))) { + write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) + for (sampleName in sampleNamesOrigin) { + write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) + } + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } - #Create a simple xcmsRaw object for each sample - xcmsRaw=xcmsRaw(file) - #Extract the polarity (a list of polarities) - polarity=xcmsRaw@polarity - #Verify if all the scans have the same polarity - uniq_list=unique(polarity) - if (length(uniq_list)>1){ - polarity="mixed" - } else { - polarity=as.character(uniq_list) - } - #Transforms the character to obtain only the sample name - filename=basename(file) - library(tools) - samplename=file_path_sans_ext(filename) - - #Set the polarity attribute - sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity - - #Delete xcmsRaw object because it creates a bug for the fillpeaks step - rm(xcmsRaw) + if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { + cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") + for (sampleName in sampleNamesOrigin) { + cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) + } } - } + sampleMetadata$sampleMetadata=sampleNamesMakeNames + sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns + rownames(sampleMetadata)=NULL + + #Create a list of files name in the current directory + list_files=xset@filepaths + #For each sample file, the following actions are done + for (file in list_files){ + #Check if the file is in the CDF format + if (!mzR:::netCDFIsFile(file)){ + + # If the column isn't exist, with add one filled with NA + if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA - write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + #Create a simple xcmsRaw object for each sample + xcmsRaw=xcmsRaw(file) + #Extract the polarity (a list of polarities) + polarity=xcmsRaw@polarity + #Verify if all the scans have the same polarity + uniq_list=unique(polarity) + if (length(uniq_list)>1){ + polarity="mixed" + } else { + polarity=as.character(uniq_list) + } + #Transforms the character to obtain only the sample name + filename=basename(file) + library(tools) + samplename=file_path_sans_ext(filename) - return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) + #Set the polarity attribute + sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity + + #Delete xcmsRaw object because it creates a bug for the fillpeaks step + rm(xcmsRaw) + } + + } + + write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) + + return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) } @@ -386,29 +381,28 @@ ## #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM checkFilesCompatibilityWithXcms <- function(directory) { - cat("Checking files filenames compatibilities with xmcs...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) - files <- c(directory[!info$isdir], listed) - files_abs <- file.path(getwd(), files) - exists <- file.exists(files_abs) - files[exists] <- files_abs[exists] - files[exists] <- sub("//","/",files[exists]) + cat("Checking files filenames compatibilities with xmcs...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + files_abs <- file.path(getwd(), files) + exists <- file.exists(files_abs) + files[exists] <- files_abs[exists] + files[exists] <- sub("//","/",files[exists]) - # WHAT IS ON THE FILESYSTEM - filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) - filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] + # WHAT IS ON THE FILESYSTEM + filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) + filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] - # COMPARISON - if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { - write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) - write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - - } + # COMPARISON + if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { + write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) + write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) + stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") + } } @@ -418,17 +412,17 @@ ## #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM checkXmlStructure <- function (directory) { - cat("Checking XML structure...\n") + cat("Checking XML structure...\n") - cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") - capture=system(cmd,intern=TRUE) + cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") + capture=system(cmd,intern=TRUE) - if (length(capture)>0){ - #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) - write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) - write(capture, stderr()) - stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") - } + if (length(capture)>0){ + #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) + write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) + write(capture, stderr()) + stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") + } } @@ -438,23 +432,23 @@ ## #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM deleteXmlBadCharacters<- function (directory) { - cat("Checking Non ASCII characters in the XML...\n") + cat("Checking Non ASCII characters in the XML...\n") - processed=F - l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) - for (i in l){ - cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") - capture=suppressWarnings(system(cmd,intern=TRUE)) - if (length(capture)>0){ - cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) - print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) - c=system(cmd,intern=TRUE) - capture="" - processed=T + processed=F + l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) + for (i in l){ + cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") + capture=suppressWarnings(system(cmd,intern=TRUE)) + if (length(capture)>0){ + cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) + print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) + c=system(cmd,intern=TRUE) + capture="" + processed=T + } } - } - if (processed) cat("\n\n") - return(processed) + if (processed) cat("\n\n") + return(processed) } @@ -463,19 +457,99 @@ ## #@author Gildas Le Corguille lecorguille@sb-roscoff.fr getMd5sum <- function (directory) { - cat("Compute md5 checksum...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) - files <- c(directory[!info$isdir], listed) - exists <- file.exists(files) - files <- files[exists] + cat("Compute md5 checksum...\n") + # WHAT XCMS WILL FIND + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] + + library(tools) + + #cat("\n\n") + + return(as.matrix(md5sum(files))) +} + + +# This function get the raw file path from the arguments +getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) { + if (!is.null(listArguments[["zipfile"]])) zipfile = listArguments[["zipfile"]] + if (!is.null(listArguments[["zipfilePositive"]])) zipfile = listArguments[["zipfilePositive"]] + if (!is.null(listArguments[["zipfileNegative"]])) zipfile = listArguments[["zipfileNegative"]] + + if (!is.null(listArguments[["singlefile_galaxyPath"]])) { + singlefile_galaxyPaths = listArguments[["singlefile_galaxyPath"]]; + singlefile_sampleNames = listArguments[["singlefile_sampleName"]] + } + if (!is.null(listArguments[["singlefile_galaxyPathPositive"]])) { + singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathPositive"]]; + singlefile_sampleNames = listArguments[["singlefile_sampleNamePositive"]] + } + if (!is.null(listArguments[["singlefile_galaxyPathNegative"]])) { + singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathNegative"]]; + singlefile_sampleNames = listArguments[["singlefile_sampleNameNegative"]] + } + if (exists("singlefile_galaxyPaths")){ + singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,",")) + singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,",")) - library(tools) + singlefile=NULL + for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { + singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] + singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] + singlefile[[singlefile_sampleName]] = singlefile_galaxyPath + } + } + for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { + listArguments[[argument]]=NULL + } + return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments)) +} + + +# This function retrieve the raw file in the working directory +# - if zipfile: unzip the file with its directory tree +# - if singlefiles: set symlink with the good filename +retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { + if(!is.null(singlefile) && (length("singlefile")>0)) { + for (singlefile_sampleName in names(singlefile)) { + singlefile_galaxyPath = singlefile[[singlefile_sampleName]] + if(!file.exists(singlefile_galaxyPath)){ + error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") + print(error_message); stop(error_message) + } - #cat("\n\n") + file.symlink(singlefile_galaxyPath,singlefile_sampleName) + } + directory = "." + + } + if(!is.null(zipfile) && (zipfile!="")) { + if(!file.exists(zipfile)){ + error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") + print(error_message) + stop(error_message) + } + + #list all file in the zip file + #zip_files=unzip(zipfile,list=T)[,"Name"] - return(as.matrix(md5sum(files))) + #unzip + suppressWarnings(unzip(zipfile, unzip="unzip")) + + #get the directory name + filesInZip=unzip(zipfile, list=T); + directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); + directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] + directory = "." + if (length(directories) == 1) directory = directories + + cat("files_root_directory\t",directory,"\n") + + } + return (directory) }
--- a/macros.xml Mon Jan 30 08:53:30 2017 -0500 +++ b/macros.xml Fri Apr 07 07:36:24 2017 -0400 @@ -7,6 +7,11 @@ <requirement type="package" version="1.1_4">r-batch</requirement> </requirements> </xml> + <xml name="requirements_light"> + <requirements> + <requirement type="package" version="1.46.0">bioconductor-xcms</requirement> + </requirements> + </xml> <xml name="stdio"> <stdio> <exit_code range="1" level="fatal" /> @@ -20,41 +25,101 @@ <token name="@COMMAND_LOG_EXIT@"> ; return=\$?; - mv log.txt $log; - cat $log; + mv log.txt '$log'; + cat '$log'; sh -c "exit \$return" </token> <!-- zipfile load for planemo test --> - <token name="@COMMAND_ZIPFILE_LOAD@"> - #if $zipfile_load_conditional.zipfile_load_select == "yes": - #if $zipfile_load_conditional.zip_file: - zipfile $zipfile_load_conditional.zip_file + <token name="@COMMAND_FILE_LOAD@"> + #if $file_load_section.file_load_conditional.file_load_select == "yes": + #if $file_load_section.file_load_conditional.input[0].is_of_type("mzxml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzml") or $file_load_section.file_load_conditional.input[0].is_of_type("mzdata") or $file_load_section.file_load_conditional.input[0].is_of_type("netcdf"): + #set singlefile_galaxyPath = ','.join( [ str( $single_file ) for $single_file in $file_load_section.file_load_conditional.input ] ) + #set singlefile_sampleName = ','.join( [ str( $single_file.name ) for $single_file in $file_load_section.file_load_conditional.input ] ) + + singlefile_galaxyPath '$singlefile_galaxyPath' singlefile_sampleName '$singlefile_sampleName' + #else + zipfile '$file_load_section.file_load_conditional.input' #end if #end if </token> - <xml name="zipfile_load"> - <conditional name="zipfile_load_conditional"> - <param name="zipfile_load_select" type="select" label="Resubmit your zip file" help="Use only if you get a message which say that your original zip file have been deleted on the server." > - <option value="no" >no need</option> - <option value="yes">yes</option> - </param> - <when value="no"> + <xml name="input_file_load"> + <section name="file_load_section" title="Resubmit your raw dataset or your zip file"> + <conditional name="file_load_conditional"> + <param name="file_load_select" type="select" label="Resubmit your dataset or your zip file" help="Use only if you get a message which say that your original dataset or zip file have been deleted on the server." > + <option value="no" >no need</option> + <option value="yes" >yes</option> + </param> + <when value="no"> + </when> + <when value="yes"> + <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,no_unzip.zip,zip" multiple="true" label="File(s) from your history containing your chromatograms" help="Single file mode for the format: mzxml, mzml, mzdata and netcdf. Zip file mode for the format: no_unzip.zip, zip. See the help section below." /> + </when> + </conditional> + </section> + </xml> + + <xml name="test_file_load_zip"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="faahKO_reduce.zip" ftype="zip" /> + </conditional> + </section> + </xml> + + <xml name="test_file_load_single"> + <section name="file_load_section"> + <conditional name="file_load_conditional"> + <param name="file_load_select" value="yes" /> + <param name="input" value="wt15.CDF,ko16.CDF,ko15.CDF,wt16.CDF" ftype="netcdf" /> + </conditional> + </section> + </xml> + + <token name="@COMMAND_PEAKLIST@"> + #if $peaklist.peaklistBool + variableMetadataOutput '$variableMetadata' + dataMatrixOutput '$dataMatrix' + convertRTMinute $peaklist.convertRTMinute + numDigitsMZ $peaklist.numDigitsMZ + numDigitsRT $peaklist.numDigitsRT + intval $peaklist.intval + #end if + </token> + + <xml name="input_peaklist"> + <conditional name="peaklist"> + <param name="peaklistBool" type="boolean" label="Get a Peak List" /> + <when value="true"> + <param name="convertRTMinute" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Convert retention time (seconds) into minutes" help="Convert the columns rtmed, rtmin and rtmax into minutes"/> + <param name="numDigitsMZ" type="integer" value="4" label="Number of decimal places for mass values reported in ions' identifiers." help="A minimum of 4 decimal places is recommended. Useful to avoid duplicates within identifiers" /> + <param name="numDigitsRT" type="integer" value="0" label="Number of decimal places for retention time values reported in ions' identifiers." help="Useful to avoid duplicates within identifiers" /> + <param name="intval" type="select" label="Reported intensity values" help="[intval] See the help section below"> + <option value="into" selected="true">into</option> + <option value="maxo">maxo</option> + <option value="intb">intb</option> + </param> </when> - <when value="yes"> - <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" /> - </when> + <when value="false" /> </conditional> </xml> - + <xml name="output_peaklist" token_function=""> + <data name="variableMetadata" format="tabular" label="${image.name[:-6]}.@FUNCTION@.variableMetadata.tsv"> + <filter>(peaklist['peaklistBool'])</filter> + </data> + <data name="dataMatrix" format="tabular" label="${image.name[:-6]}.@FUNCTION@.dataMatrix.tsv" > + <filter>(peaklist['peaklistBool'])</filter> + </data> + </xml> <token name="@HELP_AUTHORS@"> .. class:: infomark -**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu .. class:: infomark
--- a/planemo_test.sh Mon Jan 30 08:53:30 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -# Example of planemo command to launch test - -# -- Use of installed package environments -# after having installing package on a local galaxy instance -source /w/galaxy/dev/shed_tools_tool_dependency_dir/R/3.1.2/iuc/package_r_3_1_2/1ca39eb16186/env.sh -source /w/galaxy/dev/shed_tools_tool_dependency_dir/bioconductor-xcms/1.44.0/lecorguille/package_bioconductor_xcms_1_44_0/0c38f7d43e08/env.sh -planemo test --install_galaxy - -#All 1 test(s) executed passed. -#abims_xcms_retcor[0]: passed - - -# -- Use of conda dependencies -planemo conda_init --conda_prefix /tmp/mc -planemo conda_install --conda_prefix /tmp/mc . -planemo test --install_galaxy --conda_prefix /tmp/mc --conda_dependency_resolution - -#All 1 test(s) executed passed. -#abims_xcms_retcor[0]: passed - - -# -- Use of shed_test -planemo shed_test --install_galaxy --galaxy_branch "dev" -t testtoolshed -#All 1 test(s) executed passed. -#testtoolshed.g2.bx.psu.edu/repos/lecorguille/xcms_retcor/abims_xcms_retcor/2.0.6[0]: passed
--- a/tool_dependencies.xml Mon Jan 30 08:53:30 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="R" version="3.1.2"> - <repository changeset_revision="4d2fd1413b56" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="bioconductor-xcms" version="1.46.0"> - <repository changeset_revision="779207ed5674" name="package_bioconductor_xcms_1_46_0" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>
--- a/xcms.r Mon Jan 30 08:53:30 2017 -0500 +++ b/xcms.r Fri Apr 07 07:36:24 2017 -0400 @@ -15,8 +15,8 @@ #pkgs=c("xcms","batch") pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") for(pkg in pkgs) { - suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) - cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") + suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") } source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } cat("\n\n"); @@ -38,7 +38,7 @@ #image is an .RData file necessary to use xset variable given by previous tools if (!is.null(listArguments[["image"]])){ - load(listArguments[["image"]]); listArguments[["image"]]=NULL + load(listArguments[["image"]]); listArguments[["image"]]=NULL } #Import the different functions @@ -61,110 +61,67 @@ xsetRdataOutput = paste(thefunction,"RData",sep=".") if (!is.null(listArguments[["xsetRdataOutput"]])){ - xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL + xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL } #saving the specific parameters rplotspdf = "Rplots.pdf" if (!is.null(listArguments[["rplotspdf"]])){ - rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL + rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL } sampleMetadataOutput = "sampleMetadata.tsv" if (!is.null(listArguments[["sampleMetadataOutput"]])){ - sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL + sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL } variableMetadataOutput = "variableMetadata.tsv" if (!is.null(listArguments[["variableMetadataOutput"]])){ - variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL + variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL } dataMatrixOutput = "dataMatrix.tsv" if (!is.null(listArguments[["dataMatrixOutput"]])){ - dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL + dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL } if (!is.null(listArguments[["convertRTMinute"]])){ - convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL + convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL } if (!is.null(listArguments[["numDigitsMZ"]])){ - numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL + numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL } if (!is.null(listArguments[["numDigitsRT"]])){ numDigitsRT = listArguments[["numDigitsRT"]]; listArguments[["numDigitsRT"]]=NULL } if (!is.null(listArguments[["intval"]])){ - intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL + intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL } if (thefunction %in% c("xcmsSet","retcor")) { - ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL - bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL -} - -#necessary to unzip .zip file uploaded to Galaxy -#thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories - - -if (!is.null(listArguments[["zipfile"]])){ - zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL -} - -if (!is.null(listArguments[["library"]])){ - directory=listArguments[["library"]]; listArguments[["library"]]=NULL - if(!file.exists(directory)){ - error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.") - print(error_message) - stop(error_message) - } + ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL + bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL } -# We unzip automatically the chromatograms from the zip files. + if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { - if(exists("zipfile") && (zipfile!="")) { - if(!file.exists(zipfile)){ - error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") - print(error_message) - stop(error_message) - } - - #list all file in the zip file - #zip_files=unzip(zipfile,list=T)[,"Name"] - - - #unzip - suppressWarnings(unzip(zipfile, unzip="unzip")) - - #get the directory name - filesInZip=unzip(zipfile, list=T); - directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); - directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] - directory = "." - if (length(directories) == 1) directory = directories - - cat("files_root_directory\t",directory,"\n") - - # + if (!exists("singlefile")) singlefile=NULL + if (!exists("zipfile")) zipfile=NULL + rawFilePath = getRawfilePathFromArguments(singlefile, zipfile, listArguments) + zipfile = rawFilePath$zipfile + singlefile = rawFilePath$singlefile + listArguments = rawFilePath$listArguments + directory = retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) md5sumList=list("origin"=getMd5sum(directory)) - - # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files. - # Remove because can create issue with some clean files - #@TODO: fix me - #if (deleteXmlBadCharacters(directory)) { - # md5sumList=list("removalBadCharacters"=getMd5sum(directory)) - #} - - } } #addition of the directory to the list of arguments in the first position if (thefunction == "xcmsSet") { - checkXmlStructure(directory) - checkFilesCompatibilityWithXcms(directory) - listArguments=append(directory, listArguments) + checkXmlStructure(directory) + checkFilesCompatibilityWithXcms(directory) + listArguments=append(directory, listArguments) } #addition of xset object to the list of arguments in the first position if (exists("xset")){ - listArguments=append(list(xset), listArguments) + listArguments=append(list(xset), listArguments) } cat("\n\n") @@ -172,8 +129,6 @@ - - # ----- MAIN PROCESSING INFO ----- cat("\tMAIN PROCESSING INFO\n") @@ -181,12 +136,12 @@ #Verification of a group step before doing the fillpeaks job. if (thefunction == "fillPeaks") { - res=try(is.null(groupnames(xset))) - if (class(res) == "try-error"){ - error<-geterrmessage() - write(error, stderr()) - stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") - } + res=try(is.null(groupnames(xset))) + if (class(res) == "try-error"){ + error<-geterrmessage() + write(error, stderr()) + stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") + } } @@ -194,7 +149,7 @@ #dev.new(file="Rplots.pdf", width=16, height=12) pdf(file=rplotspdf, width=16, height=12) if (thefunction == "group") { - par(mfrow=c(2,2)) + par(mfrow=c(2,2)) } #else if (thefunction == "retcor") { #try to change the legend display @@ -208,6 +163,11 @@ cat("\t\tCOMPUTE\n") xset = do.call(thefunction, listArguments) +# check if there are no peaks +if (nrow(peaks(xset)) == 0) { + stop("No peaks were detected. You should review your settings") +} + cat("\n\n") @@ -215,40 +175,38 @@ if (thefunction == "xcmsSet") { - #transform the files absolute pathways into relative pathways - xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) + #transform the files absolute pathways into relative pathways + xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) + if(exists("zipfile") && !is.null(zipfile) && (zipfile!="")) { - if(exists("zipfile") && (zipfile!="")) { + #Modify the samples names (erase the path) + for(i in 1:length(sampnames(xset))){ - #Modify the samples names (erase the path) - for(i in 1:length(sampnames(xset))){ + sample_name=unlist(strsplit(sampnames(xset)[i], "/")) + sample_name=sample_name[length(sample_name)] + sample_name= unlist(strsplit(sample_name,"[.]"))[1] + sampnames(xset)[i]=sample_name - sample_name=unlist(strsplit(sampnames(xset)[i], "/")) - sample_name=sample_name[length(sample_name)] - sample_name= unlist(strsplit(sample_name,"[.]"))[1] - sampnames(xset)[i]=sample_name + } } - } - } # -- TIC -- if (thefunction == "xcmsSet") { - cat("\t\tGET TIC GRAPH\n") - sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) - getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") - getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) + cat("\t\tGET TIC GRAPH\n") + sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") + getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) } else if (thefunction == "retcor") { - cat("\t\tGET TIC GRAPH\n") - getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") - getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) + cat("\t\tGET TIC GRAPH\n") + getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") + getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) } -if (thefunction == "fillPeaks") { - cat("\t\tGET THE PEAK LIST\n") - getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput) +if ((thefunction == "group" || thefunction == "fillPeaks") && exists("intval")) { + getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput) } @@ -262,7 +220,7 @@ #saving R data in .Rdata file to save the variables used in the present tool -objects2save = c("xset","zipfile","listOFlistArguments","md5sumList","sampleNamesList") +objects2save = c("xset","zipfile","singlefile","listOFlistArguments","md5sumList","sampleNamesList") save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput) cat("\n\n")