Mercurial > repos > lecorguille > msnbase_readmsdata
changeset 13:226fb89cacc4 draft
"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f1caf2a3bf23cf319a75dd12c86402555dd02617"
author | workflow4metabolomics |
---|---|
date | Wed, 12 Feb 2020 08:27:38 -0500 |
parents | f6b2750ea32e |
children | 11ab2081bd4a |
files | lib.r msnbase_readmsdata.r msnbase_readmsdata.xml |
diffstat | 3 files changed, 49 insertions(+), 152 deletions(-) [+] |
line wrap: on
line diff
--- a/lib.r Tue Jan 07 04:30:02 2020 -0500 +++ b/lib.r Wed Feb 12 08:27:38 2020 -0500 @@ -42,16 +42,16 @@ chromBPI <- NULL chromTIC_adjusted <- NULL chromBPI_adjusted <- NULL + md5sumList <- NULL for(image in args$images) { load(image) # Handle infiles if (!exists("singlefile")) singlefile <- NULL if (!exists("zipfile")) zipfile <- NULL - rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) + rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) zipfile <- rawFilePath$zipfile singlefile <- rawFilePath$singlefile - retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) if (exists("raw_data")) xdata <- raw_data if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") @@ -149,7 +149,7 @@ par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) - group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] + group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") names(group_colors) <- unique(xdata$sample_group) xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) @@ -170,7 +170,7 @@ pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) # Color by group - group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] + group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") if (length(group_colors) > 1) { names(group_colors) <- unique(xdata$sample_group) plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) @@ -239,15 +239,15 @@ pdf(pdfname, width=16, height=10) # Color by group - group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] + group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1") if (length(group_colors) > 1) { names(group_colors) <- unique(xdata$sample_group) - plot(chrom, col = group_colors[chrom$sample_group], main=main) + plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none") legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) } # Color by sample - plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) + plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none") legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) dev.off() @@ -317,142 +317,43 @@ } -# This function check if xcms will found all the files -#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM -checkFilesCompatibilityWithXcms <- function(directory) { - cat("Checking files filenames compatibilities with xmcs...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) - files <- c(directory[!info$isdir], listed) - files_abs <- file.path(getwd(), files) - exists <- file.exists(files_abs) - files[exists] <- files_abs[exists] - files[exists] <- sub("//","/",files[exists]) - - # WHAT IS ON THE FILESYSTEM - filesystem_filepaths <- system(paste0("find \"",getwd(),"/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T) - filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] - - # COMPARISON - if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { - write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) - write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) - stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") - } -} - - -#This function list the compatible files within the directory as xcms did -#@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM -getMSFiles <- function (directory) { - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) - files <- c(directory[!info$isdir], listed) - exists <- file.exists(files) - files <- files[exists] - return(files) -} - -# This function check if XML contains special caracters. It also checks integrity and completness. -#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM -checkXmlStructure <- function (directory) { - cat("Checking XML structure...\n") - - cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") - capture <- system(cmd, intern=TRUE) - - if (length(capture)>0){ - #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) - write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) - write(capture, stderr()) - stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") - } - -} - - -# This function check if XML contain special characters -#@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM -deleteXmlBadCharacters<- function (directory) { - cat("Checking Non ASCII characters in the XML...\n") - - processed <- F - l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) - for (i in l){ - cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") - capture <- suppressWarnings(system(cmd, intern=TRUE)) - if (length(capture)>0){ - cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) - print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) - c <- system(cmd, intern=TRUE) - capture <- "" - processed <- T - } - } - if (processed) cat("\n\n") - return(processed) -} - - # This function will compute MD5 checksum to check the data integrity #@author Gildas Le Corguille lecorguille@sb-roscoff.fr -getMd5sum <- function (directory) { +getMd5sum <- function (files) { cat("Compute md5 checksum...\n") - # WHAT XCMS WILL FIND - filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") - filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") - info <- file.info(directory) - listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) - files <- c(directory[!info$isdir], listed) - exists <- file.exists(files) - files <- files[exists] - library(tools) - - #cat("\n\n") - return(as.matrix(md5sum(files))) } - -# This function get the raw file path from the arguments -#@author Gildas Le Corguille lecorguille@sb-roscoff.fr -getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") { - if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") - - if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]] - - if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { - singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]] - singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]] - } - if (exists("singlefile_galaxyPaths")){ - singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) - singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) - - singlefile <- NULL - for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { - singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] - singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] - # In case, an url is used to import data within Galaxy - singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) - singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath - } - } - return(list(zipfile=zipfile, singlefile=singlefile)) -} - # This function retrieve the raw file in the working directory # - if zipfile: unzip the file with its directory tree # - if singlefiles: set symlink with the good filename #@author Gildas Le Corguille lecorguille@sb-roscoff.fr -retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { +retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") { + + if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") + + # single - if the file are passed in the command arguments -> refresh singlefile + if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { + singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|")) + singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|")) + + singlefile <- NULL + for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { + singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] + singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] + # In case, an url is used to import data within Galaxy + singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) + singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath + } + } + # zipfile - if the file are passed in the command arguments -> refresh zipfile + if (!is.null(args[[paste0("zipfile",prefix)]])) + zipfile <- args[[paste0("zipfile",prefix)]] + + # single if(!is.null(singlefile) && (length("singlefile")>0)) { + files <- vector() for (singlefile_sampleName in names(singlefile)) { singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] if(!file.exists(singlefile_galaxyPath)){ @@ -462,22 +363,16 @@ if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) file.copy(singlefile_galaxyPath, singlefile_sampleName) - + files <- c(files, singlefile_sampleName) } - directory <- "." - } + # zipfile if(!is.null(zipfile) && (zipfile != "")) { if(!file.exists(zipfile)){ error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") print(error_message) stop(error_message) } - - #list all file in the zip file - #zip_files <- unzip(zipfile,list=T)[,"Name"] - - #unzip suppressWarnings(unzip(zipfile, unzip="unzip")) #get the directory name @@ -489,8 +384,17 @@ cat("files_root_directory\t",directory,"\n") + filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") + filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") + info <- file.info(directory) + listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) + files <- c(directory[!info$isdir], listed) + exists <- file.exists(files) + files <- files[exists] + } - return (directory) + return(list(zipfile=zipfile, singlefile=singlefile, files=files)) + }
--- a/msnbase_readmsdata.r Tue Jan 07 04:30:02 2020 -0500 +++ b/msnbase_readmsdata.r Wed Feb 12 08:27:38 2020 -0500 @@ -38,16 +38,12 @@ # Handle infiles if (!exists("singlefile")) singlefile <- NULL if (!exists("zipfile")) zipfile <- NULL -rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) +rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args) zipfile <- rawFilePath$zipfile singlefile <- rawFilePath$singlefile -directory <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) +files <- rawFilePath$files -# Check some character issues -md5sumList <- list("origin" = getMd5sum(directory)) -checkXmlStructure(directory) -checkFilesCompatibilityWithXcms(directory) - +md5sumList <- list("origin" = getMd5sum(files)) cat("\n\n") @@ -58,9 +54,6 @@ cat("\t\tCOMPUTE\n") -## Get the full path to the files -files <- getMSFiles(directory) - cat("\t\t\tCreate a phenodata data.frame\n") s_groups <- sapply(files, function(x) tail(unlist(strsplit(dirname(x),"/")), n=1)) s_name <- tools::file_path_sans_ext(basename(files))
--- a/msnbase_readmsdata.xml Tue Jan 07 04:30:02 2020 -0500 +++ b/msnbase_readmsdata.xml Wed Feb 12 08:27:38 2020 -0500 @@ -58,11 +58,11 @@ <test> <param name="input" value="ko15.CDF" ftype="netcdf" /> <assert_stdout> - <has_text text="rowNames: ./ko15.CDF" /> + <has_text text="rowNames: ko15.CDF" /> <has_text text="ko15.CDF" /> <has_text text="featureNames: F1.S0001 F1.S0002 ... F1.S1278 (1278 total)" /> <has_text text="fvarLabels: fileIdx spIdx ... spectrum (28 total)" /> - <has_text text="./ko15.CDF ko15 ." /> + <has_text text="ko15.CDF ko15 ." /> </assert_stdout> </test> <!-- DISABLE FOR TRAVIS