comparison lib.r @ 25:acc66e5c23d7 draft

"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f1caf2a3bf23cf319a75dd12c86402555dd02617"
author workflow4metabolomics
date Wed, 12 Feb 2020 08:30:41 -0500
parents acc0a4a366c1
children db6549f27ad1
comparison
equal deleted inserted replaced
24:acc0a4a366c1 25:acc66e5c23d7
40 mergeXData <- function(args) { 40 mergeXData <- function(args) {
41 chromTIC <- NULL 41 chromTIC <- NULL
42 chromBPI <- NULL 42 chromBPI <- NULL
43 chromTIC_adjusted <- NULL 43 chromTIC_adjusted <- NULL
44 chromBPI_adjusted <- NULL 44 chromBPI_adjusted <- NULL
45 md5sumList <- NULL
45 for(image in args$images) { 46 for(image in args$images) {
46 47
47 load(image) 48 load(image)
48 # Handle infiles 49 # Handle infiles
49 if (!exists("singlefile")) singlefile <- NULL 50 if (!exists("singlefile")) singlefile <- NULL
50 if (!exists("zipfile")) zipfile <- NULL 51 if (!exists("zipfile")) zipfile <- NULL
51 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) 52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args)
52 zipfile <- rawFilePath$zipfile 53 zipfile <- rawFilePath$zipfile
53 singlefile <- rawFilePath$singlefile 54 singlefile <- rawFilePath$singlefile
54 retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)
55 55
56 if (exists("raw_data")) xdata <- raw_data 56 if (exists("raw_data")) xdata <- raw_data
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") 57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
58 58
59 cat(sampleNamesList$sampleNamesOrigin,"\n") 59 cat(sampleNamesList$sampleNamesOrigin,"\n")
147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) { 147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) {
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) 148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12)
149 149
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) 150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
151 151
152 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] 152 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
153 names(group_colors) <- unique(xdata$sample_group) 153 names(group_colors) <- unique(xdata$sample_group)
154 154
155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) 155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
156 for (i in 1:nrow(featureDefinitions(xdata))) { 156 for (i in 1:nrow(featureDefinitions(xdata))) {
157 mzmin = featureDefinitions(xdata)[i,]$mzmin 157 mzmin = featureDefinitions(xdata)[i,]$mzmin
168 getPlotAdjustedRtime <- function(xdata) { 168 getPlotAdjustedRtime <- function(xdata) {
169 169
170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) 170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12)
171 171
172 # Color by group 172 # Color by group
173 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] 173 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
174 if (length(group_colors) > 1) { 174 if (length(group_colors) > 1) {
175 names(group_colors) <- unique(xdata$sample_group) 175 names(group_colors) <- unique(xdata$sample_group)
176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) 176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
178 } 178 }
237 main <- paste(type,":",adjusted,"data") 237 main <- paste(type,":",adjusted,"data")
238 238
239 pdf(pdfname, width=16, height=10) 239 pdf(pdfname, width=16, height=10)
240 240
241 # Color by group 241 # Color by group
242 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] 242 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
243 if (length(group_colors) > 1) { 243 if (length(group_colors) > 1) {
244 names(group_colors) <- unique(xdata$sample_group) 244 names(group_colors) <- unique(xdata$sample_group)
245 plot(chrom, col = group_colors[chrom$sample_group], main=main) 245 plot(chrom, col = group_colors[as.factor(chrom$sample_group)], main=main, peakType = "none")
246 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) 246 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
247 } 247 }
248 248
249 # Color by sample 249 # Color by sample
250 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) 250 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none")
251 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) 251 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
252 252
253 dev.off() 253 dev.off()
254 } 254 }
255 255
315 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) 315 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames))
316 316
317 } 317 }
318 318
319 319
320 # This function check if xcms will found all the files
321 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
322 checkFilesCompatibilityWithXcms <- function(directory) {
323 cat("Checking files filenames compatibilities with xmcs...\n")
324 # WHAT XCMS WILL FIND
325 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
326 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
327 info <- file.info(directory)
328 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
329 files <- c(directory[!info$isdir], listed)
330 files_abs <- file.path(getwd(), files)
331 exists <- file.exists(files_abs)
332 files[exists] <- files_abs[exists]
333 files[exists] <- sub("//","/",files[exists])
334
335 # WHAT IS ON THE FILESYSTEM
336 filesystem_filepaths <- system(paste0("find \"",getwd(),"/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T)
337 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
338
339 # COMPARISON
340 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
341 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
342 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
343 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
344 }
345 }
346
347
348 #This function list the compatible files within the directory as xcms did
349 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
350 getMSFiles <- function (directory) {
351 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
352 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
353 info <- file.info(directory)
354 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
355 files <- c(directory[!info$isdir], listed)
356 exists <- file.exists(files)
357 files <- files[exists]
358 return(files)
359 }
360
361 # This function check if XML contains special caracters. It also checks integrity and completness.
362 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
363 checkXmlStructure <- function (directory) {
364 cat("Checking XML structure...\n")
365
366 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
367 capture <- system(cmd, intern=TRUE)
368
369 if (length(capture)>0){
370 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
371 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
372 write(capture, stderr())
373 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
374 }
375
376 }
377
378
379 # This function check if XML contain special characters
380 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
381 deleteXmlBadCharacters<- function (directory) {
382 cat("Checking Non ASCII characters in the XML...\n")
383
384 processed <- F
385 l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE)
386 for (i in l){
387 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="")
388 capture <- suppressWarnings(system(cmd, intern=TRUE))
389 if (length(capture)>0){
390 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
391 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
392 c <- system(cmd, intern=TRUE)
393 capture <- ""
394 processed <- T
395 }
396 }
397 if (processed) cat("\n\n")
398 return(processed)
399 }
400
401
402 # This function will compute MD5 checksum to check the data integrity 320 # This function will compute MD5 checksum to check the data integrity
403 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 321 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
404 getMd5sum <- function (directory) { 322 getMd5sum <- function (files) {
405 cat("Compute md5 checksum...\n") 323 cat("Compute md5 checksum...\n")
406 # WHAT XCMS WILL FIND
407 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
408 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
409 info <- file.info(directory)
410 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
411 files <- c(directory[!info$isdir], listed)
412 exists <- file.exists(files)
413 files <- files[exists]
414
415 library(tools) 324 library(tools)
416
417 #cat("\n\n")
418
419 return(as.matrix(md5sum(files))) 325 return(as.matrix(md5sum(files)))
420 }
421
422
423 # This function get the raw file path from the arguments
424 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
425 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") {
426 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
427
428 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]]
429
430 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
431 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]]
432 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]]
433 }
434 if (exists("singlefile_galaxyPaths")){
435 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
436 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))
437
438 singlefile <- NULL
439 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
440 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
441 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
442 # In case, an url is used to import data within Galaxy
443 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
444 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
445 }
446 }
447 return(list(zipfile=zipfile, singlefile=singlefile))
448 } 326 }
449 327
450 # This function retrieve the raw file in the working directory 328 # This function retrieve the raw file in the working directory
451 # - if zipfile: unzip the file with its directory tree 329 # - if zipfile: unzip the file with its directory tree
452 # - if singlefiles: set symlink with the good filename 330 # - if singlefiles: set symlink with the good filename
453 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 331 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
454 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { 332 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") {
333
334 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
335
336 # single - if the file are passed in the command arguments -> refresh singlefile
337 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
338 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|"))
339 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|"))
340
341 singlefile <- NULL
342 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
343 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
344 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
345 # In case, an url is used to import data within Galaxy
346 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
347 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
348 }
349 }
350 # zipfile - if the file are passed in the command arguments -> refresh zipfile
351 if (!is.null(args[[paste0("zipfile",prefix)]]))
352 zipfile <- args[[paste0("zipfile",prefix)]]
353
354 # single
455 if(!is.null(singlefile) && (length("singlefile")>0)) { 355 if(!is.null(singlefile) && (length("singlefile")>0)) {
356 files <- vector()
456 for (singlefile_sampleName in names(singlefile)) { 357 for (singlefile_sampleName in names(singlefile)) {
457 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] 358 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
458 if(!file.exists(singlefile_galaxyPath)){ 359 if(!file.exists(singlefile_galaxyPath)){
459 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") 360 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
460 print(error_message); stop(error_message) 361 print(error_message); stop(error_message)
461 } 362 }
462 363
463 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) 364 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
464 file.copy(singlefile_galaxyPath, singlefile_sampleName) 365 file.copy(singlefile_galaxyPath, singlefile_sampleName)
465 366 files <- c(files, singlefile_sampleName)
466 } 367 }
467 directory <- "." 368 }
468 369 # zipfile
469 }
470 if(!is.null(zipfile) && (zipfile != "")) { 370 if(!is.null(zipfile) && (zipfile != "")) {
471 if(!file.exists(zipfile)){ 371 if(!file.exists(zipfile)){
472 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") 372 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
473 print(error_message) 373 print(error_message)
474 stop(error_message) 374 stop(error_message)
475 } 375 }
476
477 #list all file in the zip file
478 #zip_files <- unzip(zipfile,list=T)[,"Name"]
479
480 #unzip
481 suppressWarnings(unzip(zipfile, unzip="unzip")) 376 suppressWarnings(unzip(zipfile, unzip="unzip"))
482 377
483 #get the directory name 378 #get the directory name
484 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) 379 suppressWarnings(filesInZip <- unzip(zipfile, list=T))
485 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) 380 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])))
487 directory <- "." 382 directory <- "."
488 if (length(directories) == 1) directory <- directories 383 if (length(directories) == 1) directory <- directories
489 384
490 cat("files_root_directory\t",directory,"\n") 385 cat("files_root_directory\t",directory,"\n")
491 386
492 } 387 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
493 return (directory) 388 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
389 info <- file.info(directory)
390 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
391 files <- c(directory[!info$isdir], listed)
392 exists <- file.exists(files)
393 files <- files[exists]
394
395 }
396 return(list(zipfile=zipfile, singlefile=singlefile, files=files))
397
494 } 398 }
495 399
496 400
497 # This function retrieve a xset like object 401 # This function retrieve a xset like object
498 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 402 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr