diff purityX.R @ 6:2f71b3495221 draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author computational-metabolomics
date Thu, 04 Mar 2021 12:27:21 +0000
parents f52287a06c02
children efd14b326007
line wrap: on
line diff
--- a/purityX.R	Fri Nov 13 10:05:38 2020 +0000
+++ b/purityX.R	Thu Mar 04 12:27:21 2021 +0000
@@ -3,67 +3,66 @@
 print(sessionInfo())
 
 option_list <- list(
-  make_option(c("--xset_path"), type="character"),
-  make_option(c("-o", "--out_dir"), type="character"),
-  make_option(c("--mzML_path"), type="character"),
-  make_option("--minOffset", default=0.5),
-  make_option("--maxOffset", default=0.5),
-  make_option("--ilim", default=0.05),
-  make_option("--iwNorm", default="none", type="character"),
-  make_option("--exclude_isotopes", action="store_true"),
-  make_option("--isotope_matrix", type="character"),
-  make_option("--purityType", default="purityFWHMmedian"),
-  make_option("--singleFile", default=0),
-  make_option("--cores", default=4),
-  make_option("--xgroups", type="character"),
-  make_option("--rdata_name", default='xset'),
-  make_option("--camera_xcms", default='xset'),
-  make_option("--files", type="character"),
-  make_option("--galaxy_files", type="character"),
-  make_option("--choose_class", type="character"),
-  make_option("--ignore_files", type="character"),
-  make_option("--rtraw_columns",  action="store_true")
+  make_option(c("--xset_path"), type = "character"),
+  make_option(c("-o", "--out_dir"), type = "character"),
+  make_option(c("--mzML_path"), type = "character"),
+  make_option("--minOffset", default = 0.5),
+  make_option("--maxOffset", default = 0.5),
+  make_option("--ilim", default = 0.05),
+  make_option("--iwNorm", default = "none", type = "character"),
+  make_option("--exclude_isotopes", action = "store_true"),
+  make_option("--isotope_matrix", type = "character"),
+  make_option("--purityType", default = "purityFWHMmedian"),
+  make_option("--singleFile", default = 0),
+  make_option("--cores", default = 4),
+  make_option("--xgroups", type = "character"),
+  make_option("--rdata_name", default = "xset"),
+  make_option("--camera_xcms", default = "xset"),
+  make_option("--files", type = "character"),
+  make_option("--galaxy_files", type = "character"),
+  make_option("--choose_class", type = "character"),
+  make_option("--ignore_files", type = "character"),
+  make_option("--rtraw_columns",  action = "store_true")
 )
 
 
-opt<- parse_args(OptionParser(option_list=option_list))
+opt <- parse_args(OptionParser(option_list = option_list))
 print(opt)
 
 
-if (!is.null(opt$xgroups)){
-    xgroups = as.numeric(strsplit(opt$xgroups, ',')[[1]])
+if (!is.null(opt$xgroups)) {
+    xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]])
 }else{
-    xgroups = NULL
+    xgroups <- NULL
 }
 
 
-
 print(xgroups)
 
-if (!is.null(opt$remove_nas)){
-  df <- df[!is.na(df$mz),]
+if (!is.null(opt$remove_nas)) {
+  df <- df[!is.na(df$mz), ]
 }
 
-if (is.null(opt$isotope_matrix)){
+if (is.null(opt$isotope_matrix)) {
     im <- NULL
 }else{
     im <- read.table(opt$isotope_matrix,
-                     header = TRUE, sep='\t', stringsAsFactors = FALSE)
+                     header = TRUE, sep = "\t", stringsAsFactors = FALSE)
 }
 
-if (is.null(opt$exclude_isotopes)){
+if (is.null(opt$exclude_isotopes)) {
     isotopes <- FALSE
 }else{
     isotopes <- TRUE
 }
 
-if (is.null(opt$rtraw_columns)){
+if (is.null(opt$rtraw_columns)) {
     rtraw_columns <- FALSE
 }else{
     rtraw_columns <- TRUE
 }
 
-loadRData <- function(rdata_path, xset_name){
+loadRData <- function(rdata_path, xset_name) {
 #loads an RData file, and returns the named xset object if it is there
     load(rdata_path)
     return(get(ls()[ls() == xset_name]))
@@ -71,7 +70,7 @@
 
 target_obj <- loadRData(opt$xset_path, opt$rdata_name)
 
-if (opt$camera_xcms=='camera'){
+if (opt$camera_xcms == "camera") {
     xset <- target_obj@xcmsSet
 }else{
     xset <- target_obj
@@ -79,36 +78,35 @@
 
 print(xset)
 
-minOffset = as.numeric(opt$minOffset)
-maxOffset = as.numeric(opt$maxOffset)
-
+minOffset <- as.numeric(opt$minOffset)
+maxOffset <- as.numeric(opt$maxOffset)
 
-if (opt$iwNorm=='none'){
-    iwNorm = FALSE
-    iwNormFun = NULL
-}else if (opt$iwNorm=='gauss'){
-    iwNorm = TRUE
-    iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset)
-}else if (opt$iwNorm=='rcosine'){
-    iwNorm = TRUE
-    iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset)
-}else if (opt$iwNorm=='QE5'){
-    iwNorm = TRUE
-    iwNormFun = msPurity::iwNormQE.5()
+if (opt$iwNorm == "none") {
+    iwNorm <- FALSE
+    iwNormFun <- NULL
+}else if (opt$iwNorm == "gauss") {
+    iwNorm <- TRUE
+    iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset)
+}else if (opt$iwNorm == "rcosine") {
+    iwNorm <- TRUE
+    iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset)
+}else if (opt$iwNorm == "QE5") {
+    iwNorm <- TRUE
+    iwNormFun <- msPurity::iwNormQE.5()
 }
 
 print(xset@filepaths)
 
-if (!is.null(opt$files)){
-  updated_filepaths <- trimws(strsplit(opt$files, ',')[[1]])
+if (!is.null(opt$files)) {
+  updated_filepaths <- trimws(strsplit(opt$files, ",")[[1]])
   updated_filepaths <- updated_filepaths[updated_filepaths != ""]
   print(updated_filepaths)
-  updated_filenames = basename(updated_filepaths)
-  original_filenames = basename(xset@filepaths)
-  update_idx = match(updated_filenames, original_filenames)
+  updated_filenames <- basename(updated_filepaths)
+  original_filenames <- basename(xset@filepaths)
+  update_idx <- match(updated_filenames, original_filenames)
 
-    if (!is.null(opt$galaxy_files)){
-        galaxy_files <- trimws(strsplit(opt$galaxy_files, ',')[[1]])
+    if (!is.null(opt$galaxy_files)) {
+        galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]])
         galaxy_files <- galaxy_files[galaxy_files != ""]
         xset@filepaths <- galaxy_files[update_idx]
     }else{
@@ -116,27 +114,26 @@
     }
 }
 
-if (!is.null(opt$choose_class)){
-  classes <- trimws(strsplit(opt$choose_class, ',')[[1]])
-
+if (!is.null(opt$choose_class)) {
+  classes <- trimws(strsplit(opt$choose_class, ",")[[1]])
 
   ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes)
 
-  print('choose class')
+  print("choose class")
   print(ignore_files_class)
 }else{
   ignore_files_class <- NA
 }
 
-if (!is.null(opt$ignore_files)){
-  ignore_files_string <- trimws(strsplit(opt$ignore_files, ',')[[1]])
+if (!is.null(opt$ignore_files)) {
+  ignore_files_string <- trimws(strsplit(opt$ignore_files, ",")[[1]])
   filenames <- rownames(xset@phenoData)
   ignore_files <- which(filenames %in% ignore_files_string)
 
   ignore_files <- unique(c(ignore_files, ignore_files_class))
   ignore_files <- ignore_files[ignore_files != ""]
 }else{
-  if (anyNA(ignore_files_class)){
+  if (anyNA(ignore_files_class)) {
     ignore_files <- NULL
   }else{
     ignore_files <- ignore_files_class
@@ -144,41 +141,40 @@
 
 }
 
-print('ignore_files')
+print("ignore_files")
 print(ignore_files)
 
 
-ppLCMS <- msPurity::purityX(xset=xset,
-                                offsets=c(minOffset, maxOffset),
-                                cores=opt$cores,
-                                xgroups=xgroups,
-                                purityType=opt$purityType,
-                                ilim = opt$ilim,
-                                isotopes = isotopes,
-                                im = im,
-                                iwNorm = iwNorm,
-                                iwNormFun = iwNormFun,
-                                singleFile = opt$singleFile,
-                                fileignore = ignore_files,
-                                rtrawColumns=rtraw_columns)
-
+ppLCMS <- msPurity::purityX(xset = xset,
+                            offsets = c(minOffset, maxOffset),
+                            cores = opt$cores,
+                            xgroups = xgroups,
+                            purityType = opt$purityType,
+                            ilim = opt$ilim,
+                            isotopes = isotopes,
+                            im = im,
+                            iwNorm = iwNorm,
+                            iwNormFun = iwNormFun,
+                            singleFile = opt$singleFile,
+                            fileignore = ignore_files,
+                            rtrawColumns = rtraw_columns)
 
 dfp <- ppLCMS@predictions
 
 # to make compatable with deconrank
-colnames(dfp)[colnames(dfp)=='grpid'] = 'peakID'
-colnames(dfp)[colnames(dfp)=='median'] = 'medianPurity'
-colnames(dfp)[colnames(dfp)=='mean'] = 'meanPurity'
-colnames(dfp)[colnames(dfp)=='sd'] = 'sdPurity'
-colnames(dfp)[colnames(dfp)=='stde'] = 'sdePurity'
-colnames(dfp)[colnames(dfp)=='RSD'] = 'cvPurity'
-colnames(dfp)[colnames(dfp)=='pknm'] = 'pknmPurity'
-if(sum(is.na(dfp$medianPurity))>0){
-    dfp[is.na(dfp$medianPurity),]$medianPurity = 0
+colnames(dfp)[colnames(dfp) == "grpid"] <- "peakID"
+colnames(dfp)[colnames(dfp) == "median"] <- "medianPurity"
+colnames(dfp)[colnames(dfp) == "mean"] <- "meanPurity"
+colnames(dfp)[colnames(dfp) == "sd"] <- "sdPurity"
+colnames(dfp)[colnames(dfp) == "stde"] <- "sdePurity"
+colnames(dfp)[colnames(dfp) == "RSD"] <- "cvPurity"
+colnames(dfp)[colnames(dfp) == "pknm"] <- "pknmPurity"
+
+if (sum(is.na(dfp$medianPurity)) > 0) {
+  dfp[is.na(dfp$medianPurity), ]$medianPurity <- 0
 }
 
+print(head(dfp))
+write.table(dfp, file.path(opt$out_dir, "purityX_output.tsv"), row.names = FALSE, sep = "\t")
 
-print(head(dfp))
-write.table(dfp, file.path(opt$out_dir, 'purityX_output.tsv'), row.names=FALSE, sep='\t')
-
-save.image(file.path(opt$out_dir, 'purityX_output.RData'))
+save.image(file.path(opt$out_dir, "purityX_output.RData"))