diff preprocess.R @ 0:4d539083cf7f draft

planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 689d0d8dc899a683ee18700ef385753559850233-dirty
author sblanck
date Tue, 12 May 2020 10:40:36 -0400
parents
children 3fcbb8030fcc
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocess.R	Tue May 12 10:40:36 2020 -0400
@@ -0,0 +1,158 @@
+#!/usr/bin/env Rscript
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+library("optparse")
+
+##### Read options
+option_list=list(
+		make_option("--summary",type="character",default=NULL, dest="summary"),
+		make_option("--dataSetName",type="character",default=NULL, dest="dataSetName"),
+		make_option("--new_file_path",type="character",default=NULL, dest="new_file_path"),
+		make_option("--inputcdffull_name",type="character",default=NULL, dest="inputcdffull_name"),
+		make_option("--inputufl_name",type="character",default=NULL, dest="inputufl_name"),
+		make_option("--inputugp_name",type="character",default=NULL, dest="inputugp_name"),
+		make_option("--inputacs_name",type="character",default=NULL, dest="inputacs_name"),
+		make_option("--inputcdffull",type="character",default=NULL, dest="inputcdffull"),
+		make_option("--inputufl",type="character",default=NULL, dest="inputufl"),
+		make_option("--inputugp",type="character",default=NULL, dest="inputugp"),
+		make_option("--inputacs",type="character",default=NULL, dest="inputacs"),
+		make_option("--tumorcsv",type="character",default=NULL, dest="tumorcsv"),
+		make_option("--settingsType",type="character",default=NULL, dest="settingsType"),
+		make_option("--outputgraph",type="character",default=NULL, dest="outputgraph"),
+		make_option("--zipfigures",type="character",default=NULL, dest="zipfigures"),
+		make_option("--outputlog",type="character",default=NULL, dest="outputlog"),
+		make_option("--log",type="character",default=NULL, dest="log"),
+		make_option("--user_id",type="character",default=NULL, dest="user_id"),
+		make_option("--input",type="character",default=NULL, dest="input")
+);
+
+opt_parser = OptionParser(option_list=option_list);
+opt = parse_args(opt_parser);
+
+if(is.null(opt$input)){
+	print_help(opt_parser)
+	stop("input required.", call.=FALSE)
+}
+
+#loading libraries
+
+summary=opt$summary
+dataSetName=opt$dataSetName
+newFilePath=opt$new_file_path
+inputCDFName=opt$inputcdffull_name
+inputUFLName=opt$inputufl_name
+inputUGPName=opt$inputugp_name
+inputACSName=opt$inputacs_name
+inputCDF=opt$inputcdffull
+inputUFL=opt$inputufl
+inputUGP=opt$inputugp
+inputACS=opt$inputacs
+tumorcsv=opt$tumorcsv
+settingsType=opt$settingsType
+outputGraph=opt$outputgraph
+zipfigures=opt$zipfigures
+outputlog=opt$outputlog
+log=opt$log
+userId=opt$user_id
+
+destinationPath=file.path(newFilePath, userId, dataSetName)
+mpagenomicsDir = file.path(newFilePath,"mpagenomics",userId)
+dataDir = file.path(newFilePath, userId)
+chipDir = file.path(newFilePath,"mpagenomics",userId,"annotationData","chipTypes")
+createArchitecture=TRUE
+
+if (dir.exists(chipDir))
+	system(paste0("rm -r ", chipDir))
+
+if (!dir.exists(mpagenomicsDir))
+	dir.create(mpagenomicsDir, showWarnings = TRUE, recursive = TRUE)
+
+if (!dir.exists(dataDir))
+	dir.create(dataDir, showWarnings = TRUE, recursive = TRUE)
+
+listInput <- trimws( unlist( strsplit(trimws(opt$input), ",") ) )
+if(length(listInput)<2){
+	stop("To few .CEL files selected : At least 2 .CEL files are required", call.=FALSE)
+}
+
+
+celList=vector()
+celFileNameList=vector()
+
+for (i in 1:length(listInput))
+{
+	inputFileInfo <- unlist( strsplit( listInput[i], ';' ) )
+	celList=c(celList,inputFileInfo[1])
+	celFileNameList=c(celFileNameList,inputFileInfo[2])
+}
+
+
+for (i in 1:length(celFileNameList))
+	{
+		source = celList[i]
+		destination=file.path(dataDir,celFileNameList[i])
+		file.copy(source, destination)
+}
+split=unlist(strsplit(inputCDFName,",",fixed=TRUE))
+tag=NULL
+if (length(split) != 0) {
+	chipType=split[1]
+	tagExt=split[2]
+	tag=unlist(strsplit(tagExt,".",fixed=TRUE))[1]
+	} else {
+	chipType=split[1]
+}
+
+if(!file.exists(file.path(dataDir,inputCDFName)))
+	file.symlink(inputCDF,file.path(dataDir,inputCDFName))
+if(!file.exists(file.path(dataDir,inputACSName)))
+	file.symlink(inputACS,file.path(dataDir,inputACSName))
+if(!file.exists(file.path(dataDir,inputUFLName)))
+	file.symlink(inputUFL,file.path(dataDir,inputUFLName))
+if(!file.exists(file.path(dataDir,inputUGPName)))
+	file.symlink(inputUGP,file.path(dataDir,inputUGPName))
+
+fig_dir = file.path("mpagenomics", userId, "figures", dataSetName, "signal")
+abs_fig_dir = file.path(newFilePath, fig_dir)
+
+chip=chipType
+dataset=dataSetName
+workdir=mpagenomicsDir
+celPath=dataDir
+chipPath=dataDir		
+tumor=tumorcsv
+outputgraph=type.convert(outputGraph)
+
+
+library(MPAgenomics)
+setwd(workdir)
+
+if (outputlog){
+	sinklog <- file(log, open = "wt")
+	sink(sinklog ,type = "output")
+	sink(sinklog, type = "message")
+} 
+
+if (settingsType=="standard")
+{
+	signalPreProcess(dataSetName=dataset, chipType=chip, dataSetPath=celPath,chipFilesPath=chipPath, path=workdir,createArchitecture=createArchitecture, savePlot=outputgraph, tags=tag)
+} else {
+	signalPreProcess(dataSetName=dataset, chipType=chip, dataSetPath=celPath,chipFilesPath=chipPath, normalTumorArray=tumor, path=workdir,createArchitecture=createArchitecture, savePlot=outputgraph, tags=tag)
+}
+setwd(abs_fig_dir)
+files2zip <- dir(abs_fig_dir)
+zip(zipfile = "figures.zip", files = files2zip)
+file.rename("figures.zip",zipfigures)
+summarydf=data.frame(celFileNameList,rep(dataSetName,length(celFileNameList)),rep(chipType,length(celFileNameList)))
+write.table(summarydf,file=summary,quote=FALSE,row.names=FALSE,col.names=FALSE,sep="\t")
+
+if (outputlog){
+	sink(type="output")
+	sink(type="message")
+	close(sinklog)
+} 
+