diff extractCN.R @ 0:4d539083cf7f draft

planemo upload for repository https://github.com/sblanck/MPAgenomics4Galaxy/tree/master/mpagenomics_wrappers commit 689d0d8dc899a683ee18700ef385753559850233-dirty
author sblanck
date Tue, 12 May 2020 10:40:36 -0400
parents
children 3fcbb8030fcc
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extractCN.R	Tue May 12 10:40:36 2020 -0400
@@ -0,0 +1,170 @@
+#!/usr/bin/env Rscript
+# setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+library("optparse")
+
+##### Read options
+option_list=list(
+		make_option("--chrom",type="character",default=NULL, dest="chrom"),
+		make_option("--input",type="character",default=NULL, dest="input"),
+		make_option("--output",type="character",default=NULL, dest="output"),
+		make_option("--new_file_path",type="character",default=NULL, dest="new_file_path"),
+		make_option("--settings_type",type="character",default=NULL, dest="settings_type"),
+		make_option("--settings_tumor",type="character",default=NULL, dest="settings_tumor"),
+		make_option("--symmetrize",type="character",default=NULL, dest="symmetrize"),
+		make_option("--settings_signal",type="character",default=NULL, dest="settings_signal"),
+		make_option("--settings_snp",type="character",default=NULL, dest="settings_snp"),
+		make_option("--outputlog",type="character",default=NULL, dest="outputlog"),
+		make_option("--log",type="character",default=NULL, dest="log"),
+		make_option("--userid",type="character",default=NULL, dest="userid")
+);
+
+opt_parser = OptionParser(option_list=option_list);
+opt = parse_args(opt_parser);
+
+if(is.null(opt$input)){
+	print_help(opt_parser)
+	stop("input required.", call.=FALSE)
+}
+
+#loading libraries
+
+chrom=opt$chrom
+input=opt$input
+tmp_dir=opt$new_file_path
+output=opt$output
+settingsType=opt$settings_type
+tumorcsv=opt$settings_tumor
+symmetrize=opt$symmetrize
+signal=opt$settings_signal
+snp=type.convert(opt$settings_snp)
+outputlog=opt$outputlog
+log=opt$log
+user=opt$userid
+
+library(MPAgenomics)
+workdir=file.path(tmp_dir, "mpagenomics",user)
+setwd(workdir)
+
+inputDataset=read.table(file=input,stringsAsFactors=FALSE)
+dataset=inputDataset[1,2]
+
+if (outputlog){
+	sinklog <- file(log, open = "wt")
+	sink(sinklog ,type = "output")
+	sink(sinklog, type = "message")
+} 
+
+
+if (grepl("all",tolower(chrom)) | chrom=="None") {
+		chrom_vec=c(1:25)
+	} else {
+		chrom_tmp <- strsplit(chrom,",")
+		chrom_vecstring <-unlist(chrom_tmp)
+		chrom_vec <- as.numeric(chrom_vecstring)
+	}
+if (signal == "CN")
+{
+	if (settingsType == "dataset") {
+		if (tumorcsv== "None")
+		{  		
+			CN=getCopyNumberSignal(dataset,chromosome=chrom_vec, onlySNP=snp)
+					
+	  	} else {
+	  		CN=getCopyNumberSignal(dataset,chromosome=chrom_vec, normalTumorArray=tumorcsv, onlySNP=snp)
+	  	}
+	} else {
+		input_tmp <- strsplit(settingsType,",")
+		input_tmp_vecstring <-unlist(input_tmp)
+		input_vecstring = sub("^([^.]*).*", "\\1", input_tmp_vecstring) 
+	  	if (tumorcsv== "None") 
+	  	{
+	  		CN=getCopyNumberSignal(dataset,chromosome=chrom_vec, listOfFiles=input_vecstring, onlySNP=snp)
+	  	} else {
+	  		CN=getCopyNumberSignal(dataset,chromosome=chrom_vec, normalTumorArray=tumorcsv, listOfFiles=input_vecstring, onlySNP=snp )
+	  	}
+	}
+	
+	list_chr=names(CN)
+	CN_global=data.frame(check.names = FALSE)
+	for (i in list_chr) {
+	  chr_data=data.frame(CN[[i]],check.names = FALSE)
+	  CN_global=rbind(CN_global,chr_data)
+	}
+	names(CN_global)[names(CN_global)=="featureNames"] <- "probeName"
+	write.table(format(CN_global), output, row.names = FALSE, quote = FALSE, sep = "\t")
+	
+} else {
+	if (symmetrize=="TRUE")	{
+		if (settingsType == "dataset") {
+			input_vecstring = getListOfFiles(dataset)
+		} else {
+			input_tmp <- strsplit(settingsType,",")
+			input_tmp_vecstring <-unlist(input_tmp)
+			input_vecstring = sub("^([^.]*).*", "\\1", input_tmp_vecstring) 
+		}
+		
+		symFracB_global=data.frame(check.names = FALSE)
+		
+		for (currentFile in input_vecstring) {
+			cat(paste0("extracting signal from ",currentFile,".\n"))
+			currentSymFracB=data.frame()
+			symFracB=getSymFracBSignal(dataset,chromosome=chrom_vec,file=currentFile,normalTumorArray=tumorcsv)
+			list_chr=names(symFracB)
+			for (i in list_chr) {
+				cat(paste0("   extracting ",i,".\n"))
+				chr_data=data.frame(symFracB[[i]]$tumor,check.names = FALSE)
+				currentSymFracB=rbind(currentSymFracB,chr_data)
+				
+			}
+			if (is.null(symFracB_global) || nrow(symFracB_global)==0) {
+				symFracB_global=currentSymFracB
+			} else {
+				symFracB_global=cbind(symFracB_global,currentFile=currentSymFracB[[3]])
+			}
+		}
+		names(symFracB_global)[names(symFracB_global)=="featureNames"] <- "probeName"
+		
+		write.table(format(symFracB_global), output, row.names = FALSE, quote = FALSE, sep = "\t")
+	} else {
+		if (settingsType == "dataset") {
+			if (tumorcsv== "None")
+			{  		
+				fracB=getFracBSignal(dataset,chromosome=chrom_vec)
+				
+			} else {
+				fracB=getFracBSignal(dataset,chromosome=chrom_vec, normalTumorArray=tumorcsv)
+			}
+		} else {
+			input_tmp <- strsplit(settingsType,",")
+			input_tmp_vecstring <-unlist(input_tmp)
+			input_vecstring = sub("^([^.]*).*", "\\1", input_tmp_vecstring) 
+			if (tumorcsv== "None") 
+			{
+				fracB=getFracBSignal(dataset,chromosome=chrom_vec, listOfFiles=input_vecstring)
+			} else {
+				fracB=getFracBSignal(dataset,chromosome=chrom_vec, normalTumorArray=tumorcsv, listOfFiles=input_vecstring)
+			}
+		}
+		#formatage des données
+		list_chr=names(fracB)
+		fracB_global=data.frame(check.names = FALSE)
+		for (i in list_chr) {
+			chr_data=data.frame(fracB[[i]]$tumor,check.names = FALSE)
+			fracB_global=rbind(fracB_global,chr_data)
+		}
+		names(fracB_global)[names(fracB_global)=="featureNames"] <- "probeName"
+		write.table(format(fracB_global), output, row.names = FALSE, quote = FALSE, sep = "\t")
+	}
+	
+}
+
+if (outputlog){
+	sink(type="output")
+	sink(type="message")
+	close(sinklog)
+} 
\ No newline at end of file