minfi_pipeline: help/minfi_pipeline

comparison help/minfi_pipeline_test.R @ 0:84361ce36a11 draft

planemo upload commit fb90aafc93e5e63acfcdac4c27cfd865cdf06c5a-dirty

author	nturaga
date	Tue, 19 Apr 2016 11:10:25 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:84361ce36a11
+# setup R error handling to go to stderr
+options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+library("getopt")
+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs(trailingOnly = TRUE)
+# get options, using the spec as defined by the enclosed list.
+# we read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+'quiet', 'q', 2, "logical",
+'help' , 'h', 0, "logical",
+"preprocess","p",1,"character",
+"numPositions","n",2,"integer",
+"shrinkVar","s",2,"logical",
+"b_permutations","b",2,"integer",
+"smooth","m",2,"logical",
+"cutoff","t",2,"float",
+"l_value","l",2,"integer",
+"cores","c",1,"integer")
+,byrow=TRUE, ncol=4)
+opt <- getopt(spec)
+# If help was asked for print a friendly message
+# and exit with a non-zero error code
+if (!is.null(opt$help)) {
+cat(getopt(spec, usage=TRUE))
+q(status=1)
+}
+## Set verbose mode
+verbose = if(is.null(opt$quiet)){TRUE}else{FALSE}
+if(verbose){
+cat("Verbose mode is ON\n\n")
+}
+# Enforce the following required arguments
+if (is.null(opt$preprocess)) {
+cat("'preprocess' is required\n")
+q(status=1)
+}
+# Load required libraries
+suppressPackageStartupMessages({
+library("minfi")
+library("FlowSorted.Blood.450k")
+library("doParallel")
+library("TxDb.Hsapiens.UCSC.hg19.knownGene")
+})
+## Parse cheetah code and make dataframe for creating tmp dir
+minfi_config_file = "/Users/nturaga/Documents/workspace/minfi_galaxy/galaxy/database/job_working_directory/000/43/minfi_temp/minfi_config.txt"
+minfi_config = read.table(minfi_config_file)
+colnames(minfi_config) = c("status","green","red","name")
+if ( verbose ) {
+cat("Minfi configuration file:\n\n ");
+print(minfi_config)
+}
+## Make the tmpdir for symlinking data
+base_dir = paste0("/Users/nturaga/Documents/workspace/minfi_galaxy/galaxy/database/job_working_directory/000/73/minfi_temp","/base")
+### Make symlinks of files
+#for (i in 1:nrow(minfi_config)){
+#stopifnot(nrow(minfi_config) == nrow(minfi_config["name"]))
+### Make green idat file symlinks
+#file_green = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Grn.idat")
+#cmd_green = paste("ln -s",as.character(minfi_config[i,"green"]),file_green,sep=" ")
+#cat("Reading file ",i,"GREEN Channel ", file_green)
+#system(cmd_green)
+### Make red idat file symlinks
+#file_red = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Red.idat")
+#cmd_red = paste("ln -s",as.character(minfi_config[i,"red"]),file_red,sep=" ")
+#cat("Reading file ",i,"RED Channel ", file_red)
+#system(cmd_red)
+#}
+## Make dataframe with Basenames
+Basename = paste0(base_dir,"/",unique(substr(list.files(base_dir),1,17)))
+status = minfi_config[match(gsub(".+/","",Basename), minfi_config$name),"status"]
+targets = data.frame(Basename=Basename,status=status)
+if ( verbose ) {
+cat("Minfi targets file:\n\n ")
+print(targets)
+}
+## Read 450k files
+RGset = read.450k.exp(targets=targets,verbose=FALSE)
+if (verbose){
+cat("RGset has been read: \n\n")
+print(RGset)
+}
+## Preprocess data with the normalization method chosen
+if(opt$preprocess == "quantile"){
+normalized_RGset = preprocessQuantile(RGset)
+if (verbose){cat("Preprocessed using Quantile normalization")};
+} else if (opt$preprocess == "noob"){
+normalized_RGset = preprocessNoob(RGset)
+if (verbose){cat("Preprocessed using Noob normalization")};
+} else if (opt$preprocess == "raw"){
+normalized_RGset = preprocessRaw(RGset)
+if (verbose){print("Preprocessed using Raw normalization")};
+} else if (opt$preprocess == "illumina"){
+normalized_RGset = preprocessIllumina(RGset,bg.correct = TRUE, normalize = c("controls", "no"),reference = 1)
+if(verbose){print("Preprocessed using Illumina normalization")}
+} else if (opt$preprocess == "preprocessFunnorm"){
+normalized_RGset = preprocessFunnorm(RGset)
+if(verbose){print("Preprocessed using Functional normalization")}
+} else {
+normalized_RGset = RGset
+if(verbose){print("Preprocessed using NO normalization")}
+}
+## Get beta values from Proprocessed data
+beta = getBeta(normalized_RGset)
+## Set phenotype data
+pd=pData(normalized_RGset)
+## QC REPORT
+files = gsub(".+/","",pd$filenames)
+## Produce PDF file
+if (!is.null(RGset)) {
+# Make PDF of QC report
+minfi::qcReport(rgSet=RGset,sampNames=files,sampGroups=pd$status,pdf="qc_report.pdf")
+}
+## MDS Plot
+## Set phenotype data
+files = gsub(".+/","",pd$filenames)
+## Produce PDF file
+if (!is.null(RGset)) {
+## Make PDF of density plot
+pdf("mds_plot.pdf")
+minfi::mdsPlot(dat=RGset,sampNames=files,sampGroups=pd$status,main="Beta MDS",numPositions = opt$numPositions,pch=19)
+dev.off()
+}
+if(verbose){
+cat("Made plot of QC report and MDS plot\n\n")
+}
+#Estimate Cell counts
+#if(!is.null(RGset)){
+#cell_counts = minfi::estimateCellCounts(rgSet=RGset,meanPlot=TRUE)
+#write.csv(cell_counts,file="estimated_cell_counts.csv",quote=FALSE,row.names=TRUE)
+#}
+#if(verbose){
+#cat("Cell Counts estimated\n\n")
+#}
+## DMP finder
+dmp = dmpFinder(dat=beta,pheno=pd$status,type="categorical",shrinkVar=opt$shrinkVar)
+write.csv(dmp,file="dmps.csv",quote=FALSE,row.names=TRUE)
+if(verbose){
+cat("DMP Finder successful \n")
+}
+# Model Matrix to pass into the bumphunter function
+pd=pData(normalized_RGset)
+T1= levels(pd$status)[2]
+T2= levels(pd$status)[1]
+stopifnot(T1!=T2)
+keep=pd$status%in%c(T1,T2)
+tt=factor(pd$status[keep],c(T1,T2))
+design=model.matrix(~tt)
+if(verbose){
+cat("Model matrix is: \n")
+design
+}
+# Start bumphunter in a parallel environment
+# Parallelize over cores on machine
+registerDoParallel(cores = opt$cores)
+## Bumphunter Run with normalized_RGset processed with Quantile Normalization
+res=bumphunter(normalized_RGset[,keep],design,B=opt$b_permutations,smooth=opt$smooth,cutoff= opt$cutoff,type="Beta")
+bumps= res$tab
+if(verbose){
+cat("Bumphunter result", "\n")
+head(bumps)
+}
+## Choose DMR's of a certain length threshold.
+## This helps reduce the size of DMRs early, and match
+## with genes closest to region
+bumps = bumps[bumps$L>opt$l_value,]
+genes <- annotateTranscripts(TxDb.Hsapiens.UCSC.hg19.knownGene)
+tab=matchGenes(bumps,genes)
+result=cbind(bumps,tab)
+if(verbose){
+cat("Match with annotation\n")
+head(result)
+}
+# Save result, which contains DMR's and closest genes
+write.csv(result,file = "dmrs.csv",quote=FALSE,row.names=TRUE)
+# Garbage collect
+gc()
+# Block finder
+#library(sva)
+#pheno <- pData(GRset)
+#mod <- model.matrix(~as.factor(status), data=pheno)
+#mod0 <- model.matrix(~1, data=pheno)
+#sva.results <- sva(mval, mod, mod0)

Mercurial > repos > nturaga > minfi_pipeline

comparison help/minfi_pipeline_test.R @ 0:84361ce36a11 draft