Mercurial > repos > nturaga > minfi_pipeline
comparison help/minfi_pipeline_test.R @ 0:84361ce36a11 draft
planemo upload commit fb90aafc93e5e63acfcdac4c27cfd865cdf06c5a-dirty
| author | nturaga | 
|---|---|
| date | Tue, 19 Apr 2016 11:10:25 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:84361ce36a11 | 
|---|---|
| 1 # setup R error handling to go to stderr | |
| 2 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) | |
| 3 | |
| 4 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
| 5 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
| 6 | |
| 7 library("getopt") | |
| 8 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | |
| 9 args <- commandArgs(trailingOnly = TRUE) | |
| 10 | |
| 11 # get options, using the spec as defined by the enclosed list. | |
| 12 # we read the options from the default: commandArgs(TRUE). | |
| 13 spec <- matrix(c( | |
| 14 'quiet', 'q', 2, "logical", | |
| 15 'help' , 'h', 0, "logical", | |
| 16 "preprocess","p",1,"character", | |
| 17 "numPositions","n",2,"integer", | |
| 18 "shrinkVar","s",2,"logical", | |
| 19 "b_permutations","b",2,"integer", | |
| 20 "smooth","m",2,"logical", | |
| 21 "cutoff","t",2,"float", | |
| 22 "l_value","l",2,"integer", | |
| 23 "cores","c",1,"integer") | |
| 24 ,byrow=TRUE, ncol=4) | |
| 25 opt <- getopt(spec) | |
| 26 | |
| 27 | |
| 28 # If help was asked for print a friendly message | |
| 29 # and exit with a non-zero error code | |
| 30 if (!is.null(opt$help)) { | |
| 31 cat(getopt(spec, usage=TRUE)) | |
| 32 q(status=1) | |
| 33 } | |
| 34 | |
| 35 | |
| 36 ## Set verbose mode | |
| 37 verbose = if(is.null(opt$quiet)){TRUE}else{FALSE} | |
| 38 if(verbose){ | |
| 39 cat("Verbose mode is ON\n\n") | |
| 40 } | |
| 41 | |
| 42 # Enforce the following required arguments | |
| 43 if (is.null(opt$preprocess)) { | |
| 44 cat("'preprocess' is required\n") | |
| 45 q(status=1) | |
| 46 } | |
| 47 | |
| 48 # Load required libraries | |
| 49 | |
| 50 suppressPackageStartupMessages({ | |
| 51 library("minfi") | |
| 52 library("FlowSorted.Blood.450k") | |
| 53 library("doParallel") | |
| 54 library("TxDb.Hsapiens.UCSC.hg19.knownGene") | |
| 55 }) | |
| 56 | |
| 57 | |
| 58 ## Parse cheetah code and make dataframe for creating tmp dir | |
| 59 minfi_config_file = "/Users/nturaga/Documents/workspace/minfi_galaxy/galaxy/database/job_working_directory/000/43/minfi_temp/minfi_config.txt" | |
| 60 minfi_config = read.table(minfi_config_file) | |
| 61 colnames(minfi_config) = c("status","green","red","name") | |
| 62 | |
| 63 if ( verbose ) { | |
| 64 cat("Minfi configuration file:\n\n "); | |
| 65 print(minfi_config) | |
| 66 } | |
| 67 | |
| 68 ## Make the tmpdir for symlinking data | |
| 69 base_dir = paste0("/Users/nturaga/Documents/workspace/minfi_galaxy/galaxy/database/job_working_directory/000/73/minfi_temp","/base") | |
| 70 | |
| 71 | |
| 72 ### Make symlinks of files | |
| 73 #for (i in 1:nrow(minfi_config)){ | |
| 74 #stopifnot(nrow(minfi_config) == nrow(minfi_config["name"])) | |
| 75 | |
| 76 ### Make green idat file symlinks | |
| 77 #file_green = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Grn.idat") | |
| 78 #cmd_green = paste("ln -s",as.character(minfi_config[i,"green"]),file_green,sep=" ") | |
| 79 #cat("Reading file ",i,"GREEN Channel ", file_green) | |
| 80 #system(cmd_green) | |
| 81 | |
| 82 ### Make red idat file symlinks | |
| 83 #file_red = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Red.idat") | |
| 84 #cmd_red = paste("ln -s",as.character(minfi_config[i,"red"]),file_red,sep=" ") | |
| 85 #cat("Reading file ",i,"RED Channel ", file_red) | |
| 86 #system(cmd_red) | |
| 87 #} | |
| 88 | |
| 89 ## Make dataframe with Basenames | |
| 90 Basename = paste0(base_dir,"/",unique(substr(list.files(base_dir),1,17))) | |
| 91 status = minfi_config[match(gsub(".+/","",Basename), minfi_config$name),"status"] | |
| 92 targets = data.frame(Basename=Basename,status=status) | |
| 93 | |
| 94 if ( verbose ) { | |
| 95 cat("Minfi targets file:\n\n ") | |
| 96 print(targets) | |
| 97 } | |
| 98 | |
| 99 ## Read 450k files | |
| 100 RGset = read.450k.exp(targets=targets,verbose=FALSE) | |
| 101 | |
| 102 if (verbose){ | |
| 103 cat("RGset has been read: \n\n") | |
| 104 print(RGset) | |
| 105 } | |
| 106 | |
| 107 | |
| 108 ## Preprocess data with the normalization method chosen | |
| 109 if(opt$preprocess == "quantile"){ | |
| 110 normalized_RGset = preprocessQuantile(RGset) | |
| 111 if (verbose){cat("Preprocessed using Quantile normalization")}; | |
| 112 } else if (opt$preprocess == "noob"){ | |
| 113 normalized_RGset = preprocessNoob(RGset) | |
| 114 if (verbose){cat("Preprocessed using Noob normalization")}; | |
| 115 } else if (opt$preprocess == "raw"){ | |
| 116 normalized_RGset = preprocessRaw(RGset) | |
| 117 if (verbose){print("Preprocessed using Raw normalization")}; | |
| 118 } else if (opt$preprocess == "illumina"){ | |
| 119 normalized_RGset = preprocessIllumina(RGset,bg.correct = TRUE, normalize = c("controls", "no"),reference = 1) | |
| 120 if(verbose){print("Preprocessed using Illumina normalization")} | |
| 121 } else if (opt$preprocess == "preprocessFunnorm"){ | |
| 122 normalized_RGset = preprocessFunnorm(RGset) | |
| 123 if(verbose){print("Preprocessed using Functional normalization")} | |
| 124 } else { | |
| 125 normalized_RGset = RGset | |
| 126 if(verbose){print("Preprocessed using NO normalization")} | |
| 127 } | |
| 128 | |
| 129 | |
| 130 ## Get beta values from Proprocessed data | |
| 131 beta = getBeta(normalized_RGset) | |
| 132 ## Set phenotype data | |
| 133 pd=pData(normalized_RGset) | |
| 134 | |
| 135 | |
| 136 ## QC REPORT | |
| 137 files = gsub(".+/","",pd$filenames) | |
| 138 ## Produce PDF file | |
| 139 if (!is.null(RGset)) { | |
| 140 # Make PDF of QC report | |
| 141 minfi::qcReport(rgSet=RGset,sampNames=files,sampGroups=pd$status,pdf="qc_report.pdf") | |
| 142 } | |
| 143 | |
| 144 ## MDS Plot | |
| 145 ## Set phenotype data | |
| 146 files = gsub(".+/","",pd$filenames) | |
| 147 | |
| 148 ## Produce PDF file | |
| 149 if (!is.null(RGset)) { | |
| 150 ## Make PDF of density plot | |
| 151 pdf("mds_plot.pdf") | |
| 152 minfi::mdsPlot(dat=RGset,sampNames=files,sampGroups=pd$status,main="Beta MDS",numPositions = opt$numPositions,pch=19) | |
| 153 dev.off() | |
| 154 } | |
| 155 | |
| 156 | |
| 157 if(verbose){ | |
| 158 cat("Made plot of QC report and MDS plot\n\n") | |
| 159 } | |
| 160 | |
| 161 | |
| 162 #Estimate Cell counts | |
| 163 #if(!is.null(RGset)){ | |
| 164 #cell_counts = minfi::estimateCellCounts(rgSet=RGset,meanPlot=TRUE) | |
| 165 #write.csv(cell_counts,file="estimated_cell_counts.csv",quote=FALSE,row.names=TRUE) | |
| 166 #} | |
| 167 #if(verbose){ | |
| 168 #cat("Cell Counts estimated\n\n") | |
| 169 #} | |
| 170 | |
| 171 ## DMP finder | |
| 172 dmp = dmpFinder(dat=beta,pheno=pd$status,type="categorical",shrinkVar=opt$shrinkVar) | |
| 173 write.csv(dmp,file="dmps.csv",quote=FALSE,row.names=TRUE) | |
| 174 if(verbose){ | |
| 175 cat("DMP Finder successful \n") | |
| 176 } | |
| 177 | |
| 178 | |
| 179 # Model Matrix to pass into the bumphunter function | |
| 180 pd=pData(normalized_RGset) | |
| 181 T1= levels(pd$status)[2] | |
| 182 T2= levels(pd$status)[1] | |
| 183 | |
| 184 stopifnot(T1!=T2) | |
| 185 keep=pd$status%in%c(T1,T2) | |
| 186 tt=factor(pd$status[keep],c(T1,T2)) | |
| 187 design=model.matrix(~tt) | |
| 188 | |
| 189 if(verbose){ | |
| 190 cat("Model matrix is: \n") | |
| 191 design | |
| 192 } | |
| 193 | |
| 194 # Start bumphunter in a parallel environment | |
| 195 # Parallelize over cores on machine | |
| 196 registerDoParallel(cores = opt$cores) | |
| 197 | |
| 198 ## Bumphunter Run with normalized_RGset processed with Quantile Normalization | |
| 199 | |
| 200 res=bumphunter(normalized_RGset[,keep],design,B=opt$b_permutations,smooth=opt$smooth,cutoff= opt$cutoff,type="Beta") | |
| 201 bumps= res$tab | |
| 202 | |
| 203 if(verbose){ | |
| 204 cat("Bumphunter result", "\n") | |
| 205 head(bumps) | |
| 206 } | |
| 207 | |
| 208 ## Choose DMR's of a certain length threshold. | |
| 209 ## This helps reduce the size of DMRs early, and match | |
| 210 ## with genes closest to region | |
| 211 bumps = bumps[bumps$L>opt$l_value,] | |
| 212 genes <- annotateTranscripts(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
| 213 tab=matchGenes(bumps,genes) | |
| 214 result=cbind(bumps,tab) | |
| 215 | |
| 216 if(verbose){ | |
| 217 cat("Match with annotation\n") | |
| 218 head(result) | |
| 219 } | |
| 220 | |
| 221 # Save result, which contains DMR's and closest genes | |
| 222 write.csv(result,file = "dmrs.csv",quote=FALSE,row.names=TRUE) | |
| 223 | |
| 224 # Garbage collect | |
| 225 gc() | |
| 226 | |
| 227 # Block finder | |
| 228 #library(sva) | |
| 229 #pheno <- pData(GRset) | |
| 230 #mod <- model.matrix(~as.factor(status), data=pheno) | |
| 231 #mod0 <- model.matrix(~1, data=pheno) | |
| 232 #sva.results <- sva(mval, mod, mod0) | 
