comparison minfi_pipeline.R @ 0:84361ce36a11 draft

planemo upload commit fb90aafc93e5e63acfcdac4c27cfd865cdf06c5a-dirty
author nturaga
date Tue, 19 Apr 2016 11:10:25 -0400
parents
children a78f84fc4873
comparison
equal deleted inserted replaced
-1:000000000000 0:84361ce36a11
1 # setup R error handling to go to stderr
2 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
3
4 # we need that to not crash galaxy with an UTF8 error on German LC settings.
5 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
6
7 library("getopt")
8 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
9 args <- commandArgs(trailingOnly = TRUE)
10
11 # get options, using the spec as defined by the enclosed list.
12 # we read the options from the default: commandArgs(TRUE).
13 spec <- matrix(c(
14 'quiet', 'q', 2, "logical",
15 'help' , 'h', 0, "logical",
16 "preprocess","p",1,"character",
17 "cores","c",1,"integer",
18 "numPositions","n",2,"integer",
19 "shrinkVar","s",2,"logical",
20 "b_permutations","b",2,"integer",
21 "smooth","m",2,"logical",
22 "cutoff","t",2,"double",
23 "l_value","l",2,"integer")
24 ,byrow=TRUE, ncol=4)
25 opt <- getopt(spec)
26
27 # If help was asked for print a friendly message
28 # and exit with a non-zero error code
29 if (!is.null(opt$help)) {
30 cat(getopt(spec, usage=TRUE))
31 q(status=1)
32 }
33
34
35 ## Set verbose mode
36 verbose = if(is.null(opt$quiet)){TRUE}else{FALSE}
37 if(verbose){
38 cat("Verbose mode is ON\n\n")
39 }
40
41 # Enforce the following required arguments
42 if (is.null(opt$preprocess)) {
43 cat("'--preprocess' is required\n")
44 q(status=1)
45 }
46 cat("verbose = ", opt$quiet,"\n")
47 cat("preprocess = ",opt$preprocess,"\n")
48 cat("cores = ", opt$cores, "\n")
49 cat("b_permutations = ",opt$b_permutations,"\n")
50 cat("smooth = ",opt$smooth,"\n")
51 cat("cutoff = ",opt$cutoff,"\n")
52 cat("l_value = ",opt$l_value,"\n")
53 cat("numPositions = ",opt$numPositions,"\n")
54 cat("shrinkVar = ",opt$shrinkVar,"\n")
55
56
57 # Load required libraries
58 suppressPackageStartupMessages({
59 library("minfi")
60 library("FlowSorted.Blood.450k")
61 library("TxDb.Hsapiens.UCSC.hg19.knownGene")
62 library("doParallel")
63 })
64
65
66 ## Parse cheetah code and make dataframe for creating tmp dir
67 minfi_config_file = paste0("minfi_temp","/minfi_config.txt")
68 minfi_config = read.table(minfi_config_file)
69 colnames(minfi_config) = c("status","green","red","name")
70
71
72 ## Make the tmpdir for symlinking data
73 base_dir = paste0("minfi_temp","/base")
74 system(paste0("mkdir ",base_dir))
75
76
77 ## Make symlinks of files
78 for (i in 1:nrow(minfi_config)){
79 stopifnot(nrow(minfi_config) == nrow(minfi_config["name"]))
80
81 ## Make green idat file symlinks
82 file_green = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Grn.idat")
83 cmd_green = paste("ln -s",as.character(minfi_config[i,"green"]),file_green,sep=" ")
84 cat("Reading file ",i,"GREEN Channel ", file_green)
85 system(cmd_green)
86
87 ## Make red idat file symlinks
88 file_red = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Red.idat")
89 cmd_red = paste("ln -s",as.character(minfi_config[i,"red"]),file_red,sep=" ")
90 cat("Reading file ",i,"RED Channel ", file_red)
91 system(cmd_red)
92 }
93
94 ## Make dataframe with Basenames
95 Basename = paste0(base_dir,"/",unique(substr(list.files(base_dir),1,17)))
96 status = minfi_config[match(gsub(".+/","",Basename), minfi_config$name),"status"]
97 targets = data.frame(Basename=Basename,status=status)
98
99 if ( verbose ) {
100 cat("Minfi targets file:\n\n ")
101 print(targets)
102 }
103
104 ## Read 450k files
105 RGset = read.450k.exp(targets=targets,verbose=T)
106
107 if (verbose){
108 cat("RGset has been read: \n\n")
109 print(RGset)
110 }
111
112 pd = pData(RGset)
113
114 ## NOTE: QC report is for samples before normalization
115 ## QC REPORT
116 files = gsub(".+/","",pd$filenames)
117 ## Produce PDF file
118 if (!is.null(RGset)) {
119 # Make PDF of QC report
120 minfi::qcReport(rgSet=RGset,sampNames=files,sampGroups=pd$status,pdf="qc_report.pdf")
121 }
122
123
124 ## MDS Plot
125 ## Set phenotype data
126 files = gsub(".+/","",pd$filenames)
127 #numPositions=as.integer("${numPositions}")
128
129 ## Produce PDF file
130 if (!is.null(RGset)) {
131 ## Make PDF of MDS plot
132 pdf("mds_plot.pdf")
133 minfi::mdsPlot(dat=RGset,sampNames=files,sampGroups=pd$status,main="Beta MDS",numPositions = opt$numPositions,pch=19)
134 dev.off()
135 }
136
137 if(verbose){
138 cat("Made plot of QC report and MDS plot\n\n")
139 }
140
141
142 ## Preprocess data with the normalization method chosen
143 if(opt$preprocess == "quantile"){
144 normalized_RGset = preprocessQuantile(RGset)
145 if (verbose){cat("Preprocessed using Quantile normalization")};
146 } else if (opt$preprocess == "funnorm"){
147 normalized_RGset = preprocessFunnorm(RGset)
148 if(verbose){print("Preprocessed using Functional normalization")}
149 } else if (opt$preprocess == "noob"){
150 normalized_RGset = preprocessNoob(RGset)
151 if (verbose){cat("Preprocessed using Noob normalization")};
152 } else if (opt$preprocess == "illumina"){
153 normalized_RGset = preprocessIllumina(RGset,bg.correct = TRUE, normalize = c("controls", "no"),reference = 1)
154 if(verbose){print("Preprocessed using Illumina normalization")}
155 } else if (opt$preprocess == "swan"){
156 normalized_RGset = preprocessSWAN(RGset)
157 if(verbose){print("Preprocessed using SWAN normalization")}
158 }else {
159 normalized_RGset = RGset
160 if(verbose){print("Preprocessed using No normalization")}
161 }
162
163
164
165 ## Get beta values from Proprocessed data
166 beta = getBeta(normalized_RGset)
167 ## Set phenotype data
168 pd = pData(normalized_RGset)
169
170
171 ## DMP finder
172 dmp = dmpFinder(dat=beta,pheno=pd$status,type="categorical",shrinkVar=opt$shrinkVar)
173 write.csv(dmp,file="dmps.csv",quote=FALSE,row.names=TRUE)
174 if(verbose){
175 cat("\n","DMP Finder successful \n")
176 }
177
178
179 # Model Matrix to pass into the bumphunter function
180 T1= levels(pd$status)[2]
181 T2= levels(pd$status)[1]
182
183 # Introduce error if levels are different
184 stopifnot(T1!=T2)
185
186 keep=pd$status%in%c(T1,T2)
187 tt=factor(pd$status[keep],c(T1,T2))
188 design=model.matrix(~tt)
189
190 if(verbose){
191 cat("Model matrix is: \n")
192 design
193 }
194
195 # Start bumphunter in a parallel environment
196 # Parallelize over cores on machine
197 registerDoParallel(cores = opt$cores)
198
199 ## Bumphunter Run with normalized_RGset processed with Quantile Normalization
200 if (is(normalized_RGset,"GenomicRatioSet")) {
201 res=bumphunter(normalized_RGset[,keep],design,B=opt$b_permutations,smooth=opt$smooth,cutoff= opt$cutoff,type="Beta")
202 bumps= res$tab
203 } else if(is(normalized_RGset,"MethylSet")) {
204 # convert MethylSet (norm.swan) into GenomicRatioSet through Ratioset
205 normalized_RGset = ratioConvert(normalized_RGset, what = "both", keepCN = TRUE)
206 normalized_RGset = mapToGenome(normalized_RGset)
207 res = bumphunter(normalized_RGset[,keep],design=design,B=opt$b_permutations,smooth=opt$smooth,cutoff=opt$cutoff)
208 bumps = res$tab
209 } else {
210 # This else case is never supposed to run,
211 # it will run only if the normalized_RGset object
212 # did not have the expected class type
213 cat("Bumphunter did not run properly","\n")
214 stopifnot(!is.null(bumps))
215 }
216
217 if(verbose){
218 cat("Bumphunter result", "\n")
219 head(bumps)
220 }
221
222
223 ## Choose DMR's of a certain length threshold.
224 ## This helps reduce the size of DMRs early, and match
225 ## with genes closest to region
226 bumps = bumps[bumps$L > opt$l_value,]
227 genes <- annotateTranscripts(TxDb.Hsapiens.UCSC.hg19.knownGene)
228 tab=matchGenes(bumps,genes)
229 annotated_dmrs=cbind(bumps,tab)
230
231 if(verbose){
232 cat("Match with annotation\n")
233 head(annotated_dmrs)
234 }
235
236 # Save result, which contains DMR's and closest genes
237 write.csv(annotated_dmrs,file = "dmrs.csv",quote=FALSE,row.names=FALSE)
238
239 # Garbage collect
240 gc()
241
242
243 ## TODO: FIX BLOCK FINDER
244 # Block finder
245 #library(sva)
246 #pheno <- pData(GRset)
247 #mod <- model.matrix(~as.factor(status), data=pheno)
248 #mod0 <- model.matrix(~1, data=pheno)
249 #sva.results <- sva(mval, mod, mod0)