Mercurial > repos > nturaga > minfi_analyze_tcga
comparison minfi_pipeline.R @ 0:8b26eeb2da29 draft
planemo upload commit fb90aafc93e5e63acfcdac4c27cfd865cdf06c5a-dirty
author | nturaga |
---|---|
date | Tue, 19 Apr 2016 11:11:16 -0400 |
parents | |
children | 3cd8ea4f7079 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8b26eeb2da29 |
---|---|
1 # setup R error handling to go to stderr | |
2 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) | |
3 | |
4 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
5 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
6 | |
7 library("getopt") | |
8 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | |
9 args <- commandArgs(trailingOnly = TRUE) | |
10 | |
11 # get options, using the spec as defined by the enclosed list. | |
12 # we read the options from the default: commandArgs(TRUE). | |
13 spec <- matrix(c( | |
14 'quiet', 'q', 2, "logical", | |
15 'help' , 'h', 0, "logical", | |
16 "preprocess","p",1,"character", | |
17 "cores","c",1,"integer", | |
18 "numPositions","n",2,"integer", | |
19 "shrinkVar","s",2,"logical", | |
20 "b_permutations","b",2,"integer", | |
21 "smooth","m",2,"logical", | |
22 "cutoff","t",2,"double", | |
23 "l_value","l",2,"integer") | |
24 ,byrow=TRUE, ncol=4) | |
25 opt <- getopt(spec) | |
26 | |
27 # If help was asked for print a friendly message | |
28 # and exit with a non-zero error code | |
29 if (!is.null(opt$help)) { | |
30 cat(getopt(spec, usage=TRUE)) | |
31 q(status=1) | |
32 } | |
33 | |
34 | |
35 ## Set verbose mode | |
36 verbose = if(is.null(opt$quiet)){TRUE}else{FALSE} | |
37 if(verbose){ | |
38 cat("Verbose mode is ON\n\n") | |
39 } | |
40 | |
41 # Enforce the following required arguments | |
42 if (is.null(opt$preprocess)) { | |
43 cat("'--preprocess' is required\n") | |
44 q(status=1) | |
45 } | |
46 cat("verbose = ", opt$quiet,"\n") | |
47 cat("preprocess = ",opt$preprocess,"\n") | |
48 cat("cores = ", opt$cores, "\n") | |
49 cat("b_permutations = ",opt$b_permutations,"\n") | |
50 cat("smooth = ",opt$smooth,"\n") | |
51 cat("cutoff = ",opt$cutoff,"\n") | |
52 cat("l_value = ",opt$l_value,"\n") | |
53 cat("numPositions = ",opt$numPositions,"\n") | |
54 cat("shrinkVar = ",opt$shrinkVar,"\n") | |
55 | |
56 | |
57 # Load required libraries | |
58 suppressPackageStartupMessages({ | |
59 library("minfi") | |
60 library("FlowSorted.Blood.450k") | |
61 library("TxDb.Hsapiens.UCSC.hg19.knownGene") | |
62 library("doParallel") | |
63 }) | |
64 | |
65 | |
66 ## Parse cheetah code and make dataframe for creating tmp dir | |
67 minfi_config_file = paste0("minfi_temp","/minfi_config.txt") | |
68 minfi_config = read.table(minfi_config_file) | |
69 colnames(minfi_config) = c("status","green","red","name") | |
70 | |
71 | |
72 ## Make the tmpdir for symlinking data | |
73 base_dir = paste0("minfi_temp","/base") | |
74 system(paste0("mkdir ",base_dir)) | |
75 | |
76 | |
77 ## Make symlinks of files | |
78 for (i in 1:nrow(minfi_config)){ | |
79 stopifnot(nrow(minfi_config) == nrow(minfi_config["name"])) | |
80 | |
81 ## Make green idat file symlinks | |
82 file_green = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Grn.idat") | |
83 cmd_green = paste("ln -s",as.character(minfi_config[i,"green"]),file_green,sep=" ") | |
84 cat("Reading file ",i,"GREEN Channel ", file_green) | |
85 system(cmd_green) | |
86 | |
87 ## Make red idat file symlinks | |
88 file_red = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Red.idat") | |
89 cmd_red = paste("ln -s",as.character(minfi_config[i,"red"]),file_red,sep=" ") | |
90 cat("Reading file ",i,"RED Channel ", file_red) | |
91 system(cmd_red) | |
92 } | |
93 | |
94 ## Make dataframe with Basenames | |
95 Basename = paste0(base_dir,"/",unique(substr(list.files(base_dir),1,17))) | |
96 status = minfi_config[match(gsub(".+/","",Basename), minfi_config$name),"status"] | |
97 targets = data.frame(Basename=Basename,status=status) | |
98 | |
99 if ( verbose ) { | |
100 cat("Minfi targets file:\n\n ") | |
101 print(targets) | |
102 } | |
103 | |
104 ## Read 450k files | |
105 RGset = read.450k.exp(targets=targets,verbose=T) | |
106 | |
107 if (verbose){ | |
108 cat("RGset has been read: \n\n") | |
109 print(RGset) | |
110 } | |
111 | |
112 pd = pData(RGset) | |
113 | |
114 ## NOTE: QC report is for samples before normalization | |
115 ## QC REPORT | |
116 files = gsub(".+/","",pd$filenames) | |
117 ## Produce PDF file | |
118 if (!is.null(RGset)) { | |
119 # Make PDF of QC report | |
120 minfi::qcReport(rgSet=RGset,sampNames=files,sampGroups=pd$status,pdf="qc_report.pdf") | |
121 } | |
122 | |
123 | |
124 ## MDS Plot | |
125 ## Set phenotype data | |
126 files = gsub(".+/","",pd$filenames) | |
127 #numPositions=as.integer("${numPositions}") | |
128 | |
129 ## Produce PDF file | |
130 if (!is.null(RGset)) { | |
131 ## Make PDF of MDS plot | |
132 pdf("mds_plot.pdf") | |
133 minfi::mdsPlot(dat=RGset,sampNames=files,sampGroups=pd$status,main="Beta MDS",numPositions = opt$numPositions,pch=19) | |
134 dev.off() | |
135 } | |
136 | |
137 if(verbose){ | |
138 cat("Made plot of QC report and MDS plot\n\n") | |
139 } | |
140 | |
141 | |
142 ## Preprocess data with the normalization method chosen | |
143 if(opt$preprocess == "quantile"){ | |
144 normalized_RGset = preprocessQuantile(RGset) | |
145 if (verbose){cat("Preprocessed using Quantile normalization")}; | |
146 } else if (opt$preprocess == "funnorm"){ | |
147 normalized_RGset = preprocessFunnorm(RGset) | |
148 if(verbose){print("Preprocessed using Functional normalization")} | |
149 } else if (opt$preprocess == "noob"){ | |
150 normalized_RGset = preprocessNoob(RGset) | |
151 if (verbose){cat("Preprocessed using Noob normalization")}; | |
152 } else if (opt$preprocess == "illumina"){ | |
153 normalized_RGset = preprocessIllumina(RGset,bg.correct = TRUE, normalize = c("controls", "no"),reference = 1) | |
154 if(verbose){print("Preprocessed using Illumina normalization")} | |
155 } else if (opt$preprocess == "swan"){ | |
156 normalized_RGset = preprocessSWAN(RGset) | |
157 if(verbose){print("Preprocessed using SWAN normalization")} | |
158 }else { | |
159 normalized_RGset = RGset | |
160 if(verbose){print("Preprocessed using No normalization")} | |
161 } | |
162 | |
163 | |
164 | |
165 ## Get beta values from Proprocessed data | |
166 beta = getBeta(normalized_RGset) | |
167 ## Set phenotype data | |
168 pd = pData(normalized_RGset) | |
169 | |
170 | |
171 ## DMP finder | |
172 dmp = dmpFinder(dat=beta,pheno=pd$status,type="categorical",shrinkVar=opt$shrinkVar) | |
173 write.csv(dmp,file="dmps.csv",quote=FALSE,row.names=TRUE) | |
174 if(verbose){ | |
175 cat("\n","DMP Finder successful \n") | |
176 } | |
177 | |
178 | |
179 # Model Matrix to pass into the bumphunter function | |
180 T1= levels(pd$status)[2] | |
181 T2= levels(pd$status)[1] | |
182 | |
183 # Introduce error if levels are different | |
184 stopifnot(T1!=T2) | |
185 | |
186 keep=pd$status%in%c(T1,T2) | |
187 tt=factor(pd$status[keep],c(T1,T2)) | |
188 design=model.matrix(~tt) | |
189 | |
190 if(verbose){ | |
191 cat("Model matrix is: \n") | |
192 design | |
193 } | |
194 | |
195 # Start bumphunter in a parallel environment | |
196 # Parallelize over cores on machine | |
197 registerDoParallel(cores = opt$cores) | |
198 | |
199 ## Bumphunter Run with normalized_RGset processed with Quantile Normalization | |
200 if (is(normalized_RGset,"GenomicRatioSet")) { | |
201 res=bumphunter(normalized_RGset[,keep],design,B=opt$b_permutations,smooth=opt$smooth,cutoff= opt$cutoff,type="Beta") | |
202 bumps= res$tab | |
203 } else if(is(normalized_RGset,"MethylSet")) { | |
204 # convert MethylSet (norm.swan) into GenomicRatioSet through Ratioset | |
205 normalized_RGset = ratioConvert(normalized_RGset, what = "both", keepCN = TRUE) | |
206 normalized_RGset = mapToGenome(normalized_RGset) | |
207 res = bumphunter(normalized_RGset[,keep],design=design,B=opt$b_permutations,smooth=opt$smooth,cutoff=opt$cutoff) | |
208 bumps = res$tab | |
209 } else { | |
210 # This else case is never supposed to run, | |
211 # it will run only if the normalized_RGset object | |
212 # did not have the expected class type | |
213 cat("Bumphunter did not run properly","\n") | |
214 stopifnot(!is.null(bumps)) | |
215 } | |
216 | |
217 if(verbose){ | |
218 cat("Bumphunter result", "\n") | |
219 head(bumps) | |
220 } | |
221 | |
222 | |
223 ## Choose DMR's of a certain length threshold. | |
224 ## This helps reduce the size of DMRs early, and match | |
225 ## with genes closest to region | |
226 bumps = bumps[bumps$L > opt$l_value,] | |
227 genes <- annotateTranscripts(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
228 tab=matchGenes(bumps,genes) | |
229 annotated_dmrs=cbind(bumps,tab) | |
230 | |
231 if(verbose){ | |
232 cat("Match with annotation\n") | |
233 head(annotated_dmrs) | |
234 } | |
235 | |
236 # Save result, which contains DMR's and closest genes | |
237 write.csv(annotated_dmrs,file = "dmrs.csv",quote=FALSE,row.names=FALSE) | |
238 | |
239 # Garbage collect | |
240 gc() | |
241 | |
242 | |
243 ## TODO: FIX BLOCK FINDER | |
244 # Block finder | |
245 #library(sva) | |
246 #pheno <- pData(GRset) | |
247 #mod <- model.matrix(~as.factor(status), data=pheno) | |
248 #mod0 <- model.matrix(~1, data=pheno) | |
249 #sva.results <- sva(mval, mod, mod0) |