comparison help/minfi_pipeline_test.R @ 0:84361ce36a11 draft

planemo upload commit fb90aafc93e5e63acfcdac4c27cfd865cdf06c5a-dirty
author nturaga
date Tue, 19 Apr 2016 11:10:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:84361ce36a11
1 # setup R error handling to go to stderr
2 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
3
4 # we need that to not crash galaxy with an UTF8 error on German LC settings.
5 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
6
7 library("getopt")
8 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
9 args <- commandArgs(trailingOnly = TRUE)
10
11 # get options, using the spec as defined by the enclosed list.
12 # we read the options from the default: commandArgs(TRUE).
13 spec <- matrix(c(
14 'quiet', 'q', 2, "logical",
15 'help' , 'h', 0, "logical",
16 "preprocess","p",1,"character",
17 "numPositions","n",2,"integer",
18 "shrinkVar","s",2,"logical",
19 "b_permutations","b",2,"integer",
20 "smooth","m",2,"logical",
21 "cutoff","t",2,"float",
22 "l_value","l",2,"integer",
23 "cores","c",1,"integer")
24 ,byrow=TRUE, ncol=4)
25 opt <- getopt(spec)
26
27
28 # If help was asked for print a friendly message
29 # and exit with a non-zero error code
30 if (!is.null(opt$help)) {
31 cat(getopt(spec, usage=TRUE))
32 q(status=1)
33 }
34
35
36 ## Set verbose mode
37 verbose = if(is.null(opt$quiet)){TRUE}else{FALSE}
38 if(verbose){
39 cat("Verbose mode is ON\n\n")
40 }
41
42 # Enforce the following required arguments
43 if (is.null(opt$preprocess)) {
44 cat("'preprocess' is required\n")
45 q(status=1)
46 }
47
48 # Load required libraries
49
50 suppressPackageStartupMessages({
51 library("minfi")
52 library("FlowSorted.Blood.450k")
53 library("doParallel")
54 library("TxDb.Hsapiens.UCSC.hg19.knownGene")
55 })
56
57
58 ## Parse cheetah code and make dataframe for creating tmp dir
59 minfi_config_file = "/Users/nturaga/Documents/workspace/minfi_galaxy/galaxy/database/job_working_directory/000/43/minfi_temp/minfi_config.txt"
60 minfi_config = read.table(minfi_config_file)
61 colnames(minfi_config) = c("status","green","red","name")
62
63 if ( verbose ) {
64 cat("Minfi configuration file:\n\n ");
65 print(minfi_config)
66 }
67
68 ## Make the tmpdir for symlinking data
69 base_dir = paste0("/Users/nturaga/Documents/workspace/minfi_galaxy/galaxy/database/job_working_directory/000/73/minfi_temp","/base")
70
71
72 ### Make symlinks of files
73 #for (i in 1:nrow(minfi_config)){
74 #stopifnot(nrow(minfi_config) == nrow(minfi_config["name"]))
75
76 ### Make green idat file symlinks
77 #file_green = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Grn.idat")
78 #cmd_green = paste("ln -s",as.character(minfi_config[i,"green"]),file_green,sep=" ")
79 #cat("Reading file ",i,"GREEN Channel ", file_green)
80 #system(cmd_green)
81
82 ### Make red idat file symlinks
83 #file_red = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Red.idat")
84 #cmd_red = paste("ln -s",as.character(minfi_config[i,"red"]),file_red,sep=" ")
85 #cat("Reading file ",i,"RED Channel ", file_red)
86 #system(cmd_red)
87 #}
88
89 ## Make dataframe with Basenames
90 Basename = paste0(base_dir,"/",unique(substr(list.files(base_dir),1,17)))
91 status = minfi_config[match(gsub(".+/","",Basename), minfi_config$name),"status"]
92 targets = data.frame(Basename=Basename,status=status)
93
94 if ( verbose ) {
95 cat("Minfi targets file:\n\n ")
96 print(targets)
97 }
98
99 ## Read 450k files
100 RGset = read.450k.exp(targets=targets,verbose=FALSE)
101
102 if (verbose){
103 cat("RGset has been read: \n\n")
104 print(RGset)
105 }
106
107
108 ## Preprocess data with the normalization method chosen
109 if(opt$preprocess == "quantile"){
110 normalized_RGset = preprocessQuantile(RGset)
111 if (verbose){cat("Preprocessed using Quantile normalization")};
112 } else if (opt$preprocess == "noob"){
113 normalized_RGset = preprocessNoob(RGset)
114 if (verbose){cat("Preprocessed using Noob normalization")};
115 } else if (opt$preprocess == "raw"){
116 normalized_RGset = preprocessRaw(RGset)
117 if (verbose){print("Preprocessed using Raw normalization")};
118 } else if (opt$preprocess == "illumina"){
119 normalized_RGset = preprocessIllumina(RGset,bg.correct = TRUE, normalize = c("controls", "no"),reference = 1)
120 if(verbose){print("Preprocessed using Illumina normalization")}
121 } else if (opt$preprocess == "preprocessFunnorm"){
122 normalized_RGset = preprocessFunnorm(RGset)
123 if(verbose){print("Preprocessed using Functional normalization")}
124 } else {
125 normalized_RGset = RGset
126 if(verbose){print("Preprocessed using NO normalization")}
127 }
128
129
130 ## Get beta values from Proprocessed data
131 beta = getBeta(normalized_RGset)
132 ## Set phenotype data
133 pd=pData(normalized_RGset)
134
135
136 ## QC REPORT
137 files = gsub(".+/","",pd$filenames)
138 ## Produce PDF file
139 if (!is.null(RGset)) {
140 # Make PDF of QC report
141 minfi::qcReport(rgSet=RGset,sampNames=files,sampGroups=pd$status,pdf="qc_report.pdf")
142 }
143
144 ## MDS Plot
145 ## Set phenotype data
146 files = gsub(".+/","",pd$filenames)
147
148 ## Produce PDF file
149 if (!is.null(RGset)) {
150 ## Make PDF of density plot
151 pdf("mds_plot.pdf")
152 minfi::mdsPlot(dat=RGset,sampNames=files,sampGroups=pd$status,main="Beta MDS",numPositions = opt$numPositions,pch=19)
153 dev.off()
154 }
155
156
157 if(verbose){
158 cat("Made plot of QC report and MDS plot\n\n")
159 }
160
161
162 #Estimate Cell counts
163 #if(!is.null(RGset)){
164 #cell_counts = minfi::estimateCellCounts(rgSet=RGset,meanPlot=TRUE)
165 #write.csv(cell_counts,file="estimated_cell_counts.csv",quote=FALSE,row.names=TRUE)
166 #}
167 #if(verbose){
168 #cat("Cell Counts estimated\n\n")
169 #}
170
171 ## DMP finder
172 dmp = dmpFinder(dat=beta,pheno=pd$status,type="categorical",shrinkVar=opt$shrinkVar)
173 write.csv(dmp,file="dmps.csv",quote=FALSE,row.names=TRUE)
174 if(verbose){
175 cat("DMP Finder successful \n")
176 }
177
178
179 # Model Matrix to pass into the bumphunter function
180 pd=pData(normalized_RGset)
181 T1= levels(pd$status)[2]
182 T2= levels(pd$status)[1]
183
184 stopifnot(T1!=T2)
185 keep=pd$status%in%c(T1,T2)
186 tt=factor(pd$status[keep],c(T1,T2))
187 design=model.matrix(~tt)
188
189 if(verbose){
190 cat("Model matrix is: \n")
191 design
192 }
193
194 # Start bumphunter in a parallel environment
195 # Parallelize over cores on machine
196 registerDoParallel(cores = opt$cores)
197
198 ## Bumphunter Run with normalized_RGset processed with Quantile Normalization
199
200 res=bumphunter(normalized_RGset[,keep],design,B=opt$b_permutations,smooth=opt$smooth,cutoff= opt$cutoff,type="Beta")
201 bumps= res$tab
202
203 if(verbose){
204 cat("Bumphunter result", "\n")
205 head(bumps)
206 }
207
208 ## Choose DMR's of a certain length threshold.
209 ## This helps reduce the size of DMRs early, and match
210 ## with genes closest to region
211 bumps = bumps[bumps$L>opt$l_value,]
212 genes <- annotateTranscripts(TxDb.Hsapiens.UCSC.hg19.knownGene)
213 tab=matchGenes(bumps,genes)
214 result=cbind(bumps,tab)
215
216 if(verbose){
217 cat("Match with annotation\n")
218 head(result)
219 }
220
221 # Save result, which contains DMR's and closest genes
222 write.csv(result,file = "dmrs.csv",quote=FALSE,row.names=TRUE)
223
224 # Garbage collect
225 gc()
226
227 # Block finder
228 #library(sva)
229 #pheno <- pData(GRset)
230 #mod <- model.matrix(~as.factor(status), data=pheno)
231 #mod0 <- model.matrix(~1, data=pheno)
232 #sva.results <- sva(mval, mod, mod0)