Mercurial > repos > nturaga > minfi_pipeline
comparison help/minfi_pipeline_test.R @ 0:84361ce36a11 draft
planemo upload commit fb90aafc93e5e63acfcdac4c27cfd865cdf06c5a-dirty
author | nturaga |
---|---|
date | Tue, 19 Apr 2016 11:10:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:84361ce36a11 |
---|---|
1 # setup R error handling to go to stderr | |
2 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) | |
3 | |
4 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
5 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
6 | |
7 library("getopt") | |
8 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | |
9 args <- commandArgs(trailingOnly = TRUE) | |
10 | |
11 # get options, using the spec as defined by the enclosed list. | |
12 # we read the options from the default: commandArgs(TRUE). | |
13 spec <- matrix(c( | |
14 'quiet', 'q', 2, "logical", | |
15 'help' , 'h', 0, "logical", | |
16 "preprocess","p",1,"character", | |
17 "numPositions","n",2,"integer", | |
18 "shrinkVar","s",2,"logical", | |
19 "b_permutations","b",2,"integer", | |
20 "smooth","m",2,"logical", | |
21 "cutoff","t",2,"float", | |
22 "l_value","l",2,"integer", | |
23 "cores","c",1,"integer") | |
24 ,byrow=TRUE, ncol=4) | |
25 opt <- getopt(spec) | |
26 | |
27 | |
28 # If help was asked for print a friendly message | |
29 # and exit with a non-zero error code | |
30 if (!is.null(opt$help)) { | |
31 cat(getopt(spec, usage=TRUE)) | |
32 q(status=1) | |
33 } | |
34 | |
35 | |
36 ## Set verbose mode | |
37 verbose = if(is.null(opt$quiet)){TRUE}else{FALSE} | |
38 if(verbose){ | |
39 cat("Verbose mode is ON\n\n") | |
40 } | |
41 | |
42 # Enforce the following required arguments | |
43 if (is.null(opt$preprocess)) { | |
44 cat("'preprocess' is required\n") | |
45 q(status=1) | |
46 } | |
47 | |
48 # Load required libraries | |
49 | |
50 suppressPackageStartupMessages({ | |
51 library("minfi") | |
52 library("FlowSorted.Blood.450k") | |
53 library("doParallel") | |
54 library("TxDb.Hsapiens.UCSC.hg19.knownGene") | |
55 }) | |
56 | |
57 | |
58 ## Parse cheetah code and make dataframe for creating tmp dir | |
59 minfi_config_file = "/Users/nturaga/Documents/workspace/minfi_galaxy/galaxy/database/job_working_directory/000/43/minfi_temp/minfi_config.txt" | |
60 minfi_config = read.table(minfi_config_file) | |
61 colnames(minfi_config) = c("status","green","red","name") | |
62 | |
63 if ( verbose ) { | |
64 cat("Minfi configuration file:\n\n "); | |
65 print(minfi_config) | |
66 } | |
67 | |
68 ## Make the tmpdir for symlinking data | |
69 base_dir = paste0("/Users/nturaga/Documents/workspace/minfi_galaxy/galaxy/database/job_working_directory/000/73/minfi_temp","/base") | |
70 | |
71 | |
72 ### Make symlinks of files | |
73 #for (i in 1:nrow(minfi_config)){ | |
74 #stopifnot(nrow(minfi_config) == nrow(minfi_config["name"])) | |
75 | |
76 ### Make green idat file symlinks | |
77 #file_green = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Grn.idat") | |
78 #cmd_green = paste("ln -s",as.character(minfi_config[i,"green"]),file_green,sep=" ") | |
79 #cat("Reading file ",i,"GREEN Channel ", file_green) | |
80 #system(cmd_green) | |
81 | |
82 ### Make red idat file symlinks | |
83 #file_red = paste0(base_dir,"/",as.character(minfi_config[i,"name"]),"_Red.idat") | |
84 #cmd_red = paste("ln -s",as.character(minfi_config[i,"red"]),file_red,sep=" ") | |
85 #cat("Reading file ",i,"RED Channel ", file_red) | |
86 #system(cmd_red) | |
87 #} | |
88 | |
89 ## Make dataframe with Basenames | |
90 Basename = paste0(base_dir,"/",unique(substr(list.files(base_dir),1,17))) | |
91 status = minfi_config[match(gsub(".+/","",Basename), minfi_config$name),"status"] | |
92 targets = data.frame(Basename=Basename,status=status) | |
93 | |
94 if ( verbose ) { | |
95 cat("Minfi targets file:\n\n ") | |
96 print(targets) | |
97 } | |
98 | |
99 ## Read 450k files | |
100 RGset = read.450k.exp(targets=targets,verbose=FALSE) | |
101 | |
102 if (verbose){ | |
103 cat("RGset has been read: \n\n") | |
104 print(RGset) | |
105 } | |
106 | |
107 | |
108 ## Preprocess data with the normalization method chosen | |
109 if(opt$preprocess == "quantile"){ | |
110 normalized_RGset = preprocessQuantile(RGset) | |
111 if (verbose){cat("Preprocessed using Quantile normalization")}; | |
112 } else if (opt$preprocess == "noob"){ | |
113 normalized_RGset = preprocessNoob(RGset) | |
114 if (verbose){cat("Preprocessed using Noob normalization")}; | |
115 } else if (opt$preprocess == "raw"){ | |
116 normalized_RGset = preprocessRaw(RGset) | |
117 if (verbose){print("Preprocessed using Raw normalization")}; | |
118 } else if (opt$preprocess == "illumina"){ | |
119 normalized_RGset = preprocessIllumina(RGset,bg.correct = TRUE, normalize = c("controls", "no"),reference = 1) | |
120 if(verbose){print("Preprocessed using Illumina normalization")} | |
121 } else if (opt$preprocess == "preprocessFunnorm"){ | |
122 normalized_RGset = preprocessFunnorm(RGset) | |
123 if(verbose){print("Preprocessed using Functional normalization")} | |
124 } else { | |
125 normalized_RGset = RGset | |
126 if(verbose){print("Preprocessed using NO normalization")} | |
127 } | |
128 | |
129 | |
130 ## Get beta values from Proprocessed data | |
131 beta = getBeta(normalized_RGset) | |
132 ## Set phenotype data | |
133 pd=pData(normalized_RGset) | |
134 | |
135 | |
136 ## QC REPORT | |
137 files = gsub(".+/","",pd$filenames) | |
138 ## Produce PDF file | |
139 if (!is.null(RGset)) { | |
140 # Make PDF of QC report | |
141 minfi::qcReport(rgSet=RGset,sampNames=files,sampGroups=pd$status,pdf="qc_report.pdf") | |
142 } | |
143 | |
144 ## MDS Plot | |
145 ## Set phenotype data | |
146 files = gsub(".+/","",pd$filenames) | |
147 | |
148 ## Produce PDF file | |
149 if (!is.null(RGset)) { | |
150 ## Make PDF of density plot | |
151 pdf("mds_plot.pdf") | |
152 minfi::mdsPlot(dat=RGset,sampNames=files,sampGroups=pd$status,main="Beta MDS",numPositions = opt$numPositions,pch=19) | |
153 dev.off() | |
154 } | |
155 | |
156 | |
157 if(verbose){ | |
158 cat("Made plot of QC report and MDS plot\n\n") | |
159 } | |
160 | |
161 | |
162 #Estimate Cell counts | |
163 #if(!is.null(RGset)){ | |
164 #cell_counts = minfi::estimateCellCounts(rgSet=RGset,meanPlot=TRUE) | |
165 #write.csv(cell_counts,file="estimated_cell_counts.csv",quote=FALSE,row.names=TRUE) | |
166 #} | |
167 #if(verbose){ | |
168 #cat("Cell Counts estimated\n\n") | |
169 #} | |
170 | |
171 ## DMP finder | |
172 dmp = dmpFinder(dat=beta,pheno=pd$status,type="categorical",shrinkVar=opt$shrinkVar) | |
173 write.csv(dmp,file="dmps.csv",quote=FALSE,row.names=TRUE) | |
174 if(verbose){ | |
175 cat("DMP Finder successful \n") | |
176 } | |
177 | |
178 | |
179 # Model Matrix to pass into the bumphunter function | |
180 pd=pData(normalized_RGset) | |
181 T1= levels(pd$status)[2] | |
182 T2= levels(pd$status)[1] | |
183 | |
184 stopifnot(T1!=T2) | |
185 keep=pd$status%in%c(T1,T2) | |
186 tt=factor(pd$status[keep],c(T1,T2)) | |
187 design=model.matrix(~tt) | |
188 | |
189 if(verbose){ | |
190 cat("Model matrix is: \n") | |
191 design | |
192 } | |
193 | |
194 # Start bumphunter in a parallel environment | |
195 # Parallelize over cores on machine | |
196 registerDoParallel(cores = opt$cores) | |
197 | |
198 ## Bumphunter Run with normalized_RGset processed with Quantile Normalization | |
199 | |
200 res=bumphunter(normalized_RGset[,keep],design,B=opt$b_permutations,smooth=opt$smooth,cutoff= opt$cutoff,type="Beta") | |
201 bumps= res$tab | |
202 | |
203 if(verbose){ | |
204 cat("Bumphunter result", "\n") | |
205 head(bumps) | |
206 } | |
207 | |
208 ## Choose DMR's of a certain length threshold. | |
209 ## This helps reduce the size of DMRs early, and match | |
210 ## with genes closest to region | |
211 bumps = bumps[bumps$L>opt$l_value,] | |
212 genes <- annotateTranscripts(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
213 tab=matchGenes(bumps,genes) | |
214 result=cbind(bumps,tab) | |
215 | |
216 if(verbose){ | |
217 cat("Match with annotation\n") | |
218 head(result) | |
219 } | |
220 | |
221 # Save result, which contains DMR's and closest genes | |
222 write.csv(result,file = "dmrs.csv",quote=FALSE,row.names=TRUE) | |
223 | |
224 # Garbage collect | |
225 gc() | |
226 | |
227 # Block finder | |
228 #library(sva) | |
229 #pheno <- pData(GRset) | |
230 #mod <- model.matrix(~as.factor(status), data=pheno) | |
231 #mod0 <- model.matrix(~1, data=pheno) | |
232 #sva.results <- sva(mval, mod, mod0) |