Mercurial > repos > modencode-dcc > spp_package
comparison run_spp.R @ 2:86146a313b66 draft
Uploaded
author | modencode-dcc |
---|---|
date | Thu, 17 Jan 2013 14:36:52 -0500 |
parents | |
children | 495a6d033ca1 |
comparison
equal
deleted
inserted
replaced
1:6ed68e907ff6 | 2:86146a313b66 |
---|---|
1 # run_spp.R | |
2 # ============= | |
3 # Author: Anshul Kundaje, Computer Science Dept., Stanford University | |
4 # Email: akundaje@stanford.edu | |
5 # Last updated: Oct 8, 2010 | |
6 # ============= | |
7 # MANDATORY ARGUMENTS | |
8 # -c=<ChIP_tagAlign/BAMFile>, full path and name of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) | |
9 # MANDATORY ARGUMENT FOR PEAK CALLING | |
10 # -i=<Input_tagAlign/BAMFile>, full path and name of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) | |
11 # OPTIONAL ARGUMENTS | |
12 # -s=<min>:<step>:<max> , strand shifts at which cross-correlation is evaluated, default=-100:5:600 | |
13 # -speak=<strPeak>, user-defined cross-correlation peak strandshift | |
14 # -x=<min>:<max>, strand shifts to exclude (This is mainly to avoid phantom peaks) default=10:(readlen+10) | |
15 # -p=<nodes> , number of parallel processing nodes, default=NULL | |
16 # -fdr=<falseDisoveryRate> , false discovery rate threshold for peak calling | |
17 # -npeak=<numPeaks>, threshold on number of peaks to call | |
18 # -tmpdir=<tempdir> , Temporary directory (if not specified R function tempdir() is used) | |
19 # -filtchr=<chrnamePattern> , Pattern to use to remove tags that map to specific chromosomes e.g. _ will remove all tags that map to chromosomes with _ in their name | |
20 # OUTPUT PARAMETERS | |
21 # -odir=<outputDirectory> name of output directory (If not set same as ChIP file directory is used) | |
22 # -savn=<narrowpeakfilename> OR -savn NarrowPeak file name | |
23 # -savr=<regionpeakfilename> OR -savr RegionPeak file name | |
24 # -savd=<rdatafile> OR -savd , save Rdata file | |
25 # -savp=<plotdatafile> OR -savp , save cross-correlation plot | |
26 # -out=<resultfile>, append peakshift result to a file | |
27 # format:Filename<tab>numReads<tab>estFragLen<tab>corr_estFragLen<tab>PhantomPeak<tab>corr_phantomPeak<tab>argmin_corr<tab>min_corr<tab>phantomPeakCoef<tab>relPhantomPeakCoef<tab>QualityTag | |
28 # -rf , if plot or rdata or narrowPeak file exists replace it. If not used then the run is aborted if the plot or Rdata or narrowPeak file exists | |
29 # -clean, if present will remove the original chip and control files after reading them in. CAUTION: Use only if the script calling run_spp.R is creating temporary files | |
30 | |
31 args <- commandArgs(trailingOnly=TRUE); # Read Arguments from command line | |
32 nargs = length(args); # number of arguments | |
33 | |
34 # ########################################################################### | |
35 # AUXILIARY FUNCTIONS | |
36 # ########################################################################### | |
37 | |
38 print.usage <- function() { | |
39 # =================================== | |
40 # Function will print function usage | |
41 # =================================== | |
42 cat('Usage: Rscript run_spp.R <options>\n',file=stderr()) | |
43 cat('MANDATORY ARGUMENTS\n',file=stderr()) | |
44 cat('-c=<ChIP_alignFile>, full path and name (or URL) of tagAlign/BAM file (can be gzipped)(FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) \n',file=stderr()) | |
45 cat('MANDATORY ARGUMENTS FOR PEAK CALLING\n',file=stderr()) | |
46 cat('-i=<Input_alignFile>, full path and name (or URL) of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) \n',file=stderr()) | |
47 cat('OPTIONAL ARGUMENTS\n',file=stderr()) | |
48 cat('-s=<min>:<step>:<max> , strand shifts at which cross-correlation is evaluated, default=-100:5:600\n',file=stderr()) | |
49 cat('-speak=<strPeak>, user-defined cross-correlation peak strandshift\n',file=stderr()) | |
50 cat('-x=<min>:<max>, strand shifts to exclude (This is mainly to avoid region around phantom peak) default=10:(readlen+10)\n',file=stderr()) | |
51 cat('-p=<nodes> , number of parallel processing nodes, default=0\n',file=stderr()) | |
52 cat('-fdr=<falseDisoveryRate> , false discovery rate threshold for peak calling\n',file=stderr()) | |
53 cat('-npeak=<numPeaks>, threshold on number of peaks to call\n',file=stderr()) | |
54 cat('-tmpdir=<tempdir> , Temporary directory (if not specified R function tempdir() is used)\n',file=stderr()) | |
55 cat('-filtchr=<chrnamePattern> , Pattern to use to remove tags that map to specific chromosomes e.g. _ will remove all tags that map to chromosomes with _ in their name\n',file=stderr()) | |
56 cat('OUTPUT ARGUMENTS\n',file=stderr()) | |
57 cat('-odir=<outputDirectory> name of output directory (If not set same as ChIP file directory is used)\n',file=stderr()) | |
58 cat('-savn=<narrowpeakfilename> OR -savn NarrowPeak file name (fixed width peaks)\n',file=stderr()) | |
59 cat('-savr=<regionpeakfilename> OR -savr RegionPeak file name (variable width peaks with regions of enrichment)\n',file=stderr()) | |
60 cat('-savd=<rdatafile> OR -savd, save Rdata file\n',file=stderr()) | |
61 cat('-savp=<plotdatafile> OR -savp, save cross-correlation plot\n',file=stderr()) | |
62 cat('-out=<resultfile>, append peakshift/phantomPeak results to a file\n',file=stderr()) | |
63 cat(' format:Filename<tab>numReads<tab>estFragLen<tab>corr_estFragLen<tab>PhantomPeak<tab>corr_phantomPeak<tab>argmin_corr<tab>min_corr<tab>phantomPeakCoef<tab>relPhantomPeakCoef<tab>QualityTag)\n',file=stderr()) | |
64 cat('-rf, if plot or rdata or narrowPeak file exists replace it. If not used then the run is aborted if the plot or Rdata or narrowPeak file exists\n',file=stderr()) | |
65 cat('-clean, if present will remove the original chip and control files after reading them in. CAUTION: Use only if the script calling run_spp.R is creating temporary files\n',file=stderr()) | |
66 } # end: print.usage() | |
67 | |
68 get.file.parts <- function(file.fullpath) { | |
69 # =================================== | |
70 # Function will take a file name with path and split the file name into | |
71 # path, fullname, name and ext | |
72 # =================================== | |
73 if (! is.character(file.fullpath)) { | |
74 stop('File name must be a string') | |
75 } | |
76 | |
77 file.parts <- strsplit(as.character(file.fullpath), .Platform$file.sep, fixed=TRUE)[[1]] # split on file separator | |
78 | |
79 if (length(file.parts) == 0) { # if empty file name | |
80 return(list(path='', | |
81 fullname='', | |
82 name='', | |
83 ext='') | |
84 ) | |
85 } else { | |
86 if (length(file.parts) == 1) { # if no path then just the file name itself | |
87 file.path <- '.' | |
88 file.fullname <- file.parts | |
89 } else { | |
90 file.path <- paste(file.parts[1:(length(file.parts)-1)], collapse=.Platform$file.sep) # 1:last-1 token is path | |
91 file.fullname <- file.parts[length(file.parts)] # last token is filename | |
92 } | |
93 file.fullname.parts <- strsplit(file.fullname,'.',fixed=TRUE)[[1]] # split on . | |
94 if (length(file.fullname.parts) == 1) { # if no extension | |
95 file.ext <- '' | |
96 file.name <- file.fullname.parts | |
97 } else { | |
98 file.ext <- paste('.', file.fullname.parts[length(file.fullname.parts)], sep="") # add the . to the last token | |
99 file.name <- paste(file.fullname.parts[1:(length(file.fullname.parts)-1)], collapse=".") | |
100 } | |
101 return(list(path=file.path, | |
102 fullname=file.fullname, | |
103 name=file.name, | |
104 ext=file.ext)) | |
105 } | |
106 } # end: get.file.parts() | |
107 | |
108 parse.arguments <- function(args) { | |
109 # =================================== | |
110 # Function will parse arguments | |
111 # =================================== | |
112 # Set arguments to default values | |
113 chip.file <- NA # main ChIP tagAlign/BAM file name | |
114 isurl.chip.file <- FALSE # flag indicating whether ChIP file is a URL | |
115 control.file <- NA # control tagAlign/BAM file name | |
116 isurl.control.file <- FALSE # flag indicating whether control file is a URL | |
117 sep.min <- -100 # min strand shift | |
118 sep.max <- 600 # max strand shift | |
119 sep.bin <- 5 # increment for strand shift | |
120 sep.peak <- NA # user-defined peak shift | |
121 exclude.min <- 10 # lowerbound of strand shift exclusion region | |
122 exclude.max <- NaN # upperbound of strand shift exclusion region | |
123 n.nodes <- NA # number of parallel processing nodes | |
124 fdr <- 0.01 # false discovery rate threshold for peak calling | |
125 npeak <- NA # threshold on number of peaks to call | |
126 temp.dir <- tempdir() # temporary directory | |
127 chrname.rm.pattern <- NA # chromosome name pattern used to remove tags | |
128 output.odir <- NA # Output directory name | |
129 output.npeak.file <- NA # Output narrowPeak file name | |
130 output.rpeak.file <- NA # Output regionPeak file name | |
131 output.rdata.file <- NA # Rdata file | |
132 output.plot.file <- NA # cross correlation plot file | |
133 output.result.file <- NA # result file | |
134 replace.flag <- FALSE # replace file flag | |
135 clean.files.flag <- FALSE # file deletion flag | |
136 | |
137 # Parse arguments | |
138 for (each.arg in args) { | |
139 | |
140 if (grepl('^-c=',each.arg)) { #-c=<chip.file> | |
141 | |
142 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
143 if (! is.na(arg.split[2]) ) { | |
144 chip.file <- arg.split[2] # second part is chip.file | |
145 } else { | |
146 stop('No tagAlign/BAM file name provided for parameter -c=') | |
147 } | |
148 | |
149 } else if (grepl('^-i=',each.arg)) { #-i=<control.file> | |
150 | |
151 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
152 if (! is.na(arg.split[2]) ) { | |
153 control.file <- arg.split[2] # second part is control.file | |
154 } else { | |
155 stop('No tagAlign/BAM file name provided for parameter -i=') | |
156 } | |
157 | |
158 } else if (grepl('^-s=',each.arg)) { #-s=<sep.min>:<sep.bin>:<sep.max> | |
159 | |
160 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
161 if (! is.na(arg.split[2]) ) { | |
162 sep.vals <- arg.split[2] # second part is sepmin:sepbin:sepmax | |
163 sep.vals.split <- strsplit(sep.vals,':',fixed=TRUE)[[1]] # split on : | |
164 if (length(sep.vals.split) != 3) { # must have 3 parts | |
165 stop('Strand shift limits must be specified as -s=sepmin:sepbin:sepmax') | |
166 } else { | |
167 if (any(is.na(as.numeric(sep.vals.split)))) { # check that sep vals are numeric | |
168 stop('Strand shift limits must be numeric values') | |
169 } | |
170 sep.min <- round(as.numeric(sep.vals.split[1])) | |
171 sep.bin <- round(as.numeric(sep.vals.split[2])) | |
172 sep.max <- round(as.numeric(sep.vals.split[3])) | |
173 if ((sep.min > sep.max) || (sep.bin > (sep.max - sep.min)) || (sep.bin < 0)) { | |
174 stop('Illegal separation values -s=sepmin:sepbin:sepmax') | |
175 } | |
176 } | |
177 } else { | |
178 stop('Strand shift limits must be specified as -s=sepmin:sepbin:sepmax') | |
179 } | |
180 | |
181 } else if (grepl('^-speak=',each.arg)) { #-speak=<sep.peak> , user-defined cross-correlation peak strandshift | |
182 | |
183 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
184 if (! is.na(arg.split[2]) ) { | |
185 sep.peak <- arg.split[2] # second part is <sep.peak> | |
186 if (is.na(as.numeric(sep.peak))) { # check that sep.peak is numeric | |
187 stop('-speak=<sep.peak>: User defined peak shift must be numeric') | |
188 } | |
189 sep.peak <- as.numeric(sep.peak) | |
190 } else { | |
191 stop('User defined peak shift must be provided as -speak=<sep.peak>') | |
192 } | |
193 | |
194 } else if (grepl('^-x=',each.arg)) { #-x=<exclude.min>:<exclude.max> | |
195 | |
196 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
197 if (! is.na(arg.split[2]) ) { | |
198 exclude.vals <- arg.split[2] # second part is excludemin:excludemax | |
199 exclude.vals.split <- strsplit(exclude.vals,':',fixed=TRUE)[[1]] # split on : | |
200 if (length(exclude.vals.split) != 2) { # must have 2 parts | |
201 stop('Exclusion limits must be specified as -x=excludemin:excludemax') | |
202 } else { | |
203 if (any(is.na(as.numeric(exclude.vals.split)))) { # check that exclude vals are numeric | |
204 stop('Exclusion limits must be numeric values') | |
205 } | |
206 exclude.min <- round(as.numeric(exclude.vals.split[1])) | |
207 exclude.max <- round(as.numeric(exclude.vals.split[2])) | |
208 if (exclude.min > exclude.max) { | |
209 stop('Illegal exclusion limits -x=excludemin:excludemax') | |
210 } | |
211 } | |
212 } else { | |
213 stop('Exclusion limits must be specified as -x=excludemin:excludemax') | |
214 } | |
215 | |
216 } else if (grepl('^-p=',each.arg)) { #-p=<n.nodes> , number of parallel processing nodes, default=NULL | |
217 | |
218 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
219 if (! is.na(arg.split[2]) ) { | |
220 n.nodes <- arg.split[2] # second part is numnodes | |
221 if (is.na(as.numeric(n.nodes))) { # check that n.nodes is numeric | |
222 stop('-p=<numnodes>: numnodes must be numeric') | |
223 } | |
224 n.nodes <- round(as.numeric(n.nodes)) | |
225 } else { | |
226 stop('Number of parallel nodes must be provided as -p=<numnodes>') | |
227 } | |
228 | |
229 } else if (grepl('^-fdr=',each.arg)) { #-fdr=<fdr> , false discovery rate, default=0.01 | |
230 | |
231 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
232 if (! is.na(arg.split[2]) ) { | |
233 fdr <- arg.split[2] # second part is fdr | |
234 if (is.na(as.numeric(fdr))) { # check that fdr is numeric | |
235 stop('-fdr=<falseDiscoveryRate>: false discovery rate must be numeric') | |
236 } | |
237 fdr <- as.numeric(fdr) | |
238 } else { | |
239 stop('False discovery rate must be provided as -fdr=<fdr>') | |
240 } | |
241 | |
242 } else if (grepl('^-npeak=',each.arg)) { #-npeak=<numPeaks> , number of peaks threshold, default=NA | |
243 | |
244 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
245 if (! is.na(arg.split[2]) ) { | |
246 npeak <- arg.split[2] # second part is npeak | |
247 if (is.na(as.numeric(npeak))) { # check that npeak is numeric | |
248 stop('-npeak=<numPeaks>: threshold on number of peaks must be numeric') | |
249 } | |
250 npeak <- round(as.numeric(npeak)) | |
251 } else { | |
252 stop('Threshold on number of peaks must be provided as -npeak=<numPeaks>') | |
253 } | |
254 | |
255 } else if (grepl('^-tmpdir=',each.arg)) { #-tmpdir=<temp.dir> | |
256 | |
257 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
258 if (! is.na(arg.split[2]) ) { | |
259 temp.dir <- arg.split[2] # second part is temp.dir | |
260 } else { | |
261 stop('No temporary directory provided for parameter -tmpdir=') | |
262 } | |
263 | |
264 } else if (grepl('^-filtchr=',each.arg)) { #-filtchr=<chrname.rm.pattern> | |
265 | |
266 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
267 if (! is.na(arg.split[2]) ) { | |
268 chrname.rm.pattern <- arg.split[2] # second part is chrname.rm.pattern | |
269 } else { | |
270 stop('No pattern provided for parameter -filtchr=') | |
271 } | |
272 | |
273 } else if (grepl('^-odir=',each.arg)) { #-odir=<output.odir> | |
274 | |
275 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
276 if (! is.na(arg.split[2]) ) { | |
277 output.odir <- arg.split[2] # second part is output.odir | |
278 } else { | |
279 stop('No output directory provided for parameter -odir=') | |
280 } | |
281 | |
282 } else if (grepl('^-savn',each.arg)) { # -savn=<output.npeak.file> OR -savn , save narrowpeak | |
283 | |
284 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
285 if (! is.na(arg.split[2])) { | |
286 output.npeak.file <- arg.split[2] #-savn= | |
287 } else if (each.arg=='-savn') { | |
288 output.npeak.file <- NULL # NULL indicates get the name from the main file name | |
289 } else { | |
290 stop('Argument for saving narrowPeak file must be -savn or -savn=<filename>') | |
291 } | |
292 | |
293 } else if (grepl('^-savr',each.arg)) { # -savr=<output.rpeak.file> OR -savr , save regionpeak | |
294 | |
295 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
296 if (! is.na(arg.split[2])) { | |
297 output.rpeak.file <- arg.split[2] #-savr= | |
298 } else if (each.arg=='-savr') { | |
299 output.rpeak.file <- NULL # NULL indicates get the name from the main file name | |
300 } else { | |
301 stop('Argument for saving regionPeak file must be -savr or -savr=<filename>') | |
302 } | |
303 | |
304 } else if (grepl('^-savd',each.arg)) { # -savd=<output.rdata.file> OR -savd , save Rdata file | |
305 | |
306 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
307 if (! is.na(arg.split[2])) { | |
308 output.rdata.file <- arg.split[2] #-savd= | |
309 } else if (each.arg=='-savd') { | |
310 output.rdata.file <- NULL # NULL indicates get the name from the main file name | |
311 } else { | |
312 stop('Argument for saving Rdata file must be -savd or -savd=<filename>') | |
313 } | |
314 | |
315 } else if (grepl('^-savp',each.arg)) { # -savp=<output.plot.file> OR -savp , save cross-correlation plot | |
316 | |
317 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
318 if (! is.na(arg.split[2])) { | |
319 output.plot.file <- arg.split[2] #-savp= | |
320 } else if (each.arg=='-savp') { | |
321 output.plot.file <- NULL # NULL indicates get the name from the main file name | |
322 } else { | |
323 stop('Argument for saving Rdata file must be -savp or -savp=<filename>') | |
324 } | |
325 | |
326 } else if (grepl('^-out=',each.arg)) { #-out=<output.result.file> | |
327 | |
328 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = | |
329 if (! is.na(arg.split[2]) ) { | |
330 output.result.file <- arg.split[2] # second part is output.result.file | |
331 } else { | |
332 stop('No result file provided for parameter -out=') | |
333 } | |
334 | |
335 } else if (each.arg == '-rf') { | |
336 | |
337 replace.flag <- TRUE | |
338 | |
339 } else if (each.arg == '-clean') { | |
340 | |
341 clean.files.flag <- TRUE | |
342 | |
343 } else { | |
344 | |
345 stop('Illegal argument ',each.arg) | |
346 } | |
347 } | |
348 # End: for loop | |
349 | |
350 # Check mandatory arguments | |
351 if (is.na(chip.file)) { | |
352 stop('-c=<tagAlign/BAMFileName> is a mandatory argument') | |
353 } | |
354 | |
355 if (is.na(control.file) && ! is.na(output.npeak.file)) { | |
356 stop('-i=<tagAlign/BAMFileName> is required for peak calling') | |
357 } | |
358 | |
359 # Check if ChIP and control files are URLs | |
360 if (grepl('^http://',chip.file)) { | |
361 isurl.chip.file <- TRUE | |
362 } | |
363 if (grepl('^http://',control.file)) { | |
364 isurl.control.file <- TRUE | |
365 } | |
366 | |
367 # If ChIP file is a URL output.odir MUST be specified | |
368 if (isurl.chip.file && is.na(output.odir)) { | |
369 stop('If ChIP file is a URL, then output directory MUST be specified') | |
370 } | |
371 | |
372 # Check that ChIP and control files exist | |
373 if (isurl.chip.file) { | |
374 if (system(paste('wget -q --spider',chip.file)) != 0) { | |
375 stop('ChIP file URL not valid: ',chip.file) | |
376 } | |
377 } else if (!file.exists(chip.file)) { | |
378 stop('ChIP File:',chip.file,' does not exist') | |
379 } | |
380 | |
381 if (!is.na(control.file)) { | |
382 if (isurl.control.file) { | |
383 if (system(paste('wget -q --spider',control.file)) != 0) { | |
384 stop('Control file URL not valid: ',control.file) | |
385 } | |
386 } else if (!file.exists(control.file)) { | |
387 stop('Control File:',control.file,' does not exist') | |
388 } | |
389 } | |
390 | |
391 # Correct other arguments | |
392 if (is.na(output.odir)) { # Reconstruct output.odir if not provided | |
393 output.odir <- get.file.parts(chip.file)$path | |
394 } | |
395 | |
396 if (is.null(output.npeak.file)) { # Reconstruct output.npeak.file if NULL | |
397 output.npeak.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '_VS_', get.file.parts(control.file)$name,'.narrowPeak', sep="")) | |
398 } | |
399 | |
400 if (is.null(output.rpeak.file)) { # Reconstruct output.rpeak.file if NULL | |
401 output.rpeak.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '_VS_', get.file.parts(control.file)$name,'.regionPeak', sep="")) | |
402 } | |
403 | |
404 if (is.null(output.rdata.file)) { # Reconstruct output.rdata.file if NULL | |
405 output.rdata.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '.Rdata', sep="")) | |
406 } | |
407 | |
408 if (is.null(output.plot.file)) { # Reconstruct output.plot.file if NULL | |
409 output.plot.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '.pdf', sep="")) | |
410 } | |
411 | |
412 return(list(chip.file=chip.file, | |
413 isurl.chip.file=isurl.chip.file, | |
414 control.file=control.file, | |
415 isurl.control.file=isurl.control.file, | |
416 sep.range=c(sep.min,sep.bin,sep.max), | |
417 sep.peak=sep.peak, | |
418 ex.range=c(exclude.min,exclude.max), | |
419 n.nodes=n.nodes, | |
420 fdr=fdr, | |
421 npeak=npeak, | |
422 temp.dir=temp.dir, | |
423 chrname.rm.pattern=chrname.rm.pattern, | |
424 output.odir=output.odir, | |
425 output.npeak.file=output.npeak.file, | |
426 output.rpeak.file=output.rpeak.file, | |
427 output.rdata.file=output.rdata.file, | |
428 output.plot.file=output.plot.file, | |
429 output.result.file=output.result.file, | |
430 replace.flag=replace.flag, | |
431 clean.files.flag=clean.files.flag)) | |
432 } # end: parse.arguments() | |
433 | |
434 read.align <- function(align.filename) { | |
435 # =================================== | |
436 # Function will read a tagAlign or BAM file | |
437 # =================================== | |
438 if (grepl('(\\.bam)?.*(\\.tagAlign)',align.filename)) { # if tagalign file | |
439 chip.data <- read.tagalign.tags(align.filename) | |
440 # get readlength info | |
441 tmpDataRows <- read.table(align.filename,nrows=500) | |
442 chip.data$read.length <- round(median(tmpDataRows$V3 - tmpDataRows$V2)) | |
443 } else if (grepl('(\\.tagAlign)?.*(\\.bam)',align.filename)) { # if bam file | |
444 # create BAM file name | |
445 bam2align.filename <- sub('\\.bam','.tagAlign',align.filename) | |
446 # generate command to convert bam to tagalign | |
447 command <- vector(length=2) | |
448 command[1] <- sprintf("samtools view -F 0x0204 -o - %s",align.filename) | |
449 command[2] <- paste("awk 'BEGIN{FS=" , '"\t"' , ";OFS=", '"\t"} {if (and($2,16) > 0) {print $3,($4-1),($4-1+length($10)),"N","1000","-"} else {print $3,($4-1),($4-1+length($10)),"N","1000","+"}}', "' 1> ", bam2align.filename, sep="") | |
450 # command[2] <- paste("awk 'BEGIN{OFS=", '"\t"} {if (and($2,16) > 0) {print $3,($4-1),($4-1+length($10)),"N","1000","-"} else {print $3,($4-1),($4-1+length($10)),"N","1000","+"}}', "' 1> ", bam2align.filename, sep="") | |
451 command <- paste(command,collapse=" | ") | |
452 # Run command | |
453 status <- system(command,intern=FALSE,ignore.stderr=FALSE) | |
454 if ((status != 0) || !file.exists(bam2align.filename)) { | |
455 cat(sprintf("Error converting BAM to tagalign file: %s\n",align.filename),file=stderr()) | |
456 q(save="no",status=1) | |
457 } | |
458 # read converted BAM file | |
459 chip.data <- read.tagalign.tags(bam2align.filename) | |
460 # get readlength info | |
461 tmpDataRows <- read.table(bam2align.filename,nrows=500) | |
462 chip.data$read.length <- round(median(tmpDataRows$V3 - tmpDataRows$V2)) | |
463 # delete temporary tagalign file | |
464 file.remove(bam2align.filename) | |
465 } else { | |
466 cat(sprintf("Error:Unknown file format for file:%s\n",align.fname),file=stderr()) | |
467 q(save="no",status=1) | |
468 } | |
469 return(chip.data) | |
470 } # end: read.align() | |
471 | |
472 print.run.params <- function(params){ | |
473 # =================================== | |
474 # Output run parameters | |
475 # =================================== | |
476 cat('################\n',file=stdout()) | |
477 cat(iparams$chip.file, | |
478 iparams$control.file, | |
479 iparams$sep.range, | |
480 iparams$sep.peak, | |
481 iparams$ex.range, | |
482 iparams$n.nodes, | |
483 iparams$fdr, | |
484 iparams$npeak, | |
485 iparams$output.odir, | |
486 iparams$output.npeak.file, | |
487 iparams$output.rpeak.file, | |
488 iparams$output.rdata.file, | |
489 iparams$output.plot.file, | |
490 iparams$output.result.file, | |
491 iparams$replace.flag, | |
492 labels=c('ChIP data:','Control data:', 'strandshift(min):','strandshift(step):','strandshift(max)','user-defined peak shift', | |
493 'exclusion(min):','exclusion(max):','num parallel nodes:','FDR threshold:','NumPeaks Threshold:','Output Directory:', | |
494 'narrowPeak output file name:', 'regionPeak output file name:', 'Rdata filename:', | |
495 'plot pdf filename:','result filename:','Overwrite files?:'), | |
496 fill=18, | |
497 file=stdout()) | |
498 cat('\n',file=stdout()) | |
499 } # end: print.run.parameters() | |
500 | |
501 check.replace.flag <- function(params){ | |
502 # =================================== | |
503 # Check if files exist | |
504 # =================================== | |
505 # If replace.flag is NOT set, check if output files exist and abort if necessary | |
506 if (! iparams$replace.flag) { | |
507 if (! is.na(iparams$output.npeak.file)) { | |
508 if (file.exists(iparams$output.npeak.file)) { | |
509 cat('narrowPeak file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr()) | |
510 q(save="no",status=1) | |
511 } | |
512 } | |
513 if (! is.na(iparams$output.rpeak.file)) { | |
514 if (file.exists(iparams$output.rpeak.file)) { | |
515 cat('regionPeak file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr()) | |
516 q(save="no",status=1) | |
517 } | |
518 } | |
519 if (! is.na(iparams$output.plot.file)) { | |
520 if (file.exists(iparams$output.plot.file)) { | |
521 cat('Plot file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr()) | |
522 q(save="no",status=1) | |
523 } | |
524 } | |
525 if (! is.na(iparams$output.rdata.file)) { | |
526 if (file.exists(iparams$output.rdata.file)) { | |
527 cat('Rdata file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr()) | |
528 q(save="no",status=1) | |
529 } | |
530 } | |
531 } | |
532 } | |
533 | |
534 # ############################################################################# | |
535 # MAIN FUNCTION | |
536 # ############################################################################# | |
537 | |
538 # Check number of arguments | |
539 minargs = 1; | |
540 maxargs = 17; | |
541 if (nargs < minargs | nargs > maxargs) { | |
542 print.usage() | |
543 q(save="no",status=1) | |
544 } | |
545 | |
546 # Parse arguments | |
547 # iparams$chip.file | |
548 # iparams$isurl.chip.file | |
549 # iparams$control.file | |
550 # iparams$isurl.control.file | |
551 # iparams$sep.range | |
552 # iparams$sep.peak | |
553 # iparams$ex.range | |
554 # iparams$n.nodes | |
555 # iparams$fdr | |
556 # iparams$npeak | |
557 # iparams$temp.dir | |
558 # iparams$output.odir | |
559 # iparams$output.npeak.file | |
560 # iparams$output.rpeak.file | |
561 # iparams$output.rdata.file | |
562 # iparams$output.plot.file | |
563 # iparams$output.result.file | |
564 # iparams$replace.flag | |
565 # iparams$clean.files.flag | |
566 iparams <- parse.arguments(args) | |
567 | |
568 # Print run parameters | |
569 print.run.params(iparams) | |
570 | |
571 # Check if output files exist | |
572 check.replace.flag(iparams) | |
573 | |
574 # curr.chip.file and curr.control.file always point to the original ChIP and control files on disk | |
575 # ta.chip.filename & ta.control.filename always point to the final but temporary versions of the ChIP and control files that will be passed to read.align | |
576 | |
577 # Download ChIP and control files if necessary to temp.dir | |
578 if (iparams$isurl.chip.file) { | |
579 curr.chip.file <- file.path(iparams$temp.dir, get.file.parts(iparams$chip.file)$fullname) # file is downloaded to temp.dir. Has same name as URL suffix | |
580 cat('Downloading ChIP file:',iparams$chip.file,"\n",file=stdout()) | |
581 if (system(paste('wget -N -q -P',iparams$temp.dir,iparams$chip.file)) != 0) { | |
582 stop('Error downloading ChIP file:',iparams$chip.file) | |
583 } | |
584 } else { | |
585 curr.chip.file <- iparams$chip.file # file is in original directory | |
586 } | |
587 | |
588 if (iparams$isurl.control.file) { | |
589 curr.control.file <- file.path(iparams$temp.dir, get.file.parts(iparams$control.file)$fullname) # file is downloaded to temp.dir. Has same name as URL suffix | |
590 cat('Downloading control file:',iparams$control.file,"\n",file=stdout()) | |
591 if (system(paste('wget -N -q -P',iparams$temp.dir,iparams$control.file)) != 0) { | |
592 stop('Error downloading Control file:',iparams$control.file) | |
593 } | |
594 } else { | |
595 curr.control.file <- iparams$control.file # file is in original directory | |
596 } | |
597 | |
598 # unzip ChIP and input files if required AND copy to temp directory | |
599 if (get.file.parts(curr.chip.file)$ext == '.gz') { | |
600 ta.chip.filename <- tempfile(get.file.parts(curr.chip.file)$name, tmpdir=iparams$temp.dir) # unzip file to temp.dir/[filename with .gz removed][randsuffix] | |
601 cat('Decompressing ChIP file\n',file=stdout()) | |
602 if (system(paste("gunzip -c",curr.chip.file,">",ta.chip.filename)) != 0) { | |
603 stop('Unable to decompress file:', iparams$chip.file) | |
604 } | |
605 if (iparams$clean.files.flag) { # Remove original file if clean.files.flag is set | |
606 file.remove(curr.chip.file) | |
607 } | |
608 } else { | |
609 ta.chip.filename <- tempfile(get.file.parts(curr.chip.file)$fullname, tmpdir=iparams$temp.dir) | |
610 if (iparams$clean.files.flag) { | |
611 file.rename(curr.chip.file,ta.chip.filename) # move file to temp.dir/[filename][randsuffix] | |
612 } else { | |
613 file.copy(curr.chip.file,ta.chip.filename) # copy file to temp.dir/[filename][randsuffix] | |
614 } | |
615 } | |
616 | |
617 if (! is.na(iparams$control.file)) { | |
618 if (get.file.parts(curr.control.file)$ext == '.gz') { | |
619 ta.control.filename <- tempfile(get.file.parts(curr.control.file)$name, tmpdir=iparams$temp.dir) # unzip file to temp.dir/[filename with .gz removed][randsuffix] | |
620 cat('Decompressing control file\n',file=stdout()) | |
621 if (system(paste("gunzip -c",curr.control.file,">",ta.control.filename)) != 0) { | |
622 stop('Unable to decompress file:', iparams$control.file) | |
623 } | |
624 if (iparams$clean.files.flag) { # Remove original file if clean.files.flag is set | |
625 file.remove(curr.control.file) | |
626 } | |
627 } else { | |
628 ta.control.filename <- tempfile(get.file.parts(curr.control.file)$fullname, tmpdir=iparams$temp.dir) # copy file to temp.dir/[filename][randsuffix] | |
629 | |
630 if (iparams$clean.files.flag) { | |
631 file.rename(curr.control.file,ta.control.filename) # move file to temp.dir/[filename][randsuffix] | |
632 } else { | |
633 file.copy(curr.control.file,ta.control.filename) # copy file to temp.dir/[filename][randsuffix] | |
634 } | |
635 } | |
636 } | |
637 | |
638 # Remove downloaded files | |
639 if (iparams$isurl.chip.file & file.exists(curr.chip.file)) { | |
640 file.remove(curr.chip.file) | |
641 } | |
642 | |
643 if (! is.na(iparams$control.file)) { | |
644 if (iparams$isurl.control.file & file.exists(curr.control.file)) { | |
645 file.remove(curr.control.file) | |
646 } | |
647 } | |
648 | |
649 # Load SPP library | |
650 library(spp) | |
651 | |
652 # Read ChIP tagAlign/BAM files | |
653 cat("Reading ChIP tagAlign/BAM file",iparams$chip.file,"\n",file=stdout()) | |
654 chip.data <- read.align(ta.chip.filename) | |
655 cat("ChIP data read length",chip.data$read.length,"\n",file=stdout()) | |
656 file.remove(ta.chip.filename) # Delete temporary file | |
657 if (length(chip.data$tags)==0) { | |
658 stop('Error in ChIP file format:', iparams$chip.file) | |
659 } | |
660 # Remove illegal chromosome names | |
661 if (! is.na(iparams$chrname.rm.pattern)) { | |
662 selectidx <- which(grepl(iparams$chrname.rm.pattern,names(chip.data$tags))==FALSE) | |
663 chip.data$tags <- chip.data$tags[selectidx] | |
664 chip.data$quality <- chip.data$quality[selectidx] | |
665 } | |
666 chip.data$num.tags <- sum(unlist(lapply(chip.data$tags,function(d) length(d)))) | |
667 | |
668 # Read Control tagAlign/BAM files | |
669 if (! is.na(iparams$control.file)) { | |
670 cat("Reading Control tagAlign/BAM file",iparams$control.file,"\n",file=stdout()) | |
671 control.data <- read.align(ta.control.filename) | |
672 file.remove(ta.control.filename) # Delete temporary file | |
673 if (length(control.data$tags)==0) { | |
674 stop('Error in control file format:', iparams$chip.file) | |
675 } | |
676 cat("Control data read length",control.data$read.length,"\n",file=stdout()) | |
677 # Remove illegal chromosome names | |
678 if (! is.na(iparams$chrname.rm.pattern)) { | |
679 selectidx <- which(grepl(iparams$chrname.rm.pattern,names(control.data$tags))==FALSE) | |
680 control.data$tags <- control.data$tags[selectidx] | |
681 control.data$quality <- control.data$quality[selectidx] | |
682 } | |
683 control.data$num.tags <- sum(unlist(lapply(control.data$tags,function(d) length(d)))) | |
684 } | |
685 | |
686 # Open multiple processes if required | |
687 if (is.na(iparams$n.nodes)) { | |
688 cluster.nodes <- NULL | |
689 } else { | |
690 library(snow) | |
691 cluster.nodes <- makeCluster(iparams$n.nodes) | |
692 } | |
693 | |
694 # ################################# | |
695 # Calculate cross-correlation for various strand shifts | |
696 # ################################# | |
697 cat("Calculating peak characteristics\n",file=stdout()) | |
698 # crosscorr | |
699 # $cross.correlation : Cross-correlation profile as an $x/$y data.frame | |
700 # $peak : Position ($x) and height ($y) of automatically detected cross-correlation peak. | |
701 # $whs: Optimized window half-size for binding detection (based on the width of the cross-correlation peak) | |
702 crosscorr <- get.binding.characteristics(chip.data, | |
703 srange=iparams$sep.range[c(1,3)], | |
704 bin=iparams$sep.range[2], | |
705 accept.all.tags=T, | |
706 cluster=cluster.nodes) | |
707 if (!is.na(iparams$n.nodes)) { | |
708 stopCluster(cluster.nodes) | |
709 } | |
710 | |
711 # Smooth the cross-correlation curve if required | |
712 cc <- crosscorr$cross.correlation | |
713 crosscorr$min.cc <- crosscorr$cross.correlation[ which.min(crosscorr$cross.correlation$y) , ] # minimum value and shift of cross-correlation | |
714 cat("Minimum cross-correlation value", crosscorr$min.cc$y,"\n",file=stdout()) | |
715 cat("Minimum cross-correlation shift", crosscorr$min.cc$x,"\n",file=stdout()) | |
716 sbw <- 2*floor(ceiling(5/iparams$sep.range[2]) / 2) + 1 # smoothing bandwidth | |
717 cc$y <- runmean(cc$y,sbw,alg="fast") | |
718 | |
719 # Compute cross-correlation peak | |
720 bw <- ceiling(2/iparams$sep.range[2]) # crosscorr[i] is compared to crosscorr[i+/-bw] to find peaks | |
721 peakidx <- (diff(cc$y,bw)>=0) # cc[i] > cc[i-bw] | |
722 peakidx <- diff(peakidx,bw) | |
723 peakidx <- which(peakidx==-1) + bw | |
724 | |
725 # exclude peaks from the excluded region | |
726 if ( is.nan(iparams$ex.range[2]) ) { | |
727 iparams$ex.range[2] <- chip.data$read.length+10 | |
728 } | |
729 peakidx <- peakidx[(cc$x[peakidx] < iparams$ex.range[1]) | (cc$x[peakidx] > iparams$ex.range[2])] | |
730 cc <- cc[peakidx,] | |
731 | |
732 # Find max peak position and other peaks within 0.9*max_peakvalue that are further away from maxpeakposition | |
733 maxpeakidx <- which.max(cc$y) | |
734 maxpeakshift <- cc$x[maxpeakidx] | |
735 maxpeakval <- cc$y[maxpeakidx] | |
736 peakidx <-which((cc$y >= 0.9*maxpeakval) & (cc$x >= maxpeakshift)) | |
737 cc <- cc[peakidx,] | |
738 | |
739 # sort the peaks and get the top 3 | |
740 sortidx <- order(cc$y,decreasing=TRUE) | |
741 sortidx <- sortidx[c(1:min(3,length(sortidx)))] | |
742 cc.peak <- cc[sortidx,] | |
743 | |
744 # Override peak shift if user supplies peak shift | |
745 if (! is.na(iparams$sep.peak)) { | |
746 cc.peak <- approx(crosscorr$cross.correlation$x,crosscorr$cross.correlation$y,iparams$sep.peak,rule=2) | |
747 } | |
748 cat("Peak cross-correlation value", paste(cc.peak$y,collapse=","),"\n",file=stdout()) | |
749 cat("Peak strand shift",paste(cc.peak$x,collapse=","),"\n",file=stdout()) | |
750 | |
751 # Reset values in crosscorr | |
752 crosscorr$peak$x <- cc.peak$x[1] | |
753 crosscorr$peak$y <- cc.peak$y[1] | |
754 | |
755 # Compute window half size | |
756 whs.thresh <- crosscorr$min.cc$y + (crosscorr$peak$y - crosscorr$min.cc$y)/3 | |
757 crosscorr$whs <- max(crosscorr$cross.correlation$x[crosscorr$cross.correlation$y >= whs.thresh]) | |
758 cat("Window half size",crosscorr$whs,"\n",file=stdout()) | |
759 | |
760 # Compute phantom peak coefficient | |
761 ph.peakidx <- which( ( crosscorr$cross.correlation$x >= ( chip.data$read.length - round(2*iparams$sep.range[2]) ) ) & | |
762 ( crosscorr$cross.correlation$x <= ( chip.data$read.length + round(1.5*iparams$sep.range[2]) ) ) ) | |
763 ph.peakidx <- ph.peakidx[ which.max(crosscorr$cross.correlation$y[ph.peakidx]) ] | |
764 crosscorr$phantom.cc <- crosscorr$cross.correlation[ph.peakidx,] | |
765 cat("Phantom peak location",crosscorr$phantom.cc$x,"\n",file=stdout()) | |
766 cat("Phantom peak Correlation",crosscorr$phantom.cc$y,"\n",file=stdout()) | |
767 crosscorr$phantom.coeff <- crosscorr$peak$y / crosscorr$phantom.cc$y | |
768 crosscorr$phantom.coeff <- crosscorr$peak$y / crosscorr$min.cc$y | |
769 cat("Normalized cross-correlation coefficient (NCCC)",crosscorr$phantom.coeff,"\n",file=stdout()) | |
770 crosscorr$rel.phantom.coeff <- (crosscorr$peak$y - crosscorr$min.cc$y) / (crosscorr$phantom.cc$y - crosscorr$min.cc$y) | |
771 cat("Relative Cross correlation Coefficient (RCCC)",crosscorr$rel.phantom.coeff,"\n",file=stdout()) | |
772 crosscorr$phantom.quality.tag <- NA | |
773 if ( (crosscorr$rel.phantom.coeff >= 0) & (crosscorr$rel.phantom.coeff < 0.25) ) { | |
774 crosscorr$phantom.quality.tag <- -2 | |
775 } else if ( (crosscorr$rel.phantom.coeff >= 0.25) & (crosscorr$rel.phantom.coeff < 0.5) ) { | |
776 crosscorr$phantom.quality.tag <- -1 | |
777 } else if ( (crosscorr$rel.phantom.coeff >= 0.5) & (crosscorr$rel.phantom.coeff < 1) ) { | |
778 crosscorr$phantom.quality.tag <- 0 | |
779 } else if ( (crosscorr$rel.phantom.coeff >= 1) & (crosscorr$rel.phantom.coeff < 1.5) ) { | |
780 crosscorr$phantom.quality.tag <- 1 | |
781 } else if ( (crosscorr$rel.phantom.coeff >= 1.5) ) { | |
782 crosscorr$phantom.quality.tag <- 2 | |
783 } | |
784 cat("Phantom Peak Quality Tag",crosscorr$phantom.quality.tag,"\n",file=stdout()) | |
785 | |
786 # Output result to result file if required | |
787 #Filename\tnumReads\tPeak_shift\tPeak_Correlation\tRead_length\tPhantomPeak_Correlation\tMin_Correlation_Shift\tMin_Correlation\tNormalized_CrossCorrelation_Coefficient\tRelative_CrossCorrelation_Coefficient\tQualityTag) | |
788 if (! is.na(iparams$output.result.file)) { | |
789 cat(get.file.parts(iparams$chip.file)$fullname, | |
790 chip.data$num.tags, | |
791 paste(cc.peak$x,collapse=","), | |
792 paste(cc.peak$y,collapse=","), | |
793 crosscorr$phantom.cc$x, | |
794 crosscorr$phantom.cc$y, | |
795 crosscorr$min.cc$x, | |
796 crosscorr$min.cc$y, | |
797 crosscorr$phantom.coeff, | |
798 crosscorr$rel.phantom.coeff, | |
799 crosscorr$phantom.quality.tag, | |
800 sep="\t", | |
801 file=iparams$output.result.file, | |
802 append=TRUE) | |
803 cat("\n", | |
804 file=iparams$output.result.file, | |
805 append=TRUE) | |
806 } | |
807 | |
808 # Save figure if required | |
809 if (! is.na(iparams$output.plot.file)) { | |
810 pdf(file=iparams$output.plot.file,width=5,height=5) | |
811 par(mar = c(4,3.5,2,0.5), mgp = c(1.5,0.5,0), cex = 0.8); | |
812 plot(crosscorr$cross.correlation, | |
813 type='l', | |
814 xlab=sprintf("strand-shift (%s)",paste(cc.peak$x,collapse=",")), | |
815 ylab="cross-correlation") | |
816 abline(v=cc.peak$x,lty=2,col=2) | |
817 abline(v=crosscorr$phantom.cc$x,lty=2,col=4) | |
818 title(main=get.file.parts(iparams$chip.file)$fullname, | |
819 sub=sprintf("NSC=%g,RSC=%g,Qtag=%d",crosscorr$phantom.coeff,crosscorr$rel.phantom.coeff,crosscorr$phantom.quality.tag)) | |
820 dev.off(); | |
821 } | |
822 | |
823 # Save RData file if required | |
824 if (! is.na(iparams$output.rdata.file)) { | |
825 save(iparams, | |
826 crosscorr, | |
827 cc.peak, | |
828 file=iparams$output.rdata.file); | |
829 } | |
830 | |
831 # ################################# | |
832 # Call peaks | |
833 # ################################# | |
834 | |
835 if ( !is.na(iparams$output.npeak.file) || !is.na(iparams$output.rpeak.file) ) { | |
836 | |
837 # Remove local tag anomalies | |
838 cat('Removing read stacks\n',file=stdout()) | |
839 chip.data <- remove.local.tag.anomalies(chip.data$tags) | |
840 control.data <- remove.local.tag.anomalies(control.data$tags) | |
841 | |
842 # Open multiple processes if required | |
843 if (is.na(iparams$n.nodes)) { | |
844 cluster.nodes <- NULL | |
845 } else { | |
846 cluster.nodes <- makeCluster(iparams$n.nodes) | |
847 } | |
848 | |
849 # Find peaks | |
850 cat('Finding peaks\n',file=stdout()) | |
851 if (!is.na(iparams$npeak)) { | |
852 iparams$fdr <- 0.96 | |
853 } | |
854 narrow.peaks <- find.binding.positions(signal.data=chip.data,control.data=control.data,fdr=iparams$fdr,method=tag.lwcc,whs=crosscorr$whs,cluster=cluster.nodes) | |
855 if (!is.na(iparams$n.nodes)) { | |
856 stopCluster(cluster.nodes) | |
857 } | |
858 cat(paste("Detected",sum(unlist(lapply(narrow.peaks$npl,function(d) length(d$x)))),"peaks"),"\n",file=stdout()) | |
859 | |
860 # Write to narrowPeak file | |
861 if (!is.na(iparams$output.npeak.file)) { | |
862 write.narrowpeak.binding(narrow.peaks,iparams$output.npeak.file,margin=round(crosscorr$whs/2),npeaks=iparams$npeak) | |
863 system(paste('gzip -f ',iparams$output.npeak.file)) | |
864 } | |
865 | |
866 # Compute and write regionPeak file | |
867 if (!is.na(iparams$output.rpeak.file)) { | |
868 region.peaks <- add.broad.peak.regions(chip.data,control.data,narrow.peaks,window.size=max(50,round(crosscorr$whs/4)),z.thr=10) | |
869 write.narrowpeak.binding(region.peaks,iparams$output.rpeak.file,margin=round(crosscorr$whs/2),npeaks=iparams$npeak) | |
870 system(paste('gzip -f ',iparams$output.rpeak.file)) | |
871 } | |
872 | |
873 # Save Rdata file | |
874 if (! is.na(iparams$output.rdata.file)) { | |
875 save(iparams, | |
876 crosscorr, | |
877 cc.peak, | |
878 narrow.peaks, | |
879 region.peaks, | |
880 file=iparams$output.rdata.file); | |
881 } | |
882 | |
883 } | |
884 | |
885 |