annotate run_spp.R @ 0:b15734276ca3 draft

Initial upload.
author stemcellcommons
date Thu, 17 Oct 2013 12:39:45 -0400
parents
children 23b22c1692fa
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
1 # run_spp.R
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
2 # =============
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
3 # Author: Anshul Kundaje, Computer Science Dept., Stanford University
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
4 # Email: akundaje@stanford.edu
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
5 # Last updated: Oct 8, 2010
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
6 # =============
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
7 # MANDATORY ARGUMENTS
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
8 # -c=<ChIP_tagAlign/BAMFile>, full path and name of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
9 # MANDATORY ARGUMENT FOR PEAK CALLING
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
10 # -i=<Input_tagAlign/BAMFile>, full path and name of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
11 # OPTIONAL ARGUMENTS
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
12 # -s=<min>:<step>:<max> , strand shifts at which cross-correlation is evaluated, default=-100:5:600
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
13 # -speak=<strPeak>, user-defined cross-correlation peak strandshift
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
14 # -x=<min>:<max>, strand shifts to exclude (This is mainly to avoid phantom peaks) default=10:(readlen+10)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
15 # -p=<nodes> , number of parallel processing nodes, default=NULL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
16 # -fdr=<falseDisoveryRate> , false discovery rate threshold for peak calling
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
17 # -npeak=<numPeaks>, threshold on number of peaks to call
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
18 # -tmpdir=<tempdir> , Temporary directory (if not specified R function tempdir() is used)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
19 # -filtchr=<chrnamePattern> , Pattern to use to remove tags that map to specific chromosomes e.g. _ will remove all tags that map to chromosomes with _ in their name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
20 # OUTPUT PARAMETERS
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
21 # -odir=<outputDirectory> name of output directory (If not set same as ChIP file directory is used)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
22 # -savn=<narrowpeakfilename> OR -savn NarrowPeak file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
23 # -savr=<regionpeakfilename> OR -savr RegionPeak file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
24 # -savd=<rdatafile> OR -savd , save Rdata file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
25 # -savp=<plotdatafile> OR -savp , save cross-correlation plot
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
26 # -out=<resultfile>, append peakshift result to a file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
27 # format:Filename<tab>numReads<tab>estFragLen<tab>corr_estFragLen<tab>PhantomPeak<tab>corr_phantomPeak<tab>argmin_corr<tab>min_corr<tab>phantomPeakCoef<tab>relPhantomPeakCoef<tab>QualityTag
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
28 # -rf , if plot or rdata or narrowPeak file exists replace it. If not used then the run is aborted if the plot or Rdata or narrowPeak file exists
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
29 # -clean, if present will remove the original chip and control files after reading them in. CAUTION: Use only if the script calling run_spp.R is creating temporary files
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
30
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
31 args <- commandArgs(trailingOnly=TRUE); # Read Arguments from command line
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
32 nargs = length(args); # number of arguments
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
33
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
34 # ###########################################################################
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
35 # AUXILIARY FUNCTIONS
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
36 # ###########################################################################
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
37
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
38 print.usage <- function() {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
39 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
40 # Function will print function usage
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
41 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
42 cat('Usage: Rscript run_spp.R <options>\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
43 cat('MANDATORY ARGUMENTS\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
44 cat('-c=<ChIP_alignFile>, full path and name (or URL) of tagAlign/BAM file (can be gzipped)(FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) \n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
45 cat('MANDATORY ARGUMENTS FOR PEAK CALLING\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
46 cat('-i=<Input_alignFile>, full path and name (or URL) of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) \n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
47 cat('OPTIONAL ARGUMENTS\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
48 cat('-s=<min>:<step>:<max> , strand shifts at which cross-correlation is evaluated, default=-100:5:600\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
49 cat('-speak=<strPeak>, user-defined cross-correlation peak strandshift\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
50 cat('-x=<min>:<max>, strand shifts to exclude (This is mainly to avoid region around phantom peak) default=10:(readlen+10)\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
51 cat('-p=<nodes> , number of parallel processing nodes, default=0\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
52 cat('-fdr=<falseDisoveryRate> , false discovery rate threshold for peak calling\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
53 cat('-npeak=<numPeaks>, threshold on number of peaks to call\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
54 cat('-tmpdir=<tempdir> , Temporary directory (if not specified R function tempdir() is used)\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
55 cat('-filtchr=<chrnamePattern> , Pattern to use to remove tags that map to specific chromosomes e.g. _ will remove all tags that map to chromosomes with _ in their name\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
56 cat('OUTPUT ARGUMENTS\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
57 cat('-odir=<outputDirectory> name of output directory (If not set same as ChIP file directory is used)\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
58 cat('-savn=<narrowpeakfilename> OR -savn NarrowPeak file name (fixed width peaks)\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
59 cat('-savr=<regionpeakfilename> OR -savr RegionPeak file name (variable width peaks with regions of enrichment)\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
60 cat('-savd=<rdatafile> OR -savd, save Rdata file\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
61 cat('-savp=<plotdatafile> OR -savp, save cross-correlation plot\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
62 cat('-out=<resultfile>, append peakshift/phantomPeak results to a file\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
63 cat(' format:Filename<tab>numReads<tab>estFragLen<tab>corr_estFragLen<tab>PhantomPeak<tab>corr_phantomPeak<tab>argmin_corr<tab>min_corr<tab>phantomPeakCoef<tab>relPhantomPeakCoef<tab>QualityTag)\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
64 cat('-rf, if plot or rdata or narrowPeak file exists replace it. If not used then the run is aborted if the plot or Rdata or narrowPeak file exists\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
65 cat('-clean, if present will remove the original chip and control files after reading them in. CAUTION: Use only if the script calling run_spp.R is creating temporary files\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
66 } # end: print.usage()
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
67
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
68 get.file.parts <- function(file.fullpath) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
69 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
70 # Function will take a file name with path and split the file name into
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
71 # path, fullname, name and ext
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
72 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
73 if (! is.character(file.fullpath)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
74 stop('File name must be a string')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
75 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
76
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
77 file.parts <- strsplit(as.character(file.fullpath), .Platform$file.sep, fixed=TRUE)[[1]] # split on file separator
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
78
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
79 if (length(file.parts) == 0) { # if empty file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
80 return(list(path='',
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
81 fullname='',
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
82 name='',
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
83 ext='')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
84 )
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
85 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
86 if (length(file.parts) == 1) { # if no path then just the file name itself
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
87 file.path <- '.'
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
88 file.fullname <- file.parts
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
89 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
90 file.path <- paste(file.parts[1:(length(file.parts)-1)], collapse=.Platform$file.sep) # 1:last-1 token is path
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
91 file.fullname <- file.parts[length(file.parts)] # last token is filename
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
92 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
93 file.fullname.parts <- strsplit(file.fullname,'.',fixed=TRUE)[[1]] # split on .
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
94 if (length(file.fullname.parts) == 1) { # if no extension
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
95 file.ext <- ''
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
96 file.name <- file.fullname.parts
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
97 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
98 file.ext <- paste('.', file.fullname.parts[length(file.fullname.parts)], sep="") # add the . to the last token
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
99 file.name <- paste(file.fullname.parts[1:(length(file.fullname.parts)-1)], collapse=".")
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
100 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
101 return(list(path=file.path,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
102 fullname=file.fullname,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
103 name=file.name,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
104 ext=file.ext))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
105 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
106 } # end: get.file.parts()
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
107
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
108 parse.arguments <- function(args) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
109 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
110 # Function will parse arguments
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
111 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
112 # Set arguments to default values
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
113 chip.file <- NA # main ChIP tagAlign/BAM file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
114 isurl.chip.file <- FALSE # flag indicating whether ChIP file is a URL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
115 control.file <- NA # control tagAlign/BAM file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
116 isurl.control.file <- FALSE # flag indicating whether control file is a URL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
117 sep.min <- -100 # min strand shift
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
118 sep.max <- 600 # max strand shift
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
119 sep.bin <- 5 # increment for strand shift
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
120 sep.peak <- NA # user-defined peak shift
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
121 exclude.min <- 10 # lowerbound of strand shift exclusion region
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
122 exclude.max <- NaN # upperbound of strand shift exclusion region
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
123 n.nodes <- NA # number of parallel processing nodes
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
124 fdr <- 0.01 # false discovery rate threshold for peak calling
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
125 npeak <- NA # threshold on number of peaks to call
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
126 temp.dir <- tempdir() # temporary directory
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
127 chrname.rm.pattern <- NA # chromosome name pattern used to remove tags
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
128 output.odir <- NA # Output directory name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
129 output.npeak.file <- NA # Output narrowPeak file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
130 output.rpeak.file <- NA # Output regionPeak file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
131 output.rdata.file <- NA # Rdata file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
132 output.plot.file <- NA # cross correlation plot file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
133 output.result.file <- NA # result file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
134 replace.flag <- FALSE # replace file flag
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
135 clean.files.flag <- FALSE # file deletion flag
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
136
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
137 # Parse arguments
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
138 for (each.arg in args) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
139
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
140 if (grepl('^-c=',each.arg)) { #-c=<chip.file>
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
141
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
142 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
143 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
144 chip.file <- arg.split[2] # second part is chip.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
145 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
146 stop('No tagAlign/BAM file name provided for parameter -c=')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
147 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
148
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
149 } else if (grepl('^-i=',each.arg)) { #-i=<control.file>
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
150
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
151 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
152 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
153 control.file <- arg.split[2] # second part is control.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
154 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
155 stop('No tagAlign/BAM file name provided for parameter -i=')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
156 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
157
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
158 } else if (grepl('^-s=',each.arg)) { #-s=<sep.min>:<sep.bin>:<sep.max>
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
159
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
160 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
161 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
162 sep.vals <- arg.split[2] # second part is sepmin:sepbin:sepmax
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
163 sep.vals.split <- strsplit(sep.vals,':',fixed=TRUE)[[1]] # split on :
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
164 if (length(sep.vals.split) != 3) { # must have 3 parts
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
165 stop('Strand shift limits must be specified as -s=sepmin:sepbin:sepmax')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
166 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
167 if (any(is.na(as.numeric(sep.vals.split)))) { # check that sep vals are numeric
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
168 stop('Strand shift limits must be numeric values')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
169 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
170 sep.min <- round(as.numeric(sep.vals.split[1]))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
171 sep.bin <- round(as.numeric(sep.vals.split[2]))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
172 sep.max <- round(as.numeric(sep.vals.split[3]))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
173 if ((sep.min > sep.max) || (sep.bin > (sep.max - sep.min)) || (sep.bin < 0)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
174 stop('Illegal separation values -s=sepmin:sepbin:sepmax')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
175 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
176 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
177 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
178 stop('Strand shift limits must be specified as -s=sepmin:sepbin:sepmax')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
179 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
180
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
181 } else if (grepl('^-speak=',each.arg)) { #-speak=<sep.peak> , user-defined cross-correlation peak strandshift
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
182
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
183 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
184 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
185 sep.peak <- arg.split[2] # second part is <sep.peak>
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
186 if (is.na(as.numeric(sep.peak))) { # check that sep.peak is numeric
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
187 stop('-speak=<sep.peak>: User defined peak shift must be numeric')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
188 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
189 sep.peak <- as.numeric(sep.peak)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
190 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
191 stop('User defined peak shift must be provided as -speak=<sep.peak>')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
192 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
193
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
194 } else if (grepl('^-x=',each.arg)) { #-x=<exclude.min>:<exclude.max>
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
195
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
196 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
197 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
198 exclude.vals <- arg.split[2] # second part is excludemin:excludemax
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
199 exclude.vals.split <- strsplit(exclude.vals,':',fixed=TRUE)[[1]] # split on :
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
200 if (length(exclude.vals.split) != 2) { # must have 2 parts
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
201 stop('Exclusion limits must be specified as -x=excludemin:excludemax')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
202 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
203 if (any(is.na(as.numeric(exclude.vals.split)))) { # check that exclude vals are numeric
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
204 stop('Exclusion limits must be numeric values')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
205 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
206 exclude.min <- round(as.numeric(exclude.vals.split[1]))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
207 exclude.max <- round(as.numeric(exclude.vals.split[2]))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
208 if (exclude.min > exclude.max) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
209 stop('Illegal exclusion limits -x=excludemin:excludemax')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
210 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
211 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
212 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
213 stop('Exclusion limits must be specified as -x=excludemin:excludemax')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
214 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
215
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
216 } else if (grepl('^-p=',each.arg)) { #-p=<n.nodes> , number of parallel processing nodes, default=NULL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
217
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
218 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
219 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
220 n.nodes <- arg.split[2] # second part is numnodes
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
221 if (is.na(as.numeric(n.nodes))) { # check that n.nodes is numeric
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
222 stop('-p=<numnodes>: numnodes must be numeric')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
223 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
224 n.nodes <- round(as.numeric(n.nodes))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
225 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
226 stop('Number of parallel nodes must be provided as -p=<numnodes>')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
227 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
228
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
229 } else if (grepl('^-fdr=',each.arg)) { #-fdr=<fdr> , false discovery rate, default=0.01
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
230
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
231 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
232 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
233 fdr <- arg.split[2] # second part is fdr
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
234 if (is.na(as.numeric(fdr))) { # check that fdr is numeric
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
235 stop('-fdr=<falseDiscoveryRate>: false discovery rate must be numeric')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
236 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
237 fdr <- as.numeric(fdr)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
238 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
239 stop('False discovery rate must be provided as -fdr=<fdr>')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
240 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
241
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
242 } else if (grepl('^-npeak=',each.arg)) { #-npeak=<numPeaks> , number of peaks threshold, default=NA
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
243
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
244 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
245 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
246 npeak <- arg.split[2] # second part is npeak
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
247 if (is.na(as.numeric(npeak))) { # check that npeak is numeric
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
248 stop('-npeak=<numPeaks>: threshold on number of peaks must be numeric')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
249 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
250 npeak <- round(as.numeric(npeak))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
251 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
252 stop('Threshold on number of peaks must be provided as -npeak=<numPeaks>')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
253 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
254
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
255 } else if (grepl('^-tmpdir=',each.arg)) { #-tmpdir=<temp.dir>
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
256
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
257 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
258 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
259 temp.dir <- arg.split[2] # second part is temp.dir
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
260 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
261 stop('No temporary directory provided for parameter -tmpdir=')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
262 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
263
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
264 } else if (grepl('^-filtchr=',each.arg)) { #-filtchr=<chrname.rm.pattern>
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
265
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
266 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
267 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
268 chrname.rm.pattern <- arg.split[2] # second part is chrname.rm.pattern
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
269 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
270 stop('No pattern provided for parameter -filtchr=')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
271 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
272
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
273 } else if (grepl('^-odir=',each.arg)) { #-odir=<output.odir>
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
274
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
275 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
276 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
277 output.odir <- arg.split[2] # second part is output.odir
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
278 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
279 stop('No output directory provided for parameter -odir=')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
280 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
281
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
282 } else if (grepl('^-savn',each.arg)) { # -savn=<output.npeak.file> OR -savn , save narrowpeak
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
283
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
284 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
285 if (! is.na(arg.split[2])) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
286 output.npeak.file <- arg.split[2] #-savn=
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
287 } else if (each.arg=='-savn') {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
288 output.npeak.file <- NULL # NULL indicates get the name from the main file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
289 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
290 stop('Argument for saving narrowPeak file must be -savn or -savn=<filename>')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
291 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
292
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
293 } else if (grepl('^-savr',each.arg)) { # -savr=<output.rpeak.file> OR -savr , save regionpeak
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
294
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
295 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
296 if (! is.na(arg.split[2])) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
297 output.rpeak.file <- arg.split[2] #-savr=
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
298 } else if (each.arg=='-savr') {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
299 output.rpeak.file <- NULL # NULL indicates get the name from the main file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
300 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
301 stop('Argument for saving regionPeak file must be -savr or -savr=<filename>')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
302 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
303
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
304 } else if (grepl('^-savd',each.arg)) { # -savd=<output.rdata.file> OR -savd , save Rdata file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
305
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
306 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
307 if (! is.na(arg.split[2])) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
308 output.rdata.file <- arg.split[2] #-savd=
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
309 } else if (each.arg=='-savd') {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
310 output.rdata.file <- NULL # NULL indicates get the name from the main file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
311 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
312 stop('Argument for saving Rdata file must be -savd or -savd=<filename>')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
313 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
314
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
315 } else if (grepl('^-savp',each.arg)) { # -savp=<output.plot.file> OR -savp , save cross-correlation plot
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
316
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
317 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
318 if (! is.na(arg.split[2])) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
319 output.plot.file <- arg.split[2] #-savp=
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
320 } else if (each.arg=='-savp') {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
321 output.plot.file <- NULL # NULL indicates get the name from the main file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
322 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
323 stop('Argument for saving Rdata file must be -savp or -savp=<filename>')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
324 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
325
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
326 } else if (grepl('^-out=',each.arg)) { #-out=<output.result.file>
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
327
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
328 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
329 if (! is.na(arg.split[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
330 output.result.file <- arg.split[2] # second part is output.result.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
331 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
332 stop('No result file provided for parameter -out=')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
333 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
334
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
335 } else if (each.arg == '-rf') {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
336
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
337 replace.flag <- TRUE
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
338
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
339 } else if (each.arg == '-clean') {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
340
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
341 clean.files.flag <- TRUE
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
342
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
343 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
344
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
345 stop('Illegal argument ',each.arg)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
346 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
347 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
348 # End: for loop
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
349
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
350 # Check mandatory arguments
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
351 if (is.na(chip.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
352 stop('-c=<tagAlign/BAMFileName> is a mandatory argument')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
353 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
354
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
355 if (is.na(control.file) && ! is.na(output.npeak.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
356 stop('-i=<tagAlign/BAMFileName> is required for peak calling')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
357 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
358
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
359 # Check if ChIP and control files are URLs
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
360 if (grepl('^http://',chip.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
361 isurl.chip.file <- TRUE
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
362 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
363 if (grepl('^http://',control.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
364 isurl.control.file <- TRUE
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
365 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
366
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
367 # If ChIP file is a URL output.odir MUST be specified
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
368 if (isurl.chip.file && is.na(output.odir)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
369 stop('If ChIP file is a URL, then output directory MUST be specified')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
370 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
371
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
372 # Check that ChIP and control files exist
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
373 if (isurl.chip.file) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
374 if (system(paste('wget -q --spider',chip.file)) != 0) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
375 stop('ChIP file URL not valid: ',chip.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
376 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
377 } else if (!file.exists(chip.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
378 stop('ChIP File:',chip.file,' does not exist')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
379 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
380
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
381 if (!is.na(control.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
382 if (isurl.control.file) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
383 if (system(paste('wget -q --spider',control.file)) != 0) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
384 stop('Control file URL not valid: ',control.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
385 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
386 } else if (!file.exists(control.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
387 stop('Control File:',control.file,' does not exist')
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
388 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
389 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
390
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
391 # Correct other arguments
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
392 if (is.na(output.odir)) { # Reconstruct output.odir if not provided
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
393 output.odir <- get.file.parts(chip.file)$path
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
394 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
395
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
396 if (is.null(output.npeak.file)) { # Reconstruct output.npeak.file if NULL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
397 output.npeak.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '_VS_', get.file.parts(control.file)$name,'.narrowPeak', sep=""))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
398 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
399
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
400 if (is.null(output.rpeak.file)) { # Reconstruct output.rpeak.file if NULL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
401 output.rpeak.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '_VS_', get.file.parts(control.file)$name,'.regionPeak', sep=""))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
402 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
403
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
404 if (is.null(output.rdata.file)) { # Reconstruct output.rdata.file if NULL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
405 output.rdata.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '.Rdata', sep=""))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
406 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
407
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
408 if (is.null(output.plot.file)) { # Reconstruct output.plot.file if NULL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
409 output.plot.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '.pdf', sep=""))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
410 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
411
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
412 return(list(chip.file=chip.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
413 isurl.chip.file=isurl.chip.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
414 control.file=control.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
415 isurl.control.file=isurl.control.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
416 sep.range=c(sep.min,sep.bin,sep.max),
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
417 sep.peak=sep.peak,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
418 ex.range=c(exclude.min,exclude.max),
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
419 n.nodes=n.nodes,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
420 fdr=fdr,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
421 npeak=npeak,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
422 temp.dir=temp.dir,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
423 chrname.rm.pattern=chrname.rm.pattern,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
424 output.odir=output.odir,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
425 output.npeak.file=output.npeak.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
426 output.rpeak.file=output.rpeak.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
427 output.rdata.file=output.rdata.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
428 output.plot.file=output.plot.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
429 output.result.file=output.result.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
430 replace.flag=replace.flag,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
431 clean.files.flag=clean.files.flag))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
432 } # end: parse.arguments()
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
433
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
434 read.align <- function(align.filename) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
435 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
436 # Function will read a tagAlign or BAM file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
437 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
438 if (grepl('(\\.bam)?.*(\\.tagAlign)',align.filename)) { # if tagalign file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
439 chip.data <- read.tagalign.tags(align.filename)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
440 # get readlength info
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
441 tmpDataRows <- read.table(align.filename,nrows=500)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
442 chip.data$read.length <- round(median(tmpDataRows$V3 - tmpDataRows$V2))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
443 } else if (grepl('(\\.tagAlign)?.*(\\.bam)',align.filename)) { # if bam file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
444 # create BAM file name
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
445 bam2align.filename <- sub('\\.bam','.tagAlign',align.filename)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
446 # generate command to convert bam to tagalign
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
447 command <- vector(length=2)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
448 command[1] <- sprintf("samtools view -F 0x0204 -o - %s",align.filename)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
449 command[2] <- paste("awk 'BEGIN{FS=" , '"\t"' , ";OFS=", '"\t"} {if (and($2,16) > 0) {print $3,($4-1),($4-1+length($10)),"N","1000","-"} else {print $3,($4-1),($4-1+length($10)),"N","1000","+"}}', "' 1> ", bam2align.filename, sep="")
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
450 # command[2] <- paste("awk 'BEGIN{OFS=", '"\t"} {if (and($2,16) > 0) {print $3,($4-1),($4-1+length($10)),"N","1000","-"} else {print $3,($4-1),($4-1+length($10)),"N","1000","+"}}', "' 1> ", bam2align.filename, sep="")
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
451 command <- paste(command,collapse=" | ")
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
452 # Run command
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
453 status <- system(command,intern=FALSE,ignore.stderr=FALSE)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
454 if ((status != 0) || !file.exists(bam2align.filename)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
455 cat(sprintf("Error converting BAM to tagalign file: %s\n",align.filename),file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
456 q(save="no",status=1)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
457 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
458 # read converted BAM file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
459 chip.data <- read.tagalign.tags(bam2align.filename)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
460 # get readlength info
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
461 tmpDataRows <- read.table(bam2align.filename,nrows=500)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
462 chip.data$read.length <- round(median(tmpDataRows$V3 - tmpDataRows$V2))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
463 # delete temporary tagalign file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
464 file.remove(bam2align.filename)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
465 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
466 cat(sprintf("Error:Unknown file format for file:%s\n",align.fname),file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
467 q(save="no",status=1)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
468 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
469 return(chip.data)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
470 } # end: read.align()
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
471
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
472 print.run.params <- function(params){
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
473 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
474 # Output run parameters
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
475 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
476 cat('################\n',file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
477 cat(iparams$chip.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
478 iparams$control.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
479 iparams$sep.range,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
480 iparams$sep.peak,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
481 iparams$ex.range,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
482 iparams$n.nodes,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
483 iparams$fdr,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
484 iparams$npeak,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
485 iparams$output.odir,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
486 iparams$output.npeak.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
487 iparams$output.rpeak.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
488 iparams$output.rdata.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
489 iparams$output.plot.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
490 iparams$output.result.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
491 iparams$replace.flag,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
492 labels=c('ChIP data:','Control data:', 'strandshift(min):','strandshift(step):','strandshift(max)','user-defined peak shift',
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
493 'exclusion(min):','exclusion(max):','num parallel nodes:','FDR threshold:','NumPeaks Threshold:','Output Directory:',
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
494 'narrowPeak output file name:', 'regionPeak output file name:', 'Rdata filename:',
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
495 'plot pdf filename:','result filename:','Overwrite files?:'),
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
496 fill=18,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
497 file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
498 cat('\n',file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
499 } # end: print.run.parameters()
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
500
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
501 check.replace.flag <- function(params){
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
502 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
503 # Check if files exist
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
504 # ===================================
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
505 # If replace.flag is NOT set, check if output files exist and abort if necessary
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
506 if (! iparams$replace.flag) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
507 if (! is.na(iparams$output.npeak.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
508 if (file.exists(iparams$output.npeak.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
509 cat('narrowPeak file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
510 q(save="no",status=1)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
511 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
512 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
513 if (! is.na(iparams$output.rpeak.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
514 if (file.exists(iparams$output.rpeak.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
515 cat('regionPeak file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
516 q(save="no",status=1)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
517 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
518 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
519 if (! is.na(iparams$output.plot.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
520 if (file.exists(iparams$output.plot.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
521 cat('Plot file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
522 q(save="no",status=1)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
523 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
524 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
525 if (! is.na(iparams$output.rdata.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
526 if (file.exists(iparams$output.rdata.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
527 cat('Rdata file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
528 q(save="no",status=1)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
529 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
530 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
531 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
532 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
533
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
534 # #############################################################################
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
535 # MAIN FUNCTION
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
536 # #############################################################################
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
537
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
538 # Check number of arguments
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
539 minargs = 1;
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
540 maxargs = 17;
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
541 if (nargs < minargs | nargs > maxargs) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
542 print.usage()
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
543 q(save="no",status=1)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
544 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
545
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
546 # Parse arguments
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
547 # iparams$chip.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
548 # iparams$isurl.chip.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
549 # iparams$control.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
550 # iparams$isurl.control.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
551 # iparams$sep.range
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
552 # iparams$sep.peak
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
553 # iparams$ex.range
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
554 # iparams$n.nodes
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
555 # iparams$fdr
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
556 # iparams$npeak
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
557 # iparams$temp.dir
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
558 # iparams$output.odir
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
559 # iparams$output.npeak.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
560 # iparams$output.rpeak.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
561 # iparams$output.rdata.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
562 # iparams$output.plot.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
563 # iparams$output.result.file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
564 # iparams$replace.flag
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
565 # iparams$clean.files.flag
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
566 iparams <- parse.arguments(args)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
567
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
568 # Print run parameters
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
569 print.run.params(iparams)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
570
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
571 # Check if output files exist
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
572 check.replace.flag(iparams)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
573
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
574 # curr.chip.file and curr.control.file always point to the original ChIP and control files on disk
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
575 # ta.chip.filename & ta.control.filename always point to the final but temporary versions of the ChIP and control files that will be passed to read.align
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
576
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
577 # Download ChIP and control files if necessary to temp.dir
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
578 if (iparams$isurl.chip.file) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
579 curr.chip.file <- file.path(iparams$temp.dir, get.file.parts(iparams$chip.file)$fullname) # file is downloaded to temp.dir. Has same name as URL suffix
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
580 cat('Downloading ChIP file:',iparams$chip.file,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
581 if (system(paste('wget -N -q -P',iparams$temp.dir,iparams$chip.file)) != 0) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
582 stop('Error downloading ChIP file:',iparams$chip.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
583 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
584 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
585 curr.chip.file <- iparams$chip.file # file is in original directory
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
586 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
587
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
588 if (iparams$isurl.control.file) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
589 curr.control.file <- file.path(iparams$temp.dir, get.file.parts(iparams$control.file)$fullname) # file is downloaded to temp.dir. Has same name as URL suffix
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
590 cat('Downloading control file:',iparams$control.file,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
591 if (system(paste('wget -N -q -P',iparams$temp.dir,iparams$control.file)) != 0) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
592 stop('Error downloading Control file:',iparams$control.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
593 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
594 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
595 curr.control.file <- iparams$control.file # file is in original directory
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
596 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
597
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
598 # unzip ChIP and input files if required AND copy to temp directory
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
599 if (get.file.parts(curr.chip.file)$ext == '.gz') {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
600 ta.chip.filename <- tempfile(get.file.parts(curr.chip.file)$name, tmpdir=iparams$temp.dir) # unzip file to temp.dir/[filename with .gz removed][randsuffix]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
601 cat('Decompressing ChIP file\n',file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
602 if (system(paste("gunzip -c",curr.chip.file,">",ta.chip.filename)) != 0) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
603 stop('Unable to decompress file:', iparams$chip.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
604 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
605 if (iparams$clean.files.flag) { # Remove original file if clean.files.flag is set
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
606 file.remove(curr.chip.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
607 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
608 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
609 ta.chip.filename <- tempfile(get.file.parts(curr.chip.file)$fullname, tmpdir=iparams$temp.dir)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
610 if (iparams$clean.files.flag) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
611 file.rename(curr.chip.file,ta.chip.filename) # move file to temp.dir/[filename][randsuffix]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
612 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
613 file.copy(curr.chip.file,ta.chip.filename) # copy file to temp.dir/[filename][randsuffix]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
614 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
615 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
616
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
617 if (! is.na(iparams$control.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
618 if (get.file.parts(curr.control.file)$ext == '.gz') {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
619 ta.control.filename <- tempfile(get.file.parts(curr.control.file)$name, tmpdir=iparams$temp.dir) # unzip file to temp.dir/[filename with .gz removed][randsuffix]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
620 cat('Decompressing control file\n',file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
621 if (system(paste("gunzip -c",curr.control.file,">",ta.control.filename)) != 0) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
622 stop('Unable to decompress file:', iparams$control.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
623 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
624 if (iparams$clean.files.flag) { # Remove original file if clean.files.flag is set
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
625 file.remove(curr.control.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
626 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
627 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
628 ta.control.filename <- tempfile(get.file.parts(curr.control.file)$fullname, tmpdir=iparams$temp.dir) # copy file to temp.dir/[filename][randsuffix]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
629
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
630 if (iparams$clean.files.flag) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
631 file.rename(curr.control.file,ta.control.filename) # move file to temp.dir/[filename][randsuffix]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
632 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
633 file.copy(curr.control.file,ta.control.filename) # copy file to temp.dir/[filename][randsuffix]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
634 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
635 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
636 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
637
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
638 # Remove downloaded files
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
639 if (iparams$isurl.chip.file & file.exists(curr.chip.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
640 file.remove(curr.chip.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
641 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
642
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
643 if (! is.na(iparams$control.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
644 if (iparams$isurl.control.file & file.exists(curr.control.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
645 file.remove(curr.control.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
646 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
647 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
648
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
649 # Load SPP library
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
650 library(spp)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
651
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
652 # Read ChIP tagAlign/BAM files
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
653 cat("Reading ChIP tagAlign/BAM file",iparams$chip.file,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
654 chip.data <- read.align(ta.chip.filename)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
655 cat("ChIP data read length",chip.data$read.length,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
656 file.remove(ta.chip.filename) # Delete temporary file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
657 if (length(chip.data$tags)==0) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
658 stop('Error in ChIP file format:', iparams$chip.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
659 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
660 # Remove illegal chromosome names
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
661 if (! is.na(iparams$chrname.rm.pattern)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
662 selectidx <- which(grepl(iparams$chrname.rm.pattern,names(chip.data$tags))==FALSE)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
663 chip.data$tags <- chip.data$tags[selectidx]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
664 chip.data$quality <- chip.data$quality[selectidx]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
665 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
666 chip.data$num.tags <- sum(unlist(lapply(chip.data$tags,function(d) length(d))))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
667
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
668 # Read Control tagAlign/BAM files
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
669 if (! is.na(iparams$control.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
670 cat("Reading Control tagAlign/BAM file",iparams$control.file,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
671 control.data <- read.align(ta.control.filename)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
672 file.remove(ta.control.filename) # Delete temporary file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
673 if (length(control.data$tags)==0) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
674 stop('Error in control file format:', iparams$chip.file)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
675 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
676 cat("Control data read length",control.data$read.length,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
677 # Remove illegal chromosome names
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
678 if (! is.na(iparams$chrname.rm.pattern)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
679 selectidx <- which(grepl(iparams$chrname.rm.pattern,names(control.data$tags))==FALSE)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
680 control.data$tags <- control.data$tags[selectidx]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
681 control.data$quality <- control.data$quality[selectidx]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
682 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
683 control.data$num.tags <- sum(unlist(lapply(control.data$tags,function(d) length(d))))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
684 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
685
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
686 # Open multiple processes if required
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
687 if (is.na(iparams$n.nodes)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
688 cluster.nodes <- NULL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
689 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
690 library(snow)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
691 cluster.nodes <- makeCluster(iparams$n.nodes)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
692 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
693
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
694 # #################################
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
695 # Calculate cross-correlation for various strand shifts
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
696 # #################################
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
697 cat("Calculating peak characteristics\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
698 # crosscorr
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
699 # $cross.correlation : Cross-correlation profile as an $x/$y data.frame
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
700 # $peak : Position ($x) and height ($y) of automatically detected cross-correlation peak.
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
701 # $whs: Optimized window half-size for binding detection (based on the width of the cross-correlation peak)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
702 crosscorr <- get.binding.characteristics(chip.data,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
703 srange=iparams$sep.range[c(1,3)],
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
704 bin=iparams$sep.range[2],
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
705 accept.all.tags=T,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
706 cluster=cluster.nodes)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
707 if (!is.na(iparams$n.nodes)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
708 stopCluster(cluster.nodes)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
709 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
710
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
711 # Smooth the cross-correlation curve if required
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
712 cc <- crosscorr$cross.correlation
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
713 crosscorr$min.cc <- crosscorr$cross.correlation[ which.min(crosscorr$cross.correlation$y) , ] # minimum value and shift of cross-correlation
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
714 cat("Minimum cross-correlation value", crosscorr$min.cc$y,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
715 cat("Minimum cross-correlation shift", crosscorr$min.cc$x,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
716 sbw <- 2*floor(ceiling(5/iparams$sep.range[2]) / 2) + 1 # smoothing bandwidth
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
717 cc$y <- runmean(cc$y,sbw,alg="fast")
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
718
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
719 # Compute cross-correlation peak
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
720 bw <- ceiling(2/iparams$sep.range[2]) # crosscorr[i] is compared to crosscorr[i+/-bw] to find peaks
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
721 peakidx <- (diff(cc$y,bw)>=0) # cc[i] > cc[i-bw]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
722 peakidx <- diff(peakidx,bw)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
723 peakidx <- which(peakidx==-1) + bw
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
724
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
725 # exclude peaks from the excluded region
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
726 if ( is.nan(iparams$ex.range[2]) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
727 iparams$ex.range[2] <- chip.data$read.length+10
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
728 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
729 peakidx <- peakidx[(cc$x[peakidx] < iparams$ex.range[1]) | (cc$x[peakidx] > iparams$ex.range[2])]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
730 cc <- cc[peakidx,]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
731
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
732 # Find max peak position and other peaks within 0.9*max_peakvalue that are further away from maxpeakposition
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
733 maxpeakidx <- which.max(cc$y)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
734 maxpeakshift <- cc$x[maxpeakidx]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
735 maxpeakval <- cc$y[maxpeakidx]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
736 peakidx <-which((cc$y >= 0.9*maxpeakval) & (cc$x >= maxpeakshift))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
737 cc <- cc[peakidx,]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
738
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
739 # sort the peaks and get the top 3
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
740 sortidx <- order(cc$y,decreasing=TRUE)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
741 sortidx <- sortidx[c(1:min(3,length(sortidx)))]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
742 cc.peak <- cc[sortidx,]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
743
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
744 # Override peak shift if user supplies peak shift
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
745 if (! is.na(iparams$sep.peak)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
746 cc.peak <- approx(crosscorr$cross.correlation$x,crosscorr$cross.correlation$y,iparams$sep.peak,rule=2)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
747 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
748 cat("Peak cross-correlation value", paste(cc.peak$y,collapse=","),"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
749 cat("Peak strand shift",paste(cc.peak$x,collapse=","),"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
750
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
751 # Reset values in crosscorr
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
752 crosscorr$peak$x <- cc.peak$x[1]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
753 crosscorr$peak$y <- cc.peak$y[1]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
754
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
755 # Compute window half size
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
756 whs.thresh <- crosscorr$min.cc$y + (crosscorr$peak$y - crosscorr$min.cc$y)/3
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
757 crosscorr$whs <- max(crosscorr$cross.correlation$x[crosscorr$cross.correlation$y >= whs.thresh])
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
758 cat("Window half size",crosscorr$whs,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
759
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
760 # Compute phantom peak coefficient
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
761 ph.peakidx <- which( ( crosscorr$cross.correlation$x >= ( chip.data$read.length - round(2*iparams$sep.range[2]) ) ) &
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
762 ( crosscorr$cross.correlation$x <= ( chip.data$read.length + round(1.5*iparams$sep.range[2]) ) ) )
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
763 ph.peakidx <- ph.peakidx[ which.max(crosscorr$cross.correlation$y[ph.peakidx]) ]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
764 crosscorr$phantom.cc <- crosscorr$cross.correlation[ph.peakidx,]
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
765 cat("Phantom peak location",crosscorr$phantom.cc$x,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
766 cat("Phantom peak Correlation",crosscorr$phantom.cc$y,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
767 crosscorr$phantom.coeff <- crosscorr$peak$y / crosscorr$phantom.cc$y
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
768 crosscorr$phantom.coeff <- crosscorr$peak$y / crosscorr$min.cc$y
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
769 cat("Normalized cross-correlation coefficient (NCCC)",crosscorr$phantom.coeff,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
770 crosscorr$rel.phantom.coeff <- (crosscorr$peak$y - crosscorr$min.cc$y) / (crosscorr$phantom.cc$y - crosscorr$min.cc$y)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
771 cat("Relative Cross correlation Coefficient (RCCC)",crosscorr$rel.phantom.coeff,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
772 crosscorr$phantom.quality.tag <- NA
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
773 if ( (crosscorr$rel.phantom.coeff >= 0) & (crosscorr$rel.phantom.coeff < 0.25) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
774 crosscorr$phantom.quality.tag <- -2
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
775 } else if ( (crosscorr$rel.phantom.coeff >= 0.25) & (crosscorr$rel.phantom.coeff < 0.5) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
776 crosscorr$phantom.quality.tag <- -1
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
777 } else if ( (crosscorr$rel.phantom.coeff >= 0.5) & (crosscorr$rel.phantom.coeff < 1) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
778 crosscorr$phantom.quality.tag <- 0
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
779 } else if ( (crosscorr$rel.phantom.coeff >= 1) & (crosscorr$rel.phantom.coeff < 1.5) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
780 crosscorr$phantom.quality.tag <- 1
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
781 } else if ( (crosscorr$rel.phantom.coeff >= 1.5) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
782 crosscorr$phantom.quality.tag <- 2
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
783 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
784 cat("Phantom Peak Quality Tag",crosscorr$phantom.quality.tag,"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
785
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
786 # Output result to result file if required
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
787 #Filename\tnumReads\tPeak_shift\tPeak_Correlation\tRead_length\tPhantomPeak_Correlation\tMin_Correlation_Shift\tMin_Correlation\tNormalized_CrossCorrelation_Coefficient\tRelative_CrossCorrelation_Coefficient\tQualityTag)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
788 if (! is.na(iparams$output.result.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
789 cat(get.file.parts(iparams$chip.file)$fullname,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
790 chip.data$num.tags,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
791 paste(cc.peak$x,collapse=","),
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
792 paste(cc.peak$y,collapse=","),
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
793 crosscorr$phantom.cc$x,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
794 crosscorr$phantom.cc$y,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
795 crosscorr$min.cc$x,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
796 crosscorr$min.cc$y,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
797 crosscorr$phantom.coeff,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
798 crosscorr$rel.phantom.coeff,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
799 crosscorr$phantom.quality.tag,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
800 sep="\t",
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
801 file=iparams$output.result.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
802 append=TRUE)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
803 cat("\n",
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
804 file=iparams$output.result.file,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
805 append=TRUE)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
806 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
807
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
808 # Save figure if required
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
809 if (! is.na(iparams$output.plot.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
810 pdf(file=iparams$output.plot.file,width=5,height=5)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
811 par(mar = c(4,3.5,2,0.5), mgp = c(1.5,0.5,0), cex = 0.8);
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
812 plot(crosscorr$cross.correlation,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
813 type='l',
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
814 xlab=sprintf("strand-shift (%s)",paste(cc.peak$x,collapse=",")),
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
815 ylab="cross-correlation")
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
816 abline(v=cc.peak$x,lty=2,col=2)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
817 abline(v=crosscorr$phantom.cc$x,lty=2,col=4)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
818 title(main=get.file.parts(iparams$chip.file)$fullname,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
819 sub=sprintf("NSC=%g,RSC=%g,Qtag=%d",crosscorr$phantom.coeff,crosscorr$rel.phantom.coeff,crosscorr$phantom.quality.tag))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
820 dev.off();
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
821 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
822
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
823 # Save RData file if required
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
824 if (! is.na(iparams$output.rdata.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
825 save(iparams,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
826 crosscorr,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
827 cc.peak,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
828 file=iparams$output.rdata.file);
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
829 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
830
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
831 # #################################
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
832 # Call peaks
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
833 # #################################
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
834
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
835 if ( !is.na(iparams$output.npeak.file) || !is.na(iparams$output.rpeak.file) ) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
836
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
837 # Remove local tag anomalies
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
838 cat('Removing read stacks\n',file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
839 chip.data <- remove.local.tag.anomalies(chip.data$tags)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
840 control.data <- remove.local.tag.anomalies(control.data$tags)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
841
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
842 # Open multiple processes if required
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
843 if (is.na(iparams$n.nodes)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
844 cluster.nodes <- NULL
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
845 } else {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
846 cluster.nodes <- makeCluster(iparams$n.nodes)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
847 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
848
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
849 # Find peaks
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
850 cat('Finding peaks\n',file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
851 if (!is.na(iparams$npeak)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
852 iparams$fdr <- 0.96
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
853 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
854 narrow.peaks <- find.binding.positions(signal.data=chip.data,control.data=control.data,fdr=iparams$fdr,method=tag.lwcc,whs=crosscorr$whs,cluster=cluster.nodes)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
855 if (!is.na(iparams$n.nodes)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
856 stopCluster(cluster.nodes)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
857 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
858 cat(paste("Detected",sum(unlist(lapply(narrow.peaks$npl,function(d) length(d$x)))),"peaks"),"\n",file=stdout())
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
859
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
860 # Write to narrowPeak file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
861 if (!is.na(iparams$output.npeak.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
862 write.narrowpeak.binding(narrow.peaks,iparams$output.npeak.file,margin=round(crosscorr$whs/2))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
863 #system(paste('gzip -f ',iparams$output.npeak.file))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
864 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
865
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
866 # Compute and write regionPeak file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
867 if (!is.na(iparams$output.rpeak.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
868 region.peaks <- add.broad.peak.regions(chip.data,control.data,narrow.peaks,window.size=max(50,round(crosscorr$whs/4)),z.thr=10)
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
869 write.narrowpeak.binding(region.peaks,iparams$output.rpeak.file,margin=round(crosscorr$whs/2))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
870 #system(paste('gzip -f ',iparams$output.rpeak.file))
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
871 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
872
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
873 # Save Rdata file
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
874 if (! is.na(iparams$output.rdata.file)) {
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
875 save(iparams,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
876 crosscorr,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
877 cc.peak,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
878 narrow.peaks,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
879 region.peaks,
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
880 file=iparams$output.rdata.file);
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
881 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
882
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
883 }
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
884
b15734276ca3 Initial upload.
stemcellcommons
parents:
diff changeset
885