annotate run_spp.R @ 5:b5c32b6c6a0b draft

Uploaded
author modencode-dcc
date Fri, 18 Jan 2013 18:41:08 -0500
parents 86146a313b66
children 495a6d033ca1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
1 # run_spp.R
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
2 # =============
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
3 # Author: Anshul Kundaje, Computer Science Dept., Stanford University
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
4 # Email: akundaje@stanford.edu
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
5 # Last updated: Oct 8, 2010
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
6 # =============
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
7 # MANDATORY ARGUMENTS
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
8 # -c=<ChIP_tagAlign/BAMFile>, full path and name of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
9 # MANDATORY ARGUMENT FOR PEAK CALLING
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
10 # -i=<Input_tagAlign/BAMFile>, full path and name of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
11 # OPTIONAL ARGUMENTS
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
12 # -s=<min>:<step>:<max> , strand shifts at which cross-correlation is evaluated, default=-100:5:600
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
13 # -speak=<strPeak>, user-defined cross-correlation peak strandshift
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
14 # -x=<min>:<max>, strand shifts to exclude (This is mainly to avoid phantom peaks) default=10:(readlen+10)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
15 # -p=<nodes> , number of parallel processing nodes, default=NULL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
16 # -fdr=<falseDisoveryRate> , false discovery rate threshold for peak calling
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
17 # -npeak=<numPeaks>, threshold on number of peaks to call
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
18 # -tmpdir=<tempdir> , Temporary directory (if not specified R function tempdir() is used)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
19 # -filtchr=<chrnamePattern> , Pattern to use to remove tags that map to specific chromosomes e.g. _ will remove all tags that map to chromosomes with _ in their name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
20 # OUTPUT PARAMETERS
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
21 # -odir=<outputDirectory> name of output directory (If not set same as ChIP file directory is used)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
22 # -savn=<narrowpeakfilename> OR -savn NarrowPeak file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
23 # -savr=<regionpeakfilename> OR -savr RegionPeak file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
24 # -savd=<rdatafile> OR -savd , save Rdata file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
25 # -savp=<plotdatafile> OR -savp , save cross-correlation plot
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
26 # -out=<resultfile>, append peakshift result to a file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
27 # format:Filename<tab>numReads<tab>estFragLen<tab>corr_estFragLen<tab>PhantomPeak<tab>corr_phantomPeak<tab>argmin_corr<tab>min_corr<tab>phantomPeakCoef<tab>relPhantomPeakCoef<tab>QualityTag
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
28 # -rf , if plot or rdata or narrowPeak file exists replace it. If not used then the run is aborted if the plot or Rdata or narrowPeak file exists
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
29 # -clean, if present will remove the original chip and control files after reading them in. CAUTION: Use only if the script calling run_spp.R is creating temporary files
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
30
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
31 args <- commandArgs(trailingOnly=TRUE); # Read Arguments from command line
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
32 nargs = length(args); # number of arguments
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
33
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
34 # ###########################################################################
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
35 # AUXILIARY FUNCTIONS
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
36 # ###########################################################################
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
37
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
38 print.usage <- function() {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
39 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
40 # Function will print function usage
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
41 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
42 cat('Usage: Rscript run_spp.R <options>\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
43 cat('MANDATORY ARGUMENTS\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
44 cat('-c=<ChIP_alignFile>, full path and name (or URL) of tagAlign/BAM file (can be gzipped)(FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) \n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
45 cat('MANDATORY ARGUMENTS FOR PEAK CALLING\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
46 cat('-i=<Input_alignFile>, full path and name (or URL) of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) \n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
47 cat('OPTIONAL ARGUMENTS\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
48 cat('-s=<min>:<step>:<max> , strand shifts at which cross-correlation is evaluated, default=-100:5:600\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
49 cat('-speak=<strPeak>, user-defined cross-correlation peak strandshift\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
50 cat('-x=<min>:<max>, strand shifts to exclude (This is mainly to avoid region around phantom peak) default=10:(readlen+10)\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
51 cat('-p=<nodes> , number of parallel processing nodes, default=0\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
52 cat('-fdr=<falseDisoveryRate> , false discovery rate threshold for peak calling\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
53 cat('-npeak=<numPeaks>, threshold on number of peaks to call\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
54 cat('-tmpdir=<tempdir> , Temporary directory (if not specified R function tempdir() is used)\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
55 cat('-filtchr=<chrnamePattern> , Pattern to use to remove tags that map to specific chromosomes e.g. _ will remove all tags that map to chromosomes with _ in their name\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
56 cat('OUTPUT ARGUMENTS\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
57 cat('-odir=<outputDirectory> name of output directory (If not set same as ChIP file directory is used)\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
58 cat('-savn=<narrowpeakfilename> OR -savn NarrowPeak file name (fixed width peaks)\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
59 cat('-savr=<regionpeakfilename> OR -savr RegionPeak file name (variable width peaks with regions of enrichment)\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
60 cat('-savd=<rdatafile> OR -savd, save Rdata file\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
61 cat('-savp=<plotdatafile> OR -savp, save cross-correlation plot\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
62 cat('-out=<resultfile>, append peakshift/phantomPeak results to a file\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
63 cat(' format:Filename<tab>numReads<tab>estFragLen<tab>corr_estFragLen<tab>PhantomPeak<tab>corr_phantomPeak<tab>argmin_corr<tab>min_corr<tab>phantomPeakCoef<tab>relPhantomPeakCoef<tab>QualityTag)\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
64 cat('-rf, if plot or rdata or narrowPeak file exists replace it. If not used then the run is aborted if the plot or Rdata or narrowPeak file exists\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
65 cat('-clean, if present will remove the original chip and control files after reading them in. CAUTION: Use only if the script calling run_spp.R is creating temporary files\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
66 } # end: print.usage()
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
67
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
68 get.file.parts <- function(file.fullpath) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
69 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
70 # Function will take a file name with path and split the file name into
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
71 # path, fullname, name and ext
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
72 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
73 if (! is.character(file.fullpath)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
74 stop('File name must be a string')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
75 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
76
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
77 file.parts <- strsplit(as.character(file.fullpath), .Platform$file.sep, fixed=TRUE)[[1]] # split on file separator
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
78
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
79 if (length(file.parts) == 0) { # if empty file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
80 return(list(path='',
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
81 fullname='',
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
82 name='',
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
83 ext='')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
84 )
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
85 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
86 if (length(file.parts) == 1) { # if no path then just the file name itself
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
87 file.path <- '.'
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
88 file.fullname <- file.parts
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
89 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
90 file.path <- paste(file.parts[1:(length(file.parts)-1)], collapse=.Platform$file.sep) # 1:last-1 token is path
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
91 file.fullname <- file.parts[length(file.parts)] # last token is filename
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
92 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
93 file.fullname.parts <- strsplit(file.fullname,'.',fixed=TRUE)[[1]] # split on .
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
94 if (length(file.fullname.parts) == 1) { # if no extension
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
95 file.ext <- ''
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
96 file.name <- file.fullname.parts
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
97 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
98 file.ext <- paste('.', file.fullname.parts[length(file.fullname.parts)], sep="") # add the . to the last token
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
99 file.name <- paste(file.fullname.parts[1:(length(file.fullname.parts)-1)], collapse=".")
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
100 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
101 return(list(path=file.path,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
102 fullname=file.fullname,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
103 name=file.name,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
104 ext=file.ext))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
105 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
106 } # end: get.file.parts()
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
107
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
108 parse.arguments <- function(args) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
109 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
110 # Function will parse arguments
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
111 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
112 # Set arguments to default values
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
113 chip.file <- NA # main ChIP tagAlign/BAM file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
114 isurl.chip.file <- FALSE # flag indicating whether ChIP file is a URL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
115 control.file <- NA # control tagAlign/BAM file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
116 isurl.control.file <- FALSE # flag indicating whether control file is a URL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
117 sep.min <- -100 # min strand shift
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
118 sep.max <- 600 # max strand shift
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
119 sep.bin <- 5 # increment for strand shift
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
120 sep.peak <- NA # user-defined peak shift
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
121 exclude.min <- 10 # lowerbound of strand shift exclusion region
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
122 exclude.max <- NaN # upperbound of strand shift exclusion region
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
123 n.nodes <- NA # number of parallel processing nodes
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
124 fdr <- 0.01 # false discovery rate threshold for peak calling
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
125 npeak <- NA # threshold on number of peaks to call
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
126 temp.dir <- tempdir() # temporary directory
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
127 chrname.rm.pattern <- NA # chromosome name pattern used to remove tags
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
128 output.odir <- NA # Output directory name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
129 output.npeak.file <- NA # Output narrowPeak file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
130 output.rpeak.file <- NA # Output regionPeak file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
131 output.rdata.file <- NA # Rdata file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
132 output.plot.file <- NA # cross correlation plot file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
133 output.result.file <- NA # result file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
134 replace.flag <- FALSE # replace file flag
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
135 clean.files.flag <- FALSE # file deletion flag
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
136
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
137 # Parse arguments
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
138 for (each.arg in args) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
139
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
140 if (grepl('^-c=',each.arg)) { #-c=<chip.file>
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
141
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
142 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
143 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
144 chip.file <- arg.split[2] # second part is chip.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
145 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
146 stop('No tagAlign/BAM file name provided for parameter -c=')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
147 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
148
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
149 } else if (grepl('^-i=',each.arg)) { #-i=<control.file>
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
150
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
151 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
152 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
153 control.file <- arg.split[2] # second part is control.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
154 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
155 stop('No tagAlign/BAM file name provided for parameter -i=')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
156 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
157
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
158 } else if (grepl('^-s=',each.arg)) { #-s=<sep.min>:<sep.bin>:<sep.max>
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
159
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
160 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
161 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
162 sep.vals <- arg.split[2] # second part is sepmin:sepbin:sepmax
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
163 sep.vals.split <- strsplit(sep.vals,':',fixed=TRUE)[[1]] # split on :
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
164 if (length(sep.vals.split) != 3) { # must have 3 parts
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
165 stop('Strand shift limits must be specified as -s=sepmin:sepbin:sepmax')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
166 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
167 if (any(is.na(as.numeric(sep.vals.split)))) { # check that sep vals are numeric
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
168 stop('Strand shift limits must be numeric values')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
169 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
170 sep.min <- round(as.numeric(sep.vals.split[1]))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
171 sep.bin <- round(as.numeric(sep.vals.split[2]))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
172 sep.max <- round(as.numeric(sep.vals.split[3]))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
173 if ((sep.min > sep.max) || (sep.bin > (sep.max - sep.min)) || (sep.bin < 0)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
174 stop('Illegal separation values -s=sepmin:sepbin:sepmax')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
175 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
176 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
177 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
178 stop('Strand shift limits must be specified as -s=sepmin:sepbin:sepmax')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
179 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
180
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
181 } else if (grepl('^-speak=',each.arg)) { #-speak=<sep.peak> , user-defined cross-correlation peak strandshift
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
182
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
183 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
184 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
185 sep.peak <- arg.split[2] # second part is <sep.peak>
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
186 if (is.na(as.numeric(sep.peak))) { # check that sep.peak is numeric
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
187 stop('-speak=<sep.peak>: User defined peak shift must be numeric')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
188 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
189 sep.peak <- as.numeric(sep.peak)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
190 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
191 stop('User defined peak shift must be provided as -speak=<sep.peak>')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
192 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
193
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
194 } else if (grepl('^-x=',each.arg)) { #-x=<exclude.min>:<exclude.max>
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
195
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
196 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
197 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
198 exclude.vals <- arg.split[2] # second part is excludemin:excludemax
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
199 exclude.vals.split <- strsplit(exclude.vals,':',fixed=TRUE)[[1]] # split on :
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
200 if (length(exclude.vals.split) != 2) { # must have 2 parts
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
201 stop('Exclusion limits must be specified as -x=excludemin:excludemax')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
202 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
203 if (any(is.na(as.numeric(exclude.vals.split)))) { # check that exclude vals are numeric
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
204 stop('Exclusion limits must be numeric values')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
205 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
206 exclude.min <- round(as.numeric(exclude.vals.split[1]))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
207 exclude.max <- round(as.numeric(exclude.vals.split[2]))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
208 if (exclude.min > exclude.max) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
209 stop('Illegal exclusion limits -x=excludemin:excludemax')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
210 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
211 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
212 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
213 stop('Exclusion limits must be specified as -x=excludemin:excludemax')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
214 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
215
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
216 } else if (grepl('^-p=',each.arg)) { #-p=<n.nodes> , number of parallel processing nodes, default=NULL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
217
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
218 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
219 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
220 n.nodes <- arg.split[2] # second part is numnodes
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
221 if (is.na(as.numeric(n.nodes))) { # check that n.nodes is numeric
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
222 stop('-p=<numnodes>: numnodes must be numeric')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
223 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
224 n.nodes <- round(as.numeric(n.nodes))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
225 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
226 stop('Number of parallel nodes must be provided as -p=<numnodes>')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
227 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
228
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
229 } else if (grepl('^-fdr=',each.arg)) { #-fdr=<fdr> , false discovery rate, default=0.01
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
230
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
231 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
232 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
233 fdr <- arg.split[2] # second part is fdr
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
234 if (is.na(as.numeric(fdr))) { # check that fdr is numeric
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
235 stop('-fdr=<falseDiscoveryRate>: false discovery rate must be numeric')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
236 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
237 fdr <- as.numeric(fdr)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
238 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
239 stop('False discovery rate must be provided as -fdr=<fdr>')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
240 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
241
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
242 } else if (grepl('^-npeak=',each.arg)) { #-npeak=<numPeaks> , number of peaks threshold, default=NA
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
243
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
244 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
245 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
246 npeak <- arg.split[2] # second part is npeak
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
247 if (is.na(as.numeric(npeak))) { # check that npeak is numeric
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
248 stop('-npeak=<numPeaks>: threshold on number of peaks must be numeric')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
249 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
250 npeak <- round(as.numeric(npeak))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
251 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
252 stop('Threshold on number of peaks must be provided as -npeak=<numPeaks>')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
253 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
254
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
255 } else if (grepl('^-tmpdir=',each.arg)) { #-tmpdir=<temp.dir>
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
256
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
257 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
258 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
259 temp.dir <- arg.split[2] # second part is temp.dir
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
260 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
261 stop('No temporary directory provided for parameter -tmpdir=')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
262 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
263
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
264 } else if (grepl('^-filtchr=',each.arg)) { #-filtchr=<chrname.rm.pattern>
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
265
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
266 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
267 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
268 chrname.rm.pattern <- arg.split[2] # second part is chrname.rm.pattern
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
269 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
270 stop('No pattern provided for parameter -filtchr=')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
271 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
272
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
273 } else if (grepl('^-odir=',each.arg)) { #-odir=<output.odir>
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
274
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
275 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
276 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
277 output.odir <- arg.split[2] # second part is output.odir
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
278 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
279 stop('No output directory provided for parameter -odir=')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
280 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
281
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
282 } else if (grepl('^-savn',each.arg)) { # -savn=<output.npeak.file> OR -savn , save narrowpeak
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
283
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
284 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
285 if (! is.na(arg.split[2])) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
286 output.npeak.file <- arg.split[2] #-savn=
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
287 } else if (each.arg=='-savn') {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
288 output.npeak.file <- NULL # NULL indicates get the name from the main file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
289 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
290 stop('Argument for saving narrowPeak file must be -savn or -savn=<filename>')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
291 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
292
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
293 } else if (grepl('^-savr',each.arg)) { # -savr=<output.rpeak.file> OR -savr , save regionpeak
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
294
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
295 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
296 if (! is.na(arg.split[2])) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
297 output.rpeak.file <- arg.split[2] #-savr=
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
298 } else if (each.arg=='-savr') {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
299 output.rpeak.file <- NULL # NULL indicates get the name from the main file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
300 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
301 stop('Argument for saving regionPeak file must be -savr or -savr=<filename>')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
302 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
303
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
304 } else if (grepl('^-savd',each.arg)) { # -savd=<output.rdata.file> OR -savd , save Rdata file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
305
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
306 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
307 if (! is.na(arg.split[2])) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
308 output.rdata.file <- arg.split[2] #-savd=
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
309 } else if (each.arg=='-savd') {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
310 output.rdata.file <- NULL # NULL indicates get the name from the main file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
311 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
312 stop('Argument for saving Rdata file must be -savd or -savd=<filename>')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
313 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
314
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
315 } else if (grepl('^-savp',each.arg)) { # -savp=<output.plot.file> OR -savp , save cross-correlation plot
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
316
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
317 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
318 if (! is.na(arg.split[2])) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
319 output.plot.file <- arg.split[2] #-savp=
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
320 } else if (each.arg=='-savp') {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
321 output.plot.file <- NULL # NULL indicates get the name from the main file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
322 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
323 stop('Argument for saving Rdata file must be -savp or -savp=<filename>')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
324 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
325
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
326 } else if (grepl('^-out=',each.arg)) { #-out=<output.result.file>
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
327
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
328 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
329 if (! is.na(arg.split[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
330 output.result.file <- arg.split[2] # second part is output.result.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
331 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
332 stop('No result file provided for parameter -out=')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
333 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
334
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
335 } else if (each.arg == '-rf') {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
336
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
337 replace.flag <- TRUE
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
338
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
339 } else if (each.arg == '-clean') {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
340
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
341 clean.files.flag <- TRUE
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
342
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
343 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
344
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
345 stop('Illegal argument ',each.arg)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
346 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
347 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
348 # End: for loop
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
349
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
350 # Check mandatory arguments
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
351 if (is.na(chip.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
352 stop('-c=<tagAlign/BAMFileName> is a mandatory argument')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
353 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
354
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
355 if (is.na(control.file) && ! is.na(output.npeak.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
356 stop('-i=<tagAlign/BAMFileName> is required for peak calling')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
357 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
358
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
359 # Check if ChIP and control files are URLs
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
360 if (grepl('^http://',chip.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
361 isurl.chip.file <- TRUE
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
362 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
363 if (grepl('^http://',control.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
364 isurl.control.file <- TRUE
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
365 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
366
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
367 # If ChIP file is a URL output.odir MUST be specified
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
368 if (isurl.chip.file && is.na(output.odir)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
369 stop('If ChIP file is a URL, then output directory MUST be specified')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
370 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
371
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
372 # Check that ChIP and control files exist
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
373 if (isurl.chip.file) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
374 if (system(paste('wget -q --spider',chip.file)) != 0) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
375 stop('ChIP file URL not valid: ',chip.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
376 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
377 } else if (!file.exists(chip.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
378 stop('ChIP File:',chip.file,' does not exist')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
379 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
380
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
381 if (!is.na(control.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
382 if (isurl.control.file) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
383 if (system(paste('wget -q --spider',control.file)) != 0) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
384 stop('Control file URL not valid: ',control.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
385 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
386 } else if (!file.exists(control.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
387 stop('Control File:',control.file,' does not exist')
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
388 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
389 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
390
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
391 # Correct other arguments
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
392 if (is.na(output.odir)) { # Reconstruct output.odir if not provided
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
393 output.odir <- get.file.parts(chip.file)$path
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
394 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
395
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
396 if (is.null(output.npeak.file)) { # Reconstruct output.npeak.file if NULL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
397 output.npeak.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '_VS_', get.file.parts(control.file)$name,'.narrowPeak', sep=""))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
398 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
399
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
400 if (is.null(output.rpeak.file)) { # Reconstruct output.rpeak.file if NULL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
401 output.rpeak.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '_VS_', get.file.parts(control.file)$name,'.regionPeak', sep=""))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
402 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
403
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
404 if (is.null(output.rdata.file)) { # Reconstruct output.rdata.file if NULL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
405 output.rdata.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '.Rdata', sep=""))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
406 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
407
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
408 if (is.null(output.plot.file)) { # Reconstruct output.plot.file if NULL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
409 output.plot.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '.pdf', sep=""))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
410 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
411
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
412 return(list(chip.file=chip.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
413 isurl.chip.file=isurl.chip.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
414 control.file=control.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
415 isurl.control.file=isurl.control.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
416 sep.range=c(sep.min,sep.bin,sep.max),
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
417 sep.peak=sep.peak,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
418 ex.range=c(exclude.min,exclude.max),
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
419 n.nodes=n.nodes,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
420 fdr=fdr,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
421 npeak=npeak,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
422 temp.dir=temp.dir,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
423 chrname.rm.pattern=chrname.rm.pattern,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
424 output.odir=output.odir,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
425 output.npeak.file=output.npeak.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
426 output.rpeak.file=output.rpeak.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
427 output.rdata.file=output.rdata.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
428 output.plot.file=output.plot.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
429 output.result.file=output.result.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
430 replace.flag=replace.flag,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
431 clean.files.flag=clean.files.flag))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
432 } # end: parse.arguments()
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
433
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
434 read.align <- function(align.filename) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
435 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
436 # Function will read a tagAlign or BAM file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
437 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
438 if (grepl('(\\.bam)?.*(\\.tagAlign)',align.filename)) { # if tagalign file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
439 chip.data <- read.tagalign.tags(align.filename)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
440 # get readlength info
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
441 tmpDataRows <- read.table(align.filename,nrows=500)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
442 chip.data$read.length <- round(median(tmpDataRows$V3 - tmpDataRows$V2))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
443 } else if (grepl('(\\.tagAlign)?.*(\\.bam)',align.filename)) { # if bam file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
444 # create BAM file name
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
445 bam2align.filename <- sub('\\.bam','.tagAlign',align.filename)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
446 # generate command to convert bam to tagalign
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
447 command <- vector(length=2)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
448 command[1] <- sprintf("samtools view -F 0x0204 -o - %s",align.filename)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
449 command[2] <- paste("awk 'BEGIN{FS=" , '"\t"' , ";OFS=", '"\t"} {if (and($2,16) > 0) {print $3,($4-1),($4-1+length($10)),"N","1000","-"} else {print $3,($4-1),($4-1+length($10)),"N","1000","+"}}', "' 1> ", bam2align.filename, sep="")
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
450 # command[2] <- paste("awk 'BEGIN{OFS=", '"\t"} {if (and($2,16) > 0) {print $3,($4-1),($4-1+length($10)),"N","1000","-"} else {print $3,($4-1),($4-1+length($10)),"N","1000","+"}}', "' 1> ", bam2align.filename, sep="")
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
451 command <- paste(command,collapse=" | ")
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
452 # Run command
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
453 status <- system(command,intern=FALSE,ignore.stderr=FALSE)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
454 if ((status != 0) || !file.exists(bam2align.filename)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
455 cat(sprintf("Error converting BAM to tagalign file: %s\n",align.filename),file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
456 q(save="no",status=1)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
457 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
458 # read converted BAM file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
459 chip.data <- read.tagalign.tags(bam2align.filename)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
460 # get readlength info
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
461 tmpDataRows <- read.table(bam2align.filename,nrows=500)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
462 chip.data$read.length <- round(median(tmpDataRows$V3 - tmpDataRows$V2))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
463 # delete temporary tagalign file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
464 file.remove(bam2align.filename)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
465 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
466 cat(sprintf("Error:Unknown file format for file:%s\n",align.fname),file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
467 q(save="no",status=1)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
468 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
469 return(chip.data)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
470 } # end: read.align()
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
471
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
472 print.run.params <- function(params){
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
473 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
474 # Output run parameters
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
475 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
476 cat('################\n',file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
477 cat(iparams$chip.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
478 iparams$control.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
479 iparams$sep.range,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
480 iparams$sep.peak,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
481 iparams$ex.range,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
482 iparams$n.nodes,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
483 iparams$fdr,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
484 iparams$npeak,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
485 iparams$output.odir,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
486 iparams$output.npeak.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
487 iparams$output.rpeak.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
488 iparams$output.rdata.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
489 iparams$output.plot.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
490 iparams$output.result.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
491 iparams$replace.flag,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
492 labels=c('ChIP data:','Control data:', 'strandshift(min):','strandshift(step):','strandshift(max)','user-defined peak shift',
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
493 'exclusion(min):','exclusion(max):','num parallel nodes:','FDR threshold:','NumPeaks Threshold:','Output Directory:',
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
494 'narrowPeak output file name:', 'regionPeak output file name:', 'Rdata filename:',
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
495 'plot pdf filename:','result filename:','Overwrite files?:'),
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
496 fill=18,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
497 file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
498 cat('\n',file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
499 } # end: print.run.parameters()
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
500
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
501 check.replace.flag <- function(params){
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
502 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
503 # Check if files exist
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
504 # ===================================
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
505 # If replace.flag is NOT set, check if output files exist and abort if necessary
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
506 if (! iparams$replace.flag) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
507 if (! is.na(iparams$output.npeak.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
508 if (file.exists(iparams$output.npeak.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
509 cat('narrowPeak file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
510 q(save="no",status=1)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
511 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
512 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
513 if (! is.na(iparams$output.rpeak.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
514 if (file.exists(iparams$output.rpeak.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
515 cat('regionPeak file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
516 q(save="no",status=1)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
517 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
518 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
519 if (! is.na(iparams$output.plot.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
520 if (file.exists(iparams$output.plot.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
521 cat('Plot file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
522 q(save="no",status=1)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
523 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
524 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
525 if (! is.na(iparams$output.rdata.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
526 if (file.exists(iparams$output.rdata.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
527 cat('Rdata file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
528 q(save="no",status=1)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
529 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
530 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
531 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
532 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
533
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
534 # #############################################################################
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
535 # MAIN FUNCTION
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
536 # #############################################################################
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
537
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
538 # Check number of arguments
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
539 minargs = 1;
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
540 maxargs = 17;
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
541 if (nargs < minargs | nargs > maxargs) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
542 print.usage()
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
543 q(save="no",status=1)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
544 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
545
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
546 # Parse arguments
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
547 # iparams$chip.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
548 # iparams$isurl.chip.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
549 # iparams$control.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
550 # iparams$isurl.control.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
551 # iparams$sep.range
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
552 # iparams$sep.peak
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
553 # iparams$ex.range
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
554 # iparams$n.nodes
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
555 # iparams$fdr
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
556 # iparams$npeak
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
557 # iparams$temp.dir
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
558 # iparams$output.odir
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
559 # iparams$output.npeak.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
560 # iparams$output.rpeak.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
561 # iparams$output.rdata.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
562 # iparams$output.plot.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
563 # iparams$output.result.file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
564 # iparams$replace.flag
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
565 # iparams$clean.files.flag
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
566 iparams <- parse.arguments(args)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
567
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
568 # Print run parameters
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
569 print.run.params(iparams)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
570
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
571 # Check if output files exist
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
572 check.replace.flag(iparams)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
573
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
574 # curr.chip.file and curr.control.file always point to the original ChIP and control files on disk
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
575 # ta.chip.filename & ta.control.filename always point to the final but temporary versions of the ChIP and control files that will be passed to read.align
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
576
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
577 # Download ChIP and control files if necessary to temp.dir
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
578 if (iparams$isurl.chip.file) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
579 curr.chip.file <- file.path(iparams$temp.dir, get.file.parts(iparams$chip.file)$fullname) # file is downloaded to temp.dir. Has same name as URL suffix
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
580 cat('Downloading ChIP file:',iparams$chip.file,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
581 if (system(paste('wget -N -q -P',iparams$temp.dir,iparams$chip.file)) != 0) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
582 stop('Error downloading ChIP file:',iparams$chip.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
583 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
584 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
585 curr.chip.file <- iparams$chip.file # file is in original directory
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
586 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
587
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
588 if (iparams$isurl.control.file) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
589 curr.control.file <- file.path(iparams$temp.dir, get.file.parts(iparams$control.file)$fullname) # file is downloaded to temp.dir. Has same name as URL suffix
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
590 cat('Downloading control file:',iparams$control.file,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
591 if (system(paste('wget -N -q -P',iparams$temp.dir,iparams$control.file)) != 0) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
592 stop('Error downloading Control file:',iparams$control.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
593 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
594 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
595 curr.control.file <- iparams$control.file # file is in original directory
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
596 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
597
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
598 # unzip ChIP and input files if required AND copy to temp directory
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
599 if (get.file.parts(curr.chip.file)$ext == '.gz') {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
600 ta.chip.filename <- tempfile(get.file.parts(curr.chip.file)$name, tmpdir=iparams$temp.dir) # unzip file to temp.dir/[filename with .gz removed][randsuffix]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
601 cat('Decompressing ChIP file\n',file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
602 if (system(paste("gunzip -c",curr.chip.file,">",ta.chip.filename)) != 0) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
603 stop('Unable to decompress file:', iparams$chip.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
604 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
605 if (iparams$clean.files.flag) { # Remove original file if clean.files.flag is set
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
606 file.remove(curr.chip.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
607 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
608 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
609 ta.chip.filename <- tempfile(get.file.parts(curr.chip.file)$fullname, tmpdir=iparams$temp.dir)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
610 if (iparams$clean.files.flag) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
611 file.rename(curr.chip.file,ta.chip.filename) # move file to temp.dir/[filename][randsuffix]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
612 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
613 file.copy(curr.chip.file,ta.chip.filename) # copy file to temp.dir/[filename][randsuffix]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
614 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
615 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
616
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
617 if (! is.na(iparams$control.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
618 if (get.file.parts(curr.control.file)$ext == '.gz') {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
619 ta.control.filename <- tempfile(get.file.parts(curr.control.file)$name, tmpdir=iparams$temp.dir) # unzip file to temp.dir/[filename with .gz removed][randsuffix]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
620 cat('Decompressing control file\n',file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
621 if (system(paste("gunzip -c",curr.control.file,">",ta.control.filename)) != 0) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
622 stop('Unable to decompress file:', iparams$control.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
623 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
624 if (iparams$clean.files.flag) { # Remove original file if clean.files.flag is set
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
625 file.remove(curr.control.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
626 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
627 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
628 ta.control.filename <- tempfile(get.file.parts(curr.control.file)$fullname, tmpdir=iparams$temp.dir) # copy file to temp.dir/[filename][randsuffix]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
629
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
630 if (iparams$clean.files.flag) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
631 file.rename(curr.control.file,ta.control.filename) # move file to temp.dir/[filename][randsuffix]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
632 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
633 file.copy(curr.control.file,ta.control.filename) # copy file to temp.dir/[filename][randsuffix]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
634 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
635 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
636 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
637
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
638 # Remove downloaded files
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
639 if (iparams$isurl.chip.file & file.exists(curr.chip.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
640 file.remove(curr.chip.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
641 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
642
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
643 if (! is.na(iparams$control.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
644 if (iparams$isurl.control.file & file.exists(curr.control.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
645 file.remove(curr.control.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
646 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
647 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
648
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
649 # Load SPP library
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
650 library(spp)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
651
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
652 # Read ChIP tagAlign/BAM files
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
653 cat("Reading ChIP tagAlign/BAM file",iparams$chip.file,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
654 chip.data <- read.align(ta.chip.filename)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
655 cat("ChIP data read length",chip.data$read.length,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
656 file.remove(ta.chip.filename) # Delete temporary file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
657 if (length(chip.data$tags)==0) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
658 stop('Error in ChIP file format:', iparams$chip.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
659 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
660 # Remove illegal chromosome names
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
661 if (! is.na(iparams$chrname.rm.pattern)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
662 selectidx <- which(grepl(iparams$chrname.rm.pattern,names(chip.data$tags))==FALSE)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
663 chip.data$tags <- chip.data$tags[selectidx]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
664 chip.data$quality <- chip.data$quality[selectidx]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
665 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
666 chip.data$num.tags <- sum(unlist(lapply(chip.data$tags,function(d) length(d))))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
667
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
668 # Read Control tagAlign/BAM files
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
669 if (! is.na(iparams$control.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
670 cat("Reading Control tagAlign/BAM file",iparams$control.file,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
671 control.data <- read.align(ta.control.filename)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
672 file.remove(ta.control.filename) # Delete temporary file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
673 if (length(control.data$tags)==0) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
674 stop('Error in control file format:', iparams$chip.file)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
675 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
676 cat("Control data read length",control.data$read.length,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
677 # Remove illegal chromosome names
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
678 if (! is.na(iparams$chrname.rm.pattern)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
679 selectidx <- which(grepl(iparams$chrname.rm.pattern,names(control.data$tags))==FALSE)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
680 control.data$tags <- control.data$tags[selectidx]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
681 control.data$quality <- control.data$quality[selectidx]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
682 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
683 control.data$num.tags <- sum(unlist(lapply(control.data$tags,function(d) length(d))))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
684 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
685
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
686 # Open multiple processes if required
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
687 if (is.na(iparams$n.nodes)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
688 cluster.nodes <- NULL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
689 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
690 library(snow)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
691 cluster.nodes <- makeCluster(iparams$n.nodes)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
692 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
693
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
694 # #################################
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
695 # Calculate cross-correlation for various strand shifts
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
696 # #################################
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
697 cat("Calculating peak characteristics\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
698 # crosscorr
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
699 # $cross.correlation : Cross-correlation profile as an $x/$y data.frame
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
700 # $peak : Position ($x) and height ($y) of automatically detected cross-correlation peak.
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
701 # $whs: Optimized window half-size for binding detection (based on the width of the cross-correlation peak)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
702 crosscorr <- get.binding.characteristics(chip.data,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
703 srange=iparams$sep.range[c(1,3)],
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
704 bin=iparams$sep.range[2],
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
705 accept.all.tags=T,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
706 cluster=cluster.nodes)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
707 if (!is.na(iparams$n.nodes)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
708 stopCluster(cluster.nodes)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
709 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
710
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
711 # Smooth the cross-correlation curve if required
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
712 cc <- crosscorr$cross.correlation
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
713 crosscorr$min.cc <- crosscorr$cross.correlation[ which.min(crosscorr$cross.correlation$y) , ] # minimum value and shift of cross-correlation
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
714 cat("Minimum cross-correlation value", crosscorr$min.cc$y,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
715 cat("Minimum cross-correlation shift", crosscorr$min.cc$x,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
716 sbw <- 2*floor(ceiling(5/iparams$sep.range[2]) / 2) + 1 # smoothing bandwidth
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
717 cc$y <- runmean(cc$y,sbw,alg="fast")
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
718
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
719 # Compute cross-correlation peak
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
720 bw <- ceiling(2/iparams$sep.range[2]) # crosscorr[i] is compared to crosscorr[i+/-bw] to find peaks
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
721 peakidx <- (diff(cc$y,bw)>=0) # cc[i] > cc[i-bw]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
722 peakidx <- diff(peakidx,bw)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
723 peakidx <- which(peakidx==-1) + bw
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
724
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
725 # exclude peaks from the excluded region
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
726 if ( is.nan(iparams$ex.range[2]) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
727 iparams$ex.range[2] <- chip.data$read.length+10
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
728 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
729 peakidx <- peakidx[(cc$x[peakidx] < iparams$ex.range[1]) | (cc$x[peakidx] > iparams$ex.range[2])]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
730 cc <- cc[peakidx,]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
731
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
732 # Find max peak position and other peaks within 0.9*max_peakvalue that are further away from maxpeakposition
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
733 maxpeakidx <- which.max(cc$y)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
734 maxpeakshift <- cc$x[maxpeakidx]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
735 maxpeakval <- cc$y[maxpeakidx]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
736 peakidx <-which((cc$y >= 0.9*maxpeakval) & (cc$x >= maxpeakshift))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
737 cc <- cc[peakidx,]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
738
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
739 # sort the peaks and get the top 3
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
740 sortidx <- order(cc$y,decreasing=TRUE)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
741 sortidx <- sortidx[c(1:min(3,length(sortidx)))]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
742 cc.peak <- cc[sortidx,]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
743
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
744 # Override peak shift if user supplies peak shift
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
745 if (! is.na(iparams$sep.peak)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
746 cc.peak <- approx(crosscorr$cross.correlation$x,crosscorr$cross.correlation$y,iparams$sep.peak,rule=2)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
747 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
748 cat("Peak cross-correlation value", paste(cc.peak$y,collapse=","),"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
749 cat("Peak strand shift",paste(cc.peak$x,collapse=","),"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
750
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
751 # Reset values in crosscorr
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
752 crosscorr$peak$x <- cc.peak$x[1]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
753 crosscorr$peak$y <- cc.peak$y[1]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
754
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
755 # Compute window half size
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
756 whs.thresh <- crosscorr$min.cc$y + (crosscorr$peak$y - crosscorr$min.cc$y)/3
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
757 crosscorr$whs <- max(crosscorr$cross.correlation$x[crosscorr$cross.correlation$y >= whs.thresh])
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
758 cat("Window half size",crosscorr$whs,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
759
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
760 # Compute phantom peak coefficient
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
761 ph.peakidx <- which( ( crosscorr$cross.correlation$x >= ( chip.data$read.length - round(2*iparams$sep.range[2]) ) ) &
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
762 ( crosscorr$cross.correlation$x <= ( chip.data$read.length + round(1.5*iparams$sep.range[2]) ) ) )
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
763 ph.peakidx <- ph.peakidx[ which.max(crosscorr$cross.correlation$y[ph.peakidx]) ]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
764 crosscorr$phantom.cc <- crosscorr$cross.correlation[ph.peakidx,]
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
765 cat("Phantom peak location",crosscorr$phantom.cc$x,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
766 cat("Phantom peak Correlation",crosscorr$phantom.cc$y,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
767 crosscorr$phantom.coeff <- crosscorr$peak$y / crosscorr$phantom.cc$y
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
768 crosscorr$phantom.coeff <- crosscorr$peak$y / crosscorr$min.cc$y
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
769 cat("Normalized cross-correlation coefficient (NCCC)",crosscorr$phantom.coeff,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
770 crosscorr$rel.phantom.coeff <- (crosscorr$peak$y - crosscorr$min.cc$y) / (crosscorr$phantom.cc$y - crosscorr$min.cc$y)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
771 cat("Relative Cross correlation Coefficient (RCCC)",crosscorr$rel.phantom.coeff,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
772 crosscorr$phantom.quality.tag <- NA
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
773 if ( (crosscorr$rel.phantom.coeff >= 0) & (crosscorr$rel.phantom.coeff < 0.25) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
774 crosscorr$phantom.quality.tag <- -2
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
775 } else if ( (crosscorr$rel.phantom.coeff >= 0.25) & (crosscorr$rel.phantom.coeff < 0.5) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
776 crosscorr$phantom.quality.tag <- -1
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
777 } else if ( (crosscorr$rel.phantom.coeff >= 0.5) & (crosscorr$rel.phantom.coeff < 1) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
778 crosscorr$phantom.quality.tag <- 0
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
779 } else if ( (crosscorr$rel.phantom.coeff >= 1) & (crosscorr$rel.phantom.coeff < 1.5) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
780 crosscorr$phantom.quality.tag <- 1
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
781 } else if ( (crosscorr$rel.phantom.coeff >= 1.5) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
782 crosscorr$phantom.quality.tag <- 2
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
783 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
784 cat("Phantom Peak Quality Tag",crosscorr$phantom.quality.tag,"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
785
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
786 # Output result to result file if required
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
787 #Filename\tnumReads\tPeak_shift\tPeak_Correlation\tRead_length\tPhantomPeak_Correlation\tMin_Correlation_Shift\tMin_Correlation\tNormalized_CrossCorrelation_Coefficient\tRelative_CrossCorrelation_Coefficient\tQualityTag)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
788 if (! is.na(iparams$output.result.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
789 cat(get.file.parts(iparams$chip.file)$fullname,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
790 chip.data$num.tags,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
791 paste(cc.peak$x,collapse=","),
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
792 paste(cc.peak$y,collapse=","),
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
793 crosscorr$phantom.cc$x,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
794 crosscorr$phantom.cc$y,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
795 crosscorr$min.cc$x,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
796 crosscorr$min.cc$y,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
797 crosscorr$phantom.coeff,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
798 crosscorr$rel.phantom.coeff,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
799 crosscorr$phantom.quality.tag,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
800 sep="\t",
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
801 file=iparams$output.result.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
802 append=TRUE)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
803 cat("\n",
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
804 file=iparams$output.result.file,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
805 append=TRUE)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
806 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
807
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
808 # Save figure if required
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
809 if (! is.na(iparams$output.plot.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
810 pdf(file=iparams$output.plot.file,width=5,height=5)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
811 par(mar = c(4,3.5,2,0.5), mgp = c(1.5,0.5,0), cex = 0.8);
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
812 plot(crosscorr$cross.correlation,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
813 type='l',
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
814 xlab=sprintf("strand-shift (%s)",paste(cc.peak$x,collapse=",")),
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
815 ylab="cross-correlation")
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
816 abline(v=cc.peak$x,lty=2,col=2)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
817 abline(v=crosscorr$phantom.cc$x,lty=2,col=4)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
818 title(main=get.file.parts(iparams$chip.file)$fullname,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
819 sub=sprintf("NSC=%g,RSC=%g,Qtag=%d",crosscorr$phantom.coeff,crosscorr$rel.phantom.coeff,crosscorr$phantom.quality.tag))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
820 dev.off();
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
821 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
822
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
823 # Save RData file if required
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
824 if (! is.na(iparams$output.rdata.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
825 save(iparams,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
826 crosscorr,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
827 cc.peak,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
828 file=iparams$output.rdata.file);
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
829 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
830
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
831 # #################################
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
832 # Call peaks
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
833 # #################################
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
834
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
835 if ( !is.na(iparams$output.npeak.file) || !is.na(iparams$output.rpeak.file) ) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
836
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
837 # Remove local tag anomalies
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
838 cat('Removing read stacks\n',file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
839 chip.data <- remove.local.tag.anomalies(chip.data$tags)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
840 control.data <- remove.local.tag.anomalies(control.data$tags)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
841
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
842 # Open multiple processes if required
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
843 if (is.na(iparams$n.nodes)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
844 cluster.nodes <- NULL
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
845 } else {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
846 cluster.nodes <- makeCluster(iparams$n.nodes)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
847 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
848
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
849 # Find peaks
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
850 cat('Finding peaks\n',file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
851 if (!is.na(iparams$npeak)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
852 iparams$fdr <- 0.96
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
853 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
854 narrow.peaks <- find.binding.positions(signal.data=chip.data,control.data=control.data,fdr=iparams$fdr,method=tag.lwcc,whs=crosscorr$whs,cluster=cluster.nodes)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
855 if (!is.na(iparams$n.nodes)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
856 stopCluster(cluster.nodes)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
857 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
858 cat(paste("Detected",sum(unlist(lapply(narrow.peaks$npl,function(d) length(d$x)))),"peaks"),"\n",file=stdout())
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
859
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
860 # Write to narrowPeak file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
861 if (!is.na(iparams$output.npeak.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
862 write.narrowpeak.binding(narrow.peaks,iparams$output.npeak.file,margin=round(crosscorr$whs/2),npeaks=iparams$npeak)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
863 system(paste('gzip -f ',iparams$output.npeak.file))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
864 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
865
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
866 # Compute and write regionPeak file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
867 if (!is.na(iparams$output.rpeak.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
868 region.peaks <- add.broad.peak.regions(chip.data,control.data,narrow.peaks,window.size=max(50,round(crosscorr$whs/4)),z.thr=10)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
869 write.narrowpeak.binding(region.peaks,iparams$output.rpeak.file,margin=round(crosscorr$whs/2),npeaks=iparams$npeak)
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
870 system(paste('gzip -f ',iparams$output.rpeak.file))
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
871 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
872
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
873 # Save Rdata file
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
874 if (! is.na(iparams$output.rdata.file)) {
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
875 save(iparams,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
876 crosscorr,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
877 cc.peak,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
878 narrow.peaks,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
879 region.peaks,
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
880 file=iparams$output.rdata.file);
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
881 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
882
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
883 }
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
884
86146a313b66 Uploaded
modencode-dcc
parents:
diff changeset
885