Mercurial > repos > stemcellcommons > spp_tool

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/run_spp.R	Thu Oct 17 12:39:45 2013 -0400
@@ -0,0 +1,885 @@
+# run_spp.R
+# =============
+# Author: Anshul Kundaje, Computer Science Dept., Stanford University
+# Email: akundaje@stanford.edu
+# Last updated: Oct 8, 2010
+# =============
+# MANDATORY ARGUMENTS
+# -c=<ChIP_tagAlign/BAMFile>, full path and name of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz)
+# MANDATORY ARGUMENT FOR PEAK CALLING
+# -i=<Input_tagAlign/BAMFile>, full path and name of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz)
+# OPTIONAL ARGUMENTS
+# -s=<min>:<step>:<max> , strand shifts at which cross-correlation is evaluated, default=-100:5:600
+# -speak=<strPeak>, user-defined cross-correlation peak strandshift
+# -x=<min>:<max>, strand shifts to exclude (This is mainly to avoid phantom peaks) default=10:(readlen+10)
+# -p=<nodes> , number of parallel processing nodes, default=NULL
+# -fdr=<falseDisoveryRate> , false discovery rate threshold for peak calling
+# -npeak=<numPeaks>, threshold on number of peaks to call
+# -tmpdir=<tempdir> , Temporary directory (if not specified R function tempdir() is used)
+# -filtchr=<chrnamePattern> , Pattern to use to remove tags that map to specific chromosomes e.g. _ will remove all tags that map to chromosomes with _ in their name
+# OUTPUT PARAMETERS
+# -odir=<outputDirectory> name of output directory (If not set same as ChIP file directory is used)
+# -savn=<narrowpeakfilename> OR -savn NarrowPeak file name
+# -savr=<regionpeakfilename> OR -savr RegionPeak file name
+# -savd=<rdatafile> OR -savd , save Rdata file
+# -savp=<plotdatafile> OR -savp , save cross-correlation plot
+# -out=<resultfile>, append peakshift result to a file
+#      format:Filename<tab>numReads<tab>estFragLen<tab>corr_estFragLen<tab>PhantomPeak<tab>corr_phantomPeak<tab>argmin_corr<tab>min_corr<tab>phantomPeakCoef<tab>relPhantomPeakCoef<tab>QualityTag
+# -rf , if plot or rdata or narrowPeak file exists replace it. If not used then the run is aborted if the plot or Rdata or narrowPeak file exists
+# -clean, if present will remove the original chip and control files after reading them in. CAUTION: Use only if the script calling run_spp.R is creating temporary files
+
+args <- commandArgs(trailingOnly=TRUE); # Read Arguments from command line
+nargs = length(args); # number of arguments
+
+# ###########################################################################
+# AUXILIARY FUNCTIONS
+# ###########################################################################
+
+print.usage <- function() {
+# ===================================
+# Function will print function usage
+# ===================================
+	cat('Usage: Rscript run_spp.R <options>\n',file=stderr())
+	cat('MANDATORY ARGUMENTS\n',file=stderr())
+	cat('-c=<ChIP_alignFile>, full path and name (or URL) of tagAlign/BAM file (can be gzipped)(FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) \n',file=stderr())
+	cat('MANDATORY ARGUMENTS FOR PEAK CALLING\n',file=stderr())
+	cat('-i=<Input_alignFile>, full path and name (or URL) of tagAlign/BAM file (can be gzipped) (FILE EXTENSION MUST BE tagAlign.gz, tagAlign, bam or bam.gz) \n',file=stderr())
+	cat('OPTIONAL ARGUMENTS\n',file=stderr())
+	cat('-s=<min>:<step>:<max> , strand shifts at which cross-correlation is evaluated, default=-100:5:600\n',file=stderr())
+	cat('-speak=<strPeak>, user-defined cross-correlation peak strandshift\n',file=stderr())
+	cat('-x=<min>:<max>, strand shifts to exclude (This is mainly to avoid region around phantom peak) default=10:(readlen+10)\n',file=stderr())
+	cat('-p=<nodes> , number of parallel processing nodes, default=0\n',file=stderr())
+	cat('-fdr=<falseDisoveryRate> , false discovery rate threshold for peak calling\n',file=stderr())
+	cat('-npeak=<numPeaks>, threshold on number of peaks to call\n',file=stderr())
+	cat('-tmpdir=<tempdir> , Temporary directory (if not specified R function tempdir() is used)\n',file=stderr())
+	cat('-filtchr=<chrnamePattern> , Pattern to use to remove tags that map to specific chromosomes e.g. _ will remove all tags that map to chromosomes with _ in their name\n',file=stderr())
+	cat('OUTPUT ARGUMENTS\n',file=stderr())
+	cat('-odir=<outputDirectory> name of output directory (If not set same as ChIP file directory is used)\n',file=stderr())
+	cat('-savn=<narrowpeakfilename> OR -savn NarrowPeak file name (fixed width peaks)\n',file=stderr())
+	cat('-savr=<regionpeakfilename> OR -savr RegionPeak file name (variable width peaks with regions of enrichment)\n',file=stderr())
+	cat('-savd=<rdatafile> OR -savd, save Rdata file\n',file=stderr())
+	cat('-savp=<plotdatafile> OR -savp, save cross-correlation plot\n',file=stderr())
+	cat('-out=<resultfile>, append peakshift/phantomPeak results to a file\n',file=stderr())
+	cat('     format:Filename<tab>numReads<tab>estFragLen<tab>corr_estFragLen<tab>PhantomPeak<tab>corr_phantomPeak<tab>argmin_corr<tab>min_corr<tab>phantomPeakCoef<tab>relPhantomPeakCoef<tab>QualityTag)\n',file=stderr())
+	cat('-rf, if plot or rdata or narrowPeak file exists replace it. If not used then the run is aborted if the plot or Rdata or narrowPeak file exists\n',file=stderr())
+	cat('-clean, if present will remove the original chip and control files after reading them in. CAUTION: Use only if the script calling run_spp.R is creating temporary files\n',file=stderr())
+} # end: print.usage()
+
+get.file.parts <- function(file.fullpath) {
+# ===================================
+# Function will take a file name with path and split the file name into
+# path, fullname, name and ext
+# ===================================
+	if (! is.character(file.fullpath)) {
+		stop('File name must be a string')
+	}
+
+	file.parts <- strsplit(as.character(file.fullpath), .Platform$file.sep, fixed=TRUE)[[1]] # split on file separator
+
+	if (length(file.parts) == 0) { # if empty file name
+		return(list(path='',
+						fullname='',
+						name='',
+						ext='')
+		)
+	} else {
+		if (length(file.parts) == 1) { # if no path then just the file name itself
+			file.path <- '.'
+			file.fullname <- file.parts
+		} else {
+			file.path <- paste(file.parts[1:(length(file.parts)-1)], collapse=.Platform$file.sep) # 1:last-1 token is path
+			file.fullname <- file.parts[length(file.parts)] # last token is filename
+		}
+		file.fullname.parts <- strsplit(file.fullname,'.',fixed=TRUE)[[1]] # split on .
+		if (length(file.fullname.parts) == 1) { # if no extension
+			file.ext <- ''
+			file.name <- file.fullname.parts
+		} else {
+			file.ext <- paste('.', file.fullname.parts[length(file.fullname.parts)], sep="") # add the . to the last token
+			file.name <- paste(file.fullname.parts[1:(length(file.fullname.parts)-1)], collapse=".")
+		}
+		return(list(path=file.path,
+						fullname=file.fullname,
+						name=file.name,
+						ext=file.ext))
+	}
+} # end: get.file.parts()
+
+parse.arguments <- function(args) {
+# ===================================
+# Function will parse arguments
+# ===================================
+	# Set arguments to default values
+	chip.file <- NA  # main ChIP tagAlign/BAM file name
+	isurl.chip.file <- FALSE # flag indicating whether ChIP file is a URL
+	control.file <- NA # control tagAlign/BAM file name
+	isurl.control.file <- FALSE # flag indicating whether control file is a URL
+	sep.min <- -100  # min strand shift
+	sep.max <- 600  # max strand shift
+	sep.bin <- 5    # increment for strand shift
+	sep.peak <- NA # user-defined peak shift
+	exclude.min <- 10 # lowerbound of strand shift exclusion region
+	exclude.max <- NaN # upperbound of strand shift exclusion region
+	n.nodes <- NA # number of parallel processing nodes
+	fdr <- 0.01 # false discovery rate threshold for peak calling
+	npeak <- NA # threshold on number of peaks to call
+	temp.dir <- tempdir() # temporary directory
+	chrname.rm.pattern <- NA # chromosome name pattern used to remove tags
+	output.odir <- NA # Output directory name
+	output.npeak.file <- NA # Output narrowPeak file name
+	output.rpeak.file <- NA # Output regionPeak file name
+	output.rdata.file <- NA # Rdata file
+	output.plot.file <- NA  # cross correlation plot file
+	output.result.file <- NA # result file
+	replace.flag <- FALSE # replace file flag
+	clean.files.flag <- FALSE # file deletion flag
+
+	# Parse arguments
+	for (each.arg in args) {
+
+		if (grepl('^-c=',each.arg)) { #-c=<chip.file>
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				chip.file <- arg.split[2] # second part is chip.file
+			} else {
+				stop('No tagAlign/BAM file name provided for parameter -c=')
+			}
+
+		} else if (grepl('^-i=',each.arg)) { #-i=<control.file>
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				control.file <- arg.split[2] # second part is control.file
+			} else {
+				stop('No tagAlign/BAM file name provided for parameter -i=')
+			}
+
+		} else if (grepl('^-s=',each.arg)) { #-s=<sep.min>:<sep.bin>:<sep.max>
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				sep.vals <- arg.split[2] # second part is sepmin:sepbin:sepmax
+				sep.vals.split <- strsplit(sep.vals,':',fixed=TRUE)[[1]] # split on :
+				if (length(sep.vals.split) != 3) { # must have 3 parts
+					stop('Strand shift limits must be specified as -s=sepmin:sepbin:sepmax')
+				} else {
+					if (any(is.na(as.numeric(sep.vals.split)))) { # check that sep vals are numeric
+						stop('Strand shift limits must be numeric values')
+					}
+					sep.min <- round(as.numeric(sep.vals.split[1]))
+					sep.bin <- round(as.numeric(sep.vals.split[2]))
+					sep.max <- round(as.numeric(sep.vals.split[3]))
+					if ((sep.min > sep.max) || (sep.bin > (sep.max - sep.min)) || (sep.bin < 0)) {
+						stop('Illegal separation values -s=sepmin:sepbin:sepmax')
+					}
+				}
+			} else {
+				stop('Strand shift limits must be specified as -s=sepmin:sepbin:sepmax')
+			}
+
+		} else if (grepl('^-speak=',each.arg)) { #-speak=<sep.peak> , user-defined cross-correlation peak strandshift
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				sep.peak <- arg.split[2] # second part is <sep.peak>
+				if (is.na(as.numeric(sep.peak))) { # check that sep.peak is numeric
+					stop('-speak=<sep.peak>: User defined peak shift must be numeric')
+				}
+				sep.peak <- as.numeric(sep.peak)
+			} else {
+				stop('User defined peak shift must be provided as -speak=<sep.peak>')
+			}
+
+		} else if (grepl('^-x=',each.arg)) { #-x=<exclude.min>:<exclude.max>
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				exclude.vals <- arg.split[2] # second part is excludemin:excludemax
+				exclude.vals.split <- strsplit(exclude.vals,':',fixed=TRUE)[[1]] # split on :
+				if (length(exclude.vals.split) != 2) { # must have 2 parts
+					stop('Exclusion limits must be specified as -x=excludemin:excludemax')
+				} else {
+					if (any(is.na(as.numeric(exclude.vals.split)))) { # check that exclude vals are numeric
+						stop('Exclusion limits must be numeric values')
+					}
+					exclude.min <- round(as.numeric(exclude.vals.split[1]))
+					exclude.max <- round(as.numeric(exclude.vals.split[2]))
+					if (exclude.min > exclude.max) {
+						stop('Illegal exclusion limits -x=excludemin:excludemax')
+					}
+				}
+			} else {
+				stop('Exclusion limits must be specified as -x=excludemin:excludemax')
+			}
+
+		} else if (grepl('^-p=',each.arg)) { #-p=<n.nodes> , number of parallel processing nodes, default=NULL
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				n.nodes <- arg.split[2] # second part is numnodes
+				if (is.na(as.numeric(n.nodes))) { # check that n.nodes is numeric
+					stop('-p=<numnodes>: numnodes must be numeric')
+				}
+				n.nodes <- round(as.numeric(n.nodes))
+			} else {
+				stop('Number of parallel nodes must be provided as -p=<numnodes>')
+			}
+
+		} else if (grepl('^-fdr=',each.arg)) { #-fdr=<fdr> , false discovery rate, default=0.01
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				fdr <- arg.split[2] # second part is fdr
+				if (is.na(as.numeric(fdr))) { # check that fdr is numeric
+					stop('-fdr=<falseDiscoveryRate>: false discovery rate must be numeric')
+				}
+				fdr <- as.numeric(fdr)
+			} else {
+				stop('False discovery rate must be provided as -fdr=<fdr>')
+			}
+
+		} else if (grepl('^-npeak=',each.arg)) { #-npeak=<numPeaks> , number of peaks threshold, default=NA
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				npeak <- arg.split[2] # second part is npeak
+				if (is.na(as.numeric(npeak))) { # check that npeak is numeric
+					stop('-npeak=<numPeaks>: threshold on number of peaks must be numeric')
+				}
+				npeak <- round(as.numeric(npeak))
+			} else {
+				stop('Threshold on number of peaks must be provided as -npeak=<numPeaks>')
+			}
+
+		} else if (grepl('^-tmpdir=',each.arg)) { #-tmpdir=<temp.dir>
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				temp.dir <- arg.split[2] # second part is temp.dir
+			} else {
+				stop('No temporary directory provided for parameter -tmpdir=')
+			}
+
+		} else if (grepl('^-filtchr=',each.arg)) { #-filtchr=<chrname.rm.pattern>
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				chrname.rm.pattern <- arg.split[2] # second part is chrname.rm.pattern
+			} else {
+				stop('No pattern provided for parameter -filtchr=')
+			}
+
+		} else if (grepl('^-odir=',each.arg)) { #-odir=<output.odir>
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				output.odir <- arg.split[2] # second part is output.odir
+			} else {
+				stop('No output directory provided for parameter -odir=')
+			}
+
+		} else if (grepl('^-savn',each.arg)) { # -savn=<output.npeak.file> OR -savn , save narrowpeak
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2])) {
+				output.npeak.file <- arg.split[2] #-savn=
+			} else if (each.arg=='-savn') {
+				output.npeak.file <- NULL # NULL indicates get the name from the main file name
+			} else {
+				stop('Argument for saving narrowPeak file must be -savn or -savn=<filename>')
+			}
+
+		} else if (grepl('^-savr',each.arg)) { # -savr=<output.rpeak.file> OR -savr , save regionpeak
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2])) {
+				output.rpeak.file <- arg.split[2] #-savr=
+			} else if (each.arg=='-savr') {
+				output.rpeak.file <- NULL # NULL indicates get the name from the main file name
+			} else {
+				stop('Argument for saving regionPeak file must be -savr or -savr=<filename>')
+			}
+
+		} else if (grepl('^-savd',each.arg)) { # -savd=<output.rdata.file> OR -savd , save Rdata file
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2])) {
+				output.rdata.file <- arg.split[2] #-savd=
+			} else if (each.arg=='-savd') {
+				output.rdata.file <- NULL # NULL indicates get the name from the main file name
+			} else {
+				stop('Argument for saving Rdata file must be -savd or -savd=<filename>')
+			}
+
+		} else if (grepl('^-savp',each.arg)) { # -savp=<output.plot.file> OR -savp , save cross-correlation plot
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2])) {
+				output.plot.file <- arg.split[2] #-savp=
+			} else if (each.arg=='-savp') {
+				output.plot.file <- NULL # NULL indicates get the name from the main file name
+			} else {
+				stop('Argument for saving Rdata file must be -savp or -savp=<filename>')
+			}
+
+		} else if (grepl('^-out=',each.arg)) { #-out=<output.result.file>
+
+			arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
+			if (! is.na(arg.split[2]) ) {
+				output.result.file <- arg.split[2] # second part is output.result.file
+			} else {
+				stop('No result file provided for parameter -out=')
+			}
+
+		} else if (each.arg == '-rf') {
+
+			replace.flag <- TRUE
+
+		} else if (each.arg == '-clean') {
+
+			clean.files.flag <- TRUE
+
+		} else {
+
+			stop('Illegal argument ',each.arg)
+		}
+	}
+	# End: for loop
+
+	# Check mandatory arguments
+	if (is.na(chip.file)) {
+		stop('-c=<tagAlign/BAMFileName> is a mandatory argument')
+	}
+
+	if (is.na(control.file) && ! is.na(output.npeak.file)) {
+		stop('-i=<tagAlign/BAMFileName> is required for peak calling')
+	}
+
+	# Check if ChIP and control files are URLs
+	if (grepl('^http://',chip.file)) {
+		isurl.chip.file <- TRUE
+	}
+	if (grepl('^http://',control.file)) {
+		isurl.control.file <- TRUE
+	}
+
+	# If ChIP file is a URL output.odir MUST be specified
+	if (isurl.chip.file && is.na(output.odir)) {
+		stop('If ChIP file is a URL, then output directory MUST be specified')
+	}
+
+	# Check that ChIP and control files exist
+	if (isurl.chip.file) {
+		if (system(paste('wget -q --spider',chip.file)) != 0) {
+			stop('ChIP file URL not valid: ',chip.file)
+		}
+	} else if (!file.exists(chip.file)) {
+		stop('ChIP File:',chip.file,' does not exist')
+	}
+
+	if (!is.na(control.file)) {
+		if (isurl.control.file) {
+			if (system(paste('wget -q --spider',control.file)) != 0) {
+				stop('Control file URL not valid: ',control.file)
+			}
+		} else if (!file.exists(control.file)) {
+			stop('Control File:',control.file,' does not exist')
+		}
+	}
+
+	# Correct other arguments
+	if (is.na(output.odir)) { # Reconstruct output.odir if not provided
+		output.odir <- get.file.parts(chip.file)$path
+	}
+
+	if (is.null(output.npeak.file)) { # Reconstruct output.npeak.file if NULL
+		output.npeak.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '_VS_', get.file.parts(control.file)$name,'.narrowPeak', sep=""))
+	}
+
+	if (is.null(output.rpeak.file)) { # Reconstruct output.rpeak.file if NULL
+		output.rpeak.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '_VS_', get.file.parts(control.file)$name,'.regionPeak', sep=""))
+	}
+
+	if (is.null(output.rdata.file)) { # Reconstruct output.rdata.file if NULL
+		output.rdata.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '.Rdata', sep=""))
+	}
+
+	if (is.null(output.plot.file)) { # Reconstruct output.plot.file if NULL
+		output.plot.file <- file.path(output.odir, paste(get.file.parts(chip.file)$name, '.pdf', sep=""))
+	}
+
+	return(list(chip.file=chip.file,
+					isurl.chip.file=isurl.chip.file,
+					control.file=control.file,
+					isurl.control.file=isurl.control.file,
+					sep.range=c(sep.min,sep.bin,sep.max),
+					sep.peak=sep.peak,
+					ex.range=c(exclude.min,exclude.max),
+					n.nodes=n.nodes,
+					fdr=fdr,
+					npeak=npeak,
+					temp.dir=temp.dir,
+					chrname.rm.pattern=chrname.rm.pattern,
+					output.odir=output.odir,
+					output.npeak.file=output.npeak.file,
+					output.rpeak.file=output.rpeak.file,
+					output.rdata.file=output.rdata.file,
+					output.plot.file=output.plot.file,
+					output.result.file=output.result.file,
+					replace.flag=replace.flag,
+					clean.files.flag=clean.files.flag))
+} # end: parse.arguments()
+
+read.align <- function(align.filename) {
+# ===================================
+# Function will read a tagAlign or BAM file
+# ===================================
+	if (grepl('(\\.bam)?.*(\\.tagAlign)',align.filename)) { # if tagalign file
+		chip.data <- read.tagalign.tags(align.filename)
+		# get readlength info
+		tmpDataRows <- read.table(align.filename,nrows=500)
+		chip.data$read.length <- round(median(tmpDataRows$V3 - tmpDataRows$V2))
+	} else if (grepl('(\\.tagAlign)?.*(\\.bam)',align.filename)) { # if bam file
+		# create BAM file name
+		bam2align.filename <- sub('\\.bam','.tagAlign',align.filename)
+		# generate command to convert bam to tagalign
+		command <- vector(length=2)
+		command[1] <- sprintf("samtools view -F 0x0204 -o - %s",align.filename)
+		command[2] <- paste("awk 'BEGIN{FS=" , '"\t"' , ";OFS=", '"\t"} {if (and($2,16) > 0) {print $3,($4-1),($4-1+length($10)),"N","1000","-"} else {print $3,($4-1),($4-1+length($10)),"N","1000","+"}}', "' 1> ", bam2align.filename, sep="")
+		# command[2] <- paste("awk 'BEGIN{OFS=", '"\t"} {if (and($2,16) > 0) {print $3,($4-1),($4-1+length($10)),"N","1000","-"} else {print $3,($4-1),($4-1+length($10)),"N","1000","+"}}', "' 1> ", bam2align.filename, sep="")
+		command <- paste(command,collapse=" | ")
+		# Run command
+		status <- system(command,intern=FALSE,ignore.stderr=FALSE)
+		if ((status != 0) || !file.exists(bam2align.filename)) {
+			cat(sprintf("Error converting BAM to tagalign file: %s\n",align.filename),file=stderr())
+			q(save="no",status=1)
+		}
+		# read converted BAM file
+		chip.data <- read.tagalign.tags(bam2align.filename)
+		# get readlength info
+		tmpDataRows <- read.table(bam2align.filename,nrows=500)
+		chip.data$read.length <- round(median(tmpDataRows$V3 - tmpDataRows$V2))
+		# delete temporary tagalign file
+		file.remove(bam2align.filename)
+	} else {
+		cat(sprintf("Error:Unknown file format for file:%s\n",align.fname),file=stderr())
+		q(save="no",status=1)
+	}
+	return(chip.data)
+} # end: read.align()
+
+print.run.params <- function(params){
+# ===================================
+# Output run parameters
+# ===================================
+	cat('################\n',file=stdout())
+	cat(iparams$chip.file,
+			iparams$control.file,
+			iparams$sep.range,
+			iparams$sep.peak,
+			iparams$ex.range,
+			iparams$n.nodes,
+			iparams$fdr,
+			iparams$npeak,
+			iparams$output.odir,
+			iparams$output.npeak.file,
+			iparams$output.rpeak.file,
+			iparams$output.rdata.file,
+			iparams$output.plot.file,
+			iparams$output.result.file,
+			iparams$replace.flag,
+			labels=c('ChIP data:','Control data:', 'strandshift(min):','strandshift(step):','strandshift(max)','user-defined peak shift',
+					'exclusion(min):','exclusion(max):','num parallel nodes:','FDR threshold:','NumPeaks Threshold:','Output Directory:',
+					'narrowPeak output file name:', 'regionPeak output file name:', 'Rdata filename:',
+					'plot pdf filename:','result filename:','Overwrite files?:'),
+			fill=18,
+			file=stdout())
+	cat('\n',file=stdout())
+} # end: print.run.parameters()
+
+check.replace.flag <- function(params){
+# ===================================
+# Check if files exist
+# ===================================
+# If replace.flag is NOT set, check if output files exist and abort if necessary
+	if (! iparams$replace.flag) {
+		if (! is.na(iparams$output.npeak.file)) {
+			if (file.exists(iparams$output.npeak.file)) {
+				cat('narrowPeak file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
+				q(save="no",status=1)
+			}
+		}
+		if (! is.na(iparams$output.rpeak.file)) {
+			if (file.exists(iparams$output.rpeak.file)) {
+				cat('regionPeak file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
+				q(save="no",status=1)
+			}
+		}
+		if (! is.na(iparams$output.plot.file)) {
+			if (file.exists(iparams$output.plot.file)) {
+				cat('Plot file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
+				q(save="no",status=1)
+			}
+		}
+		if (! is.na(iparams$output.rdata.file)) {
+			if (file.exists(iparams$output.rdata.file)) {
+				cat('Rdata file already exists. Aborting Run. Use -rf if you want to overwrite\n',file=stderr())
+				q(save="no",status=1)
+			}
+		}
+	}
+}
+
+# #############################################################################
+# MAIN FUNCTION
+# #############################################################################
+
+# Check number of arguments
+minargs = 1;
+maxargs = 17;
+if (nargs < minargs | nargs > maxargs) {
+	print.usage()
+	q(save="no",status=1)
+}
+
+# Parse arguments
+# iparams$chip.file
+# iparams$isurl.chip.file
+# iparams$control.file
+# iparams$isurl.control.file
+# iparams$sep.range
+# iparams$sep.peak
+# iparams$ex.range
+# iparams$n.nodes
+# iparams$fdr
+# iparams$npeak
+# iparams$temp.dir
+# iparams$output.odir
+# iparams$output.npeak.file
+# iparams$output.rpeak.file
+# iparams$output.rdata.file
+# iparams$output.plot.file
+# iparams$output.result.file
+# iparams$replace.flag
+# iparams$clean.files.flag
+iparams <- parse.arguments(args)
+
+# Print run parameters
+print.run.params(iparams)
+
+# Check if output files exist
+check.replace.flag(iparams)
+
+# curr.chip.file and curr.control.file always point to the original ChIP and control files on disk
+# ta.chip.filename & ta.control.filename always point to the final but temporary versions of the ChIP and control files that will be passed to read.align
+
+# Download ChIP and control files if necessary to temp.dir
+if (iparams$isurl.chip.file) {
+	curr.chip.file <- file.path(iparams$temp.dir, get.file.parts(iparams$chip.file)$fullname) # file is downloaded to temp.dir. Has same name as URL suffix
+	cat('Downloading ChIP file:',iparams$chip.file,"\n",file=stdout())
+	if (system(paste('wget -N -q -P',iparams$temp.dir,iparams$chip.file)) != 0) {
+		stop('Error downloading ChIP file:',iparams$chip.file)
+	}
+} else {
+	curr.chip.file <- iparams$chip.file # file is in original directory
+}
+
+if (iparams$isurl.control.file) {
+	curr.control.file <- file.path(iparams$temp.dir, get.file.parts(iparams$control.file)$fullname) # file is downloaded to temp.dir. Has same name as URL suffix
+	cat('Downloading control file:',iparams$control.file,"\n",file=stdout())
+	if (system(paste('wget -N -q -P',iparams$temp.dir,iparams$control.file)) != 0) {
+		stop('Error downloading Control file:',iparams$control.file)
+	}
+} else {
+	curr.control.file <- iparams$control.file # file is in original directory
+}
+
+# unzip ChIP and input files if required AND copy to temp directory
+if (get.file.parts(curr.chip.file)$ext == '.gz') {
+	ta.chip.filename <- tempfile(get.file.parts(curr.chip.file)$name, tmpdir=iparams$temp.dir) # unzip file to temp.dir/[filename with .gz removed][randsuffix]
+	cat('Decompressing ChIP file\n',file=stdout())
+	if (system(paste("gunzip -c",curr.chip.file,">",ta.chip.filename)) != 0) {
+		stop('Unable to decompress file:', iparams$chip.file)
+	}
+	if (iparams$clean.files.flag) { # Remove original file if clean.files.flag is set
+		file.remove(curr.chip.file)
+	}
+} else {
+	ta.chip.filename <- tempfile(get.file.parts(curr.chip.file)$fullname, tmpdir=iparams$temp.dir)
+	if (iparams$clean.files.flag) {
+		file.rename(curr.chip.file,ta.chip.filename) # move file to temp.dir/[filename][randsuffix]
+	} else {
+		file.copy(curr.chip.file,ta.chip.filename) # copy file to temp.dir/[filename][randsuffix]
+	}
+}
+
+if (! is.na(iparams$control.file)) {
+	if (get.file.parts(curr.control.file)$ext == '.gz') {
+		ta.control.filename <- tempfile(get.file.parts(curr.control.file)$name, tmpdir=iparams$temp.dir) # unzip file to temp.dir/[filename with .gz removed][randsuffix]
+		cat('Decompressing control file\n',file=stdout())
+		if (system(paste("gunzip -c",curr.control.file,">",ta.control.filename)) != 0) {
+			stop('Unable to decompress file:', iparams$control.file)
+		}
+		if (iparams$clean.files.flag) { # Remove original file if clean.files.flag is set
+			file.remove(curr.control.file)
+		}
+	} else {
+		ta.control.filename <- tempfile(get.file.parts(curr.control.file)$fullname, tmpdir=iparams$temp.dir) # copy file to temp.dir/[filename][randsuffix]
+
+		if (iparams$clean.files.flag) {
+			file.rename(curr.control.file,ta.control.filename) # move file to temp.dir/[filename][randsuffix]
+		} else {
+			file.copy(curr.control.file,ta.control.filename) # copy file to temp.dir/[filename][randsuffix]
+		}
+	}
+}
+
+# Remove downloaded files
+if (iparams$isurl.chip.file & file.exists(curr.chip.file))  {
+	file.remove(curr.chip.file)
+}
+
+if (! is.na(iparams$control.file)) {
+	if (iparams$isurl.control.file & file.exists(curr.control.file)) {
+		file.remove(curr.control.file)
+	}
+}
+
+# Load SPP library
+library(spp)
+
+# Read ChIP tagAlign/BAM files
+cat("Reading ChIP tagAlign/BAM file",iparams$chip.file,"\n",file=stdout())
+chip.data <- read.align(ta.chip.filename)
+cat("ChIP data read length",chip.data$read.length,"\n",file=stdout())
+file.remove(ta.chip.filename) # Delete temporary file
+if (length(chip.data$tags)==0) {
+	stop('Error in ChIP file format:', iparams$chip.file)
+}
+# Remove illegal chromosome names
+if (! is.na(iparams$chrname.rm.pattern)) {
+	selectidx <- which(grepl(iparams$chrname.rm.pattern,names(chip.data$tags))==FALSE)
+	chip.data$tags <- chip.data$tags[selectidx]
+	chip.data$quality <- chip.data$quality[selectidx]
+}
+chip.data$num.tags <- sum(unlist(lapply(chip.data$tags,function(d) length(d))))
+
+# Read Control tagAlign/BAM files
+if (! is.na(iparams$control.file)) {
+	cat("Reading Control tagAlign/BAM file",iparams$control.file,"\n",file=stdout())
+	control.data <- read.align(ta.control.filename)
+	file.remove(ta.control.filename) # Delete temporary file
+	if (length(control.data$tags)==0) {
+		stop('Error in control file format:', iparams$chip.file)
+	}
+	cat("Control data read length",control.data$read.length,"\n",file=stdout())
+	# Remove illegal chromosome names
+	if (! is.na(iparams$chrname.rm.pattern)) {
+		selectidx <- which(grepl(iparams$chrname.rm.pattern,names(control.data$tags))==FALSE)
+		control.data$tags <- control.data$tags[selectidx]
+		control.data$quality <- control.data$quality[selectidx]
+	}
+	control.data$num.tags <- sum(unlist(lapply(control.data$tags,function(d) length(d))))
+}
+
+# Open multiple processes if required
+if (is.na(iparams$n.nodes)) {
+	cluster.nodes <- NULL
+} else {
+	library(snow)
+	cluster.nodes <- makeCluster(iparams$n.nodes)
+}
+
+# #################################
+# Calculate cross-correlation for various strand shifts
+# #################################
+cat("Calculating peak characteristics\n",file=stdout())
+# crosscorr
+# $cross.correlation : Cross-correlation profile as an $x/$y data.frame
+# $peak : Position ($x) and height ($y) of automatically detected cross-correlation peak.
+# $whs: Optimized window half-size for binding detection (based on the width of the cross-correlation peak)
+crosscorr <- get.binding.characteristics(chip.data,
+		srange=iparams$sep.range[c(1,3)],
+		bin=iparams$sep.range[2],
+		accept.all.tags=T,
+		cluster=cluster.nodes)
+if (!is.na(iparams$n.nodes)) {
+	stopCluster(cluster.nodes)
+}
+
+# Smooth the cross-correlation curve if required
+cc <- crosscorr$cross.correlation
+crosscorr$min.cc <- crosscorr$cross.correlation[ which.min(crosscorr$cross.correlation$y) , ] # minimum value and shift of cross-correlation
+cat("Minimum cross-correlation value", crosscorr$min.cc$y,"\n",file=stdout())
+cat("Minimum cross-correlation shift", crosscorr$min.cc$x,"\n",file=stdout())
+sbw <- 2*floor(ceiling(5/iparams$sep.range[2]) / 2) + 1 # smoothing bandwidth
+cc$y <- runmean(cc$y,sbw,alg="fast")
+
+# Compute cross-correlation peak
+bw <- ceiling(2/iparams$sep.range[2]) # crosscorr[i] is compared to crosscorr[i+/-bw] to find peaks
+peakidx <- (diff(cc$y,bw)>=0) # cc[i] > cc[i-bw]
+peakidx <- diff(peakidx,bw)
+peakidx <- which(peakidx==-1) + bw
+
+# exclude peaks from the excluded region
+if ( is.nan(iparams$ex.range[2]) ) {
+	iparams$ex.range[2] <- chip.data$read.length+10
+}
+peakidx <- peakidx[(cc$x[peakidx] < iparams$ex.range[1]) | (cc$x[peakidx] > iparams$ex.range[2])]
+cc <- cc[peakidx,]
+
+# Find max peak position and other peaks within 0.9*max_peakvalue that are further away from maxpeakposition
+maxpeakidx <- which.max(cc$y)
+maxpeakshift <- cc$x[maxpeakidx]
+maxpeakval <- cc$y[maxpeakidx]
+peakidx <-which((cc$y >= 0.9*maxpeakval) & (cc$x >= maxpeakshift))
+cc <- cc[peakidx,]
+
+# sort the peaks and get the top 3
+sortidx <- order(cc$y,decreasing=TRUE)
+sortidx <- sortidx[c(1:min(3,length(sortidx)))]
+cc.peak <- cc[sortidx,]
+
+# Override peak shift if user supplies peak shift
+if (! is.na(iparams$sep.peak)) {
+	cc.peak <- approx(crosscorr$cross.correlation$x,crosscorr$cross.correlation$y,iparams$sep.peak,rule=2)
+}
+cat("Peak cross-correlation value", paste(cc.peak$y,collapse=","),"\n",file=stdout())
+cat("Peak strand shift",paste(cc.peak$x,collapse=","),"\n",file=stdout())
+
+# Reset values in crosscorr
+crosscorr$peak$x <- cc.peak$x[1]
+crosscorr$peak$y <- cc.peak$y[1]
+
+# Compute window half size
+whs.thresh <- crosscorr$min.cc$y + (crosscorr$peak$y - crosscorr$min.cc$y)/3
+crosscorr$whs <- max(crosscorr$cross.correlation$x[crosscorr$cross.correlation$y >= whs.thresh])
+cat("Window half size",crosscorr$whs,"\n",file=stdout())
+
+# Compute phantom peak coefficient
+ph.peakidx <- which( ( crosscorr$cross.correlation$x >= ( chip.data$read.length - round(2*iparams$sep.range[2]) ) ) &
+                     ( crosscorr$cross.correlation$x <= ( chip.data$read.length + round(1.5*iparams$sep.range[2]) ) ) )
+ph.peakidx <- ph.peakidx[ which.max(crosscorr$cross.correlation$y[ph.peakidx]) ]
+crosscorr$phantom.cc <- crosscorr$cross.correlation[ph.peakidx,]
+cat("Phantom peak location",crosscorr$phantom.cc$x,"\n",file=stdout())
+cat("Phantom peak Correlation",crosscorr$phantom.cc$y,"\n",file=stdout())
+crosscorr$phantom.coeff <- crosscorr$peak$y / crosscorr$phantom.cc$y
+crosscorr$phantom.coeff <- crosscorr$peak$y / crosscorr$min.cc$y
+cat("Normalized cross-correlation coefficient (NCCC)",crosscorr$phantom.coeff,"\n",file=stdout())
+crosscorr$rel.phantom.coeff <- (crosscorr$peak$y - crosscorr$min.cc$y) / (crosscorr$phantom.cc$y - crosscorr$min.cc$y)
+cat("Relative Cross correlation Coefficient (RCCC)",crosscorr$rel.phantom.coeff,"\n",file=stdout())
+crosscorr$phantom.quality.tag <- NA
+if ( (crosscorr$rel.phantom.coeff >= 0) & (crosscorr$rel.phantom.coeff < 0.25) ) {
+	crosscorr$phantom.quality.tag <- -2
+} else if ( (crosscorr$rel.phantom.coeff >= 0.25) & (crosscorr$rel.phantom.coeff < 0.5) ) {
+	crosscorr$phantom.quality.tag <- -1
+} else if ( (crosscorr$rel.phantom.coeff >= 0.5) & (crosscorr$rel.phantom.coeff < 1) ) {
+	crosscorr$phantom.quality.tag <- 0
+} else if ( (crosscorr$rel.phantom.coeff >= 1) & (crosscorr$rel.phantom.coeff < 1.5) ) {
+	crosscorr$phantom.quality.tag <- 1
+} else if ( (crosscorr$rel.phantom.coeff >= 1.5) ) {
+	crosscorr$phantom.quality.tag <- 2
+}
+cat("Phantom Peak Quality Tag",crosscorr$phantom.quality.tag,"\n",file=stdout())
+
+# Output result to result file if required
+#Filename\tnumReads\tPeak_shift\tPeak_Correlation\tRead_length\tPhantomPeak_Correlation\tMin_Correlation_Shift\tMin_Correlation\tNormalized_CrossCorrelation_Coefficient\tRelative_CrossCorrelation_Coefficient\tQualityTag)
+if (! is.na(iparams$output.result.file)) {
+	cat(get.file.parts(iparams$chip.file)$fullname,
+			chip.data$num.tags,
+			paste(cc.peak$x,collapse=","),
+			paste(cc.peak$y,collapse=","),
+			crosscorr$phantom.cc$x,
+			crosscorr$phantom.cc$y,
+			crosscorr$min.cc$x,
+			crosscorr$min.cc$y,
+			crosscorr$phantom.coeff,
+			crosscorr$rel.phantom.coeff,
+			crosscorr$phantom.quality.tag,
+			sep="\t",
+			file=iparams$output.result.file,
+			append=TRUE)
+	cat("\n",
+			file=iparams$output.result.file,
+			append=TRUE)
+}
+
+# Save figure if required
+if (! is.na(iparams$output.plot.file)) {
+	pdf(file=iparams$output.plot.file,width=5,height=5)
+	par(mar = c(4,3.5,2,0.5), mgp = c(1.5,0.5,0), cex = 0.8);
+	plot(crosscorr$cross.correlation,
+			type='l',
+			xlab=sprintf("strand-shift (%s)",paste(cc.peak$x,collapse=",")),
+			ylab="cross-correlation")
+	abline(v=cc.peak$x,lty=2,col=2)
+	abline(v=crosscorr$phantom.cc$x,lty=2,col=4)
+	title(main=get.file.parts(iparams$chip.file)$fullname,
+	      sub=sprintf("NSC=%g,RSC=%g,Qtag=%d",crosscorr$phantom.coeff,crosscorr$rel.phantom.coeff,crosscorr$phantom.quality.tag))
+	dev.off();
+}
+
+# Save RData file if required
+if (! is.na(iparams$output.rdata.file)) {
+	save(iparams,
+			crosscorr,
+			cc.peak,
+			file=iparams$output.rdata.file);
+}
+
+# #################################
+# Call peaks
+# #################################
+
+if ( !is.na(iparams$output.npeak.file) || !is.na(iparams$output.rpeak.file) ) {
+
+	# Remove local tag anomalies
+	cat('Removing read stacks\n',file=stdout())
+	chip.data <- remove.local.tag.anomalies(chip.data$tags)
+	control.data <- remove.local.tag.anomalies(control.data$tags)
+
+	# Open multiple processes if required
+	if (is.na(iparams$n.nodes)) {
+		cluster.nodes <- NULL
+	} else {
+		cluster.nodes <- makeCluster(iparams$n.nodes)
+	}
+
+	# Find peaks
+	cat('Finding peaks\n',file=stdout())
+	if (!is.na(iparams$npeak)) {
+		iparams$fdr <- 0.96
+	}
+	narrow.peaks <- find.binding.positions(signal.data=chip.data,control.data=control.data,fdr=iparams$fdr,method=tag.lwcc,whs=crosscorr$whs,cluster=cluster.nodes)
+	if (!is.na(iparams$n.nodes)) {
+		stopCluster(cluster.nodes)
+	}
+	cat(paste("Detected",sum(unlist(lapply(narrow.peaks$npl,function(d) length(d$x)))),"peaks"),"\n",file=stdout())
+
+	# Write to narrowPeak file
+	if (!is.na(iparams$output.npeak.file)) {
+		write.narrowpeak.binding(narrow.peaks,iparams$output.npeak.file,margin=round(crosscorr$whs/2))
+		#system(paste('gzip -f ',iparams$output.npeak.file))
+	}
+
+	# Compute and write regionPeak file
+	if (!is.na(iparams$output.rpeak.file)) {
+		region.peaks <- add.broad.peak.regions(chip.data,control.data,narrow.peaks,window.size=max(50,round(crosscorr$whs/4)),z.thr=10)
+		write.narrowpeak.binding(region.peaks,iparams$output.rpeak.file,margin=round(crosscorr$whs/2))
+		#system(paste('gzip -f ',iparams$output.rpeak.file))
+	}
+
+	# Save Rdata file
+	if (! is.na(iparams$output.rdata.file)) {
+		save(iparams,
+				crosscorr,
+				cc.peak,
+				narrow.peaks,
+				region.peaks,
+				file=iparams$output.rdata.file);
+	}
+
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp_wrapper.py	Thu Oct 17 12:39:45 2013 -0400
@@ -0,0 +1,102 @@
+#purpose: python wrapper to run spp
+#author: Ziru Zhou
+#Date: November 2012
+#####################
+
+import sys, subprocess, tempfile, shutil, glob, os, os.path, gzip
+from galaxy import eggs
+import pkg_resources
+pkg_resources.require( "simplejson" )
+import simplejson
+
+CHUNK_SIZE = 1024
+
+def main():
+    options = simplejson.load( open( sys.argv[1] ) )
+    output_narrow_peak = sys.argv[2]
+    output_region_peak = sys.argv[3]
+    output_peakshift_file = sys.argv[4]
+    output_rdata_file = sys.argv[5]
+    output_plot_file = sys.argv[6]
+    output_default_file = sys.argv[7]
+    script_path = sys.argv[8]
+
+    #set file extensions and set mandatory options
+    #======================================================================================
+    experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name
+
+    chip_file = "%s.bam" % (options['chip_file'])
+    subprocess.call(["cp", options['chip_file'], chip_file])
+
+    cmdline = "Rscript %s/run_spp.R -c=%s" % (script_path, chip_file )
+    if 'input_file' in options:
+        input_file = "%s.bam" % (options['input_file'])
+        subprocess.call(["cp", options['input_file'], input_file])
+        cmdline = "%s -i=%s" % ( cmdline, input_file )
+
+    #set additional options
+    #========================================================================================
+    if (options['action'] == "cross_correlation"):
+        cmdline = "%s %s %s %s > default_output.txt" % ( cmdline, options['savp'], options['out'], options['rf'] )
+    elif (options['action'] == "peak_calling"):
+        cmdline = "%s -fdr=%s -npeak=%s %s %s %s %s %s > default_output.txt" % ( cmdline, options['fdr'], options['npeak'], options['savr'], options['savd'], options['savn'], options['savp'], options['rf'] )
+    elif (options['action'] == "idr"):
+        cmdline = "%s -npeak=%s %s %s %s %s > default_output.txt" % ( cmdline, options['npeak'], options['savr'], options['savp'], options['out'], options['rf'] )
+    elif (options['action'] == "custom"):
+        cmdline = "%s -s=%s %s -x=%s -fdr=%s -npeak=%s %s %s" % ( cmdline, options['s'], options['speak'], options['x'], options['fdr'], options['npeak'], options['filtchr'], options['rf'] )
+        cmdline = "%s %s  %s %s %s %s > default_output.txt" % ( cmdline, options['out'], options['savn'], options['savr'], options['savp'], options['savd'] )
+
+    #run cmdline
+    #========================================================================================
+    #tmp_dir = tempfile.mkdtemp()
+    tmp_dir = os.path.dirname(options['chip_file'])
+    stderr_name = tempfile.NamedTemporaryFile().name
+    proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) )
+    proc.wait()
+
+    #Do not terminate if error code, allow dataset (e.g. log) creation and cleanup
+    #========================================================================================
+    if proc.returncode:
+        stderr_f = open( stderr_name )
+        while True:
+            chunk = stderr_f.read( CHUNK_SIZE )
+            if not chunk:
+                stderr_f.close()
+                break
+            sys.stderr.write( chunk )
+
+
+    #determine if the outputs are there, copy them to the appropriate dir and filename
+    #========================================================================================
+    chip_name = os.path.basename(options['chip_file'])
+    input_name = os.path.basename(options['input_file'])
+
+    created_default_file =  os.path.join( tmp_dir, "default_output.txt" )
+    if os.path.exists( created_default_file ):
+        shutil.move( created_default_file, output_default_file )
+
+    created_narrow_peak =  os.path.join( tmp_dir, "%s_VS_%s.narrowPeak" % (chip_name, input_name) )
+    if os.path.exists( created_narrow_peak ):
+        shutil.move( created_narrow_peak, output_narrow_peak )
+
+    created_region_peak =  os.path.join( tmp_dir, "%s_VS_%s.regionPeak" % (chip_name, input_name) )
+    if os.path.exists( created_region_peak ):
+        shutil.move( created_region_peak, output_region_peak )
+
+    created_peakshift_file =  os.path.join( tmp_dir, "peakshift.txt" )
+    if os.path.exists( created_peakshift_file ):
+        shutil.move( created_peakshift_file, output_peakshift_file )
+
+    created_rdata_file =  os.path.join( tmp_dir, "%s.Rdata" % chip_name )
+    if os.path.exists( created_rdata_file ):
+        shutil.move( created_rdata_file, output_rdata_file )
+
+    created_plot_file =  os.path.join( tmp_dir, "%s.pdf" % chip_name )
+    if os.path.exists( created_plot_file ):
+        shutil.move( created_plot_file, output_plot_file )
+
+
+    os.unlink( stderr_name )
+    #os.rmdir( tmp_dir )
+
+if __name__ == "__main__": main()
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp_wrapper.xml	Thu Oct 17 12:39:45 2013 -0400
@@ -0,0 +1,204 @@
+<tool id="spp_peak_calling" name="SPP" version="1.11.0">
+  <requirements>
+    <requirement type="set_environment">SCRIPT_PATH</requirement>
+  </requirements>
+  <description>SPP cross-correlation analysis package</description>
+  <command interpreter="python">spp_wrapper.py $options_file $output_narrow_peak $output_region_peak $output_peakshift_file $output_rdata_file $output_plot_file $output_default_file \$SCRIPT_PATH</command>
+  <inputs>
+    <!--experiment name and inputs-->
+    <param name="experiment_name" type="text" value="SPP in Galaxy" size="50" label="Experiment Name"/>
+
+    <!--select function to perform-->
+    <conditional name="major_command">
+      <param name="major_command_selector" type="select" label="Select action to be performed">
+	<option value="cross_correlation">Determine strand cross-correlation peak</option>
+	<option value="peak_calling">Peak calling</option>
+	<option value="idr">IDR analysis</option>
+	<option value="custom">Custom settings</option>
+      </param>
+      <when value="cross_correlation">
+	<!--cross correlation options-->
+    	<param name="input_chipseq_file1" type="data" format="bam" label="ChIP-Seq Tag File" />
+    	<param name="input_control_file1" type="data" format="bam" optional="True" label="ChIP-Seq Control File" />
+
+	<param name="replace" truevalue="-rf" falsevalue="" type="boolean" checked="True" label="Replace existing plot, Rdata, or narrowpeak file (execution aborts if file exists and option not enabled)" help="(-rf)"/>
+
+	<param name="save_peakshift_file" truevalue="-out=peakshift.txt" falsevalue="" type="boolean" checked="True" label="Save peakshift file" help="(-out)"/>
+	<param name="save_plot_file" truevalue="-savp" falsevalue="" type="boolean" checked="True" label="Save plot file" help="(-savp)"/>
+      </when>
+      <when value="peak_calling">
+	<!--peak calling options-->
+	<param name="input_chipseq_file1" type="data" format="bam" label="ChIP-Seq Tag File" />
+	<param name="input_control_file1" type="data" format="bam" label="ChIP-Seq Control File" />
+
+	<param name="fdr" type="text" label="False discovery rate threshold" value="0" help="default=0 (-fdr)"/>
+	<param name="num_peaks" type="text" label="Threshold on number of peaks to call" value="0" help="default=0 (-npeak)"/>
+	<param name="replace" truevalue="-rf" falsevalue="" type="boolean" checked="True" label="Replace existing plot, Rdata, or narrowpeak file (execution aborts if file exists and option not enabled)" help="(-rf)"/>
+
+	<param name="save_regionpeak_file" truevalue="-savr" falsevalue="" type="boolean" checked="True" label="Save regionpeak file " help="(-savr)"/>
+	<param name="save_rdata_file" truevalue="-savd" falsevalue="" type="boolean" checked="True" label="Save Rdata file" help="(-savd)"/>
+	<param name="save_narrowpeak_file" truevalue="-savn" falsevalue="" type="boolean" checked="True" label="Save narrowpeak file" help="(-savn)"/>
+	<param name="save_plot_file" truevalue="-savp" falsevalue="" type="boolean" checked="True" label="Save plot file" help="(-savp)"/>
+      </when>
+      <when value="idr">
+	<!--idr options-->
+    	<param name="input_chipseq_file1" type="data" format="bam" label="ChIP-Seq Tag File" />
+    	<param name="input_control_file1" type="data" format="bam" label="ChIP-Seq Control File" />
+
+	<param name="num_peaks" type="integer" label="Threshold on number of peaks to call" value="300000" help="default=300000 (-npeak)"/>
+	<param name="replace" truevalue="-rf" falsevalue="" type="boolean" checked="True" label="Replace existing plot, Rdata, or narrowpeak file (execution aborts if file exists and option not enabled)" help="(-rf)"/>
+
+	<param name="save_regionpeak_file" truevalue="-savr" falsevalue="" type="boolean" checked="True" label="Save regionpeak file" help="(-savr)"/>
+	<param name="save_peakshift_file" truevalue="-out=peakshift.txt" falsevalue="" type="boolean" checked="True" label="Save peakshift file" help="(-out)"/>
+	<param name="save_plot_file" truevalue="-savp" falsevalue="" type="boolean" checked="True" label="Save plot file" help="(-savp)"/>
+      </when>
+      <when value="custom">
+	<!--custom settings, includes all relevant options here-->
+    	<param name="input_chipseq_file1" type="data" format="bam" label="ChIP-Seq Tag File" />
+    	<param name="input_control_file1" type="data" format="bam" optional="True" label="ChIP-Seq Control File" />
+
+	<param name="strand_shift" type="text" label="Strand shifts at which cross-correlation is evaluated" size="30" value="-100:5:600" help="default=-100:5:600 (-s)"/>
+	<param name="excluded_strand_shift" type="text" label="Strand shifts to exclude" value="10:10" help="default=10:(readlen+10) (-x)"/>
+	<param name="user_defined_strpeak" type="text" label="User defined cross-correlation peak strand shift" help="(-speak)"/>
+	<param name="num_peaks" type="integer" label="Threshold on number of peaks to call" value="0" help="default=0 (-npeak)"/>
+	<param name="fdr" type="integer" label="False discovery rate threshold" value="0" help="default=0 (-fdr)"/>
+	<param name="filter_char" type="text" label="Pattern to use to remove tags that map to specific chromosomes" help="(-filtchr)"/>
+	<param name="replace" truevalue="-rf" falsevalue="" type="boolean" checked="False" label="Replace existing plot, Rdata, or narrowpeak file (execution aborts if file exists and option not enabled)" help="(-rf)"/>
+
+	<param name="save_regionpeak_file" truevalue="-savr" falsevalue="" type="boolean" checked="False" label="Save regionpeak file" help="(-savr)"/>
+	<param name="save_peakshift_file" truevalue="-out=peakshift.txt" falsevalue="" type="boolean" checked="False" label="Save peakshift file" help="(-out)"/>
+	<param name="save_rdata_file" truevalue="-savd" falsevalue="" type="boolean" checked="False" label="Save Rdata file" help="(-savd)"/>
+	<param name="save_narrowpeak_file" truevalue="-savn" falsevalue="" type="boolean" checked="False" label="Save narrowpeak file" help="(-savn)"/>
+	<param name="save_plot_file" truevalue="-savp" falsevalue="" type="boolean" checked="False" label="Save plot file" help="(-savp)"/>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output_default_file" format="txt" label="${tool.name} on ${on_string}"/>
+    <data name="output_narrow_peak" format="txt" label="${tool.name} on ${on_string} (narrowpeaks)">
+	<filter>major_command['save_narrowpeak_file'] is True</filter>
+	<filter>major_command['major_command_selector'] == 'peak_calling' or major_command['major_command_selector'] == 'custom'</filter>
+    </data>
+    <data name="output_plot_file" format="pdf" label="${tool.name} on ${on_string} (plot)">
+	<filter>major_command['save_plot_file'] is True</filter>
+    </data>
+    <data name="output_region_peak" format="txt" label="${tool.name} on ${on_string} (regionpeaks)">
+	<filter>major_command['save_regionpeak_file'] is True</filter>
+    	<filter>major_command['major_command_selector'] == 'peak_calling' or major_command['major_command_selector'] == 'custom' or major_command['major_command_selector'] == 'idr' </filter>
+    </data>
+    <data name="output_peakshift_file" format="txt" label="${tool.name} on ${on_string} (peakshift/phantompeak)">
+	<filter>major_command['save_peakshift_file'] is True</filter>
+    	<filter>major_command['major_command_selector'] == 'cross_correlation' or major_command['major_command_selector'] == 'custom' or major_command['major_command_selector'] == 'idr' </filter>
+    </data>
+    <data name="output_rdata_file" format="txt" label="${tool.name} on ${on_string} (Rdata)">
+	<filter>major_command['save_rdata_file'] is True</filter>
+	<filter>major_command['major_command_selector'] == 'peak_calling' or major_command['major_command_selector'] == 'custom' </filter>
+    </data>
+  </outputs>
+
+  <configfiles>
+    <configfile name="options_file">&lt;%
+import simplejson
+%&gt;
+#set $__options ={ 'experiment_name':str($experiment_name), 'chip_file':str($major_command.input_chipseq_file1) }
+
+#if str( $major_command.input_control_file1 ) != 'None':
+	#set $__options['input_file'] = str( $major_command.input_control_file1 )
+#end if
+
+##=============================================================================
+#if str($major_command.major_command_selector) == 'cross_correlation':
+	#set $__options['action'] = str( "cross_correlation" )
+ 	#set $__options['rf'] = str( $major_command.replace )
+
+	#set $__options['out'] = str( $major_command.save_peakshift_file )
+	#set $__options['savp'] = str( $major_command.save_plot_file )
+#end if
+##=============================================================================
+#if str($major_command.major_command_selector) == 'peak_calling':
+	#set $__options['action'] = str( "peak_calling" )
+	#set $__options['fdr'] = str( $major_command.fdr )
+	#set $__options['npeak'] = str( $major_command.num_peaks )
+ 	#set $__options['rf'] = str( $major_command.replace )
+
+	#set $__options['savr'] = str( $major_command.save_regionpeak_file )
+	#set $__options['savd'] = str( $major_command.save_rdata_file )
+	#set $__options['savn'] = str( $major_command.save_narrowpeak_file )
+	#set $__options['savp'] = str( $major_command.save_plot_file )
+#end if
+##=============================================================================
+#if str($major_command.major_command_selector) == 'idr':
+	#set $__options['action'] = str( "idr" )
+	#set $__options['npeak'] = int( $major_command.num_peaks  )
+	#set $__options['rf'] = str( $major_command.replace )
+
+	#set $__options['savr'] = str( $major_command.save_regionpeak_file )
+	#set $__options['out'] = str( $major_command.save_peakshift_file )
+	#set $__options['savp'] = str( $major_command.save_plot_file )
+#end if
+##=============================================================================
+#if str($major_command.major_command_selector) == 'custom':
+	#set $__options['action'] = str( "custom" )
+        #set $__options['s'] = str( $major_command.strand_shift )
+        #set $__options['x'] = str( $major_command.excluded_strand_shift  )
+        #set $__options['npeak'] = int( $major_command.num_peaks )
+        #set $__options['fdr'] = int( $major_command.fdr  )
+        #set $__options['rf'] = str( $major_command.replace )
+
+	#if str($major_command.user_defined_strpeak) == '':
+        	#set $__options['speak'] = str( $major_command.user_defined_strpeak )
+	#else:
+	       	#set $__options['speak'] = "-speak=$major_command.user_defined_strpeak"
+	#end if
+
+	#if str($major_command.filter_char) == '':
+ 	       #set $__options['filtchr'] = str( $major_command.filter_char )
+	#else:
+ 	       #set $__options['filtchr'] = "-filtchr=$major_command.filter_char"
+	#end if
+
+	#set $__options['out'] = str( $major_command.save_peakshift_file )
+        #set $__options['savr'] = str( $major_command.save_regionpeak_file )
+        #set $__options['savd'] = str( $major_command.save_rdata_file )
+        #set $__options['savn'] = str( $major_command.save_narrowpeak_file )
+        #set $__options['savp'] = str( $major_command.save_plot_file )
+#end if
+
+${ simplejson.dumps( __options ) }
+    </configfile>
+  </configfiles>
+  <tests>
+	<!--none yet for spp-->
+  </tests>
+  <help>
+**What it does**
+
+This tool allows ChIP-seq peak calling using SPP
+
+This set of programs operate on mapped Illumina single-end read datasets in tagAlign or BAM format.
+
+View the modified SPP documentation: http://code.google.com/p/phantompeakqualtools/
+
+------
+
+**Usage**
+
+**Determine strand cross-correlation peak**: Compute the predominant insert-size (fragment length) based on strand cross-correlation peak.
+
+**Peak calling**: Call Peaks and regions for punctate binding datasets.
+
+**IDR analysis**: Compute Data quality measures based on relative phantom peak.
+
+**Custom settings**: Enables all options available to SPP for custom analysis.
+
+------
+
+**Citation**
+
+Anshul Kundaje, Computer Science Dept., Stanford University, ENCODE Consortium, Personal Communication, Oct 2010
+Kharchenko PK, Tolstorukov MY, Park PJ, Design and analysis of ChIP-seq experiments for DNA-binding proteins Nat Biotechnol. 2008 Dec;26(12):1351-9
+
+Integration of SPP with Galaxy performed by Ziru Zhou ( ziruzhou@gmail.com ). Please send your comments/questions to help@modencode.org.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Oct 17 12:39:45 2013 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <set_environment version="1.0">
+        <environment_variable name="SCRIPT_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>
+    </set_environment>
+</tool_dependency>
\ No newline at end of file