mtls_analysis: mtls_analyze/heatmap.R comparison

comparison mtls_analyze/heatmap.R @ 4:b465306d00ba draft default tip

Uploaded

author	kmace
date	Mon, 23 Jul 2012 13:00:15 -0400
parents
children

comparison

equal deleted inserted replaced

-:a0306edbf2f8
+:b465306d00ba
+GeneratePeakMatrix <- function(experiments, scores) {
+# Generates a score matrix for all mtls.
+#
+# Args:
+#   experiments: A list of underscore deliminated experiments for each mtl.
+#                There should never be a completely empty item in this list.
+#                for eg. experiments[1] = expA_expD_expB.
+#   scores: A list of underscore deliminated scores for each mtl, the length
+#           of these scores should be identicle to the length of the
+#           experiments. eg. scores[1] = 55_33_245.
+#
+# Returns:
+#   The peak score matrix for all mtls.
+experiments <- sapply(experiments, function(x) strsplit(x, split = "_"))
+scores <- sapply(scores, function(x) strsplit(x, split = "_"))
+unique.experiments <- unique(unlist(experiments))
+peaks=matrix(0,nr=length(experiments),nc=length(unique.experiments))
+colnames(peaks) <- unique.experiments
+for(i in 1:length(experiments)){
+for(j in 1:length(experiments[[i]])){
+peaks[i,experiments[[i]][j]]=as.numeric(scores[[i]][j])
+}
+}
+return(peaks)
+##################################################################
+}
+GetChipData <- function(file.name,
+proximity = "distal",
+include.targetless = TRUE,
+column.order = NA) {
+# Reads in, filters, and organizes mtls data
+#
+# Args:
+#   file.name: The path to the mtls file (including the file name).
+#   proximity: Either "distal" or "proximal", defines the gene target distance
+#              from the mtl. Default is "distal".
+#   include.targetless: If TRUE, includes mtls with no targets (applied after
+#                       the proximity filter); if not, mtls with no target
+#                       will be exclided. Default is TRUE.
+#   column.order: An optional vector of column names in the order in which
+#                 they will be presented. If this is left to default (NA) the
+#                 presented order of the chip columns will be the order in
+#                 which they are seen.
+#
+# Returns:
+#   An organized list of mtls data with the following elements:
+#     $peaks - a matrix of peak p-values
+#     $targets - a list of underscore deliminated gene targets for each mtl
+if(param$debug) {
+print("In GetChipData")
+}
+# Set Constants for the mtls file type:
+MTLNUM <- "mtl.id"
+CHROMOSOME <- "chr"
+EXPERIMENTS <- "expt"
+EXPERIMENTS.SORTED <- "expt.alphanum.sorted"
+START <- "start"
+END <- "end"
+SUMMIT <- "summit"
+SCORES <- "score"
+SCORE.MEAN <- "score.mean"
+SPANS <- "span.tfs"
+SPAN.TOTAL <- "span.l"
+PEAK.IDS <- "peak.ids"
+TARGETS <- "trgt"
+#  PROXIMAL.TARGETS <- "trgt.prox"
+#  DISTAL.TARGETS <- "trgt.dist"
+#  PROXIMAL.TARGETS.TSS.DISTANCE <- "dtss.prox"
+#  DISTAL.TARGETS.TSS.DISTANCE <- "dtss.dist"
+#Get chip data in from files:
+#TARGETS <- switch(proximity,
+#                   distal = DISTAL.TARGETS,
+#                   proximal = PROXIMAL.TARGETS,
+#                   stop(" Bad proximity argument supplied."))  # Default
+cat("param$rna.files = ", param$rna.files, "\n")
+if(!is.na(param$rna.files) && param$rna.files != "none") {
+keep.columns <- c(EXPERIMENTS, SCORES, TARGETS)
+} else {
+keep.columns <- c(EXPERIMENTS, SCORES)
+}
+file <- read.delim(file.name, header=T, as.is=T)[, keep.columns]
+if(!is.na(param$rna.files) && param$rna.files != "none") {
+if (!include.targetless) {
+ix.has.trgt <- which(as.character(file[, TARGETS])!="")
+file <- file[ix.has.trgt, ]
+}
+}
+chip = list()
+chip$peaks <- GeneratePeakMatrix(file[, EXPERIMENTS], file[, SCORES])
+if(!is.na(column.order)) {
+#If you specify an order, or a subset of experiments to include, this is where
+#that gets done.
+order <- unlist(strsplit(column.order, split="::"))
+chip$peaks <- chip$peaks[,order]
+}
+if(!is.na(param$rna.files) && param$rna.files != "none") {
+chip$targets <- file[ , TARGETS]
+}
+return (chip)
+}
+GetRNADataFromOneFile <- function(file.name, fpkm = "avg") {
+# Reads in rna expression data from cufflinks
+#
+# Args:
+#   file.name: The path to the cufflinks file (including the file name).
+#   fpkm: What fpkm is chosen, options are hi, low and avg
+# Returns:
+#   An organized vector of rnaseq data with the following elements:
+#     names(data) - the genes from the file. eg names(data[1]) = JUND
+#     data - the fpkm score for that gene data[1] = 31
+if(param$debug) {
+cat("In GetRNADataFromOneFile for ",file.name)
+}
+# Set Constants for the cufflinks file type:
+GENE <- "gene_id"
+BUNDLE <- "bundle_id"
+CHROMOSOME <- "chr"
+LEFT <- "left"
+RIGHT <- "right"
+FPKM_AVG <- "FPKM"
+FPKM_LOW <- "FPKM_conf_lo"
+FPKM_HIGH <- "FPKM_conf_hi"
+STATUS <- "status"
+#Get chip data in from files:
+FPKM <- switch(fpkm,
+hi = FPKM_HIGH,
+avg = FPKM_AVG,
+low = FPKM_LOW,
+stop(" Bad fpkm quality argument supplied."))  # Default
+keep.columns <- c(GENE, FPKM)
+file <- read.delim(file.name, header=T, as.is=T)[, keep.columns]
+rna = vector(mode = "numeric", length = nrow(file))
+rna <- file[, FPKM]
+names(rna) <- file[ , GENE]
+return (rna)
+}
+GetRNAData <- function(file.names, file.lables = file.names, fpkm = "avg") {
+# Reads in rna expression data from cufflinks
+#
+# Args:
+#   file.names: The list of paths to the cufflinks files (including the file name).
+#   fpkm: What fpkm is chosen, options are hi, low and avg
+# Returns:
+#   A matrix of rnaseq data with the following description:
+#     each col corresponds to an rnaseq run
+#     eacg row corresponds to a gene
+if(param$debug) {
+print("In GetRNAData")
+}
+files <- list()
+for(i in 1:length(file.names)) {
+files[[i]] <-GetRNADataFromOneFile(file.names[i])
+}
+genes <- unique(names(unlist(files)))
+scores <- matrix(0,nrow=length(genes),ncol=length(file.names))
+rownames(scores) <- genes
+colnames(scores) <- file.names
+for(j in 1:length(file.names)) {
+scores[names(files[[j]]),j] <- files[[j]]
+}
+print("# of cols for scores is ")
+print(ncol(scores))
+print ("file lables are ")
+print (file.lables)
+colnames(scores) <- file.lables
+return(scores)
+#scores[genes,file.names] <- files[[]]
+}
+NormalizeRNA <- function(scores) {
+#add psudocount
+scores <- scores+1
+numerator <- scores[,1:(ncol(scores)/2)]
+denominator <- scores[,(ncol(scores)/2 + 1):ncol(scores)]
+#new.scores <- scores[,-which(colnames(scores) == norm.exp)]
+#norm.score <- scores[,which(colnames(scores) == norm.exp)]
+#return(log2(new.scores/norm.score))
+return(log2(numerator/denominator))
+}
+PrepareRNAforHeatmap <- function(new.scores.foldchange){
+new.scores.split <- matrix(0, nr=nrow(new.scores.foldchange), nc=2*ncol(new.scores.foldchange))
+is.even <- function(x){ x %% 2 == 0 }
+corresponding.col <- function(x){ceiling(x/2)}
+new.col.names <- vector(length = ncol(new.scores.split))
+rownames(new.scores.split) <- rownames(new.scores.foldchange)
+for(i in 1:ncol(new.scores.split)) {
+	if(is.even(i)) {sign <- "down"} else {sign <- "up"}
+new.col.names[i] <- paste(colnames(new.scores.foldchange)[corresponding.col(i)],
+					sign, sep =".")
+	new.scores.split[,i] <- new.scores.foldchange[,corresponding.col(i)]
+	if(is.even(i)) {
+		new.scores.split[which(new.scores.split[,i]>0),i] <- 0
+		new.scores.split[,i] <- -1*new.scores.split[,i]
+	} else {
+		new.scores.split[which(new.scores.split[,i]<0),i] <- 0
+	}
+	colnames(new.scores.split) <- new.col.names
+}
+return(new.scores.split)}
+#ConvertExpressionToPval = function(expression, threshold = 1)
+#{
+#    print("In ConvertExpressionToPval")
+#    #Generate Normal Stats
+#    # expression.mean = apply(expression, 2, mean)
+#    # expression.sd = apply(expression, 2, sd)
+#
+#    ix.above.threshold = which(expression > threshold)
+#    expression.mean = apply(expression, 2, function(i) mean(i[which(i > threshold)]))
+#    expression.sd = apply(expression, 2, function(i)     sd(i[which(i > threshold)]))
+#
+#    # CDF from zero to point
+#    # expression.pval = matrix(,nr=nrow(expression),nc=ncol(expression),dimnames=dimnames(expression))
+#    # ix.low = which(expression < expression.mean)
+#    # ix.upper = which(expression >= expression.mean)
+#    # expression.pval[ix.low] = (-10 * pnorm(expression[ix.low], mean = expression.mean, sd = expression.sd, log=T) *log10(exp(1)))
+#
+#    expression.pval = (-10 * log10(exp(1)) * pnorm(expression,
+#                                                    mean = expression.mean,
+#                                                    sd = expression.sd,
+#                                                    log=T,
+#                                                    lower.tail=F
+#    )
+#    )
+#    #squash those under threshold to zero
+#    expression.pval[-ix.above.threshold] = 0
+#    # correct for upper tail (ie from point to inf)
+#    #Still need to do
+#    return (expression.pval)
+#}
+MapExpressiontoChip2 = function(chip.targets, expression)
+{
+mymatrix = matrix(0, nr=length(chip.targets), nc=ncol(expression))
+rownames(mymatrix) <- chip.targets
+colnames(mymatrix) <- colnames(expression)
+j <- 1 #expression counter
+i <- 1 #mymatrix counter
+expression.genes <- rownames(expression)
+previous.expression.gene <- expression.genes[j]
+# Progress bar:
+total <- length(chip.targets)
+# create progress bar
+pb <- txtProgressBar(min = 0, max = total, style = 3)
+for ( i in 1:length(chip.targets)) {
+current.target <- chip.targets[i]
+setTxtProgressBar(pb, i)
+while (current.target > previous.expression.gene && j<=length(expression.genes)) {
+j <- j + 1
+previous.expression.gene <- expression.genes[j]
+}
+if (current.target == previous.expression.gene){
+mymatrix[i,] <- expression[j,]
+}
+}
+close(pb)
+print("Leaving MapRNAtoChip")
+return(mymatrix)
+}
+MapExpressiontoChip = function(chip.targets, expression)
+{
+print("In MapRNAtoChip")
+# targets = unlist(strsplit(chip$targets[i], "_"))
+#   exp = matrix(rna$all.pval[targets, ])
+#   return (apply(exp, 2, mean))
+#rna col 1 = th0 overexpressed
+#rna col 2 = th17 overexpressed
+#rna col 3 = true zscores
+####################################################################
+chip.targets.notnull.unique <- unique(chip.targets[which(chip.targets!="")])
+gene.intersect <- intersect(rownames(expression), chip.targets.notnull.unique)
+chip.targets.notnull.unique.with.data <- chip.targets.notnull.unique[which(chip.targets.notnull.unique %in% gene.intersect)]
+expression.useful.data <- expression[which(rownames(expression) %in% gene.intersect),]
+chip.targets.notnull.unique.with.data.sorted <- chip.targets.notnull.unique.with.data[order(chip.targets.notnull.unique.with.data)]
+expression.useful.data.sorted <- expression.useful.data[order(rownames(expression.useful.data)),]
+####################################################################
+mymatrix = matrix(0, nr=length(chip.targets), nc=ncol(expression))
+rownames(mymatrix) <- chip.targets
+colnames(mymatrix) <- colnames(expression)
+head(chip.targets.notnull.unique.with.data.sorted)
+head(rownames(expression.useful.data.sorted))
+if(!identical(chip.targets.notnull.unique.with.data.sorted, rownames(expression.useful.data.sorted))){
+stop("We have a serious problem, chip is not alligned to expression")
+}
+for(i in 1:length(chip.targets.notnull.unique.with.data.sorted))
+{
+mtls.ix <- which(chip.targets == chip.targets.notnull.unique.with.data.sorted[i])
+for(j in 1:length(mtls.ix)){
+mymatrix[mtls.ix[j],] <- expression.useful.data.sorted[i,]
+}
+}
+print("Leaving MapRNAtoChip")
+return(mymatrix)
+#return( sapply(chip$targets, function(x) apply(rna$all.pval[unlist(strsplit(chip$targets[3], "_")), ], 2, mean)) )
+}
+GenerateKMOrder <- function(data, km) {
+# generates the order of clusters from high to low mean values
+#
+# Args:
+#   data: A matrix of scores that have already been clustered.
+#   km: The K-means object generated from running kmeans on data. Another
+#       method could be used so long as it supplies a (km)$cluser list. Must
+#       have the same length as the number of rows in data
+#
+# Returns:
+#   cluster.order: The order in which the clusters should be displayed.
+km.cluster = km$cluster
+clusters = unique(km.cluster)
+clusters.avg = numeric()
+for(i in clusters) {
+clusters.avg = c(clusters.avg, mean(data[which(km.cluster == i), ]))
+}
+if(param$debug) {
+print ("straight clusters")
+print (clusters)
+print ("straigth average")
+print (clusters.avg)
+print ("ordered clusters")
+print (clusters[order(clusters.avg)])
+print("ordered average")
+print (clusters.avg[order(clusters.avg)])
+}
+return(clusters[rev(order(clusters.avg))])
+}
+OrderMTL <- function(data, km, cluster.order) {
+# Orders a matrix of data according to a clustering algorithm
+#
+# Args:
+#   data: A matrix of scores that have already been clustered.
+#   km: The K-means object generated from running kmeans on data. Another
+#       method could be used so long as it supplies a (km)$cluser list. Must
+#       have the same length as the number of rows in data
+#   cluster.order: The order in which the clusters should be displayed.
+#                  for eg. km.order = c(2, 3, 1) would result in cluster 2
+#                  being on top, then cluster 3 and lastly cluster 1.
+#
+# Returns:
+#   a list that contains 3 objects:
+#     list$data: the ordered version of the data.
+#     list$color.vector: a list of colors that should be assigned to each row.
+#     list$start.row: the starting row of each cluster in data.
+number.clusters <- length(cluster.order)
+cluster.colors <- sample(rainbow(number.clusters))
+# Set up return objects
+sorted.data <- matrix(,nr=nrow(data), nc=ncol(data))
+colnames(sorted.data) <- colnames(data)
+cluster.color.vector = vector(length=length(km$cluster))
+cluster.start.row = numeric(number.clusters)
+cluster.start.row[1]=1
+for (i in 1:number.clusters)
+{
+current.cluster = cluster.order[i]
+ix = which(km$cluster == current.cluster)
+current.cluster.range <- cluster.start.row[i]:(cluster.start.row[i]+length(ix)-1)
+sorted.data[current.cluster.range, ] = data[ix, ]
+cluster.color.vector[current.cluster.range] = cluster.colors[i]
+cluster.start.row[i+1] = (cluster.start.row[i]+length(ix))
+}
+ret.list = list()
+ret.list$data = sorted.data
+ret.list$color.vector = cluster.color.vector
+ret.list$cluster.index = cluster.start.row
+return(ret.list)
+}
+CreateHeatMap <- function(data,
+km,
+cluster.order,
+document.name,
+document.type = "png",
+number.colors = 30) {
+# Generates a heatmap image based for a matrix based on a clustering
+# algorithm.
+#
+# Args:
+#   data: A matrix of scores that have already been clustered, the column
+#         names of this matrix will become the column titels of the heatmap.
+#   km: The K-means object generated from running kmeans on data. Another
+#       method could be used so long as it supplies a (km)$cluser list.
+#   cluster.order: The order in which the clusters should be displayed.
+#                  for eg. km.order = c(2, 3, 1) would result in cluster 2
+#                  being on top, then cluster 3 and lastly cluster 1.
+#   document.name: A name for the produced file. there is no need to
+#                  supply the .png/.pdf in your argument.
+#   document.type: The type of file you want to produce. current options are
+#                  png and pdf. Default is pdf.
+#
+# Returns:
+#   Nothing to the script that calls it, however it creates an image at the
+#   path specified.
+if(param$debug) {
+print("In CreateHeatMap")
+}
+data.ordered <- OrderMTL(data, km, cluster.order)
+	#Load Lib
+	library(gplots)
+	#Set Color gradient
+	color.ramp = colorRampPalette(c("black",
+	                                "darkblue",
+	                                "blue",
+	                                "yellow",
+	                                "orange",
+	                                "red"))(number.colors) #7
+	if(document.type == "png") {
+png(paste(document.name,".png", sep = ""),
+#width=15360,#1920,
+#height=204080,#2560,
+#res=500,
+antialias="gray")
+}
+else if(document.type == "pdf") {
+pdf(paste(document.name,".pdf", sep = ""))
+}
+else if(document.type == "tiff") {
+tiff(paste(document.name,".tiff", sep = ""),
+res=800,
+pointsize=2,
+width=1920,
+height=1920)
+}
+else {
+bitmap(paste(document.name,".bmp", sep = ""),
+height = 5,
+width = 5,
+res = 500)
+}
+#op <- par(mar = rep(0, 4))
+	heatmap.2(
+		data.ordered$data, #data.ordered$data[,sort(colnames(data.ordered$data))],
+		rowsep = data.ordered$cluster.index[-1],
+		sepwidth = c(0.5, ncol(data)/100),
+		dendrogram = "none",
+		Rowv = F,
+		Colv = F,
+		trace = "none",
+		labRow = F, #sapply(seq(1:length(data.ordered$cluster.index)), toString),
+		labCol = colnames(data.ordered$data), #sort(colnames(data.ordered$data)),
+		RowSideColors = data.ordered$color.vector,
+		keysize=0.6,
+		key=F,
+		col = color.ramp,
+		cexCol = 0.8,
+		cexRow = 0.8)
+	dev.off()
+}
+CreateIndividualHeatMap <- function(data,
+km,
+cluster.order,
+color.ramp = colorRampPalette(c("black",
+"darkblue",
+"blue",
+"yellow",
+"orange",
+"red"))(30)) {
+# Generates a heatmap image based for a matrix based on a clustering
+# algorithm.
+#
+# Args:
+#   data: A matrix of scores that have already been clustered, the column
+#         names of this matrix will become the column titels of the heatmap.
+#   km: The K-means object generated from running kmeans on data. Another
+#       method could be used so long as it supplies a (km)$cluser list.
+#   cluster.order: The order in which the clusters should be displayed.
+#                  for eg. km.order = c(2, 3, 1) would result in cluster 2
+#                  being on top, then cluster 3 and lastly cluster 1.
+#   document.name: A name for the produced file. there is no need to
+#                  supply the .png/.pdf in your argument.
+#   document.type: The type of file you want to produce. current options are
+#                  png and pdf. Default is pdf.
+#
+# Returns:
+#   Nothing to the script that calls it, however it creates an image at the
+#   path specified.
+if(param$debug) {
+print("In CreateHeatMap")
+}
+data.ordered <- OrderMTL(data, km, cluster.order)
+#Load Lib
+library(gplots)
+heatmap.3(
+data.ordered$data, #data.ordered$data[,sort(colnames(data.ordered$data))],
+rowsep = data.ordered$cluster.index[-1],
+sepwidth = c(0.5, nrow(data.ordered$data)/100),
+dendrogram = "none",
+Rowv = F,
+Colv = F,
+trace = "none",
+labRow = F, #sapply(seq(1:length(data.ordered$cluster.index)), toString),
+labCol = colnames(data.ordered$data), #sort(colnames(data.ordered$data)),
+#sourcRowSideColors = data.ordered$color.vector,
+keysize=0.6,
+key=F,
+col = color.ramp,
+cexCol = 0.8,
+cexRow = 0.8)
+}
+ReadCommadLineParameters <- function(argument.names, command.line.arguments, optional = F) {
+# Reads the parameters from the command line arguments.
+#
+# Args:
+#   argument.names: A list of expected argument names.
+#   command.line.arguments: The list of recieved command line arguments
+#   optional: Are the areguments optional, or are they required, default is required
+#
+# Returns:
+#   The arguments for argument.names. As strings In that order.
+if(length(grep("--version",command.line.arguments))) {
+cat("version",script.version,"\n")
+q()
+}
+# Split command line arguments
+args <- sapply(strsplit(command.line.arguments, " "),function(i) i)
+vals <- character(length(argument.names))
+# split cmd.line to key and value pairs
+for(i in 1:length(argument.names)) {
+ix <- grep(argument.names[i], args)
+if(length(ix)>1) {
+stop("arg ",
+argument.names[i],
+" used more than once.  Bailing out...\n",
+PrintParamError())
+}
+else if (length(ix)==0 && !optional) {
+stop("could not find ",
+argument.names[i],
+". Bailing out...\n",
+PrintParamError())
+}
+else if (length(ix)==0 && optional) {
+vals[i] <- NA
+}
+else {
+vals[i] <- args[ix+1]
+}
+}
+return(vals)
+}
+PrintParamError <- function(){
+# Prints the usage of the function, shows users what arguments they can use
+#
+# Args:
+#   param: A list that contains all the paramaters.
+#
+# Returns:
+#   A modified version of the param list, with the default values loaded.
+cat("
+DESCRIPTIION:
+heatmap.R takes a ...
+INPUT:
+1.--mtls_file: path to mtls file.\n
+2.--cluster_file: the destination path for the output cluster file.\n
+3.--chip_experiment_order: The order of desired chip experiments (optional).\n
+4.--heatmap_file: path for output heatmap image (no extension).\n
+5.--heatmap_type: choice of image format, currently support png, pdf, tiff and bmp (optional)\n
+6.--expression_file: list of expression files to be included in analysis (optional).\n
+7.--expression_name: lables for the expression files (optional).\n
+8.--normalization_file: a list of files to be used for normalization,
+they can be the same file, however the number of expression nominated
+normalization files must match the number of expression files (optional)\n
+9.--n_clusters: number of clusters\n
+10.--filter_percentage: percentage of mtls that will be analysed. Eg: if
+we make filter_percentage 30, we will take the union of the top mtls in
+mean, non-zero mean and variance (optional).\n
+EXAMPLE RUN:
+Rscript heatmap.R
+--mtls_file path/to/mtls.xls
+--cluster_file path/to/output/cluster
+--chip_experiment_order tf1::tf2::tf5::tf3
+--heatmap_file path/to/output/heatmap
+--heatmap_type png
+--expression_file path/to/exp1::path/to/exp2
+--expression_name myexp1::myexp2
+--normalization_file path/to/exp3::path/to/exp3
+--n_clusters 13
+--filter_percentage 100
+--include_targetless yes
+--number_bins 30
+")
+}
+#LoadDefaultParams <- function(param) {
+#  # Loads default paramaters for the heatmap application
+#  #
+#  # Args:
+#  #   param: A list that contains all the previous paramaters.
+#  #
+#  # Returns:
+#  #   A modified version of the param list, with the default values loaded.
+#script.version=0.1
+#param$debug = F
+## RNA data:
+#param$rna.files = ""
+#param$rna.normalization = "none"
+## Filter:
+#param$filter.percentage <- 100
+## Clustering:
+#param$clustering.number.of.clusters <- 13
+## Heatmap:
+#param$heatmap.document.name <- "heatmap"
+#param$heatmap.document.type <- "png"
+##Cluster Groups:
+#param$cluster.groups.document.name <- "clusters"
+#return(param)
+#}
+LoadParams <- function(cmd.args, args.nms, n.start.numeric, optional = F) {
+# Loads user defined paramaters for the heatmap application
+#
+# Args:
+#   cmd.args: The command line arguments given.
+#   arg.nms: A list of possible command arguments.
+#   n.start.numeric: the first argument that should be numeric (alway put
+#                        these last).
+#   optional: This specifies if the params in cmd.args are optional or
+#             required.
+# Returns:
+#   A list of values assigned to each argument.
+vals <- ReadCommadLineParameters(argument.names = args.nms,
+command.line.arguments = cmd.args,
+optional = optional)
+#check if numeric params are indeed numeric
+if(!optional) {
+for(i in n.start.numeric:length(vals)){
+if(is.na(as.numeric(vals[i]))){
+stop("arg ",args.nms[i]," is not numeric.  Bailing out...\n",print.error())
+}
+}
+}
+return (vals)
+}
+#ValidateParams <- function(params) {
+#    return(T)
+#}
+##########
+LoadDebugParams <- function(param) {
+cmd.args <- c(
+"--mtls_file data/test/mtls.xls",
+"--cluster_file data/test/cluster",
+"--heatmap_file data/test/heatmap",
+"--heatmap_type bmp",
+"--n_clusters 13",
+"--filter_percentage 100",
+"--expression_file /home/kieran/code/scorchR-heatmap/data/expression/rna1.tabular::/home/kieran/code/scorchR-heatmap/data/expression/rna2.tabular",
+"--expression_name batf.ko.0::batf.ko.17",
+"--normalization_file mean",
+"--chip_experiment_order ac::bc::cc::dc::ec",
+"--include_targetless yes",
+"--number_bins 20"
+)
+args.nms <- c(            "--mtls_file",        #1
+"--cluster_file",     #2
+"--chip_experiment_order",    #3
+"--heatmap_file",     #4
+"--heatmap_type",         #5
+"--expression_file",    #6
+"--expression_name",    #7
+"--normalization_file",    #8
+"--include_targetless",    #9
+"--n_clusters",     #10
+"--filter_percentage", #11
+"--number_bins")    #12
+# vals has the same order as args.nms
+vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 10, optional = F)
+# ChIP data:
+param$annotated.macs.file <- vals[1]
+param$chip.order <- vals[3]
+# RNA data:
+param$rna.files = vals[6]
+param$rna.names = vals[7]
+param$rna.normalization.file = vals[8]
+param$include.targetless = vals[9]
+# Filter:
+param$filter.percentage <- as.numeric(vals[11])
+# Clustering:
+param$number.bins <- as.numeric(vals[12])
+param$clustering.number.of.clusters <- as.numeric(vals[10])
+# Heatmap:
+param$heatmap.document.name <- vals[4]
+param$heatmap.document.type <- vals[5]
+#Cluster Groups:
+param$cluster.groups.document.name <- vals[2]
+return(param)
+}
+LoadOptionalParams <- function(param) {
+cmd.args <- commandArgs(trailingOnly = T)
+args.nms <- c(  "--chip_experiment_order",  #1
+"--expression_file",        #2
+"--expression_name",        #3
+"--normalization_file",     #4
+"--heatmap_type",           #5
+"--include_targetless",     #6
+"--filter_percentage",      #7
+"--number_bins")            #8
+# vals has the same order as args.nms
+vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 7, optional = T)
+# ChIP data:
+param$chip.order <- if(!is.na(vals[1])){vals[1]}else{NA}
+# RNA data:
+param$rna.files <- if(!is.na(vals[2])){vals[2]}else{"none"}
+param$rna.names <- if(!is.na(vals[3])){vals[3]}else{"none"}
+param$rna.normalization.file <- if(!is.na(vals[4])){vals[4]}else{"no"}
+param$include.targetless <- if(!is.na(vals[6])){vals[6]}else{"yes"}
+# Filter:
+param$filter.percentage <- if(!is.na(vals[7])){as.numeric(vals[7])}else{100}
+param$number.bins <- if(!is.na(vals[8])){as.numeric(vals[8])}else{30}
+# Heatmap file (output)
+param$heatmap.document.type <- if(!is.na(vals[5])){vals[5]}else{"none"}
+return(param)
+}
+LoadReqiredParams <- function(param){
+cmd.args <- commandArgs(trailingOnly = T)
+args.nms <- c(  "--mtls_file",          #1
+"--cluster_file",       #2
+"--heatmap_file",       #3
+"--n_clusters")        #4
+# vals has the same order as args.nms
+vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 4, optional = F)
+# ChIP data:
+param$annotated.macs.file <- vals[1]
+# Clustering
+param$clustering.number.of.clusters <- as.numeric(vals[4])
+# Cluster file (output):
+param$cluster.groups.document.name <- vals[2]
+# Heatmap file (output):
+param$heatmap.document.name <- vals[3]
+return(param)
+}
+###########
+# here we output the fasta file of the targets of each kmeans cluster
+CreateClusterGroups <- function(trgts,k.ix,f.nm="output/clust.to.trgts", km.order){
+f.nm = paste(f.nm, ".fasta", sep="")
+clusters = km.order
+for(i in 1:length(clusters)){
+v=trgts[which(k.ix==clusters[i])]
+v.split = unlist(sapply(v,strsplit, "_"))
+if(i == 1){
+cat(sep="",file=f.nm,">cluster_",i,"\n")#clusters[i],"\n")
+} else {
+cat(sep="",file=f.nm,">cluster_",i,"\n",append=TRUE)#clusters[i],"\n",append=TRUE)
+}
+cat(sep="\n",file=f.nm,v.split,append=TRUE)
+}
+}
+PrintClusters <- function(trgts,k.ix,f.nm="output/clust.to.trgts", km.order){
+	f.nm = paste(f.nm, ".tsv", sep="")
+	cat(sep="\t",file=f.nm,"row_number/target","cluster","\n")
+	trgts[which(trgts=="")] <- "no_target"
+for(i in 1:length(trgts)){
+		cat(sep="",file=f.nm,trgts[i],"\t","cluster_",k.ix[i],"\n",append=TRUE)
+			}
+#  	  if(i == 1){
+#  	    cat(sep="",file=f.nm,"cluster_",i,"\n")#clusters[i],"\n")
+#  	  } else {
+#  	    cat(sep="",file=f.nm,">cluster_",i,"\n",append=TRUE)#clusters[i],"\n",append=TRUE)
+#  	  }
+#cat(sep="\n",file=f.nm,v.split,append=TRUE)
+	}
+GetTopRowsFromMatrix = function(mtrx, percentage = 10)
+{
+if (param$debug) {
+print("In GetTopRowsFromMatrix")
+	}
+	#Store the stats for the mtrx
+	stats = list()
+	stats$mean=apply(mtrx,1,mean)
+	stats$sd=apply(mtrx,1,sd)
+	stats$nonzero.mean=apply(mtrx, 1, function(x) mean(x[which(x != 0)]))
+	#Store the indexes for the mtrx
+	index = list()
+	index$mean = sort(stats$mean, decreasing=T, index.return=T)$ix
+	index$sd   = sort(stats$sd, decreasing=T, index.return=T)$ix
+	index$nonzero.mean = sort(stats$nonzero.mean, decreasing=T, index.return=T)$ix
+	#Calculate how many data points we want to take
+	cutoff = floor(length(mtrx[ ,1])*(percentage/100))
+	#Extract Indexes and return to caller
+	index$union = union(index$mean[1:cutoff], union(index$sd[1:cutoff], index$nonzero.mean[1:cutoff]))
+	return(index)
+}
+#GetUniq = function(targets.cluster)
+#{
+#    #problem with this function is that it agrigates the list handed to it. after this point you cant maintain order
+#
+#    return(unique(unlist(lapply(targets.cluster, function(i) strsplit(i, "_")))))
+#    #return(unlist(lapply(targets.cluster, function(i) strsplit(i, "_"))))
+#
+#}
+bindata = function(d,qunts=seq(.4,.9,.1))
+{
+tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
+for(j in 1:ncol(d))
+{
+bins=quantile(d[,j],qunts)
+for(i in 1:length(bins))
+{
+tmp[which(d[,j]>bins[i]),j] = i
+}
+}
+return(tmp)
+}
+bindata.non.zero = function(d,qunts=seq(0,0.9,0.1))
+{
+tmp=matrix(0,nr=nrow(d),nc=ncol(d))
+for(j in 1:ncol(d))
+{
+ix.non.zero=which(d[,j]!=0)
+bins=quantile(d[ix.non.zero,j],qunts)
+for(i in 1:length(bins))
+{
+tmp[which(d[,j]>bins[i]),j] = i
+}
+}
+return(tmp)
+}
+bindata.matrix = function(d,qunts=seq(0,0.9,0.1))
+{
+tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
+#ix.non.zero=which(d!=0)
+bins=quantile(d[],qunts)
+for(i in 1:length(bins))
+{
+tmp[which(d>bins[i])] = i
+}
+return(tmp)
+}
+bindata.non.zero.matrix = function(d,qunts=seq(0,0.9,0.1))
+{
+tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
+ix.non.zero=which(d!=0)
+bins=quantile(d[ix.non.zero],qunts)
+for(i in 1:length(bins))
+{
+tmp[which(d>bins[i])] = i
+}
+return(tmp)
+}
+heatmap.3 <- function (x, Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE,
+distfun = dist, hclustfun = hclust, dendrogram = c("both",
+"row", "column", "none"), symm = FALSE, scale = c("none",
+"row", "column"), na.rm = TRUE, revC = identical(Colv,
+"Rowv"), add.expr, breaks, symbreaks = min(x < 0, na.rm = TRUE) ||
+scale != "none", col = "heat.colors", colsep, rowsep,
+sepcolor = "white", sepwidth = c(0.05, 0.05), cellnote, notecex = 1,
+notecol = "cyan", na.color = par("bg"), trace = c("column",
+"row", "both", "none"), tracecol = "cyan", hline = median(breaks),
+vline = median(breaks), linecol = tracecol, margins = c(5,
+5), ColSideColors, RowSideColors, cexRow = 0.2 + 1/log10(nr),
+cexCol = 0.2 + 1/log10(nc), labRow = NULL, labCol = NULL,
+key = TRUE, keysize = 1.5, density.info = c("histogram",
+"density", "none"), denscol = tracecol, symkey = min(x <
+0, na.rm = TRUE) || symbreaks, densadj = 0.25, main = NULL,
+xlab = NULL, ylab = NULL, lmat = NULL, lhei = NULL, lwid = NULL,
+...)
+{
+scale01 <- function(x, low = min(x), high = max(x)) {
+x <- (x - low)/(high - low)
+x
+}
+retval <- list()
+scale <- if (symm && missing(scale))
+"none"
+else match.arg(scale)
+dendrogram <- match.arg(dendrogram)
+trace <- match.arg(trace)
+density.info <- match.arg(density.info)
+if (length(col) == 1 && is.character(col))
+col <- get(col, mode = "function")
+if (!missing(breaks) && (scale != "none"))
+warning("Using scale=\"row\" or scale=\"column\" when breaks are",
+"specified can produce unpredictable results.", "Please consider using only one or the other.")
+if (is.null(Rowv) || is.na(Rowv))
+Rowv <- FALSE
+if (is.null(Colv) || is.na(Colv))
+Colv <- FALSE
+else if (Colv == "Rowv" && !isTRUE(Rowv))
+Colv <- FALSE
+if (length(di <- dim(x)) != 2 || !is.numeric(x))
+stop("`x' must be a numeric matrix")
+nr <- di[1]
+nc <- di[2]
+if (nr <= 1 || nc <= 1)
+stop("`x' must have at least 2 rows and 2 columns")
+if (!is.numeric(margins) || length(margins) != 2)
+stop("`margins' must be a numeric vector of length 2")
+if (missing(cellnote))
+cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x))
+if (!inherits(Rowv, "dendrogram")) {
+if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in%
+c("both", "row"))) {
+if (is.logical(Colv) && (Colv))
+dendrogram <- "column"
+else dedrogram <- "none"
+warning("Discrepancy: Rowv is FALSE, while dendrogram is `",
+dendrogram, "'. Omitting row dendogram.")
+}
+}
+if (!inherits(Colv, "dendrogram")) {
+if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in%
+c("both", "column"))) {
+if (is.logical(Rowv) && (Rowv))
+dendrogram <- "row"
+else dendrogram <- "none"
+warning("Discrepancy: Colv is FALSE, while dendrogram is `",
+dendrogram, "'. Omitting column dendogram.")
+}
+}
+if (inherits(Rowv, "dendrogram")) {
+ddr <- Rowv
+rowInd <- order.dendrogram(ddr)
+}
+else if (is.integer(Rowv)) {
+hcr <- hclustfun(distfun(x))
+ddr <- as.dendrogram(hcr)
+ddr <- reorder(ddr, Rowv)
+rowInd <- order.dendrogram(ddr)
+if (nr != length(rowInd))
+stop("row dendrogram ordering gave index of wrong length")
+}
+else if (isTRUE(Rowv)) {
+Rowv <- rowMeans(x, na.rm = na.rm)
+hcr <- hclustfun(distfun(x))
+ddr <- as.dendrogram(hcr)
+ddr <- reorder(ddr, Rowv)
+rowInd <- order.dendrogram(ddr)
+if (nr != length(rowInd))
+stop("row dendrogram ordering gave index of wrong length")
+}
+else {
+rowInd <- nr:1
+}
+if (inherits(Colv, "dendrogram")) {
+ddc <- Colv
+colInd <- order.dendrogram(ddc)
+}
+else if (identical(Colv, "Rowv")) {
+if (nr != nc)
+stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")
+if (exists("ddr")) {
+ddc <- ddr
+colInd <- order.dendrogram(ddc)
+}
+else colInd <- rowInd
+}
+else if (is.integer(Colv)) {
+hcc <- hclustfun(distfun(if (symm)
+x
+else t(x)))
+ddc <- as.dendrogram(hcc)
+ddc <- reorder(ddc, Colv)
+colInd <- order.dendrogram(ddc)
+if (nc != length(colInd))
+stop("column dendrogram ordering gave index of wrong length")
+}
+else if (isTRUE(Colv)) {
+Colv <- colMeans(x, na.rm = na.rm)
+hcc <- hclustfun(distfun(if (symm)
+x
+else t(x)))
+ddc <- as.dendrogram(hcc)
+ddc <- reorder(ddc, Colv)
+colInd <- order.dendrogram(ddc)
+if (nc != length(colInd))
+stop("column dendrogram ordering gave index of wrong length")
+}
+else {
+colInd <- 1:nc
+}
+retval$rowInd <- rowInd
+retval$colInd <- colInd
+retval$call <- match.call()
+x <- x[rowInd, colInd]
+x.unscaled <- x
+cellnote <- cellnote[rowInd, colInd]
+if (is.null(labRow))
+labRow <- if (is.null(rownames(x)))
+(1:nr)[rowInd]
+else rownames(x)
+else labRow <- labRow[rowInd]
+if (is.null(labCol))
+labCol <- if (is.null(colnames(x)))
+(1:nc)[colInd]
+else colnames(x)
+else labCol <- labCol[colInd]
+if (scale == "row") {
+retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
+x <- sweep(x, 1, rm)
+retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
+x <- sweep(x, 1, sx, "/")
+}
+else if (scale == "column") {
+retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
+x <- sweep(x, 2, rm)
+retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
+x <- sweep(x, 2, sx, "/")
+}
+if (missing(breaks) || is.null(breaks) || length(breaks) <
+1) {
+if (missing(col) || is.function(col))
+breaks <- 16
+else breaks <- length(col) + 1
+}
+if (length(breaks) == 1) {
+if (!symbreaks)
+breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm),
+length = breaks)
+else {
+extreme <- max(abs(x), na.rm = TRUE)
+breaks <- seq(-extreme, extreme, length = breaks)
+}
+}
+nbr <- length(breaks)
+ncol <- length(breaks) - 1
+if (class(col) == "function")
+col <- col(ncol)
+min.breaks <- min(breaks)
+max.breaks <- max(breaks)
+x[x < min.breaks] <- min.breaks
+x[x > max.breaks] <- max.breaks
+# if (missing(lhei) || is.null(lhei))
+#     lhei <- c(keysize, 4)
+# if (missing(lwid) || is.null(lwid))
+#     lwid <- c(keysize, 4)
+# if (missing(lmat) || is.null(lmat)) {
+#     lmat <- rbind(4:3, 2:1)
+#     if (!missing(ColSideColors)) {
+#         if (!is.character(ColSideColors) || length(ColSideColors) !=
+#             nc)
+#             stop("'ColSideColors' must be a character vector of length ncol(x)")
+#         lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] +
+#             1)
+#         lhei <- c(lhei[1], 0.2, lhei[2])
+#     }
+#     if (!missing(RowSideColors)) {
+#         if (!is.character(RowSideColors) || length(RowSideColors) !=
+#             nr)
+#             stop("'RowSideColors' must be a character vector of length nrow(x)")
+#         lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) -
+#             1), 1), lmat[, 2] + 1)
+#         lwid <- c(lwid[1], 0.2, lwid[2])
+#     }
+#     lmat[is.na(lmat)] <- 0
+# }
+# if (length(lhei) != nrow(lmat))
+#     stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
+# if (length(lwid) != ncol(lmat))
+#     stop("lwid must have length = ncol(lmat) =", ncol(lmat))
+# op <- par(no.readonly = TRUE)
+# on.exit(par(op))
+# layout(lmat, widths = lwid, heights = lhei, respect = FALSE)
+if (!missing(RowSideColors)) {
+par(mar = c(margins[1], 0, 0, 0.5))
+image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
+}
+if (!missing(ColSideColors)) {
+par(mar = c(0.5, 0, 0, margins[2]))
+image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
+}
+par(mar = c(margins[1], 0, 0, margins[2]))
+x <- t(x)
+cellnote <- t(cellnote)
+if (revC) {
+iy <- nr:1
+if (exists("ddr"))
+ddr <- rev(ddr)
+x <- x[, iy]
+cellnote <- cellnote[, iy]
+}
+else iy <- 1:nr
+image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 +
+c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col,
+breaks = breaks, ...)
+retval$carpet <- x
+if (exists("ddr"))
+retval$rowDendrogram <- ddr
+if (exists("ddc"))
+retval$colDendrogram <- ddc
+retval$breaks <- breaks
+retval$col <- col
+if (!invalid(na.color) & any(is.na(x))) {
+mmat <- ifelse(is.na(x), 1, NA)
+image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "",
+col = na.color, add = TRUE)
+}
+axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0,
+cex.axis = cexCol)
+if (!is.null(xlab))
+mtext(xlab, side = 1, line = margins[1] - 1.25)
+axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0,
+cex.axis = cexRow)
+if (!is.null(ylab))
+mtext(ylab, side = 4, line = margins[2] - 1.25)
+if (!missing(add.expr))
+eval(substitute(add.expr))
+if (!missing(colsep))
+for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0,
+length(csep)), xright = csep + 0.5 + sepwidth[1],
+ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1,
+col = sepcolor, border = sepcolor)
+if (!missing(rowsep))
+for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) +
+1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) +
+1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1,
+col = sepcolor, border = sepcolor)
+min.scale <- min(breaks)
+max.scale <- max(breaks)
+x.scaled <- scale01(t(x), min.scale, max.scale)
+if (trace %in% c("both", "column")) {
+retval$vline <- vline
+vline.vals <- scale01(vline, min.scale, max.scale)
+for (i in colInd) {
+if (!is.null(vline)) {
+abline(v = i - 0.5 + vline.vals, col = linecol,
+lty = 2)
+}
+xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5
+xv <- c(xv[1], xv)
+yv <- 1:length(xv) - 0.5
+lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
+}
+}
+if (trace %in% c("both", "row")) {
+retval$hline <- hline
+hline.vals <- scale01(hline, min.scale, max.scale)
+for (i in rowInd) {
+if (!is.null(hline)) {
+abline(h = i + hline, col = linecol, lty = 2)
+}
+yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5
+yv <- rev(c(yv[1], yv))
+xv <- length(yv):1 - 0.5
+lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
+}
+}
+if (!missing(cellnote))
+text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote),
+col = notecol, cex = notecex)
+par(mar = c(margins[1], 0, 0, 0))
+# if (dendrogram %in% c("both", "row")) {
+#     plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
+# }
+# else plot.new()
+par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))
+# if (dendrogram %in% c("both", "column")) {
+#     plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
+# }
+# else plot.new()
+if (!is.null(main))
+title(main, cex.main = 1.5 * op[["cex.main"]])
+# if (key) {
+#     par(mar = c(5, 4, 2, 1), cex = 0.75)
+#     tmpbreaks <- breaks
+#     if (symkey) {
+#         max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
+#         min.raw <- -max.raw
+#         tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
+#         tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
+#     }
+#     else {
+#         min.raw <- min(x, na.rm = TRUE)
+#         max.raw <- max(x, na.rm = TRUE)
+#     }
+#     z <- seq(min.raw, max.raw, length = length(col))
+#     image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks,
+#         xaxt = "n", yaxt = "n")
+#     par(usr = c(0, 1, 0, 1))
+#     lv <- pretty(breaks)
+#     xv <- scale01(as.numeric(lv), min.raw, max.raw)
+#     axis(1, at = xv, labels = lv)
+#     if (scale == "row")
+#         mtext(side = 1, "Row Z-Score", line = 2)
+#     else if (scale == "column")
+#         mtext(side = 1, "Column Z-Score", line = 2)
+#     else mtext(side = 1, "Value", line = 2)
+#     if (density.info == "density") {
+#         dens <- density(x, adjust = densadj, na.rm = TRUE)
+#         omit <- dens$x < min(breaks) | dens$x > max(breaks)
+#         dens$x <- dens$x[-omit]
+#         dens$y <- dens$y[-omit]
+#         dens$x <- scale01(dens$x, min.raw, max.raw)
+#         lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol,
+#             lwd = 1)
+#         axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))
+#         title("Color Key\nand Density Plot")
+#         par(cex = 0.5)
+#         mtext(side = 2, "Density", line = 2)
+#     }
+#     else if (density.info == "histogram") {
+#         h <- hist(x, plot = FALSE, breaks = breaks)
+#         hx <- scale01(breaks, min.raw, max.raw)
+#         hy <- c(h$counts, h$counts[length(h$counts)])
+#         lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",
+#             col = denscol)
+#         axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
+#         title("Color Key\nand Histogram")
+#         par(cex = 0.5)
+#         mtext(side = 2, "Count", line = 2)
+#     }
+#     else title("Color Key")
+# }
+# else plot.new()
+retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)],
+high = retval$breaks[-1], color = retval$col)
+invisible(retval)
+}
+#rm(list=ls())
+param <- list()
+param$debug <- FALSE #T
+print("Finished loading functions")
+if(param$debug) {
+param <- LoadDebugParams(param)
+} else {
+param <- LoadReqiredParams(param)
+param <- LoadOptionalParams(param)
+}
+print (param)
+Rprof() #Remove me
+# Load Chip data, alread in pval format
+# Column.order need to be a vecor of strings, that correspond to the order (and
+# inclustion) of chip experiments.
+chip <- GetChipData(param$annotated.macs.file, column.order = param$chip.order)
+#Get the top percentages on different criteria
+#ix.top <- GetTopRowsFromMatrix(chip$peaks, percentage = param$filter.percentage)
+#chip$peaks <- chip$peaks[ix.top$union, ]
+#chip$targets <- chip$targets[ix.top$union]
+#Bin data:
+chip$peaks <- bindata.non.zero.matrix(chip$peaks, qunts = seq(0, 0.9, length=(param$number.bins+1)))
+file.names <- unlist(strsplit(param$rna.files, split="::"))
+print(file.names)
+file.lables <- unlist(strsplit(param$rna.names, split="::"))
+print(file.lables)
+library("RColorBrewer")
+color.2 =  rev(colorRampPalette(c("darkblue", "steelblue", "lightgrey"))(param$number.bins))
+if(!is.na(file.names) && file.names != "none") {
+#rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg")
+#do differential expression if the user specifies such:
+if(param$rna.normalization != "no") {
+if(param$rna.normalization != "mean"){
+norm.file.names <- unlist(strsplit(param$rna.normalization.file, split="::"))
+all.file.names <- c(file.names, norm.file.names)
+all.file.lables <- c(file.lables, norm.file.names)
+print (all.file.names)
+print (all.file.lables)
+rna.scores <- GetRNAData(all.file.names, file.lables = all.file.lables, fpkm = "avg")
+rna.scores <- NormalizeRNA(scores = rna.scores)
+rna.scores.sign <- rna.scores/abs(rna.scores)
+rna.scores.sign[which(is.nan(rna.scores.sign))] <- 0
+rna.scores <- bindata.non.zero.matrix(abs(rna.scores), qunts = seq(0, 0.9, length=(param$number.bins+1)))
+rna.scores <- rna.scores.sign * rna.scores
+color.2 = colorRampPalette(c("darkblue", "steelblue", "lightgrey", "pink", "darkred"))(param$number.bins)
+} else {
+print("we are normalizing by average")
+print("file lables are:")
+print(file.lables)
+rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg")
+rna.scores <- t(apply(rna.scores,1, function(x) { if(mean(x)!=0){ return(x/mean(x)) } else { return(x) }  }))
+rna.scores <- bindata.non.zero.matrix(rna.scores, qunts = seq(0, 0.9, length=(param$number.bins+1)))
+}
+} else {
+rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg")
+rna.scores <- bindata.non.zero.matrix(rna.scores, qunts = seq(0, 0.9, length=(param$number.bins+1)))
+}
+# chip$peaks <- chip$peaks[order(chip$targets), ]
+# rna.scores <- rna.scores[order(rownames(rna.scores)), ]
+# chip$targets <- chip$targets[order(chip$targets)]
+rna.scores.mapped <- MapExpressiontoChip(chip.targets = chip$targets, expression=rna.scores)
+all.data <- cbind(chip$peaks, rna.scores.mapped)
+if(param$include.targetless!="yes"){
+all.data <- all.data[-which(chip$targets==""),]
+chip$peaks <- chip$peaks[-which(chip$targets==""),]
+rna.scores.mapped <- rna.scores.mapped[-which(chip$targets==""),]
+chip$targets <- chip$targets[-which(chip$targets=="")]
+}
+} else {
+all.data <- chip$peaks
+}
+print("Clustering")
+set.seed(1234)
+km <- kmeans(all.data, param$clustering.number.of.clusters, iter.max = 50)
+print("Ordering")
+km.order <- GenerateKMOrder(all.data, km)
+##AM edits##
+km.new.order <- numeric(length(km$cluster))
+for(i in 1:length(km.order)){
+cur.clst <- km.order[i]
+ix <- which(km$cluster==cur.clst)
+km.new.order[ix] <- i
+}
+km.order <- 1:length(km.order)
+km$cluster <- km.new.order
+## Done AM ##
+print("Creating image")
+#bmp(param$heatmap.document.name, 640, 480)#1280, 960)
+bmp(paste(param$heatmap.document.name,".bmp", sep = ""), 5120, 3840)#2560, 1920)
+#pdf(paste(param$heatmap.document.name,".pdf", sep = ""))
+color.1 <- c("#000000", colorRampPalette(c("blue", "yellow","orange","red"))(param$number.bins))
+if(!is.na(file.names) && file.names != "none") {
+PrintClusters(trgts=chip$targets,
+k.ix=km$cluster,
+f.nm = param$cluster.groups.document.name,
+km.order = NA)
+#if(param$rna.normalization != "no") {
+#    data.split <- cbind(chip$peaks, PrepareRNAforHeatmap(rna.scores.mapped))
+#} else {
+#    data.split <- cbind(chip$peaks, rna.scores.mapped)
+#}
+expression.width.multiplier <- 2
+if(ncol(rna.scores.mapped)==1) {
+rna.scores.mapped <- cbind(rna.scores.mapped, rna.scores.mapped)
+expression.width.multiplier <- 1
+}
+layout(matrix(c(1,2,2,5,
+1,3,4,6,
+1,3,4,7,
+1,3,4,8,
+1,3,4,9),
+5,4,
+byrow=T),
+widths=c(1,2*ncol(chip$peaks),expression.width.multiplier*ncol(rna.scores.mapped),1),
+heights=c(1,10,2,10,2))
+#1
+plot.new()
+#2
+plot.new()
+#3
+print("Creating peak image")
+CreateIndividualHeatMap(chip$peaks,
+km,
+km.order, color.ramp=color.1)
+#4
+print("Creating rna image")
+CreateIndividualHeatMap(rna.scores.mapped,
+km,
+km.order, color.ramp=color.2)
+#5
+plot.new()
+#6
+image(t(matrix(1:param$number.bins,nc=1)), col=color.1)
+#7
+plot.new()
+#8
+image(t(matrix(1:param$number.bins,nc=1)), col=color.2)
+#9
+plot.new()
+} else {
+PrintClusters(trgts=1:nrow(chip$peaks),
+k.ix=km$cluster,
+f.nm = param$cluster.groups.document.name,
+km.order = NA)
+layout(matrix(c(1,2,2,
+1,3,4,
+1,3,5),3,3,byrow=T),
+widths=c(1,3*ncol(chip$peaks),1),
+heights=c(1,8,1))
+#1
+plot.new()
+#2
+plot.new()
+#3
+CreateIndividualHeatMap(chip$peaks,
+km,
+km.order, color.ramp=color.1)
+#4
+image(t(matrix(1:param$number.bins,nc=1)), col=color.1)
+#5
+plot.new()
+}
+dev.off()
+Rprof(NULL)
+profile <- summaryRprof()
+print(str(profile))
+print(profile)
+#save.image("test/testData.RData")

Mercurial > repos > kmace > mtls_analysis

comparison mtls_analyze/heatmap.R @ 4:b465306d00ba draft default tip