Mercurial > repos > kmace > mtls_analysis

diff mtls_analyze/heatmap.R @ 4:b465306d00ba draft default tip
Uploaded
author: kmace
date: Mon, 23 Jul 2012 13:00:15 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mtls_analyze/heatmap.R	Mon Jul 23 13:00:15 2012 -0400
@@ -0,0 +1,1524 @@
+GeneratePeakMatrix <- function(experiments, scores) {
+  # Generates a score matrix for all mtls.
+  #
+  # Args:
+  #   experiments: A list of underscore deliminated experiments for each mtl.
+  #                There should never be a completely empty item in this list.
+  #                for eg. experiments[1] = expA_expD_expB.
+  #   scores: A list of underscore deliminated scores for each mtl, the length
+  #           of these scores should be identicle to the length of the
+  #           experiments. eg. scores[1] = 55_33_245.
+  #
+  # Returns:
+  #   The peak score matrix for all mtls.
+  experiments <- sapply(experiments, function(x) strsplit(x, split = "_"))
+  scores <- sapply(scores, function(x) strsplit(x, split = "_"))
+  unique.experiments <- unique(unlist(experiments))
+
+  peaks=matrix(0,nr=length(experiments),nc=length(unique.experiments))
+  colnames(peaks) <- unique.experiments
+
+  for(i in 1:length(experiments)){
+    for(j in 1:length(experiments[[i]])){
+      peaks[i,experiments[[i]][j]]=as.numeric(scores[[i]][j])
+    }
+  }
+  return(peaks)
+##################################################################
+}
+GetChipData <- function(file.name,
+                        proximity = "distal",
+                        include.targetless = TRUE,
+                        column.order = NA) {
+  # Reads in, filters, and organizes mtls data
+  #
+  # Args:
+  #   file.name: The path to the mtls file (including the file name).
+  #   proximity: Either "distal" or "proximal", defines the gene target distance
+  #              from the mtl. Default is "distal".
+  #   include.targetless: If TRUE, includes mtls with no targets (applied after
+  #                       the proximity filter); if not, mtls with no target
+  #                       will be exclided. Default is TRUE.
+  #   column.order: An optional vector of column names in the order in which
+  #                 they will be presented. If this is left to default (NA) the
+  #                 presented order of the chip columns will be the order in
+  #                 which they are seen.
+  #
+  # Returns:
+  #   An organized list of mtls data with the following elements:
+  #     $peaks - a matrix of peak p-values
+  #     $targets - a list of underscore deliminated gene targets for each mtl
+  if(param$debug) {
+    print("In GetChipData")
+  }
+  # Set Constants for the mtls file type:
+  MTLNUM <- "mtl.id"
+  CHROMOSOME <- "chr"
+  EXPERIMENTS <- "expt"
+  EXPERIMENTS.SORTED <- "expt.alphanum.sorted"
+  START <- "start"
+  END <- "end"
+  SUMMIT <- "summit"
+  SCORES <- "score"
+  SCORE.MEAN <- "score.mean"
+  SPANS <- "span.tfs"
+  SPAN.TOTAL <- "span.l"
+  PEAK.IDS <- "peak.ids"
+  TARGETS <- "trgt"
+#  PROXIMAL.TARGETS <- "trgt.prox"
+#  DISTAL.TARGETS <- "trgt.dist"
+#  PROXIMAL.TARGETS.TSS.DISTANCE <- "dtss.prox"
+#  DISTAL.TARGETS.TSS.DISTANCE <- "dtss.dist"
+
+
+  #Get chip data in from files:
+  #TARGETS <- switch(proximity,
+  #                   distal = DISTAL.TARGETS,
+  #                   proximal = PROXIMAL.TARGETS,
+  #                   stop(" Bad proximity argument supplied."))  # Default
+cat("param$rna.files = ", param$rna.files, "\n")
+if(!is.na(param$rna.files) && param$rna.files != "none") {
+  keep.columns <- c(EXPERIMENTS, SCORES, TARGETS)
+} else {
+  keep.columns <- c(EXPERIMENTS, SCORES)
+}
+  file <- read.delim(file.name, header=T, as.is=T)[, keep.columns]
+if(!is.na(param$rna.files) && param$rna.files != "none") {
+  if (!include.targetless) {
+    ix.has.trgt <- which(as.character(file[, TARGETS])!="")
+    file <- file[ix.has.trgt, ]
+  }
+}
+  chip = list()
+
+  chip$peaks <- GeneratePeakMatrix(file[, EXPERIMENTS], file[, SCORES])
+  if(!is.na(column.order)) {
+  #If you specify an order, or a subset of experiments to include, this is where
+  #that gets done.
+  order <- unlist(strsplit(column.order, split="::"))
+  chip$peaks <- chip$peaks[,order]
+  }
+  if(!is.na(param$rna.files) && param$rna.files != "none") {
+  chip$targets <- file[ , TARGETS]
+}
+  
+  return (chip)
+}
+GetRNADataFromOneFile <- function(file.name, fpkm = "avg") {
+  # Reads in rna expression data from cufflinks
+  #
+  # Args:
+  #   file.name: The path to the cufflinks file (including the file name).
+  #   fpkm: What fpkm is chosen, options are hi, low and avg
+  # Returns:
+  #   An organized vector of rnaseq data with the following elements:
+  #     names(data) - the genes from the file. eg names(data[1]) = JUND
+  #     data - the fpkm score for that gene data[1] = 31 
+  if(param$debug) {
+    cat("In GetRNADataFromOneFile for ",file.name)
+  }
+
+ # Set Constants for the cufflinks file type:
+  GENE <- "gene_id"
+  BUNDLE <- "bundle_id"
+  CHROMOSOME <- "chr"
+  LEFT <- "left"
+  RIGHT <- "right"
+  FPKM_AVG <- "FPKM"
+  FPKM_LOW <- "FPKM_conf_lo"
+  FPKM_HIGH <- "FPKM_conf_hi"
+  STATUS <- "status"
+
+  
+  #Get chip data in from files:
+   FPKM <- switch(fpkm,
+                  hi = FPKM_HIGH,
+                  avg = FPKM_AVG,
+                  low = FPKM_LOW,
+                  stop(" Bad fpkm quality argument supplied."))  # Default
+  
+  keep.columns <- c(GENE, FPKM) 
+  
+  file <- read.delim(file.name, header=T, as.is=T)[, keep.columns]
+  
+  rna = vector(mode = "numeric", length = nrow(file))
+  
+  rna <- file[, FPKM]
+  names(rna) <- file[ , GENE] 
+  return (rna)
+}
+
+GetRNAData <- function(file.names, file.lables = file.names, fpkm = "avg") {
+  # Reads in rna expression data from cufflinks
+  #
+  # Args:
+  #   file.names: The list of paths to the cufflinks files (including the file name).
+  #   fpkm: What fpkm is chosen, options are hi, low and avg
+  # Returns:
+  #   A matrix of rnaseq data with the following description:
+  #     each col corresponds to an rnaseq run
+  #     eacg row corresponds to a gene 
+  if(param$debug) {
+    print("In GetRNAData")
+  }
+  files <- list()
+  for(i in 1:length(file.names)) {
+  files[[i]] <-GetRNADataFromOneFile(file.names[i])
+  }
+  genes <- unique(names(unlist(files)))
+  scores <- matrix(0,nrow=length(genes),ncol=length(file.names))
+  rownames(scores) <- genes
+  colnames(scores) <- file.names
+
+  for(j in 1:length(file.names)) {
+    scores[names(files[[j]]),j] <- files[[j]]
+  }
+print("# of cols for scores is ")
+print(ncol(scores))
+print ("file lables are ")
+print (file.lables)
+  colnames(scores) <- file.lables
+  return(scores)
+  #scores[genes,file.names] <- files[[]]
+}
+
+NormalizeRNA <- function(scores) {
+
+#add psudocount
+scores <- scores+1
+
+numerator <- scores[,1:(ncol(scores)/2)]
+denominator <- scores[,(ncol(scores)/2 + 1):ncol(scores)]
+
+#new.scores <- scores[,-which(colnames(scores) == norm.exp)]
+#norm.score <- scores[,which(colnames(scores) == norm.exp)]
+#return(log2(new.scores/norm.score))
+
+return(log2(numerator/denominator))
+  
+}
+
+PrepareRNAforHeatmap <- function(new.scores.foldchange){
+new.scores.split <- matrix(0, nr=nrow(new.scores.foldchange), nc=2*ncol(new.scores.foldchange))
+is.even <- function(x){ x %% 2 == 0 }
+corresponding.col <- function(x){ceiling(x/2)}
+new.col.names <- vector(length = ncol(new.scores.split))
+rownames(new.scores.split) <- rownames(new.scores.foldchange)
+  for(i in 1:ncol(new.scores.split)) {
+	if(is.even(i)) {sign <- "down"} else {sign <- "up"}
+  new.col.names[i] <- paste(colnames(new.scores.foldchange)[corresponding.col(i)], 
+					sign, sep =".")
+	new.scores.split[,i] <- new.scores.foldchange[,corresponding.col(i)]
+	if(is.even(i)) {
+		new.scores.split[which(new.scores.split[,i]>0),i] <- 0
+		new.scores.split[,i] <- -1*new.scores.split[,i]
+	} else {
+		new.scores.split[which(new.scores.split[,i]<0),i] <- 0
+	}
+	colnames(new.scores.split) <- new.col.names
+}
+return(new.scores.split)}
+
+#ConvertExpressionToPval = function(expression, threshold = 1)
+#{
+#    print("In ConvertExpressionToPval")
+#    #Generate Normal Stats
+#    # expression.mean = apply(expression, 2, mean)
+#    # expression.sd = apply(expression, 2, sd)
+#    
+#    ix.above.threshold = which(expression > threshold)
+#    expression.mean = apply(expression, 2, function(i) mean(i[which(i > threshold)]))
+#    expression.sd = apply(expression, 2, function(i)     sd(i[which(i > threshold)]))
+#    
+
+#    # CDF from zero to point
+#    # expression.pval = matrix(,nr=nrow(expression),nc=ncol(expression),dimnames=dimnames(expression))
+#    # ix.low = which(expression < expression.mean)
+#    # ix.upper = which(expression >= expression.mean)
+#    # expression.pval[ix.low] = (-10 * pnorm(expression[ix.low], mean = expression.mean, sd = expression.sd, log=T) *log10(exp(1)))
+#    
+#    expression.pval = (-10 * log10(exp(1)) * pnorm(expression, 
+#                                                    mean = expression.mean, 
+#                                                    sd = expression.sd, 
+#                                                    log=T, 
+#                                                    lower.tail=F
+#    ) 
+#    )
+#    #squash those under threshold to zero
+#    expression.pval[-ix.above.threshold] = 0
+
+#    # correct for upper tail (ie from point to inf)
+#    #Still need to do
+#    return (expression.pval)
+#}
+MapExpressiontoChip2 = function(chip.targets, expression)
+{
+mymatrix = matrix(0, nr=length(chip.targets), nc=ncol(expression))
+rownames(mymatrix) <- chip.targets
+colnames(mymatrix) <- colnames(expression)
+
+
+j <- 1 #expression counter
+i <- 1 #mymatrix counter
+expression.genes <- rownames(expression)
+previous.expression.gene <- expression.genes[j]
+
+# Progress bar:
+total <- length(chip.targets)
+# create progress bar
+pb <- txtProgressBar(min = 0, max = total, style = 3)
+
+for ( i in 1:length(chip.targets)) { 
+  current.target <- chip.targets[i]
+  setTxtProgressBar(pb, i)
+  while (current.target > previous.expression.gene && j<=length(expression.genes)) {
+    j <- j + 1
+    previous.expression.gene <- expression.genes[j]
+  }
+  if (current.target == previous.expression.gene){
+    mymatrix[i,] <- expression[j,]
+  }
+}
+
+close(pb)
+
+           print("Leaving MapRNAtoChip")
+           return(mymatrix)
+}
+
+
+MapExpressiontoChip = function(chip.targets, expression)
+{
+    print("In MapRNAtoChip")
+    # targets = unlist(strsplit(chip$targets[i], "_"))
+    #   exp = matrix(rna$all.pval[targets, ])   
+    #   return (apply(exp, 2, mean))
+    
+    #rna col 1 = th0 overexpressed  
+    #rna col 2 = th17 overexpressed
+    #rna col 3 = true zscores
+
+
+
+
+
+
+    ####################################################################
+    chip.targets.notnull.unique <- unique(chip.targets[which(chip.targets!="")])
+    gene.intersect <- intersect(rownames(expression), chip.targets.notnull.unique)
+    chip.targets.notnull.unique.with.data <- chip.targets.notnull.unique[which(chip.targets.notnull.unique %in% gene.intersect)]
+    expression.useful.data <- expression[which(rownames(expression) %in% gene.intersect),]
+    chip.targets.notnull.unique.with.data.sorted <- chip.targets.notnull.unique.with.data[order(chip.targets.notnull.unique.with.data)]
+    expression.useful.data.sorted <- expression.useful.data[order(rownames(expression.useful.data)),]
+    ####################################################################
+    
+    mymatrix = matrix(0, nr=length(chip.targets), nc=ncol(expression))
+       rownames(mymatrix) <- chip.targets
+       colnames(mymatrix) <- colnames(expression)
+       head(chip.targets.notnull.unique.with.data.sorted)
+       head(rownames(expression.useful.data.sorted))
+       if(!identical(chip.targets.notnull.unique.with.data.sorted, rownames(expression.useful.data.sorted))){
+        stop("We have a serious problem, chip is not alligned to expression")
+      }
+       for(i in 1:length(chip.targets.notnull.unique.with.data.sorted))
+          {
+            mtls.ix <- which(chip.targets == chip.targets.notnull.unique.with.data.sorted[i])
+            for(j in 1:length(mtls.ix)){
+                mymatrix[mtls.ix[j],] <- expression.useful.data.sorted[i,]
+              }
+          }
+           print("Leaving MapRNAtoChip")
+           return(mymatrix)
+    
+   #return( sapply(chip$targets, function(x) apply(rna$all.pval[unlist(strsplit(chip$targets[3], "_")), ], 2, mean)) )
+}
+
+
+
+
+
+GenerateKMOrder <- function(data, km) {
+  # generates the order of clusters from high to low mean values
+  #
+  # Args:
+  #   data: A matrix of scores that have already been clustered.
+  #   km: The K-means object generated from running kmeans on data. Another
+  #       method could be used so long as it supplies a (km)$cluser list. Must
+  #       have the same length as the number of rows in data
+  #
+  # Returns:
+  #   cluster.order: The order in which the clusters should be displayed.
+
+    km.cluster = km$cluster
+    clusters = unique(km.cluster)
+    clusters.avg = numeric()
+    for(i in clusters) {
+        clusters.avg = c(clusters.avg, mean(data[which(km.cluster == i), ]))
+    }
+    if(param$debug) {
+      print ("straight clusters")
+      print (clusters)
+      print ("straigth average")
+      print (clusters.avg)
+      print ("ordered clusters")
+      print (clusters[order(clusters.avg)])
+      print("ordered average")
+      print (clusters.avg[order(clusters.avg)])
+    }
+    return(clusters[rev(order(clusters.avg))])
+}
+
+OrderMTL <- function(data, km, cluster.order) {
+  # Orders a matrix of data according to a clustering algorithm
+  #
+  # Args:
+  #   data: A matrix of scores that have already been clustered.
+  #   km: The K-means object generated from running kmeans on data. Another
+  #       method could be used so long as it supplies a (km)$cluser list. Must
+  #       have the same length as the number of rows in data
+  #   cluster.order: The order in which the clusters should be displayed. 
+  #                  for eg. km.order = c(2, 3, 1) would result in cluster 2 
+  #                  being on top, then cluster 3 and lastly cluster 1.
+  #
+  # Returns:
+  #   a list that contains 3 objects:
+  #     list$data: the ordered version of the data.
+  #     list$color.vector: a list of colors that should be assigned to each row.
+  #     list$start.row: the starting row of each cluster in data.
+  number.clusters <- length(cluster.order)
+  cluster.colors <- sample(rainbow(number.clusters))
+  
+  # Set up return objects
+  sorted.data <- matrix(,nr=nrow(data), nc=ncol(data))
+  colnames(sorted.data) <- colnames(data)
+  cluster.color.vector = vector(length=length(km$cluster))
+  cluster.start.row = numeric(number.clusters)
+  cluster.start.row[1]=1
+  
+  for (i in 1:number.clusters)
+  {
+      current.cluster = cluster.order[i]
+      ix = which(km$cluster == current.cluster)
+      current.cluster.range <- cluster.start.row[i]:(cluster.start.row[i]+length(ix)-1)
+      sorted.data[current.cluster.range, ] = data[ix, ]
+      cluster.color.vector[current.cluster.range] = cluster.colors[i]
+      cluster.start.row[i+1] = (cluster.start.row[i]+length(ix))  
+  }
+  ret.list = list()
+  ret.list$data = sorted.data
+  ret.list$color.vector = cluster.color.vector
+  ret.list$cluster.index = cluster.start.row
+  return(ret.list)
+}    
+
+
+CreateHeatMap <- function(data,
+                         km,
+                         cluster.order, 
+                         document.name,
+                         document.type = "png",
+                         number.colors = 30) {
+  # Generates a heatmap image based for a matrix based on a clustering 
+  # algorithm.
+  #
+  # Args:
+  #   data: A matrix of scores that have already been clustered, the column
+  #         names of this matrix will become the column titels of the heatmap.
+  #   km: The K-means object generated from running kmeans on data. Another
+  #       method could be used so long as it supplies a (km)$cluser list.
+  #   cluster.order: The order in which the clusters should be displayed. 
+  #                  for eg. km.order = c(2, 3, 1) would result in cluster 2 
+  #                  being on top, then cluster 3 and lastly cluster 1.
+  #   document.name: A name for the produced file. there is no need to
+  #                  supply the .png/.pdf in your argument.
+  #   document.type: The type of file you want to produce. current options are
+  #                  png and pdf. Default is pdf.
+  #
+  # Returns:
+  #   Nothing to the script that calls it, however it creates an image at the 
+  #   path specified.
+  if(param$debug) {
+    print("In CreateHeatMap")    
+  }
+  
+  data.ordered <- OrderMTL(data, km, cluster.order)
+    
+	
+	#Load Lib
+	library(gplots)
+	
+	#Set Color gradient
+	color.ramp = colorRampPalette(c("black",
+	                                "darkblue",
+	                                "blue",
+	                                "yellow",
+	                                "orange",
+	                                "red"))(number.colors) #7
+	
+	if(document.type == "png") {
+          png(paste(document.name,".png", sep = ""),
+              #width=15360,#1920,
+              #height=204080,#2560,
+              #res=500,
+              antialias="gray")
+        }
+        else if(document.type == "pdf") {
+        pdf(paste(document.name,".pdf", sep = ""))
+        }
+        else if(document.type == "tiff") {
+        tiff(paste(document.name,".tiff", sep = ""),
+        res=800,
+        pointsize=2,
+        width=1920,
+        height=1920)
+        }
+        else {
+        bitmap(paste(document.name,".bmp", sep = ""),
+        height = 5,
+        width = 5,
+        res = 500)
+        }
+#op <- par(mar = rep(0, 4))
+	
+	heatmap.2(
+		data.ordered$data, #data.ordered$data[,sort(colnames(data.ordered$data))],
+		rowsep = data.ordered$cluster.index[-1],
+		sepwidth = c(0.5, ncol(data)/100), 
+		dendrogram = "none",
+		Rowv = F,
+		Colv = F,
+		trace = "none",
+		labRow = F, #sapply(seq(1:length(data.ordered$cluster.index)), toString),
+		labCol = colnames(data.ordered$data), #sort(colnames(data.ordered$data)), 
+		RowSideColors = data.ordered$color.vector, 
+		keysize=0.6,
+		key=F,
+		col = color.ramp,
+		cexCol = 0.8,
+		cexRow = 0.8)
+  
+	dev.off()  
+
+}
+
+CreateIndividualHeatMap <- function(data,
+                         km,
+                         cluster.order,
+                         color.ramp = colorRampPalette(c("black",
+                                  "darkblue",
+                                  "blue",
+                                  "yellow",
+                                  "orange",
+                                  "red"))(30)) {
+  # Generates a heatmap image based for a matrix based on a clustering 
+  # algorithm.
+  #
+  # Args:
+  #   data: A matrix of scores that have already been clustered, the column
+  #         names of this matrix will become the column titels of the heatmap.
+  #   km: The K-means object generated from running kmeans on data. Another
+  #       method could be used so long as it supplies a (km)$cluser list.
+  #   cluster.order: The order in which the clusters should be displayed. 
+  #                  for eg. km.order = c(2, 3, 1) would result in cluster 2 
+  #                  being on top, then cluster 3 and lastly cluster 1.
+  #   document.name: A name for the produced file. there is no need to
+  #                  supply the .png/.pdf in your argument.
+  #   document.type: The type of file you want to produce. current options are
+  #                  png and pdf. Default is pdf.
+  #
+  # Returns:
+  #   Nothing to the script that calls it, however it creates an image at the 
+  #   path specified.
+  if(param$debug) {
+    print("In CreateHeatMap")    
+  }
+  
+  data.ordered <- OrderMTL(data, km, cluster.order)
+    
+  
+  #Load Lib
+  library(gplots)
+  
+
+  
+
+  heatmap.3(
+    data.ordered$data, #data.ordered$data[,sort(colnames(data.ordered$data))],
+    rowsep = data.ordered$cluster.index[-1],
+    sepwidth = c(0.5, nrow(data.ordered$data)/100), 
+    dendrogram = "none",
+    Rowv = F,
+    Colv = F,
+    trace = "none",
+    labRow = F, #sapply(seq(1:length(data.ordered$cluster.index)), toString),
+    labCol = colnames(data.ordered$data), #sort(colnames(data.ordered$data)), 
+    #sourcRowSideColors = data.ordered$color.vector, 
+    keysize=0.6,
+    key=F,
+    col = color.ramp,
+    cexCol = 0.8,
+    cexRow = 0.8)
+   
+
+}
+
+ReadCommadLineParameters <- function(argument.names, command.line.arguments, optional = F) {
+  # Reads the parameters from the command line arguments.
+  #
+  # Args:
+  #   argument.names: A list of expected argument names.
+  #   command.line.arguments: The list of recieved command line arguments
+  #   optional: Are the areguments optional, or are they required, default is required
+  #
+  # Returns:
+  #   The arguments for argument.names. As strings In that order.
+
+    if(length(grep("--version",command.line.arguments))) {
+        cat("version",script.version,"\n")
+        q()
+    }
+    # Split command line arguments
+    args <- sapply(strsplit(command.line.arguments, " "),function(i) i)
+    vals <- character(length(argument.names))
+    # split cmd.line to key and value pairs
+    for(i in 1:length(argument.names)) {
+        ix <- grep(argument.names[i], args)
+        if(length(ix)>1) {
+            stop("arg ",
+                 argument.names[i],
+                 " used more than once.  Bailing out...\n",
+                 PrintParamError())
+        }
+        else if (length(ix)==0 && !optional) {
+            stop("could not find ",
+                 argument.names[i],
+                 ". Bailing out...\n",
+                 PrintParamError())
+        }
+        else if (length(ix)==0 && optional) {
+        vals[i] <- NA
+        }
+        else {
+        vals[i] <- args[ix+1]
+        }
+    }
+    return(vals)
+}
+
+PrintParamError <- function(){
+  # Prints the usage of the function, shows users what arguments they can use
+  #
+  # Args:
+  #   param: A list that contains all the paramaters.
+  #
+  # Returns:
+  #   A modified version of the param list, with the default values loaded.
+    cat("
+DESCRIPTIION:
+    heatmap.R takes a ...
+
+INPUT:
+    1.--mtls_file: path to mtls file.\n
+    2.--cluster_file: the destination path for the output cluster file.\n
+    3.--chip_experiment_order: The order of desired chip experiments (optional).\n
+    4.--heatmap_file: path for output heatmap image (no extension).\n
+    5.--heatmap_type: choice of image format, currently support png, pdf, tiff and bmp (optional)\n
+    6.--expression_file: list of expression files to be included in analysis (optional).\n
+    7.--expression_name: lables for the expression files (optional).\n
+    8.--normalization_file: a list of files to be used for normalization,
+    they can be the same file, however the number of expression nominated
+    normalization files must match the number of expression files (optional)\n
+    9.--n_clusters: number of clusters\n
+    10.--filter_percentage: percentage of mtls that will be analysed. Eg: if
+    we make filter_percentage 30, we will take the union of the top mtls in
+    mean, non-zero mean and variance (optional).\n
+
+EXAMPLE RUN:
+    Rscript heatmap.R
+        --mtls_file path/to/mtls.xls
+        --cluster_file path/to/output/cluster
+        --chip_experiment_order tf1::tf2::tf5::tf3
+        --heatmap_file path/to/output/heatmap
+        --heatmap_type png
+        --expression_file path/to/exp1::path/to/exp2
+        --expression_name myexp1::myexp2
+        --normalization_file path/to/exp3::path/to/exp3
+        --n_clusters 13
+        --filter_percentage 100
+        --include_targetless yes
+        --number_bins 30
+
+    ")
+}
+
+#LoadDefaultParams <- function(param) {
+#  # Loads default paramaters for the heatmap application
+#  #
+#  # Args:
+#  #   param: A list that contains all the previous paramaters.
+#  #
+#  # Returns:
+#  #   A modified version of the param list, with the default values loaded.
+
+#script.version=0.1
+#param$debug = F
+
+## RNA data:
+#param$rna.files = ""
+#param$rna.normalization = "none"
+
+## Filter:
+#param$filter.percentage <- 100
+
+## Clustering:
+#param$clustering.number.of.clusters <- 13
+
+## Heatmap:
+#param$heatmap.document.name <- "heatmap"
+#param$heatmap.document.type <- "png"
+##Cluster Groups:
+#param$cluster.groups.document.name <- "clusters"
+
+#return(param)
+
+#}
+
+
+
+LoadParams <- function(cmd.args, args.nms, n.start.numeric, optional = F) {
+  # Loads user defined paramaters for the heatmap application
+  #
+  # Args:
+  #   cmd.args: The command line arguments given.
+  #   arg.nms: A list of possible command arguments.
+  #   n.start.numeric: the first argument that should be numeric (alway put
+  #                        these last).
+  #   optional: This specifies if the params in cmd.args are optional or
+  #             required.
+  # Returns:
+  #   A list of values assigned to each argument.
+
+
+  vals <- ReadCommadLineParameters(argument.names = args.nms,
+                                 command.line.arguments = cmd.args,
+                                 optional = optional)
+
+   #check if numeric params are indeed numeric
+  if(!optional) {
+  for(i in n.start.numeric:length(vals)){
+    if(is.na(as.numeric(vals[i]))){
+      stop("arg ",args.nms[i]," is not numeric.  Bailing out...\n",print.error())
+    }
+  }
+}
+  return (vals)
+
+}
+
+#ValidateParams <- function(params) {
+#    return(T)
+#}
+
+
+##########
+
+LoadDebugParams <- function(param) {
+
+    cmd.args <- c(
+      "--mtls_file data/test/mtls.xls",
+      "--cluster_file data/test/cluster",
+      "--heatmap_file data/test/heatmap",
+      "--heatmap_type bmp",
+      "--n_clusters 13",
+      "--filter_percentage 100",
+      "--expression_file /home/kieran/code/scorchR-heatmap/data/expression/rna1.tabular::/home/kieran/code/scorchR-heatmap/data/expression/rna2.tabular",
+      "--expression_name batf.ko.0::batf.ko.17", 
+      "--normalization_file mean",
+      "--chip_experiment_order ac::bc::cc::dc::ec",
+      "--include_targetless yes",
+      "--number_bins 20"
+    )
+
+args.nms <- c(            "--mtls_file",        #1
+                "--cluster_file",     #2
+                "--chip_experiment_order",    #3
+                "--heatmap_file",     #4
+                "--heatmap_type",         #5
+                "--expression_file",    #6
+                "--expression_name",    #7
+                "--normalization_file",    #8
+                "--include_targetless",    #9
+                 "--n_clusters",     #10
+                "--filter_percentage", #11
+                "--number_bins")    #12
+
+
+# vals has the same order as args.nms
+vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 10, optional = F)
+
+
+# ChIP data:
+param$annotated.macs.file <- vals[1]
+param$chip.order <- vals[3]
+# RNA data:
+param$rna.files = vals[6]
+param$rna.names = vals[7]
+param$rna.normalization.file = vals[8]
+param$include.targetless = vals[9]
+
+# Filter:
+param$filter.percentage <- as.numeric(vals[11])
+
+# Clustering:
+param$number.bins <- as.numeric(vals[12])
+param$clustering.number.of.clusters <- as.numeric(vals[10])
+
+# Heatmap:
+param$heatmap.document.name <- vals[4]
+param$heatmap.document.type <- vals[5]
+#Cluster Groups:
+param$cluster.groups.document.name <- vals[2]
+
+return(param)
+
+}
+
+
+
+LoadOptionalParams <- function(param) {
+
+cmd.args <- commandArgs(trailingOnly = T)
+
+args.nms <- c(  "--chip_experiment_order",  #1
+                "--expression_file",        #2
+                "--expression_name",        #3
+                "--normalization_file",     #4
+                "--heatmap_type",           #5
+                "--include_targetless",     #6
+                "--filter_percentage",      #7
+                "--number_bins")            #8
+
+
+# vals has the same order as args.nms
+vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 7, optional = T)
+
+# ChIP data:
+param$chip.order <- if(!is.na(vals[1])){vals[1]}else{NA}
+
+# RNA data:
+param$rna.files <- if(!is.na(vals[2])){vals[2]}else{"none"}
+param$rna.names <- if(!is.na(vals[3])){vals[3]}else{"none"}
+param$rna.normalization.file <- if(!is.na(vals[4])){vals[4]}else{"no"}
+param$include.targetless <- if(!is.na(vals[6])){vals[6]}else{"yes"}
+# Filter:
+param$filter.percentage <- if(!is.na(vals[7])){as.numeric(vals[7])}else{100}
+param$number.bins <- if(!is.na(vals[8])){as.numeric(vals[8])}else{30}
+# Heatmap file (output)
+param$heatmap.document.type <- if(!is.na(vals[5])){vals[5]}else{"none"}
+
+return(param)
+}
+
+
+LoadReqiredParams <- function(param){
+
+cmd.args <- commandArgs(trailingOnly = T)
+
+args.nms <- c(  "--mtls_file",          #1
+                "--cluster_file",       #2
+                "--heatmap_file",       #3
+                 "--n_clusters")        #4
+
+
+# vals has the same order as args.nms
+vals <- LoadParams(cmd.args, args.nms, n.start.numeric = 4, optional = F)
+
+# ChIP data:
+param$annotated.macs.file <- vals[1]
+
+# Clustering
+param$clustering.number.of.clusters <- as.numeric(vals[4])
+
+# Cluster file (output):
+param$cluster.groups.document.name <- vals[2]
+
+
+# Heatmap file (output):
+param$heatmap.document.name <- vals[3]
+
+return(param)
+
+}
+
+###########
+# here we output the fasta file of the targets of each kmeans cluster
+CreateClusterGroups <- function(trgts,k.ix,f.nm="output/clust.to.trgts", km.order){
+  f.nm = paste(f.nm, ".fasta", sep="")
+    clusters = km.order
+    for(i in 1:length(clusters)){
+        v=trgts[which(k.ix==clusters[i])]
+        v.split = unlist(sapply(v,strsplit, "_"))
+        if(i == 1){
+            cat(sep="",file=f.nm,">cluster_",i,"\n")#clusters[i],"\n")    
+        } else {
+            cat(sep="",file=f.nm,">cluster_",i,"\n",append=TRUE)#clusters[i],"\n",append=TRUE)
+        }
+        cat(sep="\n",file=f.nm,v.split,append=TRUE)
+    }
+}
+
+
+PrintClusters <- function(trgts,k.ix,f.nm="output/clust.to.trgts", km.order){
+	f.nm = paste(f.nm, ".tsv", sep="")
+	cat(sep="\t",file=f.nm,"row_number/target","cluster","\n")
+	trgts[which(trgts=="")] <- "no_target"
+    for(i in 1:length(trgts)){
+		cat(sep="",file=f.nm,trgts[i],"\t","cluster_",k.ix[i],"\n",append=TRUE)
+			}
+#  	  if(i == 1){
+#  	    cat(sep="",file=f.nm,"cluster_",i,"\n")#clusters[i],"\n")    
+#  	  } else {
+#  	    cat(sep="",file=f.nm,">cluster_",i,"\n",append=TRUE)#clusters[i],"\n",append=TRUE)
+#  	  }
+      #cat(sep="\n",file=f.nm,v.split,append=TRUE)
+  	}
+GetTopRowsFromMatrix = function(mtrx, percentage = 10)
+{
+  if (param$debug) {
+    print("In GetTopRowsFromMatrix")
+	}
+	#Store the stats for the mtrx
+	stats = list()
+	stats$mean=apply(mtrx,1,mean)
+	stats$sd=apply(mtrx,1,sd)
+	stats$nonzero.mean=apply(mtrx, 1, function(x) mean(x[which(x != 0)]))
+    
+	
+	#Store the indexes for the mtrx
+	index = list()
+	index$mean = sort(stats$mean, decreasing=T, index.return=T)$ix
+	index$sd   = sort(stats$sd, decreasing=T, index.return=T)$ix
+	index$nonzero.mean = sort(stats$nonzero.mean, decreasing=T, index.return=T)$ix
+	
+	#Calculate how many data points we want to take
+	cutoff = floor(length(mtrx[ ,1])*(percentage/100))
+	
+	#Extract Indexes and return to caller
+	index$union = union(index$mean[1:cutoff], union(index$sd[1:cutoff], index$nonzero.mean[1:cutoff]))
+	
+	return(index)
+}
+
+#GetUniq = function(targets.cluster)
+#{
+#    #problem with this function is that it agrigates the list handed to it. after this point you cant maintain order
+#    
+#    return(unique(unlist(lapply(targets.cluster, function(i) strsplit(i, "_")))))
+#    #return(unlist(lapply(targets.cluster, function(i) strsplit(i, "_"))))
+#    
+#}
+
+bindata = function(d,qunts=seq(.4,.9,.1))
+{
+    tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
+    for(j in 1:ncol(d))
+    {
+        bins=quantile(d[,j],qunts)
+            for(i in 1:length(bins))
+            {
+                tmp[which(d[,j]>bins[i]),j] = i
+         }
+    }
+    return(tmp)
+}
+
+bindata.non.zero = function(d,qunts=seq(0,0.9,0.1))
+{
+    tmp=matrix(0,nr=nrow(d),nc=ncol(d))
+    for(j in 1:ncol(d))
+    {
+        ix.non.zero=which(d[,j]!=0)
+        bins=quantile(d[ix.non.zero,j],qunts)
+            for(i in 1:length(bins))
+            {
+                tmp[which(d[,j]>bins[i]),j] = i
+         }
+    }
+    
+    return(tmp)
+}
+bindata.matrix = function(d,qunts=seq(0,0.9,0.1))
+{
+    tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
+    #ix.non.zero=which(d!=0)
+    bins=quantile(d[],qunts)
+    for(i in 1:length(bins))
+    {
+        tmp[which(d>bins[i])] = i
+    }
+    return(tmp)
+}
+bindata.non.zero.matrix = function(d,qunts=seq(0,0.9,0.1))
+{
+    tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
+    ix.non.zero=which(d!=0)
+    bins=quantile(d[ix.non.zero],qunts)
+    for(i in 1:length(bins))
+    {
+        tmp[which(d>bins[i])] = i
+    }
+    return(tmp)
+}
+heatmap.3 <- function (x, Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE, 
+    distfun = dist, hclustfun = hclust, dendrogram = c("both", 
+        "row", "column", "none"), symm = FALSE, scale = c("none", 
+        "row", "column"), na.rm = TRUE, revC = identical(Colv, 
+        "Rowv"), add.expr, breaks, symbreaks = min(x < 0, na.rm = TRUE) || 
+        scale != "none", col = "heat.colors", colsep, rowsep, 
+    sepcolor = "white", sepwidth = c(0.05, 0.05), cellnote, notecex = 1, 
+    notecol = "cyan", na.color = par("bg"), trace = c("column", 
+        "row", "both", "none"), tracecol = "cyan", hline = median(breaks), 
+    vline = median(breaks), linecol = tracecol, margins = c(5, 
+        5), ColSideColors, RowSideColors, cexRow = 0.2 + 1/log10(nr), 
+    cexCol = 0.2 + 1/log10(nc), labRow = NULL, labCol = NULL, 
+    key = TRUE, keysize = 1.5, density.info = c("histogram", 
+        "density", "none"), denscol = tracecol, symkey = min(x < 
+        0, na.rm = TRUE) || symbreaks, densadj = 0.25, main = NULL, 
+    xlab = NULL, ylab = NULL, lmat = NULL, lhei = NULL, lwid = NULL, 
+    ...) 
+{
+    scale01 <- function(x, low = min(x), high = max(x)) {
+        x <- (x - low)/(high - low)
+        x
+    }
+    retval <- list()
+    scale <- if (symm && missing(scale)) 
+        "none"
+    else match.arg(scale)
+    dendrogram <- match.arg(dendrogram)
+    trace <- match.arg(trace)
+    density.info <- match.arg(density.info)
+    if (length(col) == 1 && is.character(col)) 
+        col <- get(col, mode = "function")
+    if (!missing(breaks) && (scale != "none")) 
+        warning("Using scale=\"row\" or scale=\"column\" when breaks are", 
+            "specified can produce unpredictable results.", "Please consider using only one or the other.")
+    if (is.null(Rowv) || is.na(Rowv)) 
+        Rowv <- FALSE
+    if (is.null(Colv) || is.na(Colv)) 
+        Colv <- FALSE
+    else if (Colv == "Rowv" && !isTRUE(Rowv)) 
+        Colv <- FALSE
+    if (length(di <- dim(x)) != 2 || !is.numeric(x)) 
+        stop("`x' must be a numeric matrix")
+    nr <- di[1]
+    nc <- di[2]
+    if (nr <= 1 || nc <= 1) 
+        stop("`x' must have at least 2 rows and 2 columns")
+    if (!is.numeric(margins) || length(margins) != 2) 
+        stop("`margins' must be a numeric vector of length 2")
+    if (missing(cellnote)) 
+        cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x))
+    if (!inherits(Rowv, "dendrogram")) {
+        if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in% 
+            c("both", "row"))) {
+            if (is.logical(Colv) && (Colv)) 
+                dendrogram <- "column"
+            else dedrogram <- "none"
+            warning("Discrepancy: Rowv is FALSE, while dendrogram is `", 
+                dendrogram, "'. Omitting row dendogram.")
+        }
+    }
+    if (!inherits(Colv, "dendrogram")) {
+        if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in% 
+            c("both", "column"))) {
+            if (is.logical(Rowv) && (Rowv)) 
+                dendrogram <- "row"
+            else dendrogram <- "none"
+            warning("Discrepancy: Colv is FALSE, while dendrogram is `", 
+                dendrogram, "'. Omitting column dendogram.")
+        }
+    }
+    if (inherits(Rowv, "dendrogram")) {
+        ddr <- Rowv
+        rowInd <- order.dendrogram(ddr)
+    }
+    else if (is.integer(Rowv)) {
+        hcr <- hclustfun(distfun(x))
+        ddr <- as.dendrogram(hcr)
+        ddr <- reorder(ddr, Rowv)
+        rowInd <- order.dendrogram(ddr)
+        if (nr != length(rowInd)) 
+            stop("row dendrogram ordering gave index of wrong length")
+    }
+    else if (isTRUE(Rowv)) {
+        Rowv <- rowMeans(x, na.rm = na.rm)
+        hcr <- hclustfun(distfun(x))
+        ddr <- as.dendrogram(hcr)
+        ddr <- reorder(ddr, Rowv)
+        rowInd <- order.dendrogram(ddr)
+        if (nr != length(rowInd)) 
+            stop("row dendrogram ordering gave index of wrong length")
+    }
+    else {
+        rowInd <- nr:1
+    }
+    if (inherits(Colv, "dendrogram")) {
+        ddc <- Colv
+        colInd <- order.dendrogram(ddc)
+    }
+    else if (identical(Colv, "Rowv")) {
+        if (nr != nc) 
+            stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")
+        if (exists("ddr")) {
+            ddc <- ddr
+            colInd <- order.dendrogram(ddc)
+        }
+        else colInd <- rowInd
+    }
+    else if (is.integer(Colv)) {
+        hcc <- hclustfun(distfun(if (symm) 
+            x
+        else t(x)))
+        ddc <- as.dendrogram(hcc)
+        ddc <- reorder(ddc, Colv)
+        colInd <- order.dendrogram(ddc)
+        if (nc != length(colInd)) 
+            stop("column dendrogram ordering gave index of wrong length")
+    }
+    else if (isTRUE(Colv)) {
+        Colv <- colMeans(x, na.rm = na.rm)
+        hcc <- hclustfun(distfun(if (symm) 
+            x
+        else t(x)))
+        ddc <- as.dendrogram(hcc)
+        ddc <- reorder(ddc, Colv)
+        colInd <- order.dendrogram(ddc)
+        if (nc != length(colInd)) 
+            stop("column dendrogram ordering gave index of wrong length")
+    }
+    else {
+        colInd <- 1:nc
+    }
+    retval$rowInd <- rowInd
+    retval$colInd <- colInd
+    retval$call <- match.call()
+    x <- x[rowInd, colInd]
+    x.unscaled <- x
+    cellnote <- cellnote[rowInd, colInd]
+    if (is.null(labRow)) 
+        labRow <- if (is.null(rownames(x))) 
+            (1:nr)[rowInd]
+        else rownames(x)
+    else labRow <- labRow[rowInd]
+    if (is.null(labCol)) 
+        labCol <- if (is.null(colnames(x))) 
+            (1:nc)[colInd]
+        else colnames(x)
+    else labCol <- labCol[colInd]
+    if (scale == "row") {
+        retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
+        x <- sweep(x, 1, rm)
+        retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
+        x <- sweep(x, 1, sx, "/")
+    }
+    else if (scale == "column") {
+        retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
+        x <- sweep(x, 2, rm)
+        retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
+        x <- sweep(x, 2, sx, "/")
+    }
+    if (missing(breaks) || is.null(breaks) || length(breaks) < 
+        1) {
+        if (missing(col) || is.function(col)) 
+            breaks <- 16
+        else breaks <- length(col) + 1
+    }
+    if (length(breaks) == 1) {
+        if (!symbreaks) 
+            breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm), 
+                length = breaks)
+        else {
+            extreme <- max(abs(x), na.rm = TRUE)
+            breaks <- seq(-extreme, extreme, length = breaks)
+        }
+    }
+    nbr <- length(breaks)
+    ncol <- length(breaks) - 1
+    if (class(col) == "function") 
+        col <- col(ncol)
+    min.breaks <- min(breaks)
+    max.breaks <- max(breaks)
+    x[x < min.breaks] <- min.breaks
+    x[x > max.breaks] <- max.breaks
+    # if (missing(lhei) || is.null(lhei)) 
+    #     lhei <- c(keysize, 4)
+    # if (missing(lwid) || is.null(lwid)) 
+    #     lwid <- c(keysize, 4)
+    # if (missing(lmat) || is.null(lmat)) {
+    #     lmat <- rbind(4:3, 2:1)
+    #     if (!missing(ColSideColors)) {
+    #         if (!is.character(ColSideColors) || length(ColSideColors) != 
+    #             nc) 
+    #             stop("'ColSideColors' must be a character vector of length ncol(x)")
+    #         lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + 
+    #             1)
+    #         lhei <- c(lhei[1], 0.2, lhei[2])
+    #     }
+    #     if (!missing(RowSideColors)) {
+    #         if (!is.character(RowSideColors) || length(RowSideColors) != 
+    #             nr) 
+    #             stop("'RowSideColors' must be a character vector of length nrow(x)")
+    #         lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - 
+    #             1), 1), lmat[, 2] + 1)
+    #         lwid <- c(lwid[1], 0.2, lwid[2])
+    #     }
+    #     lmat[is.na(lmat)] <- 0
+    # }
+    # if (length(lhei) != nrow(lmat)) 
+    #     stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
+    # if (length(lwid) != ncol(lmat)) 
+    #     stop("lwid must have length = ncol(lmat) =", ncol(lmat))
+    # op <- par(no.readonly = TRUE)
+    # on.exit(par(op))
+    # layout(lmat, widths = lwid, heights = lhei, respect = FALSE)
+    if (!missing(RowSideColors)) {
+        par(mar = c(margins[1], 0, 0, 0.5))
+        image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
+    }
+    if (!missing(ColSideColors)) {
+        par(mar = c(0.5, 0, 0, margins[2]))
+        image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
+    }
+    par(mar = c(margins[1], 0, 0, margins[2]))
+    x <- t(x)
+    cellnote <- t(cellnote)
+    if (revC) {
+        iy <- nr:1
+        if (exists("ddr")) 
+            ddr <- rev(ddr)
+        x <- x[, iy]
+        cellnote <- cellnote[, iy]
+    }
+    else iy <- 1:nr
+    image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + 
+        c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col, 
+        breaks = breaks, ...)
+    retval$carpet <- x
+    if (exists("ddr")) 
+        retval$rowDendrogram <- ddr
+    if (exists("ddc")) 
+        retval$colDendrogram <- ddc
+    retval$breaks <- breaks
+    retval$col <- col
+    if (!invalid(na.color) & any(is.na(x))) {
+        mmat <- ifelse(is.na(x), 1, NA)
+        image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "", 
+            col = na.color, add = TRUE)
+    }
+    axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0, 
+        cex.axis = cexCol)
+    if (!is.null(xlab)) 
+        mtext(xlab, side = 1, line = margins[1] - 1.25)
+    axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0, 
+        cex.axis = cexRow)
+    if (!is.null(ylab)) 
+        mtext(ylab, side = 4, line = margins[2] - 1.25)
+    if (!missing(add.expr)) 
+        eval(substitute(add.expr))
+    if (!missing(colsep)) 
+        for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0, 
+            length(csep)), xright = csep + 0.5 + sepwidth[1], 
+            ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1, 
+            col = sepcolor, border = sepcolor)
+    if (!missing(rowsep)) 
+        for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) + 
+            1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) + 
+            1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1, 
+            col = sepcolor, border = sepcolor)
+    min.scale <- min(breaks)
+    max.scale <- max(breaks)
+    x.scaled <- scale01(t(x), min.scale, max.scale)
+    if (trace %in% c("both", "column")) {
+        retval$vline <- vline
+        vline.vals <- scale01(vline, min.scale, max.scale)
+        for (i in colInd) {
+            if (!is.null(vline)) {
+                abline(v = i - 0.5 + vline.vals, col = linecol, 
+                  lty = 2)
+            }
+            xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5
+            xv <- c(xv[1], xv)
+            yv <- 1:length(xv) - 0.5
+            lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
+        }
+    }
+    if (trace %in% c("both", "row")) {
+        retval$hline <- hline
+        hline.vals <- scale01(hline, min.scale, max.scale)
+        for (i in rowInd) {
+            if (!is.null(hline)) {
+                abline(h = i + hline, col = linecol, lty = 2)
+            }
+            yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5
+            yv <- rev(c(yv[1], yv))
+            xv <- length(yv):1 - 0.5
+            lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
+        }
+    }
+    if (!missing(cellnote)) 
+        text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote), 
+            col = notecol, cex = notecex)
+    par(mar = c(margins[1], 0, 0, 0))
+    # if (dendrogram %in% c("both", "row")) {
+    #     plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
+    # }
+    # else plot.new()
+    par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))
+    # if (dendrogram %in% c("both", "column")) {
+    #     plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
+    # }
+    # else plot.new()
+    if (!is.null(main)) 
+        title(main, cex.main = 1.5 * op[["cex.main"]])
+    # if (key) {
+    #     par(mar = c(5, 4, 2, 1), cex = 0.75)
+    #     tmpbreaks <- breaks
+    #     if (symkey) {
+    #         max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
+    #         min.raw <- -max.raw
+    #         tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
+    #         tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
+    #     }
+    #     else {
+    #         min.raw <- min(x, na.rm = TRUE)
+    #         max.raw <- max(x, na.rm = TRUE)
+    #     }
+    #     z <- seq(min.raw, max.raw, length = length(col))
+    #     image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks, 
+    #         xaxt = "n", yaxt = "n")
+    #     par(usr = c(0, 1, 0, 1))
+    #     lv <- pretty(breaks)
+    #     xv <- scale01(as.numeric(lv), min.raw, max.raw)
+    #     axis(1, at = xv, labels = lv)
+    #     if (scale == "row") 
+    #         mtext(side = 1, "Row Z-Score", line = 2)
+    #     else if (scale == "column") 
+    #         mtext(side = 1, "Column Z-Score", line = 2)
+    #     else mtext(side = 1, "Value", line = 2)
+    #     if (density.info == "density") {
+    #         dens <- density(x, adjust = densadj, na.rm = TRUE)
+    #         omit <- dens$x < min(breaks) | dens$x > max(breaks)
+    #         dens$x <- dens$x[-omit]
+    #         dens$y <- dens$y[-omit]
+    #         dens$x <- scale01(dens$x, min.raw, max.raw)
+    #         lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol, 
+    #             lwd = 1)
+    #         axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))
+    #         title("Color Key\nand Density Plot")
+    #         par(cex = 0.5)
+    #         mtext(side = 2, "Density", line = 2)
+    #     }
+    #     else if (density.info == "histogram") {
+    #         h <- hist(x, plot = FALSE, breaks = breaks)
+    #         hx <- scale01(breaks, min.raw, max.raw)
+    #         hy <- c(h$counts, h$counts[length(h$counts)])
+    #         lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s", 
+    #             col = denscol)
+    #         axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
+    #         title("Color Key\nand Histogram")
+    #         par(cex = 0.5)
+    #         mtext(side = 2, "Count", line = 2)
+    #     }
+    #     else title("Color Key")
+    # }
+    # else plot.new()
+    retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)], 
+        high = retval$breaks[-1], color = retval$col)
+    invisible(retval)
+}
+
+#rm(list=ls())
+param <- list()
+param$debug <- FALSE #T
+
+print("Finished loading functions")
+
+
+if(param$debug) {
+    param <- LoadDebugParams(param)
+} else {
+    param <- LoadReqiredParams(param)
+    param <- LoadOptionalParams(param)
+}
+print (param)
+Rprof() #Remove me
+# Load Chip data, alread in pval format
+# Column.order need to be a vecor of strings, that correspond to the order (and
+# inclustion) of chip experiments.
+chip <- GetChipData(param$annotated.macs.file, column.order = param$chip.order)
+
+#Get the top percentages on different criteria
+#ix.top <- GetTopRowsFromMatrix(chip$peaks, percentage = param$filter.percentage)
+#chip$peaks <- chip$peaks[ix.top$union, ]
+#chip$targets <- chip$targets[ix.top$union]
+#Bin data:
+chip$peaks <- bindata.non.zero.matrix(chip$peaks, qunts = seq(0, 0.9, length=(param$number.bins+1)))
+
+file.names <- unlist(strsplit(param$rna.files, split="::"))
+print(file.names)
+file.lables <- unlist(strsplit(param$rna.names, split="::"))
+print(file.lables)
+
+library("RColorBrewer")
+color.2 =  rev(colorRampPalette(c("darkblue", "steelblue", "lightgrey"))(param$number.bins))
+if(!is.na(file.names) && file.names != "none") {
+    #rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg")
+    #do differential expression if the user specifies such:
+    if(param$rna.normalization != "no") {
+        if(param$rna.normalization != "mean"){
+            norm.file.names <- unlist(strsplit(param$rna.normalization.file, split="::"))
+            all.file.names <- c(file.names, norm.file.names)
+            all.file.lables <- c(file.lables, norm.file.names)
+            print (all.file.names)
+            print (all.file.lables)
+            rna.scores <- GetRNAData(all.file.names, file.lables = all.file.lables, fpkm = "avg")
+            rna.scores <- NormalizeRNA(scores = rna.scores)
+            rna.scores.sign <- rna.scores/abs(rna.scores)
+            rna.scores.sign[which(is.nan(rna.scores.sign))] <- 0
+            rna.scores <- bindata.non.zero.matrix(abs(rna.scores), qunts = seq(0, 0.9, length=(param$number.bins+1)))
+            rna.scores <- rna.scores.sign * rna.scores
+            color.2 = colorRampPalette(c("darkblue", "steelblue", "lightgrey", "pink", "darkred"))(param$number.bins)
+        } else {
+            print("we are normalizing by average")
+            print("file lables are:")
+            print(file.lables)
+            rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg")
+            rna.scores <- t(apply(rna.scores,1, function(x) { if(mean(x)!=0){ return(x/mean(x)) } else { return(x) }  }))
+            rna.scores <- bindata.non.zero.matrix(rna.scores, qunts = seq(0, 0.9, length=(param$number.bins+1)))
+        }
+    } else {
+        rna.scores <- GetRNAData(file.names, file.lables = file.lables, fpkm = "avg")
+        rna.scores <- bindata.non.zero.matrix(rna.scores, qunts = seq(0, 0.9, length=(param$number.bins+1)))
+    }
+    
+    # chip$peaks <- chip$peaks[order(chip$targets), ]
+    # rna.scores <- rna.scores[order(rownames(rna.scores)), ]
+    # chip$targets <- chip$targets[order(chip$targets)]
+
+    rna.scores.mapped <- MapExpressiontoChip(chip.targets = chip$targets, expression=rna.scores)
+    all.data <- cbind(chip$peaks, rna.scores.mapped)
+    if(param$include.targetless!="yes"){
+        all.data <- all.data[-which(chip$targets==""),]
+        chip$peaks <- chip$peaks[-which(chip$targets==""),]
+        rna.scores.mapped <- rna.scores.mapped[-which(chip$targets==""),]
+        chip$targets <- chip$targets[-which(chip$targets=="")]
+    }
+} else {
+    all.data <- chip$peaks
+}
+
+print("Clustering")
+set.seed(1234)
+km <- kmeans(all.data, param$clustering.number.of.clusters, iter.max = 50)
+print("Ordering")
+km.order <- GenerateKMOrder(all.data, km)
+
+##AM edits##
+km.new.order <- numeric(length(km$cluster))
+for(i in 1:length(km.order)){
+    cur.clst <- km.order[i]
+    ix <- which(km$cluster==cur.clst)
+    km.new.order[ix] <- i
+}
+km.order <- 1:length(km.order)
+km$cluster <- km.new.order
+## Done AM ##
+print("Creating image")
+#bmp(param$heatmap.document.name, 640, 480)#1280, 960)
+bmp(paste(param$heatmap.document.name,".bmp", sep = ""), 5120, 3840)#2560, 1920)
+#pdf(paste(param$heatmap.document.name,".pdf", sep = ""))
+color.1 <- c("#000000", colorRampPalette(c("blue", "yellow","orange","red"))(param$number.bins))
+if(!is.na(file.names) && file.names != "none") {
+    PrintClusters(trgts=chip$targets,
+        k.ix=km$cluster,
+        f.nm = param$cluster.groups.document.name,
+        km.order = NA)
+
+    #if(param$rna.normalization != "no") {
+    #    data.split <- cbind(chip$peaks, PrepareRNAforHeatmap(rna.scores.mapped))
+    #} else {
+    #    data.split <- cbind(chip$peaks, rna.scores.mapped)
+    #}
+    expression.width.multiplier <- 2
+    if(ncol(rna.scores.mapped)==1) {
+        rna.scores.mapped <- cbind(rna.scores.mapped, rna.scores.mapped)
+        expression.width.multiplier <- 1
+    }
+
+    layout(matrix(c(1,2,2,5,
+        1,3,4,6,
+        1,3,4,7,
+        1,3,4,8,
+        1,3,4,9),
+    5,4,
+    byrow=T),
+    widths=c(1,2*ncol(chip$peaks),expression.width.multiplier*ncol(rna.scores.mapped),1),
+    heights=c(1,10,2,10,2))
+    #1
+    plot.new()
+    #2
+    plot.new()
+    #3
+    print("Creating peak image")
+    CreateIndividualHeatMap(chip$peaks,
+        km,
+        km.order, color.ramp=color.1)
+    #4
+    print("Creating rna image")
+    CreateIndividualHeatMap(rna.scores.mapped,
+        km,
+        km.order, color.ramp=color.2)
+    #5
+    plot.new()
+    #6
+    image(t(matrix(1:param$number.bins,nc=1)), col=color.1)
+    #7
+    plot.new()
+    #8
+    image(t(matrix(1:param$number.bins,nc=1)), col=color.2)
+    #9
+    plot.new()
+} else {
+    PrintClusters(trgts=1:nrow(chip$peaks),
+        k.ix=km$cluster,
+        f.nm = param$cluster.groups.document.name,
+        km.order = NA)
+
+
+    layout(matrix(c(1,2,2,
+        1,3,4,
+        1,3,5),3,3,byrow=T),
+        widths=c(1,3*ncol(chip$peaks),1),
+        heights=c(1,8,1))
+    #1
+    plot.new()
+    #2
+    plot.new()
+    #3
+    CreateIndividualHeatMap(chip$peaks,
+        km,
+        km.order, color.ramp=color.1)
+    #4
+    image(t(matrix(1:param$number.bins,nc=1)), col=color.1)
+    #5
+    plot.new()
+
+}
+
+dev.off()
+Rprof(NULL)
+profile <- summaryRprof()
+print(str(profile))
+print(profile)
+#save.image("test/testData.RData")
author	kmace
date	Mon, 23 Jul 2012 13:00:15 -0400
parents
children