annotate mtls_analyze/heatmap.R @ 0:a903c48f05b4

Added non-integrated scripts to galaxy
author kmace
date Thu, 22 Mar 2012 15:21:00 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
1 rm(list = ls())
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
2
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
3 GeneratePeakMatrix <- function(experiments, scores) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
4 # Generates a score matrix for all mtls.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
5 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
6 # Args:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
7 # experiments: A list of underscore deliminated experiments for each mtl.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
8 # There should never be a completely empty item in this list.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
9 # for eg. experiments[1] = expA_expD_expB.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
10 # scores: A list of underscore deliminated scores for each mtl, the length
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
11 # of these scores should be identicle to the length of the
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
12 # experiments. eg. scores[1] = 55_33_245.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
13 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
14 # Returns:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
15 # The peak score matrix for all mtls.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
16 experiments <- sapply(experiments, function(x) strsplit(x, split = "_"))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
17 scores <- sapply(scores, function(x) strsplit(x, split = "_"))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
18 unique.experiments <- unique(unlist(experiments))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
19
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
20 peaks=matrix(0,nr=length(experiments),nc=length(unique.experiments))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
21 colnames(peaks) <- unique.experiments
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
22
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
23 for(i in 1:length(experiments)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
24 for(j in 1:length(experiments[[i]])){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
25 peaks[i,experiments[[i]][j]]=as.numeric(scores[[i]][j])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
26 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
27 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
28 return(peaks)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
29 ##################################################################
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
30 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
31 GetChipData <- function(file.name,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
32 proximity = "distal",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
33 include.targetless = TRUE) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
34 # Reads in, filters, and organizes mtls data
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
35 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
36 # Args:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
37 # file.name: The path to the mtls file (including the file name).
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
38 # proximity: Either "distal" or "proximal", defines the gene target distance
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
39 # from the mtl. Default is "distal".
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
40 # include.targetless: If TRUE, includes mtls with no targets (applied after
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
41 # the proximity filter); if not, mtls with no target
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
42 # will be exclided. Default is TRUE.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
43 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
44 # Returns:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
45 # An organized list of mtls data with the following elements:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
46 # $peaks - a matrix of peak p-values
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
47 # $targets - a list of underscore deliminated gene targets for each mtl
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
48 if(param$debug) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
49 print("In GetChipData")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
50 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
51 # Set Constants for the mtls file type:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
52 MTLNUM <- "l"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
53 CHROMOSOME <- "chr"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
54 EXPERIMENTS <- "expt"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
55 EXPERIMENTS.SORTED <- "expt.alphanum.sorted"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
56 START <- "start"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
57 END <- "end"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
58 SUMMIT <- "s"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
59 PVALS <- "pval"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
60 PVAL.MEAN <- "pval.mean"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
61 SPANS <- "span.tfs"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
62 SPAN.TOTAL <- "span.l"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
63 PEAK.IDS <- "peak.ids" # TODO(kieran): Find out what this is
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
64 PROXIMAL.TARGETS <- "trgt.prox"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
65 DISTAL.TARGETS <- "trgt.dist"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
66 PROXIMAL.TARGETS.TSS.DISTANCE <- "dtss.prox"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
67 DISTAL.TARGETS.TSS.DISTANCE <- "dtss.dist"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
68
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
69
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
70 #Get chip data in from files:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
71 TARGETS <- switch(proximity,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
72 distal = DISTAL.TARGETS,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
73 proximal = PROXIMAL.TARGETS,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
74 stop(" Bad proximity argument supplied.")) # Default
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
75
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
76 keep.columns <- c(EXPERIMENTS, PVALS, TARGETS)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
77
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
78 file <- read.delim(file.name, header=T, as.is=T)[, keep.columns]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
79
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
80 if (!include.targetless) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
81 ix.has.trgt <- which(as.character(file[, TARGETS])!="")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
82 file <- file[ix.has.trgt, ]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
83 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
84
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
85 chip = list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
86
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
87 chip$peaks = GeneratePeakMatrix(file[, EXPERIMENTS], file[, PVALS])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
88 chip$targets = file[ , TARGETS]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
89 return (chip)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
90 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
91
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
92
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
93
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
94 GenerateKMOrder <- function(data, km) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
95 # generates the order of clusters from high to low mean values
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
96 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
97 # Args:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
98 # data: A matrix of scores that have already been clustered.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
99 # km: The K-means object generated from running kmeans on data. Another
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
100 # method could be used so long as it supplies a (km)$cluser list. Must
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
101 # have the same length as the number of rows in data
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
102 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
103 # Returns:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
104 # cluster.order: The order in which the clusters should be displayed.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
105
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
106 km.cluster = km$cluster
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
107 clusters = unique(km.cluster)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
108 clusters.avg = numeric()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
109 for(i in clusters) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
110 clusters.avg = c(clusters.avg, mean(data[which(km.cluster == i), ]))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
111 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
112 if(param$debug) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
113 print ("straight clusters")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
114 print (clusters)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
115 print ("straigth average")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
116 print (clusters.avg)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
117 print ("ordered clusters")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
118 print (clusters[order(clusters.avg)])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
119 print("ordered average")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
120 print (clusters.avg[order(clusters.avg)])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
121 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
122 return(clusters[rev(order(clusters.avg))])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
123 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
124
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
125 OrderMTL <- function(data, km, cluster.order) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
126 # Orders a matrix of data according to a clustering algorithm
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
127 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
128 # Args:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
129 # data: A matrix of scores that have already been clustered.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
130 # km: The K-means object generated from running kmeans on data. Another
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
131 # method could be used so long as it supplies a (km)$cluser list. Must
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
132 # have the same length as the number of rows in data
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
133 # cluster.order: The order in which the clusters should be displayed.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
134 # for eg. km.order = c(2, 3, 1) would result in cluster 2
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
135 # being on top, then cluster 3 and lastly cluster 1.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
136 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
137 # Returns:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
138 # a list that contains 3 objects:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
139 # list$data: the ordered version of the data.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
140 # list$color.vector: a list of colors that should be assigned to each row.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
141 # list$start.row: the starting row of each cluster in data.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
142 number.clusters <- length(cluster.order)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
143 cluster.colors <- sample(rainbow(number.clusters))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
144
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
145 # Set up return objects
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
146 sorted.data <- matrix(,nr=nrow(data), nc=ncol(data))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
147 colnames(sorted.data) <- colnames(data)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
148 cluster.color.vector = vector(length=length(km$cluster))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
149 cluster.start.row = numeric(number.clusters)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
150 cluster.start.row[1]=1
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
151
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
152 for (i in 1:number.clusters)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
153 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
154 current.cluster = cluster.order[i]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
155 ix = which(km$cluster == current.cluster)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
156 current.cluster.range <- cluster.start.row[i]:(cluster.start.row[i]+length(ix)-1)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
157 sorted.data[current.cluster.range, ] = data[ix, ]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
158 cluster.color.vector[current.cluster.range] = cluster.colors[i]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
159 cluster.start.row[i+1] = (cluster.start.row[i]+length(ix))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
160 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
161 ret.list = list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
162 ret.list$data = sorted.data
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
163 ret.list$color.vector = cluster.color.vector
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
164 ret.list$cluster.index = cluster.start.row
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
165 return(ret.list)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
166 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
167
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
168
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
169 CreateHeatMap <- function(data,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
170 km,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
171 cluster.order,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
172 document.name,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
173 document.type = "png",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
174 number.colors = 30) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
175 # Generates a heatmap image based for a matrix based on a clustering
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
176 # algorithm.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
177 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
178 # Args:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
179 # data: A matrix of scores that have already been clustered, the column
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
180 # names of this matrix will become the column titels of the heatmap.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
181 # km: The K-means object generated from running kmeans on data. Another
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
182 # method could be used so long as it supplies a (km)$cluser list.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
183 # cluster.order: The order in which the clusters should be displayed.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
184 # for eg. km.order = c(2, 3, 1) would result in cluster 2
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
185 # being on top, then cluster 3 and lastly cluster 1.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
186 # document.name: A name for the produced file. there is no need to
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
187 # supply the .png/.pdf in your argument.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
188 # document.type: The type of file you want to produce. current options are
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
189 # png and pdf. Default is pdf.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
190 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
191 # Returns:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
192 # Nothing to the script that calls it, however it creates an image at the
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
193 # path specified.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
194 if(param$debug) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
195 print("In CreateHeatMap")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
196 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
197
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
198 data.ordered <- OrderMTL(data, km, cluster.order)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
199
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
200
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
201 #Load Lib
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
202 library(gplots)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
203
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
204 #Set Color gradient
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
205 color.ramp = colorRampPalette(c("black",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
206 "darkblue",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
207 "blue",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
208 "yellow",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
209 "orange",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
210 "red"))(number.colors) #7
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
211
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
212 if(document.type == "png") {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
213 png(paste(document.name,".png", sep = ""),
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
214 width=1920,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
215 height=2560,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
216 res=100,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
217 antialias="gray")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
218 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
219 else {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
220 pdf(paste(document.name,".pdf", sep = ""))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
221 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
222
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
223 heatmap.2(
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
224 data.ordered$data[,sort(colnames(data.ordered$data))],
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
225 rowsep = data.ordered$cluster.index[-1],
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
226 sepwidth = c(0.5, 60),
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
227 dendrogram = "none",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
228 Rowv = F,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
229 Colv = F,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
230 trace = "none",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
231 labRow = F,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
232 labCol = sort(colnames(data.ordered$data)),
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
233 RowSideColors = data.ordered$color.vector,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
234 col = color.ramp,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
235 cexCol = 1.8,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
236 cexRow = 0.8,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
237 useRaster=F)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
238
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
239 dev.off()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
240
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
241 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
242
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
243
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
244
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
245
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
246 ReadCommadLineParameters <- function(argument.names, command.line.arguments) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
247 # Reads the parameters from the command line arguments.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
248 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
249 # Args:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
250 # argument.names: A list of expected argument names.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
251 # command.line.arguments: The list of recieved command line arguments
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
252 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
253 # Returns:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
254 # The arguments for argument.names. As strings In that order.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
255
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
256 if(length(grep("--version",command.line.arguments))){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
257 cat("version",script.version,"\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
258 q()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
259 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
260 # Split command line arguments
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
261 args <- sapply(strsplit(command.line.arguments, " "),function(i) i)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
262 vals <- character(length(argument.names))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
263 # split cmd.line to key and value pairs
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
264 for(i in 1:length(argument.names)) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
265 ix <- grep(argument.names[i], args)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
266 if(length(ix)>1) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
267 stop("arg ",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
268 argument.names[i],
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
269 " used more than once. Bailing out...\n",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
270 PrintParamError())
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
271 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
272 else if (length(ix)==0) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
273 stop("could not find --",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
274 argument.names[i],
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
275 ". Bailing out...\n",
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
276 PrintParamError())
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
277 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
278 vals[i] <- args[ix+1]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
279 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
280 return(vals)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
281 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
282
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
283 PrintParamError <- function(){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
284 # Prints the usage of the function, shows users what arguments they can use
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
285 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
286 # Args:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
287 # param: A list that contains all the paramaters.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
288 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
289 # Returns:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
290 # A modified version of the param list, with the default values loaded.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
291 cat("
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
292 DESCRIPTIION:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
293 heatmap.R takes a ...
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
294
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
295 INPUT:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
296 1.--mtls_file path to mtls file.\n
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
297 2.--cluster_file the destination path for the cluster file.\n
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
298 3.--heatmap_file the destination path for heatmap image (no extension).\n
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
299 4.--heatmap_type choice of image type, currently support png and pdf.\n
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
300 5.--n_clusters number of clusters in the heatmap\n
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
301 6.--filter_percentage percentage of mtls that will be analysed. for eg. if
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
302 we make filter_percentage 30, we will take the union of the top mtls in
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
303 mean, non-zero mean and variance.\n
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
304
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
305 EXAMPLE RUN:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
306 Rscript heatmap.R
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
307 --mtls_file mtls.xls
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
308 --cluster_file output/cluster
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
309 --heatmap_file output/heatmap
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
310 --heatmap_type png
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
311 --n_clusters 13
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
312 --filter_percentage 60
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
313
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
314 ")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
315 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
316
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
317 LoadDefaultParams <- function(param) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
318 # Loads default paramaters for the heatmap application
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
319 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
320 # Args:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
321 # param: A list that contains all the previous paramaters.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
322 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
323 # Returns:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
324 # A modified version of the param list, with the default values loaded.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
325
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
326 script.version=0.1
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
327 param$debug = F
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
328
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
329 # ChIP data:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
330 param$annotated.macs.file = "data/mtls/mtls.xls"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
331
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
332 # Filter:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
333 param$filter.percentage <- 100
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
334
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
335 # Analyze Clustering:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
336 param$analyze.number.of.clusters = F
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
337 param$analyze.number.of.clusters.sample.size = 10000
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
338 param$analyze.number.of.clusters.min = 4
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
339 param$analyze.number.of.clusters.max = 9
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
340 param$analyze.number.of.clusters.document.name <- "data/analysis/cluster_analysis"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
341 param$analyze.number.of.clusters.itterations = 25
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
342
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
343 # Clustering:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
344 param$clustering.number.of.clusters <- 13
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
345
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
346 # Heatmap:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
347 param$heatmap.document.name <- "data/heatmap/heatmap"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
348 param$heatmap.document.type <- "png"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
349 #Cluster Groups:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
350 param$cluster.groups.document.name <- "data/clusters/clusters"
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
351
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
352 return(param)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
353
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
354 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
355
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
356
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
357
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
358 LoadDefinedParams <- function(param) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
359 # Loads user defined paramaters for the heatmap application
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
360 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
361 # Args:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
362 # param: A list that contains all the previous paramaters.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
363 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
364 # Returns:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
365 # A modified version of the param list, with the user defined values loaded.
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
366
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
367 cmd.args <- commandArgs(trailingOnly = T);
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
368
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
369
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
370 # put the numeric params at the end!
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
371 n.start.numeric <- 5
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
372 args.nms <- c( "--mtls_file", #1
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
373 "--cluster_file", #2
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
374 "--heatmap_file", #3
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
375 "--heatmap_type", #4
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
376 "--n_clusters", #5
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
377 "--filter_percentage") #6
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
378
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
379 # get parameters
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
380 vals <- ReadCommadLineParameters(argument.names = args.nms,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
381 command.line.arguments = cmd.args)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
382
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
383 # check if numeric params are indeed numeric
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
384 for(i in n.start.numeric:length(vals)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
385 if(is.na(as.numeric(vals[i]))){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
386 stop("arg ",args.nms[i]," is not numeric. Bailing out...\n",print.error())
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
387 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
388 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
389
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
390 # vals has the same order as args.nms
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
391
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
392 # ChIP data:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
393 param$annotated.macs.file <- vals[1]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
394
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
395 # Filter:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
396 param$filter.percentage <- as.numeric(vals[6])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
397
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
398 # Clustering:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
399 param$clustering.number.of.clusters <- as.numeric(vals[5])
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
400
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
401 # Heatmap:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
402 param$heatmap.document.name <- vals[3]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
403 param$heatmap.document.type <- vals[4]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
404 #Cluster Groups:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
405 param$cluster.groups.document.name <- vals[2]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
406
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
407 return(param)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
408 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
409
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
410
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
411
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
412
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
413 # here we output the fasta file of the targets of each kmeans cluster
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
414 CreateClusterGroups <- function(trgts,k.ix,f.nm="output/clust.to.trgts", km.order){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
415 f.nm = paste(f.nm, ".fasta", sep="")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
416 clusters = km.order
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
417 for(i in 1:length(clusters)){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
418 v=trgts[which(k.ix==clusters[i])]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
419 v.split = unlist(sapply(v,strsplit, "_"))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
420 if(i == 1){
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
421 cat(sep="",file=f.nm,">cluster_",i,"\n")#clusters[i],"\n")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
422 } else {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
423 cat(sep="",file=f.nm,">cluster_",i,"\n",append=TRUE)#clusters[i],"\n",append=TRUE)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
424 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
425 cat(sep="\n",file=f.nm,v.split,append=TRUE)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
426 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
427 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
428
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
429
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
430 GetTopRowsFromMatrix = function(mtrx, percentage = 10)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
431 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
432 if (param$debug) {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
433 print("In GetTopRowsFromMatrix")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
434 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
435 #Store the stats for the mtrx
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
436 stats = list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
437 stats$mean=apply(mtrx,1,mean)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
438 stats$sd=apply(mtrx,1,sd)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
439 stats$nonzero.mean=apply(mtrx, 1, function(x) mean(x[which(x != 0)]))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
440
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
441
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
442 #Store the indexes for the mtrx
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
443 index = list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
444 index$mean = sort(stats$mean, decreasing=T, index.return=T)$ix
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
445 index$sd = sort(stats$sd, decreasing=T, index.return=T)$ix
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
446 index$nonzero.mean = sort(stats$nonzero.mean, decreasing=T, index.return=T)$ix
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
447
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
448 #Calculate how many data points we want to take
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
449 cutoff = floor(length(mtrx[ ,1])*(percentage/100))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
450
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
451 #Extract Indexes and return to caller
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
452 index$union = union(index$mean[1:cutoff], union(index$sd[1:cutoff], index$nonzero.mean[1:cutoff]))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
453
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
454 return(index)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
455 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
456
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
457 #GetUniq = function(targets.cluster)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
458 #{
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
459 # #problem with this function is that it agrigates the list handed to it. after this point you cant maintain order
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
460 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
461 # return(unique(unlist(lapply(targets.cluster, function(i) strsplit(i, "_")))))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
462 # #return(unlist(lapply(targets.cluster, function(i) strsplit(i, "_"))))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
463 #
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
464 #}
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
465
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
466 bindata = function(d,qunts=seq(.4,.9,.1))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
467 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
468 tmp=matrix(0,nr=nrow(d),nc=ncol(d))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
469 for(j in 1:ncol(d))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
470 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
471 bins=quantile(d[,j],qunts)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
472 for(i in 1:length(bins))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
473 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
474 tmp[which(d[,j]>bins[i]),j] = i
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
475 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
476 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
477 return(tmp)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
478 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
479
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
480 bindata.non.zero = function(d,qunts=seq(0,0.9,0.1))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
481 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
482 tmp=matrix(0,nr=nrow(d),nc=ncol(d))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
483 for(j in 1:ncol(d))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
484 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
485 ix.non.zero=which(d[,j]!=0)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
486 bins=quantile(d[ix.non.zero,j],qunts)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
487 for(i in 1:length(bins))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
488 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
489 tmp[which(d[,j]>bins[i]),j] = i
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
490 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
491 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
492
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
493 return(tmp)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
494 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
495
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
496 bindata.non.zero.matrix = function(d,qunts=seq(0,0.9,0.1))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
497 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
498 tmp=matrix(0,nr=nrow(d),nc=ncol(d),dimnames=dimnames(d))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
499 ix.non.zero=which(d!=0)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
500 bins=quantile(d[ix.non.zero],qunts)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
501 for(i in 1:length(bins))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
502 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
503 tmp[which(d>bins[i])] = i
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
504 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
505 return(tmp)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
506 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
507
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
508 print("Finished loading functions")
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
509
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
510 param <- list()
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
511 param <- LoadDefaultParams(param)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
512 param <- LoadDefinedParams(param)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
513
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
514 print (param)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
515
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
516 #Load Chip data, alread in pval format
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
517 chip <- GetChipData(param$annotated.macs.file)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
518
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
519 #Get the top percentages on different criteria
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
520 ix.top <- GetTopRowsFromMatrix(chip$peaks, percentage = param$filter.percentage)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
521 chip$peaks <- chip$peaks[ix.top$union, ]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
522 chip$targets <- chip$targets[ix.top$union]
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
523
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
524 #Bin data:
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
525 chip$peaks = bindata.non.zero.matrix(chip$peaks, qunts = seq(0, 0.9, length=30))
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
526
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
527
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
528 #Analyze-cluster
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
529 if(param$analyze.number.of.clusters)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
530 {
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
531 AnalyzeCluster(data,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
532 sample.size = param$analyze.number.of.clusters.sample.size,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
533 clusters.min = param$analyze.number.of.clusters.min,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
534 clusters.max = param$analyze.number.of.clusters.max,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
535 document.name = param$analyze.number.of.clusters.document.name,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
536 itterations = param$analyze.number.of.clusters.itterations)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
537
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
538 }
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
539
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
540 #Cluster
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
541 set.seed(1234)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
542 km <- kmeans(chip$peaks, param$clustering.number.of.clusters)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
543 km.order <- GenerateKMOrder(chip$peaks, km)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
544
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
545 CreateHeatMap(chip$peaks,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
546 km,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
547 km.order,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
548 document.name = param$heatmap.document.name,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
549 document.type = param$heatmap.document.type)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
550
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
551 CreateClusterGroups(trgts=chip$targets,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
552 k.ix=km$cluster,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
553 f.nm = param$cluster.groups.document.name,
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
554 km.order = km.order)
a903c48f05b4 Added non-integrated scripts to galaxy
kmace
parents:
diff changeset
555