Mercurial > repos > md-anderson-bioinformatics > heat_map_creation_advanced
comparison CHM_Advanced.R @ 0:8893ea2915cc draft
Initial Version of Advanced Heat Map Tool
author | insilico-bob |
---|---|
date | Tue, 08 Aug 2017 14:01:05 -0400 |
parents | |
children | 1f13d304ddbd |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8893ea2915cc |
---|---|
1 ### This method generates a row and column ordering given an input matrix and ordering methods. | |
2 ### | |
3 ### matrixData - numeric matrix | |
4 ### rowOrderMethod - Hierarchical, Original, Random | |
5 ### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation. | |
6 ### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage, | |
7 ### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'. | |
8 ### colOrderMethod | |
9 ### colDistanceMeasure | |
10 ### colAgglomerationMethod | |
11 ### rowOrderFile - output file of order of rows | |
12 ### rowDendroFile - output file of row dendrogram | |
13 ### colOrderFile - output file of order of cols | |
14 ### colDendroFile - output file of col dendrogram | |
15 ### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. | |
16 ### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. | |
17 | |
18 performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut) | |
19 { | |
20 dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?")) | |
21 rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod) | |
22 if (rowOrderMethod == "Hierarchical") { | |
23 writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile) | |
24 if (rowCut != 0) { | |
25 writeHCCut(rowOrder, rowCut, paste(rowOrderFile,".cut", sep="")) | |
26 } | |
27 } | |
28 | |
29 colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod) | |
30 if (colOrderMethod == "Hierarchical") { | |
31 writeHCDataTSVs(colOrder, colDendroFile, colOrderFile) | |
32 if (colCut != 0) { | |
33 writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep="")) | |
34 } | |
35 } | |
36 } | |
37 | |
38 #creates output files for hclust ordering | |
39 writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName) | |
40 { | |
41 data<-cbind(uDend$merge, uDend$height, deparse.level=0) | |
42 colnames(data)<-c("A", "B", "Height") | |
43 write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
44 | |
45 data=matrix(,length(uDend$labels),2); | |
46 for (i in 1:length(uDend$labels)) { | |
47 data[i,1] = uDend$labels[i]; | |
48 data[i,2] = which(uDend$order==i); | |
49 } | |
50 colnames(data)<-c("Id", "Order") | |
51 write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
52 } | |
53 | |
54 #creates a classification file based on user specified cut of dendrogram | |
55 writeHCCut<-function(uDend, cutNum, outputCutFileName) | |
56 { | |
57 print (paste("Writing cut file ", outputCutFileName)) | |
58 cut <- cutree(uDend, cutNum); | |
59 id <- names(cut); | |
60 data=matrix(,length(cut),2); | |
61 for (i in 1:length(cut)) { | |
62 data[i,1] = id[i]; | |
63 data[i,2] = sprintf("Cluster %d", cut[i]); | |
64 } | |
65 | |
66 write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE); | |
67 } | |
68 | |
69 | |
70 createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod) | |
71 { | |
72 ordering <- NULL | |
73 | |
74 if (orderMethod == "Hierarchical") | |
75 { | |
76 | |
77 # Compute dendrogram for "Distance Metric" | |
78 distVals <- NULL | |
79 if(direction=="row") { | |
80 if (distanceMeasure == "correlation") { | |
81 geneGeneCor <- cor(t(matrixData), use="pairwise") | |
82 distVals <- as.dist((1-geneGeneCor)/2) | |
83 } else { | |
84 distVals <- dist(matrixData, method=distanceMeasure) | |
85 } | |
86 } else { #column | |
87 if (distanceMeasure == "correlation") { | |
88 geneGeneCor <- cor(matrixData, use="pairwise") | |
89 distVals <- as.dist((1-geneGeneCor)/2) | |
90 } else { | |
91 distVals <- dist(t(matrixData), method=distanceMeasure) | |
92 } | |
93 } | |
94 | |
95 # if (agglomerationMethod == "ward") { | |
96 # ordering <- hclust(distVals * distVals, method="ward.D2") | |
97 # } else { | |
98 ordering <- hclust(distVals, method=agglomerationMethod) | |
99 # } | |
100 } | |
101 else if (orderMethod == "Random") | |
102 { | |
103 if(direction=="row") { | |
104 headerList <- rownames(matrixData) | |
105 ordering <- sample(headerList, length(headerList)) | |
106 } else { | |
107 headerList <- colnames(matrixData) | |
108 ordering <- sample(headerList, length(headerList)) | |
109 } | |
110 } | |
111 else if (orderMethod == "Original") | |
112 { | |
113 if(direction=="row") { | |
114 ordering <- rownames(matrixData) | |
115 } else { | |
116 ordering <- colnames(matrixData) | |
117 } | |
118 } else { | |
119 stop("createOrdering -- failed to find ordering method") | |
120 } | |
121 return(ordering) | |
122 } | |
123 ### Initialize command line arguments and call performDataOrdering | |
124 | |
125 options(warn=-1) | |
126 | |
127 args = commandArgs(TRUE) | |
128 | |
129 performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13]) | |
130 | |
131 #suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11])) |