comparison CHM.R @ 40:8f8ab332a050 draft

Uploaded
author insilico-bob
date Thu, 20 Jun 2019 11:39:46 -0400
parents
children
comparison
equal deleted inserted replaced
39:436f03b71cf6 40:8f8ab332a050
1 ### This method generates a row and column ordering given an input matrix and ordering methods.
2 ###
3 ### matrixData - numeric matrix
4 ### rowOrderMethod - Hierarchical, Original, Random
5 ### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation.
6 ### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage,
7 ### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'.
8 ### colOrderMethod
9 ### colDistanceMeasure
10 ### colAgglomerationMethod
11 ### rowOrderFile - output file of order of rows
12 ### rowDendroFile - output file of row dendrogram
13 ### colOrderFile - output file of order of cols
14 ### colDendroFile - output file of col dendrogram
15 ### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off.
16 ### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off.
17
18 performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut)
19 {
20 dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", check.names = FALSE, row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?"))
21 rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod)
22 if (rowOrderMethod == "Hierarchical") {
23 writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile)
24 }
25
26 colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod)
27 if (colOrderMethod == "Hierarchical") {
28 writeHCDataTSVs(colOrder, colDendroFile, colOrderFile)
29 writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep=""))
30 }
31 }
32
33 #creates output files for hclust ordering
34 writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName)
35 {
36 data<-cbind(uDend$merge, uDend$height, deparse.level=0)
37 colnames(data)<-c("A", "B", "Height")
38 write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
39
40 data=matrix(,length(uDend$labels),2);
41 for (i in 1:length(uDend$labels)) {
42 print(uDend$labels[i])
43 data[i,1] = uDend$labels[i];
44 data[i,2] = which(uDend$order==i);
45 }
46 colnames(data)<-c("Id", "Order")
47 write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
48 }
49
50 #creates a classification file based on user specified cut of dendrogram
51 writeHCCut<-function(uDend, cutNum, outputCutFileName)
52 {
53 if (cutNum < 2) {
54 return()
55 }
56 print (paste("Writing cut file ", outputCutFileName))
57 cut <- cutree(uDend, cutNum);
58 id <- names(cut);
59 data=matrix(,length(cut),2);
60 for (i in 1:length(cut)) {
61 data[i,1] = id[i];
62 data[i,2] = sprintf("Cluster %d", cut[i]);
63 }
64
65 write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE);
66 }
67
68
69 createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod)
70 {
71 ordering <- NULL
72
73 if (orderMethod == "Hierarchical")
74 {
75
76 # Compute dendrogram for "Distance Metric"
77 distVals <- NULL
78 if(direction=="row") {
79 if (distanceMeasure == "correlation") {
80 geneGeneCor <- cor(t(matrixData), use="pairwise")
81 distVals <- as.dist((1-geneGeneCor)/2)
82 } else {
83 distVals <- dist(matrixData, method=distanceMeasure)
84 }
85 } else { #column
86 if (distanceMeasure == "correlation") {
87 geneGeneCor <- cor(matrixData, use="pairwise")
88 distVals <- as.dist((1-geneGeneCor)/2)
89 } else {
90 distVals <- dist(t(matrixData), method=distanceMeasure)
91 }
92 }
93
94 # if (agglomerationMethod == "ward") {
95 # ordering <- hclust(distVals * distVals, method="ward.D2")
96 # } else {
97 ordering <- hclust(distVals, method=agglomerationMethod)
98 # }
99 }
100 else if (orderMethod == "Random")
101 {
102 if(direction=="row") {
103 headerList <- rownames(matrixData)
104 ordering <- sample(headerList, length(headerList))
105 } else {
106 headerList <- colnames(matrixData)
107 ordering <- sample(headerList, length(headerList))
108 }
109 }
110 else if (orderMethod == "Original")
111 {
112 if(direction=="row") {
113 ordering <- rownames(matrixData)
114 } else {
115 ordering <- colnames(matrixData)
116 }
117 } else {
118 stop("createOrdering -- failed to find ordering method")
119 }
120 return(ordering)
121 }
122 ### Initialize command line arguments and call performDataOrdering
123
124 options(warn=-1)
125
126 args = commandArgs(TRUE)
127
128 performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13])
129
130 #suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11]))