Mercurial > repos > md-anderson-bioinformatics > heat_map_creation
changeset 14:dfc86e786db4 draft
Deleted selected files
| author | insilico-bob | 
|---|---|
| date | Fri, 27 Jan 2017 11:25:52 -0500 | 
| parents | 7258044eda47 | 
| children | 2c7d75d58ca7 | 
| files | CHM.R GalaxyMapGen.jar heatmap.sh | 
| diffstat | 3 files changed, 0 insertions(+), 193 deletions(-) [+] | 
line wrap: on
 line diff
--- a/CHM.R Thu Jan 26 10:18:00 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,146 +0,0 @@ -### This method generates a row and column ordering given an input matrix and ordering methods. -### -### matrixData - numeric matrix -### rowOrderMethod - Hierarchical, Original, Random -### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation. -### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage, -### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'. -### colOrderMethod -### colDistanceMeasure -### colAgglomerationMethod -### rowOrderFile - output file of order of rows -### rowDendroFile - output file of row dendrogram -### colOrderFile - output file of order of cols -### colDendroFile - output file of col dendrogram -### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. -### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. - -performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut) -{ - dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?")) - rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod) - if (rowOrderMethod == "Hierarchical") { - writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile) - writeHCCut(rowOrder, rowCut, paste(rowOrderFile,".cut", sep="")) - } else { - writeOrderTSV(rowOrder, rownames(dataMatrix), rowOrderFile) - } - - colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod) - if (colOrderMethod == "Hierarchical") { - writeHCDataTSVs(colOrder, colDendroFile, colOrderFile) - writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep="")) - } else { - writeOrderTSV(colOrder, colnames(dataMatrix), colOrderFile) - } -} - -#creates output files for hclust ordering -writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName) -{ - data<-cbind(uDend$merge, uDend$height, deparse.level=0) - colnames(data)<-c("A", "B", "Height") - write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) - - data=matrix(,length(uDend$labels),2); - for (i in 1:length(uDend$labels)) { - data[i,1] = uDend$labels[i]; - data[i,2] = which(uDend$order==i); - } - colnames(data)<-c("Id", "Order") - write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) -} - -#creates order file for non-clustering methods -writeOrderTSV<-function(newOrder, originalOrder, outputHCOrderFileName) -{ - data=matrix(,length(originalOrder),2); - for (i in 1:length(originalOrder)) { - data[i,1] = originalOrder[i]; - data[i,2] = which(newOrder==originalOrder[i]); - } - colnames(data)<-c("Id", "Order") - write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) -} - -#creates a classification file based on user specified cut of dendrogram -writeHCCut<-function(uDend, cutNum, outputCutFileName) -{ - if (cutNum < 2) { - return() - } - print (paste("Writing cut file ", outputCutFileName)) - cut <- cutree(uDend, cutNum); - id <- names(cut); - data=matrix(,length(cut),2); - for (i in 1:length(cut)) { - data[i,1] = id[i]; - data[i,2] = sprintf("Cluster %d", cut[i]); - } - - write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE); -} - - -createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod) -{ - ordering <- NULL - - if (orderMethod == "Hierarchical") - { - - # Compute dendrogram for "Distance Metric" - distVals <- NULL - if(direction=="row") { - if (distanceMeasure == "correlation") { - geneGeneCor <- cor(t(matrixData), use="pairwise") - distVals <- as.dist((1-geneGeneCor)/2) - } else { - distVals <- dist(matrixData, method=distanceMeasure) - } - } else { #column - if (distanceMeasure == "correlation") { - geneGeneCor <- cor(matrixData, use="pairwise") - distVals <- as.dist((1-geneGeneCor)/2) - } else { - distVals <- dist(t(matrixData), method=distanceMeasure) - } - } - -# if (agglomerationMethod == "ward") { -# ordering <- hclust(distVals * distVals, method="ward.D2") -# } else { - ordering <- hclust(distVals, method=agglomerationMethod) -# } - } - else if (orderMethod == "Random") - { - if(direction=="row") { - headerList <- rownames(matrixData) - ordering <- sample(headerList, length(headerList)) - } else { - headerList <- colnames(matrixData) - ordering <- sample(headerList, length(headerList)) - } - } - else if (orderMethod == "Original") - { - if(direction=="row") { - ordering <- rownames(matrixData) - } else { - ordering <- colnames(matrixData) - } - } else { - stop("createOrdering -- failed to find ordering method") - } - return(ordering) -} -### Initialize command line arguments and call performDataOrdering - -options(warn=-1) - -args = commandArgs(TRUE) - -performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13]) - -#suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11]))
--- a/heatmap.sh Thu Jan 26 10:18:00 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -echo $1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17} -#create temp directory for row and col order and dendro files. -tdir=${11}/$(date +%y%m%d%M%S) -echo $tdir -mkdir $tdir -#run R to cluster matrix -output="$(R --slave --vanilla --file=${11}/CHM.R --args $3 $4 $5 $6 $7 $8 $9 $tdir/ROfile.txt $tdir/COfile.txt $tdir/RDfile.txt $tdir/CDfile.txt ${12} ${13} ${14} ${15} 2>&1)" -rc=$?; -if [ $rc != 0 ] -then - echo $output; - if [ `echo "$output" | grep -c "Inf in foreign function call"` -gt 0 ] - then - echo ""; - echo "Note: This error can occur when there is no variation in a row or column. Try a different distance measure or remove rows/columns without variation."; - fi - exit $rc; -fi - -#there are a variable number of triplicate parameters for classification bars -count=0 -classifications='' - -#if row cut was done, add that autogenerated classification -if [ ${12} -gt 1 ] -then - classifications="Class $tdir/ROfile.txt.cut row_categorical" -fi - -#if col cut was done, add that autogenerated classification -if [ ${13} -gt 1 ] -then - classifications="$classifications Class $tdir/COfile.txt.cut col_categorical" -fi - -#now add the user provided classification files -for i in "$@"; do - if [ $count -gt 14 ] - then - classifications=$classifications' '$i - fi - count=$((count+1)) -done -#call java program to generate NGCHM viewer files. -java -jar ${11}/GalaxyMapGen.jar "${1}" "${2}" DataLayer1 $3 linear ${14} ${15} $4 $5 $6 $tdir/ROfile.txt $tdir/RDfile.txt $7 $8 $9 $tdir/COfile.txt $tdir/CDfile.txt ${10} $classifications -#clean up tempdir -rm -rf $tdir
