# HG changeset patch
# User insilico-bob
# Date 1561045186 14400
# Node ID 8f8ab332a05078219e95caec1e4f0378dbbc252a
# Parent 436f03b71cf636556fdc01fa9b4beb96399fb0a3
Uploaded
diff -r 436f03b71cf6 -r 8f8ab332a050 CHM.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/CHM.R Thu Jun 20 11:39:46 2019 -0400
@@ -0,0 +1,130 @@
+### This method generates a row and column ordering given an input matrix and ordering methods.
+###
+### matrixData - numeric matrix
+### rowOrderMethod - Hierarchical, Original, Random
+### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation.
+### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage,
+### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'.
+### colOrderMethod
+### colDistanceMeasure
+### colAgglomerationMethod
+### rowOrderFile - output file of order of rows
+### rowDendroFile - output file of row dendrogram
+### colOrderFile - output file of order of cols
+### colDendroFile - output file of col dendrogram
+### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off.
+### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off.
+
+performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut)
+{
+ dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", check.names = FALSE, row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?"))
+ rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod)
+ if (rowOrderMethod == "Hierarchical") {
+ writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile)
+ }
+
+ colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod)
+ if (colOrderMethod == "Hierarchical") {
+ writeHCDataTSVs(colOrder, colDendroFile, colOrderFile)
+ writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep=""))
+ }
+}
+
+#creates output files for hclust ordering
+writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName)
+{
+ data<-cbind(uDend$merge, uDend$height, deparse.level=0)
+ colnames(data)<-c("A", "B", "Height")
+ write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
+
+ data=matrix(,length(uDend$labels),2);
+ for (i in 1:length(uDend$labels)) {
+ print(uDend$labels[i])
+ data[i,1] = uDend$labels[i];
+ data[i,2] = which(uDend$order==i);
+ }
+ colnames(data)<-c("Id", "Order")
+ write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
+}
+
+#creates a classification file based on user specified cut of dendrogram
+writeHCCut<-function(uDend, cutNum, outputCutFileName)
+{
+ if (cutNum < 2) {
+ return()
+ }
+ print (paste("Writing cut file ", outputCutFileName))
+ cut <- cutree(uDend, cutNum);
+ id <- names(cut);
+ data=matrix(,length(cut),2);
+ for (i in 1:length(cut)) {
+ data[i,1] = id[i];
+ data[i,2] = sprintf("Cluster %d", cut[i]);
+ }
+
+ write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE);
+}
+
+
+createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod)
+{
+ ordering <- NULL
+
+ if (orderMethod == "Hierarchical")
+ {
+
+ # Compute dendrogram for "Distance Metric"
+ distVals <- NULL
+ if(direction=="row") {
+ if (distanceMeasure == "correlation") {
+ geneGeneCor <- cor(t(matrixData), use="pairwise")
+ distVals <- as.dist((1-geneGeneCor)/2)
+ } else {
+ distVals <- dist(matrixData, method=distanceMeasure)
+ }
+ } else { #column
+ if (distanceMeasure == "correlation") {
+ geneGeneCor <- cor(matrixData, use="pairwise")
+ distVals <- as.dist((1-geneGeneCor)/2)
+ } else {
+ distVals <- dist(t(matrixData), method=distanceMeasure)
+ }
+ }
+
+# if (agglomerationMethod == "ward") {
+# ordering <- hclust(distVals * distVals, method="ward.D2")
+# } else {
+ ordering <- hclust(distVals, method=agglomerationMethod)
+# }
+ }
+ else if (orderMethod == "Random")
+ {
+ if(direction=="row") {
+ headerList <- rownames(matrixData)
+ ordering <- sample(headerList, length(headerList))
+ } else {
+ headerList <- colnames(matrixData)
+ ordering <- sample(headerList, length(headerList))
+ }
+ }
+ else if (orderMethod == "Original")
+ {
+ if(direction=="row") {
+ ordering <- rownames(matrixData)
+ } else {
+ ordering <- colnames(matrixData)
+ }
+ } else {
+ stop("createOrdering -- failed to find ordering method")
+ }
+ return(ordering)
+}
+### Initialize command line arguments and call performDataOrdering
+
+options(warn=-1)
+
+args = commandArgs(TRUE)
+
+performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13])
+
+#suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11]))
diff -r 436f03b71cf6 -r 8f8ab332a050 CHM_Advanced.R
--- a/CHM_Advanced.R Thu Jun 20 11:31:24 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
-### This method generates a row and column ordering given an input matrix and ordering methods.
-###
-### matrixData - numeric matrix
-### rowOrderMethod - Hierarchical, Original, Random
-### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation.
-### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage,
-### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'.
-### colOrderMethod
-### colDistanceMeasure
-### colAgglomerationMethod
-### rowOrderFile - output file of order of rows
-### rowDendroFile - output file of row dendrogram
-### colOrderFile - output file of order of cols
-### colDendroFile - output file of col dendrogram
-### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off.
-### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off.
-
-performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut)
-{
- dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", check.names = FALSE, row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?"))
- rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod)
- if (rowOrderMethod == "Hierarchical") {
- writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile)
- if (rowCut != 0) {
- writeHCCut(rowOrder, rowCut, paste(rowOrderFile,".cut", sep=""))
- }
- }
-
- colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod)
- if (colOrderMethod == "Hierarchical") {
- writeHCDataTSVs(colOrder, colDendroFile, colOrderFile)
- if (colCut != 0) {
- writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep=""))
- }
- }
-}
-
-#creates output files for hclust ordering
-writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName)
-{
- data<-cbind(uDend$merge, uDend$height, deparse.level=0)
- colnames(data)<-c("A", "B", "Height")
- write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
-
- data=matrix(,length(uDend$labels),2);
- for (i in 1:length(uDend$labels)) {
- data[i,1] = uDend$labels[i];
- data[i,2] = which(uDend$order==i);
- }
- colnames(data)<-c("Id", "Order")
- write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
-}
-
-#creates a classification file based on user specified cut of dendrogram
-writeHCCut<-function(uDend, cutNum, outputCutFileName)
-{
- print (paste("Writing cut file ", outputCutFileName))
- cut <- cutree(uDend, cutNum);
- id <- names(cut);
- data=matrix(,length(cut),2);
- for (i in 1:length(cut)) {
- data[i,1] = id[i];
- data[i,2] = sprintf("Cluster %d", cut[i]);
- }
-
- write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE);
-}
-
-
-createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod)
-{
- ordering <- NULL
-
- if (orderMethod == "Hierarchical")
- {
-
- # Compute dendrogram for "Distance Metric"
- distVals <- NULL
- if(direction=="row") {
- if (distanceMeasure == "correlation") {
- geneGeneCor <- cor(t(matrixData), use="pairwise")
- distVals <- as.dist((1-geneGeneCor)/2)
- } else {
- distVals <- dist(matrixData, method=distanceMeasure)
- }
- } else { #column
- if (distanceMeasure == "correlation") {
- geneGeneCor <- cor(matrixData, use="pairwise")
- distVals <- as.dist((1-geneGeneCor)/2)
- } else {
- distVals <- dist(t(matrixData), method=distanceMeasure)
- }
- }
-
-# if (agglomerationMethod == "ward") {
-# ordering <- hclust(distVals * distVals, method="ward.D2")
-# } else {
- ordering <- hclust(distVals, method=agglomerationMethod)
-# }
- }
- else if (orderMethod == "Random")
- {
- if(direction=="row") {
- headerList <- rownames(matrixData)
- ordering <- sample(headerList, length(headerList))
- } else {
- headerList <- colnames(matrixData)
- ordering <- sample(headerList, length(headerList))
- }
- }
- else if (orderMethod == "Original")
- {
- if(direction=="row") {
- ordering <- rownames(matrixData)
- } else {
- ordering <- colnames(matrixData)
- }
- } else {
- stop("createOrdering -- failed to find ordering method")
- }
- return(ordering)
-}
-### Initialize command line arguments and call performDataOrdering
-
-options(warn=-1)
-
-args = commandArgs(TRUE)
-
-performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13])
-
-#suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11]))
diff -r 436f03b71cf6 -r 8f8ab332a050 heatmap.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/heatmap.sh Thu Jun 20 11:39:46 2019 -0400
@@ -0,0 +1,147 @@
+#echo "1: " $1" 2: " $2" 3: " $3" 4: "$4" 5: "$5 " 6: "$6 "7: "$7" 8: "$8 " 9: "$9" 10: "${10}" 11: "${11} "12: "${12}
+#echo " 13: "${13}" 14: "${14}" 15: "${15}" 16: "${16} "17: "${17}" 18: "${18}" 19: "${19}" 20: "${20}" 21: "${21} " 22: "${22}" 23:" ${23}
+
+#Count total number of parameters and classification parameters
+parmSize=0
+classSize=0
+matrixSize=0
+for i in "$@"; do
+ currParm=$(cut -d'|' -f1 <<< $i)
+ parmSize=$((parmSize+1))
+ if [ $currParm = "classification" ]
+ then
+ classSize=$((classSize+1))
+ fi
+done
+
+#Get tool data and tool install directories
+tooldir=$1
+tooldata=$2
+#create temp directory for row and col order and dendro files.
+tdir=$tooldata/$(date +%y%m%d%M%S)
+mkdir $tdir
+#echo "tdir: "$tdir
+
+#Extract parameters for row and column order and dendro files
+rowOrderFile=$tdir/ROfile.txt
+rowDendroFile=$tdir/RDfile.txt
+colOrderFile=$tdir/COfile.txt
+colDendroFile=$tdir/CDfile.txt
+rowOrderJson='"order_file": "'$rowOrderFile'",'
+rowDendroJson='"dendro_file": "'$rowDendroFile'",'
+colOrderJson='"order_file": "'$colOrderFile'",'
+colDendroJson='"dendro_file": "'$colDendroFile'",'
+
+#BEGIN: Construct JSON for all non-repeating parameters
+parmJson='{'
+rowConfigJson='"row_configuration": {'
+colConfigJson='"col_configuration": {'
+
+ctr=0
+for i in "$@"; do
+ if [ $ctr -gt 1 ]
+ then
+ currParm=$(cut -d'|' -f1 <<< $i)
+ if [ $currParm != "matrix_files" ] && [ $currParm != "row_configuration" ] && [ $currParm != "col_configuration" ] && [ $currParm != "classification" ]
+ then
+ #Parse pipe-delimited parameter parameter
+ parmJson=$parmJson' "'$(cut -d'|' -f1 <<< $i)'":"'$(cut -d'|' -f2 <<< $i)'",'
+ fi
+ if [ $currParm = "row_configuration" ]
+ then
+ rowOrder=$(cut -d'|' -f3 <<< $i)
+ rowDistance=$(cut -d'|' -f5 <<< $i)
+ rowAgglomeration=$(cut -d'|' -f7 <<< $i)
+ rowCuts=$(cut -d'|' -f9 <<< $i)
+ rowLabels=$(cut -d'|' -f11 <<< $i)
+ dataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$rowLabels'"]'
+ if [ $rowOrder = 'Hierarchical' ]
+ then
+ rowConfigJson=$rowConfigJson$rowOrderJson$rowDendroJson
+ fi
+ rowConfigJson=$rowConfigJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'",'$dataTypeJson'},'
+ fi
+ if [ $currParm = "col_configuration" ]
+ then
+ colOrder=$(cut -d'|' -f3 <<< $i)
+ colDistance=$(cut -d'|' -f5 <<< $i)
+ colAgglomeration=$(cut -d'|' -f7 <<< $i)
+ colCuts=$(cut -d'|' -f9 <<< $i)
+ colLabels=$(cut -d'|' -f11 <<< $i)
+ dataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$colLabels'"]'
+ if [ $colOrder = 'Hierarchical' ]
+ then
+ colConfigJson=$colConfigJson$colOrderJson$colDendroJson
+ fi
+ colConfigJson=$colConfigJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'",'$dataTypeJson'},'
+ fi
+ fi
+ ctr=$((ctr+1))
+done
+#END: Construct JSON for all non-repeating parameters
+#echo "rowCuts: "$rowCuts
+#echo "colCuts: "$colCuts
+#echo "ROW CONFIG JSON: "$rowConfigJson
+#echo "COL CONFIG JSON: "$colConfigJson
+
+#BEGIN: Construct JSON for data layers
+matrixJson='"matrix_files": [ '
+inputMatrix=''
+for i in "$@"; do
+ currParm=$(cut -d'|' -f1 <<< $i)
+ if [ $currParm = "matrix_files" ]
+ then
+ #Parse pipe-delimited parameter parameter
+ matrixJson=$matrixJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'"}'
+ inputMatrix=$(cut -d'|' -f3 <<< $i)
+ fi
+done
+matrixJson=$matrixJson"],"
+#END: Construct JSON for data layers
+
+#BEGIN: Construct JSON for classification files
+classJson='"classification_files": [ '
+classIter=0
+for i in "$@"; do
+ currParm=$(cut -d'|' -f1 <<< $i)
+ if [ $currParm = "classification" ]
+ then
+ classIter=$((classIter+1))
+ #Parse pipe-delimited 3-part classification bar parameter
+ classJson=$classJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'"'
+ classCat=$(cut -d'|' -f7 <<< $i)
+ classColorType=$(cut -d'_' -f2 <<< $classCat)
+ classJson=$classJson','
+ classJson=$classJson' "position":"'$(cut -d'_' -f1 <<< $classCat)'","color_map": {"type":"'$classColorType'"}}'
+ if [ $classIter -lt $classSize ]
+ then
+ classJson=$classJson','
+ fi
+ fi
+done
+classJson=$classJson']'
+#END: Construct JSON for classification files
+
+parmJson=$parmJson$matrixJson$rowConfigJson$colConfigJson$classJson
+parmJson=$parmJson'}'
+#echo "HEATMAP PARAMETERS JSON: "$parmJson
+
+#run R to cluster matrix
+output="$(R --slave --vanilla --file=$tooldir/CHM.R --args $inputMatrix $rowOrder $rowDistance $rowAgglomeration $colOrder $colDistance $colAgglomeration $rowOrderFile $colOrderFile $rowDendroFile $colDendroFile $rowCuts $colCuts $rowLabels $colLabels 2>&1)"
+rc=$?;
+if [ $rc != 0 ]
+then
+ echo $output;
+ if [ `echo "$output" | grep -c "Inf in foreign function call"` -gt 0 ]
+ then
+ echo "";
+ echo "Note: This error can occur when there is no variation in a row or column. Try a different distance measure or remove rows/columns without variation.";
+ echo "This error may also be caused when a covariate file has inadvertently been selected as an Input Matrix. Check your Input Matrix entry.";
+ fi
+ exit $rc;
+fi
+
+#call java program to generate NGCHM viewer files.
+java -jar $tooldir/GalaxyMapGen.jar "$parmJson"
+#clean up tempdir
+rm -rf $tdir
diff -r 436f03b71cf6 -r 8f8ab332a050 heatmap_advanced.sh
--- a/heatmap_advanced.sh Thu Jun 20 11:31:24 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,369 +0,0 @@
-#echo "1: " $1 " 2: " $2 " 3: " $3 " 4: " $4 " 5: " $5 " 6: " $6 " 7: " $7 " 8: " $8 " 9: " $9 " 10: " ${10}
-#echo "11: " ${11} " 12: " ${12} 13: " ${13} 14: " ${14} " 15: " ${15} " 16: " ${16} " 17: " ${17} " 18: " ${18} " 19: " ${19} " 20: " ${20}
-#echo "21: "${21}" 22: "${22}" 23: "${23}" 24: "${24}" 25: "${25}" 26: "${26}" 27: "${27}" 28: "${28}" 29: "${29}" 30: "${30}
-
-#Count total number of parameters, dataLayer parameters, and classification parameters
-parmSize=0
-classSize=0
-dataLayerSize=0
-attribSize=0
-for i in "$@"; do
- currParm=$(cut -d'|' -f1 <<< $i)
- parmSize=$((parmSize+1))
- if [ $currParm = "classification" ]
- then
- classSize=$((classSize+1))
- fi
- if [ $currParm = "matrix_files" ]
- then
- dataLayerSize=$((dataLayerSize+1))
- fi
- if [ $currParm = "attribute" ]
- then
- attribSize=$((attribSize+1))
- fi
-done
-
-if [ $dataLayerSize -lt 1 ]
-then
- noDataLayer="ERROR: No Heat Map Matrices provided. Please add at least one Heat Map Matrix to your request and try again."
- echo $noDataLayer
- exit $noDataLayer
-fi
-
-#Get tool data and tool install directories
-tooldir=$1
-tooldata=$2
-#create temp directory for row and col order and dendro files.
-tdir=$tooldata/$(date +%y%m%d%M%S)
-mkdir $tdir
-#echo "tdir: "$tdir
-
-#Extract parameters for row and column order and dendro files
-rowOrderFile=$tdir/ROfile.txt
-rowDendroFile=$tdir/RDfile.txt
-colOrderFile=$tdir/COfile.txt
-colDendroFile=$tdir/CDfile.txt
-rowOrderJson='"order_file": "'$rowOrderFile'",'
-rowDendroJson='"dendro_file": "'$rowDendroFile'",'
-colOrderJson='"order_file": "'$colOrderFile'",'
-colDendroJson='"dendro_file": "'$colDendroFile'",'
-
-#BEGIN: Construct JSON for all non-repeating parameters
-parmJson='{'
-rowConfigJson='"row_configuration": {'
-colConfigJson='"col_configuration": {'
-
-ctr=0
-for i in "$@"; do
- if [ $ctr -gt 1 ]
- then
- currParm=$(cut -d'|' -f1 <<< $i)
- if [ $currParm != "matrix_files" ] && [ $currParm != "row_configuration" ] && [ $currParm != "col_configuration" ] && [ $currParm != "classification" ] && [ $currParm != "attribute" ]
- then
- #Parse pipe-delimited parameter parameter
- parmJson=$parmJson' "'$(cut -d'|' -f1 <<< $i)'":"'$(cut -d'|' -f2 <<< $i)'",'
- fi
- if [ $currParm = "row_configuration" ]
- then
- rowOrder=$(cut -d'|' -f3 <<< $i)
- rowDistance=$(cut -d'|' -f5 <<< $i)
- rowAgglomeration=$(cut -d'|' -f7 <<< $i)
- rowCuts=$(cut -d'|' -f9 <<< $i)
- rowLabels=$(cut -d'|' -f11 <<< $i)
- rowDataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$rowLabels'"],'
- rowCutType=$(cut -d'|' -f16 <<< $i)
- rowTopItemsJson=''
- rowTopItems=$(cut -d'|' -f13 <<< $i)
- if [ $rowTopItems != "None" ] && [ $rowTopItems != "" ]
- then
- rowTopItemsJson='"'$(cut -d'|' -f12 <<< $i)'": ['
- rowTopItems=${rowTopItems//,/'","'}
- rowTopItemsJson=$rowTopItemsJson'"'$rowTopItems'"],'
- fi
- rowCutsJson=''
- if [ $rowCutType != "none" ]
- then
- cutValues=$(cut -d'|' -f15 <<< $i)
- if [ $cutValues != "None" ] && [ $cutValues != "0" ]
- then
- if [ $rowCutType = "treecuts" ]
- then
- rowCutsJson=$rowCutsJson'"tree_cuts": "'$cutValues'",'
- rowCutsJson=$rowCutsJson'"cut_width": "5",'
- fi
- if [ $rowCutType = "positional" ]
- then
- rowCutErrorVal=0
- [[ $cutValues != ?(-)+([0-9,]) ]] && rowCutErrorVal=$((rowCutErrorVal+1))
- if [ $rowCutErrorVal -gt 0 ]
- then
- echo "GALAXY PARAMETER WARNING: Non-numeric values found for Row Fixed Gap Locations. Ignoring parameter value: "$cutValues
- else
- rowCutsJson=$rowCutsJson'"cut_locations": ['$cutValues'],'
- rowCutsJson=$rowCutsJson'"cut_width": "5",'
- fi
- fi
- fi
- fi
- rowConfigJson=$rowConfigJson$rowDataTypeJson$rowCutsJson$rowTopItemsJson
- if [ $rowOrder = 'Hierarchical' ]
- then
- rowConfigJson=$rowConfigJson$rowOrderJson$rowDendroJson
- fi
- rowConfigJson=$rowConfigJson' "'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f17 <<< $i)'":"'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'":"'$(cut -d'|' -f20 <<< $i)'"},'
- fi
- if [ $currParm = "col_configuration" ]
- then
- colOrder=$(cut -d'|' -f3 <<< $i)
- colDistance=$(cut -d'|' -f5 <<< $i)
- colAgglomeration=$(cut -d'|' -f7 <<< $i)
- colCuts=$(cut -d'|' -f9 <<< $i)
- colLabels=$(cut -d'|' -f11 <<< $i)
- colDataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$colLabels'"],'
- colCutType=$(cut -d'|' -f16 <<< $i)
- colTopItemsJson=''
- colTopItems=$(cut -d'|' -f13 <<< $i)
- if [ $colTopItems != "None" ] && [ $colTopItems != "" ]
- then
- colTopItemsJson='"'$(cut -d'|' -f12 <<< $i)'": ['
- colTopItems=${colTopItems//,/'","'}
- colTopItemsJson=$colTopItemsJson'"'$colTopItems'"],'
- fi
- colCutsJson=''
- if [ $colCutType != "none" ]
- then
- cutValues=$(cut -d'|' -f15 <<< $i)
- if [ $cutValues != "None" ] && [ $cutValues != "0" ]
- then
- if [ $colCutType = "treecuts" ]
- then
- colCutsJson=$colCutsJson'"tree_cuts": "'$cutValues'",'
- colCutsJson=$colCutsJson'"cut_width": "5",'
- fi
- if [ $colCutType = "positional" ]
- then
- colCutErrorVal=0
- [[ $cutValues != ?(-)+([0-9,]) ]] && colCutErrorVal=$((colCutErrorVal+1))
- if [ $colCutErrorVal -gt 0 ]
- then
- echo "GALAXY PARAMETER WARNING: Non-numeric values found for Column Fixed Gap Locations. Ignoring parameter value: "$cutValues
- else
- colCutsJson=$colCutsJson'"cut_locations": ['$cutValues'],'
- colCutsJson=$colCutsJson'"cut_width": "5",'
- fi
- fi
- fi
- fi
- colConfigJson=$colConfigJson$colDataTypeJson$colCutsJson$colTopItemsJson
- if [ $colOrder = 'Hierarchical' ]
- then
- colConfigJson=$colConfigJson$colOrderJson$colDendroJson
- fi
- colConfigJson=$colConfigJson' "'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f17 <<< $i)'":"'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'":"'$(cut -d'|' -f20 <<< $i)'"},'
- fi
- fi
- ctr=$((ctr+1))
-done
-
-#END: Construct JSON for all non-repeating parameters
-#echo "rowOrder: "$rowOrder
-#echo "rowDistance: "$rowDistance
-#echo "rowAgglomeration: "$rowAgglomeration
-#echo "rowCuts: "$rowCuts
-#echo "rowLabels: "$rowLabels
-#echo "ROW CONFIG JSON: "$rowConfigJson
-#echo "colOrder: "$colOrder
-#echo "colDistance: "$colDistance
-#echo "colAgglomeration: "$colAgglomeration
-#echo "colCuts: "$colCuts
-#echo "colLabels: "$colLabels
-#echo "COL CONFIG JSON: "$colConfigJson
-
-#BEGIN: Construct JSON for data layers
-matrixJson='"matrix_files": [ '
-inputMatrix=''
-dataLayerIter=0
-dataLayerNames=''
-for i in "$@"; do
- currParm=$(cut -d'|' -f1 <<< $i)
- if [ $currParm = "matrix_files" ]
- then
- if [ $dataLayerIter -lt 1 ]
- then
- inputMatrix=$(cut -d'|' -f3 <<< $i)
- fi
- currMatrixName=$(cut -d'|' -f5 <<< $i)
- dataLayerIter=$((dataLayerIter+1))
- if [[ $dataLayerNames =~ $currMatrixName ]]
- then
- currMatrixName=$currMatrixName$dataLayerIter
- fi
- dataLayerNames=$dataLayerNames$currMatrixName
- colorPref=$(cut -d'|' -f16 <<< $i)
- colorMapJson=''
- if [ $colorPref = "defined" ]
- then
- #validations to place leading zero on first breakpoint (if necessary)
- b1=$(cut -d'|' -f20 <<< $i)
- b1first=$(cut -d'.' -f1 <<< $b1)
- if [ $b1first = "-" ]
- then
- b1="-0."$(cut -d'.' -f2 <<< $b1)
- fi
- if [ "$b1first" = "" ]
- then
- b1="0"$b1
- fi
- #validations to place leading zero on second breakpoint (if necessary)
- b2=$(cut -d'|' -f21 <<< $i)
- b2first=$(cut -d'.' -f1 <<< $b2)
- if [ $b2first = "-" ]
- then
- b2="-0."$(cut -d'.' -f2 <<< $b2)
- fi
- if [ "$b2first" = "" ]
- then
- b2="0"$b2
- fi
- #validations to place leading zero on third breakpoint (if necessary)
- b3=$(cut -d'|' -f22 <<< $i)
- b3first=$(cut -d'.' -f1 <<< $b3)
- if [ $b3first = "-" ]
- then
- b3="-0."$(cut -d'.' -f2 <<< $b3)
- fi
- if [ "$b3first" = "" ]
- then
- b3="0"$b3
- fi
- #validation to ensure that all entered breakpoints are numeric values
- regExp='^[+-]?([0-9]+\.?|[0-9]*\.[0-9]+)$'
- if [[ $b1 =~ $regExp ]] && [[ $b2 =~ $regExp ]] && [[ $b3 =~ $regExp ]]
- then
- colorMapJson=$colorMapJson'"color_map": {"colors": ["'$(cut -d'|' -f17 <<< $i)'","'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'"],'
- colorMapJson=$colorMapJson'"thresholds": ['$b1','$b2','$b3'],'
- colorMapJson=$colorMapJson'"missing":"'$(cut -d'|' -f23 <<< $i)'"},'
- else
- echo "GALAXY PARAMETER WARNING: Not all user-defined breakpoints are numbers. Defined breakpoints and colors will be ignored."
- fi
- fi
- #Parse pipe-delimited parameter parameter
- matrixJson=$matrixJson' {'$colorMapJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$currMatrixName'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f8 <<< $i)'":"'$(cut -d'|' -f9 <<< $i)'","'$(cut -d'|' -f10 <<< $i)'":"'$(cut -d'|' -f11 <<< $i)'","'$(cut -d'|' -f12 <<< $i)'":"'$(cut -d'|' -f13 <<< $i)'","'$(cut -d'|' -f14 <<< $i)'":"'$(cut -d'|' -f15 <<< $i)'"}'
- if [ $dataLayerIter -lt $dataLayerSize ]
- then
- matrixJson=$matrixJson','
- fi
- fi
-done
-matrixJson=$matrixJson"],"
-#END: Construct JSON for data layers
-#echo "DATA LAYER JSON: "$matrixJson
-#echo "INPUT MATRIX: "$inputMatrix
-
-#BEGIN: Construct JSON for attributes
-attribJson='"chm_attributes": [ '
-attribIter=0
-for i in "$@"; do
- currParm=$(cut -d'|' -f1 <<< $i)
- if [ $currParm = "attribute" ]
- then
- attribIter=$((attribIter+1))
- attribParam=$(cut -d'|' -f2 <<< $i)
- #Parse pipe-delimited 2-part data layer parameter
- attribJson=$attribJson' {"'$(cut -d':' -f1 <<< $attribParam)'":"'$(cut -d':' -f2 <<< $attribParam)'"}'
- if [ $attribIter -lt $attribSize ]
- then
- attribJson=$attribJson','
- fi
- fi
-done
-attribJson=$attribJson'],'
-#END: Construct JSON for attributes
-#echo "ATTRIB JSON: "$attribJson
-
-#BEGIN: Construct JSON for classification files
-classJson='"classification_files": [ '
-colCutClass=''
-rowCutClass=''
-if [ $rowCuts -gt 1 ]
-then
- rowCutClass='{"name": "Class", "path": "'$tdir'/ROfile.txt.cut","position": "row", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}'
-fi
-
-if [ $colCuts -gt 1 ]
-then
- if [ $rowCuts -gt 1 ]
- then
- rowCutClass=$rowCutClass','
- fi
- colCutClass='{"name": "Class", "path": "'$tdir'/COfile.txt.cut","position": "column", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}'
- if [ $classSize -gt 0 ]
- then
- colCutClass=$colCutClass','
- fi
-else
- if [ $rowCuts -gt 1 ] && [ $classSize -gt 0 ]
- then
- rowCutClass=$rowCutClass','
- fi
-fi
-
-classJson=$classJson$rowCutClass$colCutClass
-classIter=0
-for i in "$@"; do
- currParm=$(cut -d'|' -f1 <<< $i)
- if [ $currParm = "classification" ]
- then
- classIter=$((classIter+1))
- className=$(cut -d'|' -f3 <<< $i)
- #Parse pipe-delimited 3-part classification bar parameter
- classJson=$classJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f8 <<< $i)'":"'$(cut -d'|' -f9 <<< $i)'","'$(cut -d'|' -f12 <<< $i)'":"'$(cut -d'|' -f13 <<< $i)'","'$(cut -d'|' -f14 <<< $i)'":"'$(cut -d'|' -f15 <<< $i)'"'
- classCat=$(cut -d'|' -f7 <<< $i)
- classColorType=$(cut -d'_' -f2 <<< $classCat)
- classJson=$classJson','
- classHeight=$(cut -d'|' -f11 <<< $i)
- heightErrorVal=0
- [[ $classHeight != ?(-)+([0-9]) ]] && heightErrorVal=$((heightErrorVal+1))
- if [ $heightErrorVal -gt 0 ]
- then
- echo 'GALAXY PARAMETER WARNING: Non-numeric values found for covariate bar ('$className') height. Height value ignored and default of 15 used: '$classHeight
- else
- classJson=$classJson'"height": "'$classHeight'",'
- fi
- classJson=$classJson' "position":"'$(cut -d'_' -f1 <<< $classCat)'","color_map": {"type":"'$classColorType'"}}'
- if [ $classIter -lt $classSize ]
- then
- classJson=$classJson','
- fi
- fi
-done
-classJson=$classJson']'
-#END: Construct JSON for classification files
-#echo "CLASSIFICATION JSON: "$classJson
-
-#Complete construction of Parameter JSON file by adding all JSON sections created above
-parmJson=$parmJson$rowConfigJson$colConfigJson$attribJson$matrixJson$classJson
-parmJson=$parmJson'}'
-#echo "COMPLETED PARAMETER JSON: "$parmJson
-
-#run R to cluster matrix
-output="$(R --slave --vanilla --file=$tooldir/CHM_Advanced.R --args $inputMatrix $rowOrder $rowDistance $rowAgglomeration $colOrder $colDistance $colAgglomeration $rowOrderFile $colOrderFile $rowDendroFile $colDendroFile $rowCuts $colCuts $rowLabels $colLabels 2>&1)"
-# Check for errors from R step, log them if found, and exit script
-rc=$?;
-if [ $rc != 0 ]
-then
- echo $output;
- if [ `echo "$output" | grep -c "Inf in foreign function call"` -gt 0 ]
- then
- echo "";
- echo "NOTE 1: This error can occur when a covariate file has inadvertently been selected as an Input Matrix. Check your Input Matrix entry.";
- echo "NOTE 2: This error can occur when there is no variation in a data rows or columns in the input matrix. Try a different distance measure or remove rows/columns without variation.";
- fi
- exit $rc;
-fi
-
-#Call java program to generate NGCHM viewer files.
-java -jar $tooldir/GalaxyMapGen.jar "$parmJson"
-#clean up tempdir
-rm -rf $tdir
diff -r 436f03b71cf6 -r 8f8ab332a050 mda_advanced_heatmap_gen.xml
--- a/mda_advanced_heatmap_gen.xml Thu Jun 20 11:31:24 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,503 +0,0 @@
-
-
-
- r-base
- openjdk
-
- Create Clustered Heat Maps with Advanced Options
-$__tool_directory__/heatmap_advanced.sh "$__tool_directory__" "$__tool_data_path__/" "chm_name|Heat_Map_$hmname" "chm_description|$hmdesc" "summary_width|$summaryDisplayWidth"
- "row_configuration|order_method|${d_rows.rowOrderMethod}|distance_metric|${d_rows.rowDistanceMeasure}|agglomeration_method|${d_rows.rowAgglomerationMethod}|tree_covar_cuts|${d_rows.rowDendroCut}|data_type|$rowDataType|top_items|$rowTopItems|tree_cuts|${d_rows.rcutrows.rowDendroTreeCut}|${d_rows.rcutrows.raddcuts}|dendro_show|${d_rows.rowDendroShow}|dendro_height|${d_rows.rowDendroHeight}"
- "col_configuration|order_method|${d_cols.columnOrderMethod}|distance_metric|${d_cols.columnDistanceMeasure}|agglomeration_method|${d_cols.columnAgglomerationMethod}|tree_covar_cuts|${d_cols.colDendroCut}|data_type|$colDataType|top_items|$colTopItems|tree_cuts|${d_cols.ccutrows.colDendroTreeCut}|${d_cols.ccutrows.caddcuts}|dendro_show|${d_cols.columnDendroShow}|dendro_height|${d_cols.columnDendroHeight}"
- #for $attr in $hm_attribute
- 'attribute|${attr.attrbute_key}':'${attr.attrbute_value}'
- #end for
- #for $mx in $matrices
- 'matrix_files|path|$mx.dataLayer|name|${mx.dataLayerName}|summary_method|${mx.summarymethod}|selection_color|${mx.dataLayerSelectionColor}|cuts_color|${mx.dataLayerCutsColor}|grid_color|${mx.dataLayerGridColor}|grid_show|${mx.dataLayerGridShow}|${mx.colorsBreaks.setColorsBreaks}|${mx.colorsBreaks.matrixColor1}|${mx.colorsBreaks.matrixColor2}|${mx.colorsBreaks.matrixColor3}|${mx.colorsBreaks.matrixBreak1}|${mx.colorsBreaks.matrixBreak2}|${mx.colorsBreaks.matrixBreak3}|${mx.colorsBreaks.missingColor}'
- #end for
- #for $op in $operations
- 'classification|name|${op.class_name}|path|${op.repeatinput.file_name}|category|${op.cattype.cat}|bar_type|${op.cattype.scatbar.bartype}|height|${op.classHeight}|fg_color|${op.cattype.scatbar.fg_color}|bg_color|${op.cattype.scatbar.bg_color}'
- #end for
- 'output_location|$output'
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 436f03b71cf6 -r 8f8ab332a050 mda_heatmap_gen.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mda_heatmap_gen.py Thu Jun 20 11:39:46 2019 -0400
@@ -0,0 +1,250 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# python shell program to validate ng-chm heat map input matrix file and covariate file formats before calling java shell -- bob brown
+
+import subprocess #you must import subprocess so that python can talk to the command line
+import sys
+import os
+import re
+#import config
+import traceback
+#import commons
+
+#ConfigVals = config.Config("../rppaConf.txt")
+
+def main():
+
+ try:
+ print '\nStarting Heat Map file validation ......'
+ #print "\nheat map sys args len and values = ",len(sys.argv), str(sys.argv) #, '++',argvals
+
+
+ error= False
+ endCovarParam= len(sys.argv)-2 # IF any ending of loc for covar triplet info
+ startCovarParam= 17 # beginning loc for covar triplet info
+ inMatrix= sys.argv[3]
+
+ for i in range( endCovarParam, 15, -3):
+ if len(sys.argv[i]) > 6:
+ if sys.argv[i][0:4].find('row_') == 0 or sys.argv[i][0:7].find('column_') == 0: # 0 is match start position
+ startCovarParam= i-2
+ #print "\nHeat map arg 3 and start covariate index on = " ,str(sys.argv[3]),' - ', startCovarParam, ' covar name= ',str(sys.argv[startCovarParam:])
+ #else: print '\nCovariate param row or column not found at i', i, str(sys.argv[i])
+
+ #test inMatrix= "/Users/bobbrown/Desktop/NGCHM-Galaxy-Test-Files/400x400firstRowShift.txt"
+ #test covarFN= '/Users/bobbrown/Desktop/400x400-column-covariate-continuous-TestingErrors.txt'
+ #test row_col_cat_contin= 'column_continuous'
+ #test row_col_cat_contin= 'column_categorical'
+ #test covarLabel = 'bob test'
+ #test numCovariates= 1
+
+ errorInMatrix,inMatrixRowLabels,inMatrixColLabels= ValidateHMInputMatrix(inMatrix) # verify input matrix
+
+ print "\nFirst & last Row labels ", inMatrixRowLabels[0],inMatrixRowLabels[-1]," and Columns ", inMatrixColLabels[0],inMatrixColLabels[-1], " number Rows= ",len(inMatrixRowLabels)," number Columns= ",len(inMatrixColLabels)
+
+ # continue reviewing covariates to catch any errors in any of the input info
+ if len(inMatrixRowLabels) < 5 or len(inMatrixColLabels) < 5:
+ errorInMatrix = True
+ print '\n----ERROR Input matrix has too few columns and rows need to ignore validating covariate files for now'
+
+ elif not errorInMatrix:
+ print "\n++++ SUCCESS the Input Matrix looks good\n\n"
+
+ i= startCovarParam
+ while i < (len(sys.argv)-2): # todo verify this works with advances tool is one other 0->n param after this
+ covarLabel= sys.argv[i]
+ covarLabel= covarLabel.replace(' ','')
+ covarFN= sys.argv[i+1]
+ covarFN= covarFN.replace(' ','')
+ row_col_cat_contin= sys.argv[i+2]
+ row_col_cat_contin= row_col_cat_contin.replace(' ','')
+ i +=3
+
+ print "\nSTART Validating covariate file with label= ", covarLabel, " and type= ",row_col_cat_contin
+
+ error= ValidateHMCorvarFile(covarLabel, covarFN, row_col_cat_contin,inMatrixRowLabels,inMatrixColLabels) # check covariate files
+
+ if error or errorInMatrix:
+ print"\n---ERROR issues found in input or covariate files\n "
+ sys.stderr.write( "\nERROR issues found in input or covariate files see errors in Standard Output\n\n ")
+ sys.exit(3)
+
+
+ print"\n FINISHED -- Validation of the Input Matrix and Covariate files (if any)\n\n"
+
+ #print" next running the clustered heat map generator \n",str(sys.argv[11])+"/heatmap.sh "+ str(sys.argv[1:])
+ # p = subprocess.Popen([str(sys.argv[1])+"/heatmap.sh "+ argvals], shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ #p = subprocess.Popen([str(sys.argv[11])+"/heatmap.sh "+ str(sys.argv[1:])], shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+ #retval = p.wait()
+ #print ' Cluster and Viewer returned\n'
+ #for line in p.stdout.readlines():
+ # print line
+
+# else:
+# sys.stderr.write("\nERROR -- The Heat Map Generator encountered the above errors with the input file(s)\n\n")
+# sys.exit(3) # this will error it out :)
+# except:
+# sys.stderr.write(str(traceback.format_exc()))
+# sys.exit(3) # this will error it out :)
+ except Exception, err:
+ sys.stderr.write('ERROR: %sn' % str(err))
+
+ return
+
+#+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+
+def ValidateHMInputMatrix(inputMatrixPath): # This sub routine ensures that the slide design input by the user matches a slide design on record
+
+ try:
+ error= True
+
+ inputMatrixPath= inputMatrixPath.replace(' ','')
+
+ inMatrixFH= open( inputMatrixPath, 'rU')
+
+ #print "\nInput matrix path and name ", inputMatrixPath,"\n"
+ error= False
+
+ countRow= 0
+ lenRow1= 0
+ lenAllRows= 0
+ inMatrixRowLabels= []
+ inMatrixColLabels= []
+
+ for rawRow in inMatrixFH:
+ countRow +=1
+
+ rawRow= rawRow.replace('\n','')
+ eachRow= rawRow.split('\t')
+ if countRow < 2: print 'Input Matrix start 1 to 10= ',eachRow[:10], '\n'
+
+ if countRow == 1:
+ lenRow1= len(eachRow)
+ inMatrixColLabels= eachRow
+ for j in range(1,lenRow1):
+ tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[j].lower())
+ try:
+ if tmp.group(0) == '': # if doesn't exist then error
+ tmp= tmp
+ except Exception as e:
+ print("\n--+-+- ERROR Column Headers at position "+str(j+1)+" value appears to be non-alphanumeric --"+str(eachRow[j])+"--")
+ sys.stderr.write("\n--+-+- ERROR Column Headers at position "+str(j+1)+" value appears to be non-alphanumeric --"+str(eachRow[j])+"--")
+ error= True
+
+ if lenRow1 < 3: # likely is covariate file not input matrix
+ print"----WARNING Input number of columns= " , str(lenRow1)," is too few likely input matrix is really a covariate file"
+ SystemError ("----WARNING Input number of columns= " + str(lenRow1)+" is too few likely input matrix is really a covariate file")
+ #error= True
+ #sys.err= 2
+ elif countRow == 2:
+ lenAllRows= len(eachRow)
+ if (lenAllRows == lenRow1) or (lenAllRows == lenRow1+1): #or (lenAllRows- lenRow1 == 0 or 1):
+ print"Validating Input matrix, number of Labeled Columns = ", str(lenAllRows)
+ inMatrixRowLabels.append(eachRow[0])
+
+# allow other error to occur first
+# tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[0].lower())
+# try:
+# if tmp.group(0) == '': # if doesn't exist then error
+# tmp= tmp
+# except Exception as e:
+# print("\n--+-+- WARNING Row Label at row 2 value appears to be non-alphanumeric --"+str(eachRow[j])+"--")
+# sys.stderr.write("\n--+-+- WARNING Row Label at row 2 value appears to be non-alphanumeric --"+str(eachRow[j])+"--")
+# #error= True
+ if (lenAllRows == lenRow1) and (inMatrixColLabels[0]==''): inMatrixColLabels.pop(0) #remove blank first cell
+
+ else:
+ print( "\n--ERROR Input matrix number columns= "+str(lenRow1)+" in first row and the second row= "+str(lenAllRows)+" mismatch ")
+ sys.stderr.write( "\n--ERROR Input matrix number columns= "+str(lenRow1)+" in first row and the second row= "+str(lenAllRows)+" mismatch ")
+ error= True
+ sys.err= 6
+ elif (lenRow1 != len(eachRow) and lenRow1+1 != len(eachRow)):
+ print ("\n--ERROR Input Row "+ str(countRow)+" number of columns= "+str(len(eachRow))+" is a length mismatch with row 2 length "+str( lenAllRows))
+ sys.stderr.write ("\n--ERROR Input Row "+ str(countRow)+" number of columns= "+str(len(eachRow))+" is a length mismatch with row 2 length "+str( lenAllRows))
+ error= True
+ sys.err= 7
+ else:
+ inMatrixRowLabels.append(eachRow[0])
+ tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[0].lower())
+ try:
+ if tmp.group(0) == '': # if doesn't exist then error
+ tmp= tmp
+ except Exception as e:
+ print"-+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[j])
+ sys.stderr.write("\n--+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric "+str(eachRow[j]))
+
+
+ if len(inMatrixColLabels) > 0:
+ if (inMatrixColLabels[-1] =='') or (inMatrixColLabels[-1] =='\n'): inMatrixColLabels.pop()
+
+ inMatrixFH.close()
+
+ #print error, lenAllRows, len(eachRow), eachRow[0]
+ except:
+ #inMatrixFH.close()
+ sys.stderr.write(str(traceback.format_exc()))
+ error= True
+
+ return error,inMatrixRowLabels,inMatrixColLabels
+
+ #+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+
+def ValidateHMCorvarFile(covarLabel, covariateFilePath, row_col_cat_contin, inMatrixRowLabels,inMatrixColLabels): # This sub routine ensures that the slide design input by the user matches a slide design on record
+
+# verify
+# 1 That covar file labels match the col or row labels 1 to 1
+# 2 That if a continuous covar file that the 2nd field is not all text hard to tell if '-' or 'e exponent'
+# 3 That the length of the covar file matches the row or col length of the input matrix
+
+ error= True
+ try:
+
+ covFH= open( covariateFilePath, 'rU')
+ countRow= 0
+
+ error= False
+
+ for rawRow in covFH:
+ countRow +=1
+ rawRow= rawRow.replace('\n','')
+ eachRow= rawRow.split('\t')
+ if countRow== 0: print "\nCovariance file info - label ",str(covarLabel)," row/col categorical or continous",row_col_cat_contin," first row ",str(eachrow)
+
+ if len(eachRow) < 2 and countRow > 1:
+ print("----ERROR Input Row "+str(countRow)+" does not have a label and/or value ")
+ sys.stderr.write("----ERROR Input Row "+str(countRow)+" does not have a label/or and value")
+ error= True
+ sys.err= 8
+ #return error
+ elif len(eachRow) > 1:
+ tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[0].lower())
+ try:
+ if tmp.group(0) == '': # if doesn't exist then error
+ tmp= tmp
+ except Exception as e:
+ print"\n-+-+- WARNING Covariate Label at row "+str(countRow)+" value appears to be non-alphanumeric --", eachRow[0],"--"
+ sys.stderr.write("\n--+-+- WARNING Row Headers at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[0])+"--")
+
+ if not error:
+ if row_col_cat_contin[-4:] == 'uous': # verify continuous is number-ish
+ tmp= re.search('[+-.0123456789eE]',eachRow[1])
+ try:
+ if tmp.group(0) == '':
+ tmp= tmp
+ except Exception as e:
+ print("\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")
+ sys.stderr.write("\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")
+ #error= True
+ except:
+ sys.stderr.write(str(traceback.format_exc()))
+
+ covFH.close()
+
+ return error
+
+
+if __name__ == "__main__":
+ main()
+
+
diff -r 436f03b71cf6 -r 8f8ab332a050 mda_heatmap_gen.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mda_heatmap_gen.xml Thu Jun 20 11:39:46 2019 -0400
@@ -0,0 +1,159 @@
+
+
+
+ r-base
+ openjdk
+
+ Create Clustered Heat Maps
+ $__tool_directory__/heatmap.sh "$__tool_directory__" "$__tool_data_path__/" "chm_name|Heat_Map_$hmname" "chm_description|$hmdesc"
+ "matrix_files|path|$inputmatrix|name|datalayer|summary_method|$summarymethod"
+ "row_configuration|order_method|${d_rows.rowOrderMethod}|distance_metric|${d_rows.rowDistanceMeasure}|agglomeration_method|${d_rows.rowAgglomerationMethod}|tree_covar_cuts|0|data_type|labels"
+ "col_configuration|order_method|${d_cols.columnOrderMethod}|distance_metric|${d_cols.columnDistanceMeasure}|agglomeration_method|${d_cols.columnAgglomerationMethod}|tree_covar_cuts|0|data_type|labels"
+ #for $op in $operations
+ 'classification|name|${op.class_name}|path|${op.repeatinput.file_name}|category|${op.cat}'
+ #end for
+ 'output_location|$output'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+