Repository 'heat_map_creation'
hg clone https://toolshed.g2.bx.psu.edu/repos/md-anderson-bioinformatics/heat_map_creation

Changeset 40:8f8ab332a050 (2019-06-20)
Previous changeset 39:436f03b71cf6 (2019-06-20) Next changeset 41:8acca16f3921 (2020-01-28)
Commit message:
Uploaded
added:
CHM.R
heatmap.sh
mda_heatmap_gen.py
mda_heatmap_gen.xml
removed:
CHM_Advanced.R
heatmap_advanced.sh
mda_advanced_heatmap_gen.xml
b
diff -r 436f03b71cf6 -r 8f8ab332a050 CHM.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/CHM.R Thu Jun 20 11:39:46 2019 -0400
[
@@ -0,0 +1,130 @@
+### This method generates a row and column ordering given an input matrix and ordering methods.
+###
+### matrixData - numeric matrix 
+### rowOrderMethod - Hierarchical, Original, Random
+### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation.
+### rowAgglomerationMethod - For clustering, agglomeration method.  May be:  'average' for Average Linkage, 'complete' for Complete Linkage,
+###                                                                          'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'.
+### colOrderMethod 
+### colDistanceMeasure
+### colAgglomerationMethod
+### rowOrderFile - output file of order of rows 
+### rowDendroFile - output file of row dendrogram  
+### colOrderFile - output file of order of cols
+### colDendroFile - output file of col dendrogram
+### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file.  0 for turned off.
+### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file.  0 for turned off.
+
+performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut)
+{ 
+   dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", check.names = FALSE, row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?"))
+   rowOrder <-  createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod)  
+   if (rowOrderMethod == "Hierarchical") {
+      writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile)
+   }
+
+   colOrder <-  createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod)  
+   if (colOrderMethod == "Hierarchical") {
+      writeHCDataTSVs(colOrder, colDendroFile, colOrderFile)
+      writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep=""))
+   }
+}
+
+#creates output files for hclust ordering
+writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName)
+{
+   data<-cbind(uDend$merge, uDend$height, deparse.level=0)
+   colnames(data)<-c("A", "B", "Height")
+   write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)

+   data=matrix(,length(uDend$labels),2);
+   for (i in 1:length(uDend$labels)) {
+      print(uDend$labels[i])
+      data[i,1] = uDend$labels[i];
+      data[i,2] = which(uDend$order==i);
+   }
+   colnames(data)<-c("Id", "Order")
+   write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
+}
+
+#creates a classification file based on user specified cut of dendrogram
+writeHCCut<-function(uDend, cutNum, outputCutFileName)
+{
+   if (cutNum < 2) {
+      return()
+   }
+   print (paste("Writing cut file ", outputCutFileName))
+   cut <- cutree(uDend, cutNum);
+   id <- names(cut);
+   data=matrix(,length(cut),2);
+   for (i in 1:length(cut)) {
+      data[i,1] = id[i];
+      data[i,2] = sprintf("Cluster %d", cut[i]);
+   }
+
+   write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE);
+}
+
+
+createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod)
+{
+  ordering <- NULL
+
+  if (orderMethod == "Hierarchical")
+  {
+
+    # Compute dendrogram for "Distance Metric"
+    distVals <- NULL
+    if(direction=="row") {
+      if (distanceMeasure == "correlation") {
+        geneGeneCor <- cor(t(matrixData), use="pairwise")
+        distVals <- as.dist((1-geneGeneCor)/2)
+      } else {
+        distVals <- dist(matrixData, method=distanceMeasure)
+      }
+    } else { #column
+      if (distanceMeasure == "correlation") {
+        geneGeneCor <- cor(matrixData, use="pairwise")
+        distVals <- as.dist((1-geneGeneCor)/2)
+      } else {
+        distVals <- dist(t(matrixData), method=distanceMeasure)
+      }
+    }
+
+#    if (agglomerationMethod == "ward") {
+#      ordering <- hclust(distVals * distVals, method="ward.D2")
+#    } else {
+      ordering <- hclust(distVals, method=agglomerationMethod)
+#    }
+  }
+  else if (orderMethod == "Random")
+  {
+    if(direction=="row") {
+       headerList <- rownames(matrixData)
+       ordering <- sample(headerList, length(headerList)) 
+    } else {
+       headerList <- colnames(matrixData)
+       ordering <- sample(headerList, length(headerList)) 
+    }
+  }
+  else if (orderMethod == "Original")
+  {
+    if(direction=="row") {
+       ordering <- rownames(matrixData) 
+    } else {
+       ordering <- colnames(matrixData) 
+    }
+  } else {
+    stop("createOrdering -- failed to find ordering method")
+  }
+  return(ordering)
+}
+### Initialize command line arguments and call performDataOrdering
+
+options(warn=-1)
+
+args = commandArgs(TRUE)
+
+performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13])
+
+#suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11]))
b
diff -r 436f03b71cf6 -r 8f8ab332a050 CHM_Advanced.R
--- a/CHM_Advanced.R Thu Jun 20 11:31:24 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,131 +0,0 @@
-### This method generates a row and column ordering given an input matrix and ordering methods.
-###
-### matrixData - numeric matrix 
-### rowOrderMethod - Hierarchical, Original, Random
-### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation.
-### rowAgglomerationMethod - For clustering, agglomeration method.  May be:  'average' for Average Linkage, 'complete' for Complete Linkage,
-###                                                                          'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'.
-### colOrderMethod 
-### colDistanceMeasure
-### colAgglomerationMethod
-### rowOrderFile - output file of order of rows 
-### rowDendroFile - output file of row dendrogram  
-### colOrderFile - output file of order of cols
-### colDendroFile - output file of col dendrogram
-### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file.  0 for turned off.
-### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file.  0 for turned off.
-
-performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut)
-{ 
-   dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", check.names = FALSE, row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?"))
-   rowOrder <-  createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod)  
-   if (rowOrderMethod == "Hierarchical") {
-      writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile)
-    if (rowCut != 0) {
-       writeHCCut(rowOrder, rowCut, paste(rowOrderFile,".cut", sep=""))
-    }
-   }
-
-   colOrder <-  createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod)  
-   if (colOrderMethod == "Hierarchical") {
-      writeHCDataTSVs(colOrder, colDendroFile, colOrderFile)
-    if (colCut != 0) {
-        writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep=""))
-    }
-   }
-}
-
-#creates output files for hclust ordering
-writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName)
-{
-   data<-cbind(uDend$merge, uDend$height, deparse.level=0)
-   colnames(data)<-c("A", "B", "Height")
-   write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)

-   data=matrix(,length(uDend$labels),2);
-   for (i in 1:length(uDend$labels)) {
-      data[i,1] = uDend$labels[i];
-      data[i,2] = which(uDend$order==i);
-   }
-   colnames(data)<-c("Id", "Order")
-   write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
-}
-
-#creates a classification file based on user specified cut of dendrogram
-writeHCCut<-function(uDend, cutNum, outputCutFileName)
-{
-   print (paste("Writing cut file ", outputCutFileName))
-   cut <- cutree(uDend, cutNum);
-   id <- names(cut);
-   data=matrix(,length(cut),2);
-   for (i in 1:length(cut)) {
-      data[i,1] = id[i];
-      data[i,2] = sprintf("Cluster %d", cut[i]);
-   }
-
-   write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE);
-}
-
-
-createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod)
-{
-  ordering <- NULL
-
-  if (orderMethod == "Hierarchical")
-  {
-
-    # Compute dendrogram for "Distance Metric"
-    distVals <- NULL
-    if(direction=="row") {
-      if (distanceMeasure == "correlation") {
-        geneGeneCor <- cor(t(matrixData), use="pairwise")
-        distVals <- as.dist((1-geneGeneCor)/2)
-      } else {
-        distVals <- dist(matrixData, method=distanceMeasure)
-      }
-    } else { #column
-      if (distanceMeasure == "correlation") {
-        geneGeneCor <- cor(matrixData, use="pairwise")
-        distVals <- as.dist((1-geneGeneCor)/2)
-      } else {
-        distVals <- dist(t(matrixData), method=distanceMeasure)
-      }
-    }
-
-#    if (agglomerationMethod == "ward") {
-#      ordering <- hclust(distVals * distVals, method="ward.D2")
-#    } else {
-      ordering <- hclust(distVals, method=agglomerationMethod)
-#    }
-  }
-  else if (orderMethod == "Random")
-  {
-    if(direction=="row") {
-       headerList <- rownames(matrixData)
-       ordering <- sample(headerList, length(headerList)) 
-    } else {
-       headerList <- colnames(matrixData)
-       ordering <- sample(headerList, length(headerList)) 
-    }
-  }
-  else if (orderMethod == "Original")
-  {
-    if(direction=="row") {
-       ordering <- rownames(matrixData) 
-    } else {
-       ordering <- colnames(matrixData) 
-    }
-  } else {
-    stop("createOrdering -- failed to find ordering method")
-  }
-  return(ordering)
-}
-### Initialize command line arguments and call performDataOrdering
-
-options(warn=-1)
-
-args = commandArgs(TRUE)
-
-performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13])
-
-#suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11]))
b
diff -r 436f03b71cf6 -r 8f8ab332a050 heatmap.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/heatmap.sh Thu Jun 20 11:39:46 2019 -0400
[
@@ -0,0 +1,147 @@
+#echo "1: " $1" 2: " $2" 3: " $3" 4: "$4" 5: "$5 " 6: "$6 "7: "$7" 8: "$8 " 9: "$9" 10: "${10}" 11: "${11} "12: "${12} 
+#echo " 13: "${13}" 14: "${14}" 15: "${15}" 16: "${16} "17: "${17}" 18: "${18}" 19: "${19}" 20: "${20}" 21: "${21} " 22: "${22}" 23:" ${23} 
+
+#Count total number of parameters and classification parameters
+parmSize=0
+classSize=0
+matrixSize=0
+for i in "$@"; do
+ currParm=$(cut -d'|' -f1 <<< $i)
+ parmSize=$((parmSize+1))
+ if [ $currParm = "classification" ]
+ then
+ classSize=$((classSize+1))
+   fi
+done
+
+#Get tool data and tool install directories
+tooldir=$1
+tooldata=$2
+#create temp directory for row and col order and dendro files.
+tdir=$tooldata/$(date +%y%m%d%M%S)
+mkdir $tdir
+#echo "tdir: "$tdir
+
+#Extract parameters for row and column order and dendro files
+rowOrderFile=$tdir/ROfile.txt
+rowDendroFile=$tdir/RDfile.txt
+colOrderFile=$tdir/COfile.txt
+colDendroFile=$tdir/CDfile.txt
+rowOrderJson='"order_file": "'$rowOrderFile'",'
+rowDendroJson='"dendro_file": "'$rowDendroFile'",'
+colOrderJson='"order_file": "'$colOrderFile'",'
+colDendroJson='"dendro_file": "'$colDendroFile'",'
+
+#BEGIN: Construct JSON for all non-repeating parameters
+parmJson='{'
+rowConfigJson='"row_configuration": {'
+colConfigJson='"col_configuration": {'
+
+ctr=0
+for i in "$@"; do
+ if [ $ctr -gt 1 ]
+ then
+ currParm=$(cut -d'|' -f1 <<< $i)
+ if [ $currParm != "matrix_files" ] && [ $currParm != "row_configuration" ] && [ $currParm != "col_configuration" ] && [ $currParm != "classification" ]
+ then
+ #Parse pipe-delimited parameter parameter
+ parmJson=$parmJson' "'$(cut -d'|' -f1 <<< $i)'":"'$(cut -d'|' -f2 <<< $i)'",'
+    fi
+ if [ $currParm = "row_configuration" ]
+ then
+ rowOrder=$(cut -d'|' -f3 <<< $i)
+ rowDistance=$(cut -d'|' -f5 <<< $i)
+ rowAgglomeration=$(cut -d'|' -f7 <<< $i)
+ rowCuts=$(cut -d'|' -f9 <<< $i)
+ rowLabels=$(cut -d'|' -f11 <<< $i)
+ dataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$rowLabels'"]'
+ if [ $rowOrder = 'Hierarchical' ]
+ then
+ rowConfigJson=$rowConfigJson$rowOrderJson$rowDendroJson
+ fi
+ rowConfigJson=$rowConfigJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'",'$dataTypeJson'},'
+    fi
+ if [ $currParm = "col_configuration" ]
+ then
+ colOrder=$(cut -d'|' -f3 <<< $i)
+ colDistance=$(cut -d'|' -f5 <<< $i)
+ colAgglomeration=$(cut -d'|' -f7 <<< $i)
+ colCuts=$(cut -d'|' -f9 <<< $i)
+ colLabels=$(cut -d'|' -f11 <<< $i)
+ dataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$colLabels'"]'
+ if [ $colOrder = 'Hierarchical' ]
+ then
+ colConfigJson=$colConfigJson$colOrderJson$colDendroJson
+ fi
+ colConfigJson=$colConfigJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'",'$dataTypeJson'},'
+    fi
+  fi
+  ctr=$((ctr+1))
+done
+#END: Construct JSON for all non-repeating parameters
+#echo "rowCuts: "$rowCuts
+#echo "colCuts: "$colCuts
+#echo "ROW CONFIG JSON: "$rowConfigJson
+#echo "COL CONFIG JSON: "$colConfigJson
+
+#BEGIN: Construct JSON for data layers
+matrixJson='"matrix_files": [ '
+inputMatrix=''
+for i in "$@"; do
+ currParm=$(cut -d'|' -f1 <<< $i)
+ if [ $currParm = "matrix_files" ]
+ then
+ #Parse pipe-delimited parameter parameter
+ matrixJson=$matrixJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'"}'
+ inputMatrix=$(cut -d'|' -f3 <<< $i)
+   fi
+done
+matrixJson=$matrixJson"],"
+#END: Construct JSON for data layers
+
+#BEGIN: Construct JSON for classification files
+classJson='"classification_files": [ '
+classIter=0
+for i in "$@"; do
+ currParm=$(cut -d'|' -f1 <<< $i)
+ if [ $currParm = "classification" ]
+ then
+ classIter=$((classIter+1))
+ #Parse pipe-delimited 3-part classification bar parameter
+ classJson=$classJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'"'
+ classCat=$(cut -d'|' -f7 <<< $i)
+ classColorType=$(cut -d'_' -f2 <<< $classCat)
+ classJson=$classJson','
+ classJson=$classJson' "position":"'$(cut -d'_' -f1 <<< $classCat)'","color_map": {"type":"'$classColorType'"}}'
+ if [ $classIter -lt $classSize ]
+ then
+ classJson=$classJson','
+ fi
+   fi
+done
+classJson=$classJson']'
+#END: Construct JSON for classification files
+
+parmJson=$parmJson$matrixJson$rowConfigJson$colConfigJson$classJson
+parmJson=$parmJson'}'
+#echo "HEATMAP PARAMETERS JSON: "$parmJson
+
+#run R to cluster matrix
+output="$(R --slave --vanilla --file=$tooldir/CHM.R --args $inputMatrix $rowOrder $rowDistance $rowAgglomeration $colOrder $colDistance $colAgglomeration $rowOrderFile $colOrderFile $rowDendroFile $colDendroFile $rowCuts $colCuts $rowLabels $colLabels 2>&1)"
+rc=$?;
+if [ $rc != 0 ]
+then
+  echo $output;
+  if [ `echo "$output" | grep -c "Inf in foreign function call"` -gt 0 ]
+  then
+    echo "";
+    echo "Note: This error can occur when there is no variation in a row or column.  Try a different distance measure or remove rows/columns without variation.";
+    echo "This error may also be caused when a covariate file has inadvertently been selected as an Input Matrix.  Check your Input Matrix entry.";
+  fi
+  exit $rc;
+fi
+
+#call java program to generate NGCHM viewer files.
+java -jar $tooldir/GalaxyMapGen.jar "$parmJson"
+#clean up tempdir
+rm -rf $tdir
b
diff -r 436f03b71cf6 -r 8f8ab332a050 heatmap_advanced.sh
--- a/heatmap_advanced.sh Thu Jun 20 11:31:24 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,369 +0,0 @@\n-#echo "1: " $1  " 2: " $2 " 3: " $3 " 4: " $4  " 5: " $5 " 6: " $6 " 7: " $7 " 8: " $8 " 9: " $9 " 10: " ${10} \n-#echo "11: " ${11} " 12: " ${12} 13: " ${13} 14: " ${14} " 15: " ${15} " 16: " ${16} " 17: " ${17} " 18: " ${18} " 19: " ${19} " 20: " ${20} \n-#echo "21: "${21}" 22: "${22}" 23: "${23}" 24: "${24}" 25: "${25}" 26: "${26}" 27: "${27}" 28: "${28}" 29: "${29}" 30: "${30}\n-\n-#Count total number of parameters, dataLayer parameters, and classification parameters\n-parmSize=0\n-classSize=0\n-dataLayerSize=0\n-attribSize=0\n-for i in "$@"; do\n-\tcurrParm=$(cut -d\'|\' -f1 <<< $i)\n-\tparmSize=$((parmSize+1))\n-\tif [ $currParm = "classification" ]\n-\tthen\n-\t\tclassSize=$((classSize+1))\n-  \tfi\n-\tif [ $currParm = "matrix_files" ]\n-\tthen\n-\t\tdataLayerSize=$((dataLayerSize+1))\n-  \tfi\n-\tif [ $currParm = "attribute" ]\n-\tthen\n-\t\tattribSize=$((attribSize+1))\n-  \tfi\n-done\n-\n-if [ $dataLayerSize -lt 1 ]\n-then\n-\tnoDataLayer="ERROR: No Heat Map Matrices provided.  Please add at least one Heat Map Matrix to your request and try again."\n-   \techo $noDataLayer\n-   \texit $noDataLayer\n-fi\n-\n-#Get tool data and tool install directories\n-tooldir=$1\n-tooldata=$2\n-#create temp directory for row and col order and dendro files.\n-tdir=$tooldata/$(date +%y%m%d%M%S)\n-mkdir $tdir\n-#echo "tdir: "$tdir\n-\n-#Extract parameters for row and column order and dendro files\n-rowOrderFile=$tdir/ROfile.txt\n-rowDendroFile=$tdir/RDfile.txt\n-colOrderFile=$tdir/COfile.txt\n-colDendroFile=$tdir/CDfile.txt\n-rowOrderJson=\'"order_file": "\'$rowOrderFile\'",\'\n-rowDendroJson=\'"dendro_file": "\'$rowDendroFile\'",\'\n-colOrderJson=\'"order_file": "\'$colOrderFile\'",\'\n-colDendroJson=\'"dendro_file": "\'$colDendroFile\'",\'\n-\n-#BEGIN: Construct JSON for all non-repeating parameters\n-parmJson=\'{\'\n-rowConfigJson=\'"row_configuration": {\'\n-colConfigJson=\'"col_configuration": {\'\n-\n-ctr=0\n-for i in "$@"; do\n-\tif [ $ctr -gt 1 ]\n-\tthen\n-\t\tcurrParm=$(cut -d\'|\' -f1 <<< $i)\n-\t\tif [ $currParm != "matrix_files" ] && [ $currParm != "row_configuration" ] && [ $currParm != "col_configuration" ] && [ $currParm != "classification" ] && [ $currParm != "attribute" ]\n-\t\tthen\n-\t\t\t#Parse pipe-delimited parameter parameter\n-\t\t\tparmJson=$parmJson\' "\'$(cut -d\'|\' -f1 <<< $i)\'":"\'$(cut -d\'|\' -f2 <<< $i)\'",\'\n-\t  \tfi\n-\t\tif [ $currParm = "row_configuration" ]\n-\t\tthen\n-\t\t\trowOrder=$(cut -d\'|\' -f3 <<< $i)\n-\t\t\trowDistance=$(cut -d\'|\' -f5 <<< $i)\n-\t\t\trowAgglomeration=$(cut -d\'|\' -f7 <<< $i)\n-\t\t\trowCuts=$(cut -d\'|\' -f9 <<< $i)\n-\t\t\trowLabels=$(cut -d\'|\' -f11 <<< $i)\n-\t\t\trowDataTypeJson=\'"\'$(cut -d\'|\' -f10 <<< $i)\'":["\'$rowLabels\'"],\'\n-\t\t\trowCutType=$(cut -d\'|\' -f16 <<< $i)\n-\t\t\trowTopItemsJson=\'\'\n-\t\t\trowTopItems=$(cut -d\'|\' -f13 <<< $i)\n-\t\t\tif [ $rowTopItems != "None" ] && [ $rowTopItems != "" ]\n-\t\t\tthen\n-\t\t\t\trowTopItemsJson=\'"\'$(cut -d\'|\' -f12 <<< $i)\'": [\'\n-\t\t\t\trowTopItems=${rowTopItems//,/\'","\'}\n-\t\t\t\trowTopItemsJson=$rowTopItemsJson\'"\'$rowTopItems\'"],\'\n-\t\t\tfi\n-\t\t\trowCutsJson=\'\'\n-\t\t\tif [ $rowCutType != "none" ]\n-\t\t\tthen\n-\t\t\t\tcutValues=$(cut -d\'|\' -f15 <<< $i) \n-\t\t\t\tif [ $cutValues != "None" ] && [ $cutValues != "0" ]\n-\t\t\t\tthen\n-\t\t\t\t\tif [ $rowCutType = "treecuts" ]\n-\t\t\t\t\tthen\n-\t\t\t\t\t\trowCutsJson=$rowCutsJson\'"tree_cuts": "\'$cutValues\'",\' \n-\t\t\t\t\t\trowCutsJson=$rowCutsJson\'"cut_width": "5",\' \n-\t\t\t\t\tfi\n-\t\t\t\t\tif [ $rowCutType = "positional" ]\n-\t\t\t\t\tthen\n-\t\t\t\t\t\trowCutErrorVal=0\n-\t\t\t\t\t\t[[ $cutValues != ?(-)+([0-9,]) ]] && rowCutErrorVal=$((rowCutErrorVal+1))\n-\t\t\t\t\t\tif [ $rowCutErrorVal -gt 0 ]\n-\t\t\t\t\t\tthen\n-\t   \t\t\t\t\t\techo "GALAXY PARAMETER WARNING: Non-numeric values found for Row Fixed Gap Locations. Ignoring parameter value: "$cutValues\n-\t\t\t\t\t\telse\n-\t\t\t\t\t\t\trowCutsJson=$rowCutsJson\'"cut_locations": [\'$cutValues\'],\' \n-\t\t\t\t\t\t\trowCutsJson=$rowCutsJson\'"cut_width": "5",\' \n-\t\t\t\t\t\tfi\n-\t\t\t\t\tfi\n-\t\t\t\tfi\n-\t\t\tfi\n-\t\t\trowConfigJson=$rowConfigJson$rowDataTypeJson$rowCutsJson$rowTopItemsJson\n-\t\t\tif [ $rowOrder = \'Hierarchical\' ]\n-\t\t\tthen\n-\t\t\t\trowConfigJson=$rowConfigJson$rowOrderJson$rowDendroJson\n-\t\t\tfi\n-\t\t\trowConfigJson=$rowConfigJson\' "\'$('..b'N for attributes\n-attribJson=\'"chm_attributes": [ \'\n-attribIter=0\n-for i in "$@"; do\n-\tcurrParm=$(cut -d\'|\' -f1 <<< $i)\n-\tif [ $currParm = "attribute" ]\n-\tthen\n-\t\tattribIter=$((attribIter+1))\n-\t\tattribParam=$(cut -d\'|\' -f2 <<< $i)\n-\t\t#Parse pipe-delimited 2-part data layer parameter\n-\t\tattribJson=$attribJson\' {"\'$(cut -d\':\' -f1 <<< $attribParam)\'":"\'$(cut -d\':\' -f2 <<< $attribParam)\'"}\'\n-\t\tif [ $attribIter -lt $attribSize ]\t\t\n-\t\tthen\n-\t\t\tattribJson=$attribJson\',\'\n-\t\tfi\n-  \tfi\n-done\n-attribJson=$attribJson\'],\'\n-#END: Construct JSON for attributes\n-#echo "ATTRIB JSON: "$attribJson\n-\n-#BEGIN: Construct JSON for classification files\n-classJson=\'"classification_files": [ \'\n-colCutClass=\'\'\n-rowCutClass=\'\'\n-if [ $rowCuts -gt 1 ]\n-then\n-\trowCutClass=\'{"name": "Class", "path": "\'$tdir\'/ROfile.txt.cut","position": "row", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}\'\n-fi\n-\n-if [ $colCuts -gt 1 ]\n-then\n-\tif [ $rowCuts -gt 1 ] \n-\tthen\n-\t\trowCutClass=$rowCutClass\',\'\n-\tfi\n-\tcolCutClass=\'{"name": "Class", "path": "\'$tdir\'/COfile.txt.cut","position": "column", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}\'\n-\tif [ $classSize -gt 0 ] \n-\tthen\n-\t\tcolCutClass=$colCutClass\',\'\n-\tfi\n-else\n-\tif [ $rowCuts -gt 1 ] && [ $classSize -gt 0 ] \n-\tthen\n-\t\trowCutClass=$rowCutClass\',\'\n-\tfi\n-fi\n-\n-classJson=$classJson$rowCutClass$colCutClass\n-classIter=0\n-for i in "$@"; do\n-\tcurrParm=$(cut -d\'|\' -f1 <<< $i)\n-\tif [ $currParm = "classification" ]\n-\tthen\n-\t\tclassIter=$((classIter+1))\n-\t\tclassName=$(cut -d\'|\' -f3 <<< $i)\n-\t\t#Parse pipe-delimited 3-part classification bar parameter\n-\t\tclassJson=$classJson\' {"\'$(cut -d\'|\' -f2 <<< $i)\'":"\'$(cut -d\'|\' -f3 <<< $i)\'","\'$(cut -d\'|\' -f4 <<< $i)\'":"\'$(cut -d\'|\' -f5 <<< $i)\'","\'$(cut -d\'|\' -f8 <<< $i)\'":"\'$(cut -d\'|\' -f9 <<< $i)\'","\'$(cut -d\'|\' -f12 <<< $i)\'":"\'$(cut -d\'|\' -f13 <<< $i)\'","\'$(cut -d\'|\' -f14 <<< $i)\'":"\'$(cut -d\'|\' -f15 <<< $i)\'"\'\n-\t\tclassCat=$(cut -d\'|\' -f7 <<< $i)\n-\t\tclassColorType=$(cut -d\'_\' -f2 <<< $classCat)\n-\t\tclassJson=$classJson\',\'\n-\t\tclassHeight=$(cut -d\'|\' -f11 <<< $i)\n-\t\theightErrorVal=0\n-\t\t[[ $classHeight != ?(-)+([0-9]) ]] && heightErrorVal=$((heightErrorVal+1))\n-\t\tif [ $heightErrorVal -gt 0 ]\n-\t\tthen\n-\t\t\techo \'GALAXY PARAMETER WARNING: Non-numeric values found for covariate bar (\'$className\') height.  Height value ignored and default of 15 used: \'$classHeight\n-\t\telse\n-\t\t\tclassJson=$classJson\'"height": "\'$classHeight\'",\' \n-\t\tfi\n-\t\tclassJson=$classJson\' "position":"\'$(cut -d\'_\' -f1 <<< $classCat)\'","color_map": {"type":"\'$classColorType\'"}}\'\n-\t\tif [ $classIter -lt $classSize ]\t\t\n-\t\tthen\n-\t\t\tclassJson=$classJson\',\'\n-\t\tfi\n-  \tfi\n-done\n-classJson=$classJson\']\'\n-#END: Construct JSON for classification files\n-#echo "CLASSIFICATION JSON: "$classJson\n-\n-#Complete construction of Parameter JSON file by adding all JSON sections created above\n-parmJson=$parmJson$rowConfigJson$colConfigJson$attribJson$matrixJson$classJson\n-parmJson=$parmJson\'}\'\n-#echo "COMPLETED PARAMETER JSON: "$parmJson\n-\n-#run R to cluster matrix\n-output="$(R --slave --vanilla --file=$tooldir/CHM_Advanced.R --args $inputMatrix $rowOrder $rowDistance $rowAgglomeration $colOrder $colDistance $colAgglomeration $rowOrderFile $colOrderFile $rowDendroFile $colDendroFile $rowCuts $colCuts $rowLabels $colLabels 2>&1)"\n-# Check for errors from R step, log them if found, and exit script\n-rc=$?;\n-if [ $rc != 0 ]\n-then\n-  echo $output;\n-  if [ `echo "$output" | grep -c "Inf in foreign function call"` -gt 0 ]\n-  then\n-    echo "";\n-    echo "NOTE 1: This error can occur when a covariate file has inadvertently been selected as an Input Matrix.  Check your Input Matrix entry.";\n-    echo "NOTE 2: This error can occur when there is no variation in a data rows or columns in the input matrix.  Try a different distance measure or remove rows/columns without variation.";\n-  fi\n-  exit $rc;\n-fi\n- \n-#Call java program to generate NGCHM viewer files.\n-java -jar $tooldir/GalaxyMapGen.jar "$parmJson"\n-#clean up tempdir\n-rm -rf $tdir\n'
b
diff -r 436f03b71cf6 -r 8f8ab332a050 mda_advanced_heatmap_gen.xml
--- a/mda_advanced_heatmap_gen.xml Thu Jun 20 11:31:24 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,503 +0,0 @@\n-<?xml version="1.0" encoding="UTF-8" ?>\r\n-<tool id="mda_advanced_heatmap_gen" name="Advanced NG-CHM Generator" version="2.3">\r\n-  <requirements>\r\n-       <requirement type="package" version="3.4.1">r-base</requirement> \r\n-\t\t<requirement type="package" version="8.0.144">openjdk</requirement>\r\n-  </requirements>\r\n-  <description> Create Clustered Heat Maps with Advanced Options</description>\r\n-<command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap_advanced.sh  "$__tool_directory__" "$__tool_data_path__/" "chm_name|Heat_Map_$hmname" "chm_description|$hmdesc" "summary_width|$summaryDisplayWidth"\r\n-\t"row_configuration|order_method|${d_rows.rowOrderMethod}|distance_metric|${d_rows.rowDistanceMeasure}|agglomeration_method|${d_rows.rowAgglomerationMethod}|tree_covar_cuts|${d_rows.rowDendroCut}|data_type|$rowDataType|top_items|$rowTopItems|tree_cuts|${d_rows.rcutrows.rowDendroTreeCut}|${d_rows.rcutrows.raddcuts}|dendro_show|${d_rows.rowDendroShow}|dendro_height|${d_rows.rowDendroHeight}"      \r\n-\t"col_configuration|order_method|${d_cols.columnOrderMethod}|distance_metric|${d_cols.columnDistanceMeasure}|agglomeration_method|${d_cols.columnAgglomerationMethod}|tree_covar_cuts|${d_cols.colDendroCut}|data_type|$colDataType|top_items|$colTopItems|tree_cuts|${d_cols.ccutrows.colDendroTreeCut}|${d_cols.ccutrows.caddcuts}|dendro_show|${d_cols.columnDendroShow}|dendro_height|${d_cols.columnDendroHeight}"    \r\n-    #for $attr in $hm_attribute\r\n-      \'attribute|${attr.attrbute_key}\':\'${attr.attrbute_value}\'\r\n-    #end for\r\n-    #for $mx in $matrices\r\n-\t   \'matrix_files|path|$mx.dataLayer|name|${mx.dataLayerName}|summary_method|${mx.summarymethod}|selection_color|${mx.dataLayerSelectionColor}|cuts_color|${mx.dataLayerCutsColor}|grid_color|${mx.dataLayerGridColor}|grid_show|${mx.dataLayerGridShow}|${mx.colorsBreaks.setColorsBreaks}|${mx.colorsBreaks.matrixColor1}|${mx.colorsBreaks.matrixColor2}|${mx.colorsBreaks.matrixColor3}|${mx.colorsBreaks.matrixBreak1}|${mx.colorsBreaks.matrixBreak2}|${mx.colorsBreaks.matrixBreak3}|${mx.colorsBreaks.missingColor}\'\r\n-    #end for\r\n-    #for $op in $operations\r\n-       \'classification|name|${op.class_name}|path|${op.repeatinput.file_name}|category|${op.cattype.cat}|bar_type|${op.cattype.scatbar.bartype}|height|${op.classHeight}|fg_color|${op.cattype.scatbar.fg_color}|bg_color|${op.cattype.scatbar.bg_color}\'\r\n-    #end for\r\n- \t\'output_location|$output\' \r\n- </command>\r\n-\t<stdio>\r\n-      <exit_code range="1:" level="fatal" />\r\n-\t</stdio>\r\n-  <inputs>\r\n-    <repeat name="matrices" title="Heat Map Matrices">\r\n-    \t<param name="dataLayer" type="data" format="Tabular" label="Input Data Matrix" help="Tab delimited text file with row labels, column labels, and data."/>\r\n-    \t<param name="dataLayerName" size="40" type="text" value="Data_Layer_name"  label="Data Layer Name" help="Name for data layer (no spaces).">\r\n-           <sanitizer>\r\n-              <valid>\r\n-                <add preset="string.printable"/>\r\n-            \t<remove value="&quot;"/>\r\n-            \t<remove value="&apos;"/>\r\n-                <remove value=" "/> \r\n-              </valid>\r\n-           </sanitizer>\r\n-        </param>   \r\n-\t    <param name="summarymethod" type="select"  label="Data Summarization Method" help="For large matrices, the selected method is used to aggregate data values in the summary view.">\r\n-\t\t\t<option value="average">Average</option>\r\n-\t\t\t<option value="sample">Sample</option>\r\n-\t\t\t<option value="mode">Mode</option>\r\n-\t    </param>\r\n-  \t\t<conditional name="colorsBreaks">\r\n-\t\t\t<param name="setColorsBreaks" type="select" label="Colors and Breakpoints" help="Select whether to set your own colors and breakpoints or use default values.">\r\n-\t\t\t\t<option value="none">Use System Generated Colors and Breakpoints</option>\r\n-\t\t\t\t<option value="defined">Define Your Own Colors and Breakpoints</option>\t    \r\n-\t\t\t</param>\r\n-\t       \t<when value="none">\r\n-\t \t\t    \t<param name="matrixColor1" type'..b'-\t\t\t\t\t  <option value="color_plot" >Standard</option>\r\n-\t\t\t\t\t  <option value="bar_plot" >Bar Plot</option>\r\n-\t\t\t\t\t  <option value="scatter_plot" >Scatter Plot</option>\r\n-\t\t\t\t\t</param>\r\n-\t        \t\t<when value="color_plot">\r\n- \t\t\t\t\t\t<param name="bg_color" type="text" size="0" hidden="true" value="#ffffff"/>\r\n- \t\t\t\t\t\t<param name="fg_color" type="text" size="0" hidden="true" value="#000000"/>\r\n-\t        \t\t</when>\r\n-\t        \t\t<when value="bar_plot">\r\n- \t\t\t\t\t\t<param name="bg_color" type="color" label="Color for Bar Plot Background" value="#ffffff"/>\r\n- \t\t\t\t\t\t<param name="fg_color" type="color" label="Color for Bar Plot Foreground" value="#000000"/>\r\n-\t        \t\t</when>\r\n-\t        \t\t<when value="scatter_plot">\r\n- \t\t\t\t\t\t<param name="bg_color" type="color" label="Color for Scatter Plot Background" value="#ffffff"/>\r\n- \t\t\t\t\t\t<param name="fg_color" type="color" label="Color for Scatter Plot Foreground" value="#000000"/>\r\n-\t        \t\t</when>\r\n-\t        \t</conditional>\r\n-\t        </when>\r\n-\t        <when value="column_discrete">\r\n-         \t\t<conditional name="scatbar">\r\n-\t\t\t\t\t<param name="bartype" type="select" hidden="true">\r\n-\t\t\t\t\t  <option value="color_plot" >Standard</option>\r\n-\t\t\t\t\t</param>\r\n- \t        \t\t<when value="color_plot">\r\n-\t\t\t\t\t\t<param name="bg_color" type="text" size="0" hidden="true" value="#ffffff"/>\r\n- \t\t\t\t\t\t<param name="fg_color" type="text" size="0" hidden="true" value="#000000"/>\r\n- \t\t\t\t\t</when>\r\n-\t        \t</conditional>\r\n-\t        </when>\r\n-\t        <when value="row_discrete">\r\n-         \t\t<conditional name="scatbar">\r\n-\t\t\t\t\t<param name="bartype" type="select" hidden="true">\r\n-\t\t\t\t\t  <option value="color_plot" >Standard</option>\r\n-\t\t\t\t\t</param>\r\n- \t        \t\t<when value="color_plot">\r\n-\t\t\t\t\t\t<param name="bg_color" type="text" size="0" hidden="true" value="#ffffff"/>\r\n- \t\t\t\t\t\t<param name="fg_color" type="text" size="0" hidden="true" value="#000000"/>\r\n- \t\t\t\t\t</when>\r\n-\t        \t</conditional>\r\n-\t        </when>\r\n-\t \t</conditional>\r\n-     </repeat>       \r\n-     <repeat name="hm_attribute" title="Heat Map Attributes">\r\n-        <param name="attrbute_key" size="50" type="text" value="" label="Heat Map Attribute Key" help="For map level attributes. Enter the key (no spaces).">\r\n-           <sanitizer invalid_char="_">\r\n-              <valid initial="">\r\n-                <add preset="string.letters"/>\r\n-                <add preset="string.digits"/>\r\n-              </valid>\r\n-              <mapping initial="">\r\n-              </mapping>\r\n-           </sanitizer>\r\n-        </param>\r\n-        <param name="attrbute_value" size="50" type="text" label="Heat Map Attributes Value" help="For map level attributes. Enter the value (no spaces).">\r\n-           <sanitizer invalid_char="_">\r\n-              <valid initial="">\r\n-                <add preset="string.letters"/>\r\n-                <add preset="string.digits"/>\r\n-              </valid>\r\n-              <mapping initial="">\r\n-              </mapping>\r\n-           </sanitizer>\r\n-        </param>\r\n-    </repeat>       \r\n-  </inputs>\r\n-  <outputs>\r\n-    <data name="output" label=\'Heat_Map_$hmname\' format="ngchm"/>\r\n-  </outputs>\r\n- <tests>\r\n-    <test>\r\n-      <param name="inputmatrix" value="400x400.txt" />\r\n-      <param name="hmname" value="testRun" />\r\n-      <param name="$hmdesc" value="validateTool" />\r\n-      <param name="summarymethod" value="Average" />\r\n-      <param name="rowOrderMethod" value="Hierarchical" />\r\n-      <param name="rowDistanceMeasure" value="Manhattan" />\r\n-      <param name="rowAgglomerationMethod" value="Ward" />\r\n-      <param name="columnOrderMethod" value="Hierarchical" />\r\n-      <param name="columnDistanceMeasure" value="Manhattan" />\r\n-      <param name="columnAgglomerationMethod" value="Ward" />\r\n-      <output name="output" file="Galaxy400x400-noCovariates.ngchm" lines_diff="10" />     \r\n-\r\n-    </test>\r\n-<!--   galaxy/test-data/    dir where the input and output file that should match tool output will be copied -->\r\n-  </tests>\r\n- </tool>\r\n'
b
diff -r 436f03b71cf6 -r 8f8ab332a050 mda_heatmap_gen.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mda_heatmap_gen.py Thu Jun 20 11:39:46 2019 -0400
[
b'@@ -0,0 +1,250 @@\n+#!/usr/bin/env python\n+# -*- coding: utf-8 -*-\n+# python shell program to validate ng-chm heat map input matrix file and covariate file formats before calling java shell -- bob brown\n+\n+import subprocess           #you must import subprocess so that python can talk to the command line\n+import sys\n+import os\n+import re\n+#import config\n+import traceback\n+#import commons\n+\n+#ConfigVals = config.Config("../rppaConf.txt")\n+\n+def main():\n+    \n+    try:\n+        print \'\\nStarting Heat Map file validation ......\' \n+        #print "\\nheat map sys args len and values = ",len(sys.argv), str(sys.argv)   #, \'++\',argvals\n+\n+      \n+        error= False\n+        endCovarParam=  len(sys.argv)-2 # IF any ending of loc for covar triplet info \n+        startCovarParam=    17 # beginning loc for covar triplet info\n+        inMatrix=           sys.argv[3]\n+\n+        for i in range( endCovarParam, 15, -3):\n+            if len(sys.argv[i]) > 6:\n+                if sys.argv[i][0:4].find(\'row_\') == 0 or sys.argv[i][0:7].find(\'column_\') == 0:  # 0 is match start position\n+                    startCovarParam= i-2                \n+                    #print "\\nHeat map arg 3 and start covariate index on = " ,str(sys.argv[3]),\' - \', startCovarParam, \' covar name= \',str(sys.argv[startCovarParam:])\n+                #else: print \'\\nCovariate param row or column not found at i\', i, str(sys.argv[i])\n+\n+    #test        inMatrix= "/Users/bobbrown/Desktop/NGCHM-Galaxy-Test-Files/400x400firstRowShift.txt"\n+    #test        covarFN= \'/Users/bobbrown/Desktop/400x400-column-covariate-continuous-TestingErrors.txt\'\n+    #test        row_col_cat_contin= \'column_continuous\'\n+    #test        row_col_cat_contin= \'column_categorical\'  \n+    #test        covarLabel = \'bob test\'\n+    #test        numCovariates= 1\n+        \n+        errorInMatrix,inMatrixRowLabels,inMatrixColLabels= ValidateHMInputMatrix(inMatrix)   # verify input matrix\n+        \n+        print "\\nFirst & last Row labels ", inMatrixRowLabels[0],inMatrixRowLabels[-1]," and Columns ", inMatrixColLabels[0],inMatrixColLabels[-1], " number Rows= ",len(inMatrixRowLabels)," number Columns= ",len(inMatrixColLabels)\n+            \n+    # continue reviewing covariates to catch any errors in any of the input info\n+        if len(inMatrixRowLabels) < 5 or len(inMatrixColLabels) < 5: \n+            errorInMatrix = True\n+            print \'\\n----ERROR Input matrix has too few columns and rows need to ignore validating covariate files for now\'\n+            \n+        elif not errorInMatrix: \n+            print "\\n++++ SUCCESS the Input Matrix looks good\\n\\n"\n+            \n+            i= startCovarParam\n+            while i < (len(sys.argv)-2):  # todo verify this works with advances tool is one other 0->n param after this\n+                covarLabel=         sys.argv[i]\n+                covarLabel=         covarLabel.replace(\' \',\'\')\n+                covarFN=            sys.argv[i+1]\n+                covarFN=            covarFN.replace(\' \',\'\')\n+                row_col_cat_contin=  sys.argv[i+2]\n+                row_col_cat_contin=  row_col_cat_contin.replace(\' \',\'\')\n+                i +=3\n+                                             \n+                print "\\nSTART Validating covariate file with label= ", covarLabel, " and type= ",row_col_cat_contin\n+        \n+                error= ValidateHMCorvarFile(covarLabel, covarFN, row_col_cat_contin,inMatrixRowLabels,inMatrixColLabels)  # check covariate files\n+    \n+            if error or errorInMatrix:\n+                print"\\n---ERROR issues found in input or covariate files\\n "\n+                sys.stderr.write( "\\nERROR issues found in input or covariate files see errors in Standard Output\\n\\n ") \n+                sys.exit(3)\n+            \n+                \n+        print"\\n FINISHED -- Validation of the Input Matrix and Covariate files (if any)\\n\\n"\n+        \n+        #print" next running the clustered heat map generator \\n",str(sys.argv[11])+"/heatmap.'..b'ue\n+                    sys.err= 7\n+            else:\n+                inMatrixRowLabels.append(eachRow[0])\n+                tmp= re.search(\'[abcdefghijklmnopqrstuvwxyz]\',eachRow[0].lower())\n+                try:\n+                    if tmp.group(0) == \'\':  # if doesn\'t exist then error\n+                        tmp= tmp\n+                except Exception as e:\n+                    print"-+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[j])\n+                    sys.stderr.write("\\n--+-+- WARNING Row Label at row "+str(countRow)+"  value appears to be non-alphanumeric "+str(eachRow[j]))\n+                \n+            \n+            if len(inMatrixColLabels) > 0: \n+                if (inMatrixColLabels[-1] ==\'\') or (inMatrixColLabels[-1] ==\'\\n\'): inMatrixColLabels.pop()\n+     \n+        inMatrixFH.close()\n+\n+            #print error, lenAllRows, len(eachRow), eachRow[0]\n+     except:\n+        #inMatrixFH.close()\n+        sys.stderr.write(str(traceback.format_exc()))\n+        error= True\n+    \n+     return error,inMatrixRowLabels,inMatrixColLabels\n+\n+ #+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n+\n+def ValidateHMCorvarFile(covarLabel, covariateFilePath, row_col_cat_contin, inMatrixRowLabels,inMatrixColLabels):           # This sub routine ensures that the slide design input by the user matches a slide design on record\n+\n+# verify \n+# 1 That covar file labels match the col or row labels 1 to 1\n+# 2 That if a continuous covar file that the 2nd field is not all text hard to tell if \'-\' or \'e exponent\'\n+# 3 That the length of the covar file matches the row or col length of the input matrix \n+\n+    error= True\n+    try:\n+    \n+        covFH= open( covariateFilePath, \'rU\')\n+        countRow= 0\n+\n+        error= False\n+        \n+        for rawRow in covFH:\n+            countRow +=1\n+            rawRow= rawRow.replace(\'\\n\',\'\')\n+            eachRow=  rawRow.split(\'\\t\')\n+            if countRow== 0: print "\\nCovariance file info - label ",str(covarLabel)," row/col categorical or continous",row_col_cat_contin," first row ",str(eachrow)\n+    \n+            if len(eachRow) < 2 and countRow > 1:\n+                print("----ERROR Input Row "+str(countRow)+" does not have a label and/or value ")\n+                sys.stderr.write("----ERROR Input Row "+str(countRow)+" does not have a label/or and value")\n+                error= True\n+                sys.err= 8\n+                #return error\n+            elif len(eachRow) > 1:\n+                tmp= re.search(\'[abcdefghijklmnopqrstuvwxyz]\',eachRow[0].lower())\n+                try:\n+                    if tmp.group(0) == \'\':  # if doesn\'t exist then error\n+                        tmp= tmp\n+                except Exception as e:\n+                    print"\\n-+-+- WARNING Covariate Label at row "+str(countRow)+" value appears to be non-alphanumeric --", eachRow[0],"--"\n+                    sys.stderr.write("\\n--+-+- WARNING Row Headers at  row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[0])+"--")\n+                    \n+                if not error:\n+                    if row_col_cat_contin[-4:] == \'uous\':  # verify continuous is number-ish\n+                        tmp= re.search(\'[+-.0123456789eE]\',eachRow[1])\n+                        try:\n+                            if tmp.group(0) == \'\':\n+                                tmp= tmp\n+                        except Exception as e:\n+                            print("\\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")\n+                            sys.stderr.write("\\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")\n+                            #error= True\n+    except:\n+        sys.stderr.write(str(traceback.format_exc()))\n+\n+    covFH.close()\n+\n+    return error\n+\n+\n+if __name__ == "__main__":\n+    main()\n+\n+\n'
b
diff -r 436f03b71cf6 -r 8f8ab332a050 mda_heatmap_gen.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mda_heatmap_gen.xml Thu Jun 20 11:39:46 2019 -0400
b
b'@@ -0,0 +1,159 @@\n+<?xml version="1.0" encoding="UTF-8" ?>\n+<tool id="mda_heatmap_gen" name="NG-CHM Generator" version="2.3">\n+   <requirements>\n+       <requirement type="package" version="3.4.1">r-base</requirement> \n+\t\t<requirement type="package" version="8.0.144">openjdk</requirement>\n+   </requirements>\n+   <description>Create Clustered Heat Maps</description>\n+   <command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap.sh  "$__tool_directory__" "$__tool_data_path__/" "chm_name|Heat_Map_$hmname" "chm_description|$hmdesc" \n+\t"matrix_files|path|$inputmatrix|name|datalayer|summary_method|$summarymethod"\n+\t"row_configuration|order_method|${d_rows.rowOrderMethod}|distance_metric|${d_rows.rowDistanceMeasure}|agglomeration_method|${d_rows.rowAgglomerationMethod}|tree_covar_cuts|0|data_type|labels"    \n+\t"col_configuration|order_method|${d_cols.columnOrderMethod}|distance_metric|${d_cols.columnDistanceMeasure}|agglomeration_method|${d_cols.columnAgglomerationMethod}|tree_covar_cuts|0|data_type|labels"    \n+    #for $op in $operations\n+       \'classification|name|${op.class_name}|path|${op.repeatinput.file_name}|category|${op.cat}\'\n+    #end for\n+ \t\'output_location|$output\' \n+ </command>\n+\t<stdio>\n+      <exit_code range="1:" level="fatal" />\n+\t</stdio>\n+  <inputs>\n+    <param name="inputmatrix" type="data" format="Tabular" label="Input Data Matrix" help="Tab delimited text file with row labels, column labels, and data."  />\n+    <param name="hmname" size="40" type="text" value="Heat_Map_name"  label="Heat Map Name" help="Short Name for heat map (no spaces)."/>\n+           <sanitizer>\n+              <valid>\n+                <add preset="string.printable"/>\n+            \t<remove value="&quot;"/>\n+            \t<remove value="&apos;"/>\n+                <remove value=" "/> \n+              </valid>\n+           </sanitizer>\n+    <param name="hmdesc" size="100" optional="true" type="text" value="Heat_Map_description" label="Heat Map Description" help="Longer description of the heat map contents."/>\n+           <sanitizer>\n+              <valid>\n+                <add preset="string.printable"/>\n+                <add value="string.letters"/>\n+                <add value="string.digits"/>\n+                <add value="-"/>\n+                <add value="_"/>\n+            \t<remove value="&quot;"/>\n+            \t<remove value="&apos;"/>\n+                <remove value=" "/> \n+              </valid>\n+           </sanitizer>\n+    <param name="summarymethod" \ttype="select"  label="Data Summarization Method" help="For large matrices, the selected method is used to aggregate data values in the summary view.">\n+\t\t<option value="average">Average</option>\n+\t\t<option value="sample">Sample</option>\n+\t\t<option value="mode">Mode</option>\n+    </param>\n+    <conditional name="d_rows">\n+\t<param name="rowOrderMethod" type="select" label="Row ordering method" help="Determine if rows should be clustered, randomized, or remain as is.">\n+\t\t<option value="Hierarchical">Hierarchical Clustering</option>\n+\t\t<option value="Original">Original Order</option>\t    \n+\t\t<option value="Random">Random</option>\t    \n+\t</param>\n+        <when value="Hierarchical">\n+\t\t\t<param name="rowDistanceMeasure" type="select"  label="Row Distance Metric" help="For clustering, select the method of determining distance between rows">\n+\t\t\t\t<option value="euclidean">Euclidean</option>\n+\t\t\t\t<!-- <option value="binary">Binary</option> ** breaks dendrogram  -->\n+\t\t\t\t<option value="manhattan">Manhattan</option>\n+\t\t\t\t<option value="maximum">Maximum</option>\n+\t\t\t\t<!-- <option value="canberra">Canberra</option> ** breaks dendrogram  -->\n+\t\t\t\t<option value="minkowski">Minkowski</option>\t    \n+\t\t\t\t<!-- <option value="correlation">Correlation</option>\t** breaks dendrogram -->    \n+\t\t\t</param>\n+\t\t\t<param name="rowAgglomerationMethod" type="select"  label="Row Clustering Method" help="For clustering, select algorithm for building clusters.">\n+\t\t\t\t<option value="average">Average Lin'..b'="Hierarchical">Hierarchical Clustering</option>\n+\t\t<option value="Original">Original Order</option>\t    \n+\t\t<option value="Random">Random</option>\t    \n+\t</param>\n+        <when value="Hierarchical">\n+\t\t\t<param name="columnDistanceMeasure" type="select"  label="Column Distance Metric" help="For clustering, select the method of determining distance between columns">\n+\t\t\t\t<option value="euclidean">Euclidean</option>\n+\t\t\t\t<!-- <option value="binary">Binary</option> ** breaks dendrogram  -->\n+\t\t\t\t<option value="manhattan">Manhattan</option>\n+\t\t\t\t<option value="maximum">Maximum</option>\n+\t\t\t\t<!-- <option value="canberra">Canberra</option> ** breaks dendrogram  -->\t    \n+\t\t\t\t<option value="minkowski">Minkowski</option>\t    \n+\t\t\t\t<!-- <option value="correlation">Correlation</option>\t** breaks dendrogram     -->\n+\t\t\t</param>\n+\t\t\t<param name="columnAgglomerationMethod" type="select"  label="Column Clustering Method" help="For clustering, select algorithm for building clusters.">\n+\t\t\t\t<option value="average">Average Linkage</option>\n+\t\t\t\t<option value="complete">Complete Linkage</option>\n+\t\t\t\t<option value="single">Single Linkage</option>\n+\t\t\t\t<option value="ward" selected="true">Ward</option>\n+\t\t\t\t<option value="mcquitty">Mcquitty</option>\t    \n+\t\t\t\t<!-- <option value="median">Median</option>\t  ** breaks dendrogram  \n+\t\t\t\t<option value="centroid">Centroid</option>\t  ** breaks dendrogram   -->\n+\t\t\t</param>\n+        </when>\n+        <when value="Original">\n+\t\t    <param name="columnDistanceMeasure" type="text" size="0" hidden="true"    value="n/a"/>\n+\t\t    <param name="columnAgglomerationMethod" type="text" size="0"   hidden="true"  value="n/a"/>\n+        </when>\n+        <when value="Random">\n+\t\t    <param name="columnDistanceMeasure" type="text" size="0"  hidden="true"   value="n/a"/>\n+\t\t    <param name="columnAgglomerationMethod" type="text" size="0" hidden="true"    value="n/a"/>\n+        </when>\n+    </conditional>\n+    <repeat name="operations" title="Covariate Bars">\n+        <param name="class_name" size="25" type="text" value="" label="Covariate Name" help="Covariate heat map display label.">\n+           <sanitizer>\n+              <valid>\n+                <add preset="string.printable"/>\n+            \t<remove value="&quot;"/>\n+            \t<remove value="&apos;"/>\n+                <remove value=" "/> \n+              </valid>\n+           </sanitizer>\n+        </param>\n+        <param name="repeatinput" type="data" format="Tabular" label="Covariate File" help="Tab delimited text file with row or column label and covariate value on each line."/>\n+\t<param name="cat" type="select" label="Axis Covariate Type" help="Identify the covariate as belonging to rows or columns and containing categorical or continuous values.">\n+\t  <option value="row_discrete" >Row Categorical</option>\n+\t  <option value="row_continuous" >Row Continuous</option>\n+\t  <option value="column_discrete" >Column Categorical</option>\n+\t  <option value="column_continuous" >Column Continuous</option>\n+\t</param>\n+    </repeat>       \n+  </inputs>\n+  <outputs>\n+    <data name="output" label=\'Heat_Map_$hmname\' format="ngchm"/>\n+  </outputs>\n+ <tests>\n+    <test>\n+      <param name="inputmatrix" value="400x400.txt" />\n+      <param name="hmname" value="testRun" />\n+      <param name="$hmdesc" value="validateTool" />\n+      <param name="summarymethod" value="Average" />\n+      <param name="rowOrderMethod" value="Hierarchical" />\n+      <param name="rowDistanceMeasure" value="Manhattan" />\n+      <param name="rowAgglomerationMethod" value="Ward" />\n+      <param name="columnOrderMethod" value="Hierarchical" />\n+      <param name="columnDistanceMeasure" value="Manhattan" />\n+      <param name="columnAgglomerationMethod" value="Ward" />\n+      <output name="output" file="Galaxy400x400-noCovariates.ngchm" lines_diff="10" />     \n+\n+    </test>\n+<!--   galaxy/test-data/    dir where the input and output file that should match tool output will be copied -->\n+  </tests>\n+ </tool>\n'