Repository 'heat_map_creation'
hg clone https://toolshed.g2.bx.psu.edu/repos/md-anderson-bioinformatics/heat_map_creation

Changeset 32:16593e40c2cd (2017-07-20)
Previous changeset 31:e01b833f5d43 (2017-02-03) Next changeset 33:0097750ad7ad (2017-12-05)
Commit message:
Version 2.0.5
modified:
CHM.R
GalaxyMapGen.jar
heatmap.sh
mda_heatmap_gen.xml
mda_heatmap_viz.zip
ngchm-matrix-functional-test-data/400x400.txt
added:
mda_heatmap_gen.py
removed:
ngchm-matrix-functional-test-data/._.DS_Store
ngchm-matrix-functional-test-data/._400x400.txt
b
diff -r e01b833f5d43 -r 16593e40c2cd CHM.R
--- a/CHM.R Fri Feb 03 13:20:50 2017 -0500
+++ b/CHM.R Thu Jul 20 15:31:06 2017 -0400
[
@@ -21,17 +21,12 @@
    rowOrder <-  createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod)  
    if (rowOrderMethod == "Hierarchical") {
       writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile)
-      writeHCCut(rowOrder, rowCut, paste(rowOrderFile,".cut", sep=""))
-   } else {
-      writeOrderTSV(rowOrder, rownames(dataMatrix), rowOrderFile)
    }
 
    colOrder <-  createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod)  
    if (colOrderMethod == "Hierarchical") {
       writeHCDataTSVs(colOrder, colDendroFile, colOrderFile)
       writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep=""))
-   } else {
-      writeOrderTSV(colOrder, colnames(dataMatrix), colOrderFile)
    }
 }
 
@@ -51,18 +46,6 @@
    write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
 }
 
-#creates order file for non-clustering methods
-writeOrderTSV<-function(newOrder, originalOrder, outputHCOrderFileName)
-{
-   data=matrix(,length(originalOrder),2);
-   for (i in 1:length(originalOrder)) {
-      data[i,1] = originalOrder[i];
-      data[i,2] = which(newOrder==originalOrder[i]);
-   }
-   colnames(data)<-c("Id", "Order")
-   write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE)
-}
-
 #creates a classification file based on user specified cut of dendrogram
 writeHCCut<-function(uDend, cutNum, outputCutFileName)
 {
b
diff -r e01b833f5d43 -r 16593e40c2cd GalaxyMapGen.jar
b
Binary file GalaxyMapGen.jar has changed
b
diff -r e01b833f5d43 -r 16593e40c2cd heatmap.sh
--- a/heatmap.sh Fri Feb 03 13:20:50 2017 -0500
+++ b/heatmap.sh Thu Jul 20 15:31:06 2017 -0400
[
@@ -1,8 +1,21 @@
 echo $1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17}
+
+#run python to validate the input matrix and covariate files (if any)
+#output="$(python  ${11}/mda_heatmap_gen.py $@)"
+output=$(python  ${11}/mda_heatmap_gen.py "$@")
+rc=$?;
+echo $output;
+if [ $rc != 0 ]
+then
+  exit $rc;
+fi
+
+
 #create temp directory for row and col order and dendro files.
 tdir=${11}/$(date +%y%m%d%M%S)
 echo $tdir
 mkdir $tdir
+
 #run R to cluster matrix
 output="$(R --slave --vanilla --file=${11}/CHM.R --args $3 $4 $5 $6 $7 $8 $9 $tdir/ROfile.txt $tdir/COfile.txt $tdir/RDfile.txt $tdir/CDfile.txt ${12} ${13} ${14} ${15} 2>&1)"
 rc=$?;
b
diff -r e01b833f5d43 -r 16593e40c2cd mda_heatmap_gen.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mda_heatmap_gen.py Thu Jul 20 15:31:06 2017 -0400
[
b'@@ -0,0 +1,250 @@\n+#!/usr/bin/env python\n+# -*- coding: utf-8 -*-\n+# python shell program to validate ng-chm heat map input matrix file and covariate file formats before calling java shell -- bob brown\n+\n+import subprocess           #you must import subprocess so that python can talk to the command line\n+import sys\n+import os\n+import re\n+#import config\n+import traceback\n+#import commons\n+\n+#ConfigVals = config.Config("../rppaConf.txt")\n+\n+def main():\n+    \n+    try:\n+        print \'\\nStarting Heat Map file validation ......\' \n+        #print "\\nheat map sys args len and values = ",len(sys.argv), str(sys.argv)   #, \'++\',argvals\n+\n+      \n+        error= False\n+        endCovarParam=  len(sys.argv)-2 # IF any ending of loc for covar triplet info \n+        startCovarParam=    17 # beginning loc for covar triplet info\n+        inMatrix=           sys.argv[3]\n+\n+        for i in range( endCovarParam, 15, -3):\n+            if len(sys.argv[i]) > 6:\n+                if sys.argv[i][0:4].find(\'row_\') == 0 or sys.argv[i][0:7].find(\'column_\') == 0:  # 0 is match start position\n+                    startCovarParam= i-2                \n+                    #print "\\nHeat map arg 3 and start covariate index on = " ,str(sys.argv[3]),\' - \', startCovarParam, \' covar name= \',str(sys.argv[startCovarParam:])\n+                #else: print \'\\nCovariate param row or column not found at i\', i, str(sys.argv[i])\n+\n+    #test        inMatrix= "/Users/bobbrown/Desktop/NGCHM-Galaxy-Test-Files/400x400firstRowShift.txt"\n+    #test        covarFN= \'/Users/bobbrown/Desktop/400x400-column-covariate-continuous-TestingErrors.txt\'\n+    #test        row_col_cat_contin= \'column_continuous\'\n+    #test        row_col_cat_contin= \'column_categorical\'  \n+    #test        covarLabel = \'bob test\'\n+    #test        numCovariates= 1\n+        \n+        errorInMatrix,inMatrixRowLabels,inMatrixColLabels= ValidateHMInputMatrix(inMatrix)   # verify input matrix\n+        \n+        print "\\nFirst & last Row labels ", inMatrixRowLabels[0],inMatrixRowLabels[-1]," and Columns ", inMatrixColLabels[0],inMatrixColLabels[-1], " number Rows= ",len(inMatrixRowLabels)," number Columns= ",len(inMatrixColLabels)\n+            \n+    # continue reviewing covariates to catch any errors in any of the input info\n+        if len(inMatrixRowLabels) < 5 or len(inMatrixColLabels) < 5: \n+            errorInMatrix = True\n+            print \'\\n----ERROR Input matrix has too few columns and rows need to ignore validating covariate files for now\'\n+            \n+        elif not errorInMatrix: \n+            print "\\n++++ SUCCESS the Input Matrix looks good\\n\\n"\n+            \n+            i= startCovarParam\n+            while i < (len(sys.argv)-2):  # todo verify this works with advances tool is one other 0->n param after this\n+                covarLabel=         sys.argv[i]\n+                covarLabel=         covarLabel.replace(\' \',\'\')\n+                covarFN=            sys.argv[i+1]\n+                covarFN=            covarFN.replace(\' \',\'\')\n+                row_col_cat_contin=  sys.argv[i+2]\n+                row_col_cat_contin=  row_col_cat_contin.replace(\' \',\'\')\n+                i +=3\n+                                             \n+                print "\\nSTART Validating covariate file with label= ", covarLabel, " and type= ",row_col_cat_contin\n+        \n+                error= ValidateHMCorvarFile(covarLabel, covarFN, row_col_cat_contin,inMatrixRowLabels,inMatrixColLabels)  # check covariate files\n+    \n+            if error or errorInMatrix:\n+                print"\\n---ERROR issues found in input or covariate files\\n "\n+                sys.stderr.write( "\\nERROR issues found in input or covariate files see errors in Standard Output\\n\\n ") \n+                sys.exit(3)\n+            \n+                \n+        print"\\n FINISHED -- Validation of the Input Matrix and Covariate files (if any)\\n\\n"\n+        \n+        #print" next running the clustered heat map generator \\n",str(sys.argv[11])+"/heatmap.'..b'ue\n+                    sys.err= 7\n+            else:\n+                inMatrixRowLabels.append(eachRow[0])\n+                tmp= re.search(\'[abcdefghijklmnopqrstuvwxyz]\',eachRow[0].lower())\n+                try:\n+                    if tmp.group(0) == \'\':  # if doesn\'t exist then error\n+                        tmp= tmp\n+                except Exception as e:\n+                    print"-+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[j])\n+                    sys.stderr.write("\\n--+-+- WARNING Row Label at row "+str(countRow)+"  value appears to be non-alphanumeric "+str(eachRow[j]))\n+                \n+            \n+            if len(inMatrixColLabels) > 0: \n+                if (inMatrixColLabels[-1] ==\'\') or (inMatrixColLabels[-1] ==\'\\n\'): inMatrixColLabels.pop()\n+     \n+        inMatrixFH.close()\n+\n+            #print error, lenAllRows, len(eachRow), eachRow[0]\n+     except:\n+        #inMatrixFH.close()\n+        sys.stderr.write(str(traceback.format_exc()))\n+        error= True\n+    \n+     return error,inMatrixRowLabels,inMatrixColLabels\n+\n+ #+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n+\n+def ValidateHMCorvarFile(covarLabel, covariateFilePath, row_col_cat_contin, inMatrixRowLabels,inMatrixColLabels):           # This sub routine ensures that the slide design input by the user matches a slide design on record\n+\n+# verify \n+# 1 That covar file labels match the col or row labels 1 to 1\n+# 2 That if a continuous covar file that the 2nd field is not all text hard to tell if \'-\' or \'e exponent\'\n+# 3 That the length of the covar file matches the row or col length of the input matrix \n+\n+    error= True\n+    try:\n+    \n+        covFH= open( covariateFilePath, \'rU\')\n+        countRow= 0\n+\n+        error= False\n+        \n+        for rawRow in covFH:\n+            countRow +=1\n+            rawRow= rawRow.replace(\'\\n\',\'\')\n+            eachRow=  rawRow.split(\'\\t\')\n+            if countRow== 0: print "\\nCovariance file info - label ",str(covarLabel)," row/col categorical or continous",row_col_cat_contin," first row ",str(eachrow)\n+    \n+            if len(eachRow) < 2 and countRow > 1:\n+                print("----ERROR Input Row "+str(countRow)+" does not have a label and/or value ")\n+                sys.stderr.write("----ERROR Input Row "+str(countRow)+" does not have a label/or and value")\n+                error= True\n+                sys.err= 8\n+                #return error\n+            elif len(eachRow) > 1:\n+                tmp= re.search(\'[abcdefghijklmnopqrstuvwxyz]\',eachRow[0].lower())\n+                try:\n+                    if tmp.group(0) == \'\':  # if doesn\'t exist then error\n+                        tmp= tmp\n+                except Exception as e:\n+                    print"\\n-+-+- WARNING Covariate Label at row "+str(countRow)+" value appears to be non-alphanumeric --", eachRow[0],"--"\n+                    sys.stderr.write("\\n--+-+- WARNING Row Headers at  row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[0])+"--")\n+                    \n+                if not error:\n+                    if row_col_cat_contin[-4:] == \'uous\':  # verify continuous is number-ish\n+                        tmp= re.search(\'[+-.0123456789eE]\',eachRow[1])\n+                        try:\n+                            if tmp.group(0) == \'\':\n+                                tmp= tmp\n+                        except Exception as e:\n+                            print("\\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")\n+                            sys.stderr.write("\\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")\n+                            #error= True\n+    except:\n+        sys.stderr.write(str(traceback.format_exc()))\n+\n+    covFH.close()\n+\n+    return error\n+\n+\n+if __name__ == "__main__":\n+    main()\n+\n+\n'
b
diff -r e01b833f5d43 -r 16593e40c2cd mda_heatmap_gen.xml
--- a/mda_heatmap_gen.xml Fri Feb 03 13:20:50 2017 -0500
+++ b/mda_heatmap_gen.xml Thu Jul 20 15:31:06 2017 -0400
b
b'@@ -1,7 +1,8 @@\n <?xml version="1.0" encoding="UTF-8" ?>\n-<tool id="mda_heatmap_gen" name="NG-CHM Generator" version="2.0.2">\n+<tool id="mda_heatmap_gen" name="NG-CHM Generator" version="2.0.5">\n   <description>Create Clustered Heat Maps</description>\n-<command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap.sh  \'$hmname\' \'$hmdesc\' \'$inputmatrix\' ${d_rows.rowOrderMethod} ${d_rows.rowDistanceMeasure} ${d_rows.rowAgglomerationMethod} ${d_cols.columnOrderMethod} ${d_cols.columnDistanceMeasure} ${d_cols.columnAgglomerationMethod} $summarymethod \'$__tool_directory__\' ${d_rows.rowDendroCut} ${d_cols.colDendroCut} $rowDataType $colDataType\n+<!-- <command interpreter="python" detect_errors="aggressive">$__tool_directory__/mda_heatmap_gen.py  \'Heat_Map_$hmname\' \'$hmdesc\' \'$inputmatrix\' ${d_rows.rowOrderMethod} ${d_rows.rowDistanceMeasure} ${d_rows.rowAgglomerationMethod} ${d_cols.columnOrderMethod} ${d_cols.columnDistanceMeasure} ${d_cols.columnAgglomerationMethod} $summarymethod \'$__tool_directory__\' 0 0 labels labels \'None\'-->\n+   <command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap.sh  "Heat_Map_$hmname" "$hmdesc" \'$inputmatrix\' ${d_rows.rowOrderMethod} ${d_rows.rowDistanceMeasure} ${d_rows.rowAgglomerationMethod} ${d_cols.columnOrderMethod} ${d_cols.columnDistanceMeasure} ${d_cols.columnAgglomerationMethod} $summarymethod \'$__tool_directory__/\' 0 0 labels labels \'None\'\n     #for $op in $operations\n        ${op.class_name}\n        ${op.repeatinput.file_name}\n@@ -13,13 +14,13 @@\n       <exit_code range="1:" level="fatal" />\n \t</stdio>\n   <inputs>\n-    <param name="inputmatrix" type="data" format="text" label="Input Matrix" />\n-    <param name="hmname" size="20" type="text" value="Heat_Map_name" label="User Defined Heat Map Name"/>\n+    <param name="inputmatrix" type="data" format="Tabular" label="Input Matrix" />\n+    <param name="hmname" size="20" type="text" value="Heat_Map_name"  label="User Defined Heat Map Name"/>\n     <param name="hmdesc" size="100" optional="true" type="text" value="Heat_Map_description" label="User Defined Heat Map Description"/>\n     <param name="summarymethod" \ttype="select"  label="Data Summarization Method">\n-\t\t<option value="average">average</option>\n-\t\t<option value="sample">sample</option>\n-\t\t<option value="mode">mode</option>\n+\t\t<option value="average">Average</option>\n+\t\t<option value="sample">Sample</option>\n+\t\t<option value="mode">Mode</option>\n     </param>\n       <conditional name="d_rows">\n \t<param name="rowOrderMethod" type="select" label="Row ordering method" help="Choices -- Hierarchical Clustering, Original Order, Random">\n@@ -28,69 +29,34 @@\n \t\t<option value="Random">Random</option>\t    \n \t</param>\n         <when value="Hierarchical">\n-\t<param name="rowDistanceMeasure" type="select"  label="Row Distance Metric" help="euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation">\n-\t\t<option value="euclidean">Euclidean</option>\n-\t\t<option value="binary">Binary</option>\n-\t\t<option value="manhattan">Manhattan</option>\n-\t\t<option value="maximum">Maximum</option>\n-\t\t<option value="canberra">Canberra</option>\t    \n-\t\t<option value="minkowski">Minkowski</option>\t    \n-\t\t<option value="correlation">Correlation</option>\t    \n-\t</param>\n-\t<param name="rowAgglomerationMethod" type="select"  label="Row Clustering Method" help="Choices: \'average\' for Average Linkage, \'complete\' for Complete Linkage, \'single\' for Single Linkage, \'ward\', \'mcquitty\', \'median\', or \'centroid\'.">\n-\t\t<option value="average">Average Linkage</option>\n-\t\t<option value="complete">Complete Linkage</option>\n-\t\t<option value="single">Single Linkage</option>\n-\t\t<option value="ward" selected="true">Ward</option>\n-\t\t<option value="mcquitty">Mcquitty</option>\t    \n-\t\t<option value="median">Median</option>\t    \n-\t\t<option value="centroid">Centroid</option>\t    \n-\t</param>\n-            <param name="rowDendroCut" type="select" label="Create row categorical covariate bar b'..b'     <when value="Random">\n-\t\t    <param name="columnDistanceMeasure" type="text" size="0"     value="n/a"/>\n-\t\t    <param name="columnAgglomerationMethod" type="text" size="0"     value="n/a"/>\n-\t\t    <param name="colDendroCut" type="text" size="0"     value="0"/>\n+\t\t    <param name="columnDistanceMeasure" type="text" size="0"  hidden="true"   value="n/a"/>\n+\t\t    <param name="columnAgglomerationMethod" type="text" size="0" hidden="true"    value="n/a"/>\n         </when>\n     </conditional>\n-    <param name="colDataType" type="select" label="Linkouts to column data type info" >\n-        <option value="labels" selected="true" >None</option>\n-        <option value="bio.probe.affymetrix" >Affymetrix Probe Id</option>\n-        <option value="bio.feature.agilent" >Agilent Id</option>\n-        <option value="bio.sample.cbioportal" >cBioPortal sample Id</option>\n-        <option value="bio.transcript.ensemble" >Ensemble transcript Id</option>\n-        <option value="bio.gene.entrez" >Gene Entrez Id</option>\n-        <option value="bio.gene.hugo" >Gene HUGO symbol</option>\n-        <option value="bio.go" >Gene Ontology (GO) Id</option>\n-        <option value="bio.geo.acc" >GEO Accession Id</option>\n-        <option value="bio.probe.illumina" >Illumina Probe Id</option>\n-        <option value="bio.probe.infinium" >Infinium Probe Id</option>\n-        <option value="bio.pathway.mdanderson" >MD Anderson pathway Id</option>\n-        <option value="bio.mirna" >miRNA Id</option>\n-        <option value="bio.mirna.mimat" >miRNA MIMAT Id</option>\n-        <option value="bio.pubmed" >Pubmed Id</option>\n-        <option value="bio.pubmed.search" >Pubmed Search Term</option>\n-        <option value="scholar" >Scholarly term</option>\n-        <option value="bio.gene.unigene" >Unigene CId</option>\n-        <option value="bio.protein.uniprot" >UniProt Id</option>\n-    </param>    \n     <repeat name="operations" title="Covariate Bars">\n         <param name="class_name" size="20" type="text" value="" label="Axis Covariate Name">\n            <sanitizer invalid_char="_">\n@@ -174,28 +105,28 @@\n         </param>\n         <param name="repeatinput" type="data" format="text" label="Axis Covariate File"/>\n \t<param name="cat" type="select" label="Axis Covariate Type">\n-\t  <option value="row_categorical" >row categorical</option>\n-\t  <option value="row_continuous" >row continuous</option>\n-\t  <option value="column_categorical" >column categorical</option>\n-\t  <option value="column_continuous" >column continuous</option>\n+\t  <option value="row_categorical" >Row Categorical</option>\n+\t  <option value="row_continuous" >Row Continuous</option>\n+\t  <option value="column_categorical" >Column Categorical</option>\n+\t  <option value="column_continuous" >Column Continuous</option>\n \t</param>\n     </repeat>       \n   </inputs>\n   <outputs>\n-    <data name="output" label=\'${hmname}\' format="ngchm"/>\n+    <data name="output" label=\'Heat_Map_$hmname\' format="ngchm"/>\n   </outputs>\n  <tests>\n     <test>\n       <param name="inputmatrix" value="400x400.txt" />\n       <param name="hmname" value="testRun" />\n       <param name="$hmdesc" value="validateTool" />\n-      <param name="summarymethod" value="average" />\n+      <param name="summarymethod" value="Average" />\n       <param name="rowOrderMethod" value="Hierarchical" />\n-      <param name="rowDistanceMeasure" value="manhattan" />\n-      <param name="rowAgglomerationMethod" value="ward" />\n+      <param name="rowDistanceMeasure" value="Manhattan" />\n+      <param name="rowAgglomerationMethod" value="Ward" />\n       <param name="columnOrderMethod" value="Hierarchical" />\n-      <param name="columnDistanceMeasure" value="manhattan" />\n-      <param name="columnAgglomerationMethod" value="ward" />\n+      <param name="columnDistanceMeasure" value="Manhattan" />\n+      <param name="columnAgglomerationMethod" value="Ward" />\n       <output name="output" file="Galaxy400x400-noCovariates.ngchm" lines_diff="10" />     \n \n     </test>\n'
b
diff -r e01b833f5d43 -r 16593e40c2cd mda_heatmap_viz.zip
b
Binary file mda_heatmap_viz.zip has changed
b
diff -r e01b833f5d43 -r 16593e40c2cd ngchm-matrix-functional-test-data/._.DS_Store
b
Binary file ngchm-matrix-functional-test-data/._.DS_Store has changed
b
diff -r e01b833f5d43 -r 16593e40c2cd ngchm-matrix-functional-test-data/._400x400.txt
b
Binary file ngchm-matrix-functional-test-data/._400x400.txt has changed