Previous changeset 31:e01b833f5d43 (2017-02-03) Next changeset 33:0097750ad7ad (2017-12-05) |
Commit message:
Version 2.0.5 |
modified:
CHM.R GalaxyMapGen.jar heatmap.sh mda_heatmap_gen.xml mda_heatmap_viz.zip ngchm-matrix-functional-test-data/400x400.txt |
added:
mda_heatmap_gen.py |
removed:
ngchm-matrix-functional-test-data/._.DS_Store ngchm-matrix-functional-test-data/._400x400.txt |
b |
diff -r e01b833f5d43 -r 16593e40c2cd CHM.R --- a/CHM.R Fri Feb 03 13:20:50 2017 -0500 +++ b/CHM.R Thu Jul 20 15:31:06 2017 -0400 |
[ |
@@ -21,17 +21,12 @@ rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod) if (rowOrderMethod == "Hierarchical") { writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile) - writeHCCut(rowOrder, rowCut, paste(rowOrderFile,".cut", sep="")) - } else { - writeOrderTSV(rowOrder, rownames(dataMatrix), rowOrderFile) } colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod) if (colOrderMethod == "Hierarchical") { writeHCDataTSVs(colOrder, colDendroFile, colOrderFile) writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep="")) - } else { - writeOrderTSV(colOrder, colnames(dataMatrix), colOrderFile) } } @@ -51,18 +46,6 @@ write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) } -#creates order file for non-clustering methods -writeOrderTSV<-function(newOrder, originalOrder, outputHCOrderFileName) -{ - data=matrix(,length(originalOrder),2); - for (i in 1:length(originalOrder)) { - data[i,1] = originalOrder[i]; - data[i,2] = which(newOrder==originalOrder[i]); - } - colnames(data)<-c("Id", "Order") - write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) -} - #creates a classification file based on user specified cut of dendrogram writeHCCut<-function(uDend, cutNum, outputCutFileName) { |
b |
diff -r e01b833f5d43 -r 16593e40c2cd GalaxyMapGen.jar |
b |
Binary file GalaxyMapGen.jar has changed |
b |
diff -r e01b833f5d43 -r 16593e40c2cd heatmap.sh --- a/heatmap.sh Fri Feb 03 13:20:50 2017 -0500 +++ b/heatmap.sh Thu Jul 20 15:31:06 2017 -0400 |
[ |
@@ -1,8 +1,21 @@ echo $1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17} + +#run python to validate the input matrix and covariate files (if any) +#output="$(python ${11}/mda_heatmap_gen.py $@)" +output=$(python ${11}/mda_heatmap_gen.py "$@") +rc=$?; +echo $output; +if [ $rc != 0 ] +then + exit $rc; +fi + + #create temp directory for row and col order and dendro files. tdir=${11}/$(date +%y%m%d%M%S) echo $tdir mkdir $tdir + #run R to cluster matrix output="$(R --slave --vanilla --file=${11}/CHM.R --args $3 $4 $5 $6 $7 $8 $9 $tdir/ROfile.txt $tdir/COfile.txt $tdir/RDfile.txt $tdir/CDfile.txt ${12} ${13} ${14} ${15} 2>&1)" rc=$?; |
b |
diff -r e01b833f5d43 -r 16593e40c2cd mda_heatmap_gen.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mda_heatmap_gen.py Thu Jul 20 15:31:06 2017 -0400 |
[ |
b'@@ -0,0 +1,250 @@\n+#!/usr/bin/env python\n+# -*- coding: utf-8 -*-\n+# python shell program to validate ng-chm heat map input matrix file and covariate file formats before calling java shell -- bob brown\n+\n+import subprocess #you must import subprocess so that python can talk to the command line\n+import sys\n+import os\n+import re\n+#import config\n+import traceback\n+#import commons\n+\n+#ConfigVals = config.Config("../rppaConf.txt")\n+\n+def main():\n+ \n+ try:\n+ print \'\\nStarting Heat Map file validation ......\' \n+ #print "\\nheat map sys args len and values = ",len(sys.argv), str(sys.argv) #, \'++\',argvals\n+\n+ \n+ error= False\n+ endCovarParam= len(sys.argv)-2 # IF any ending of loc for covar triplet info \n+ startCovarParam= 17 # beginning loc for covar triplet info\n+ inMatrix= sys.argv[3]\n+\n+ for i in range( endCovarParam, 15, -3):\n+ if len(sys.argv[i]) > 6:\n+ if sys.argv[i][0:4].find(\'row_\') == 0 or sys.argv[i][0:7].find(\'column_\') == 0: # 0 is match start position\n+ startCovarParam= i-2 \n+ #print "\\nHeat map arg 3 and start covariate index on = " ,str(sys.argv[3]),\' - \', startCovarParam, \' covar name= \',str(sys.argv[startCovarParam:])\n+ #else: print \'\\nCovariate param row or column not found at i\', i, str(sys.argv[i])\n+\n+ #test inMatrix= "/Users/bobbrown/Desktop/NGCHM-Galaxy-Test-Files/400x400firstRowShift.txt"\n+ #test covarFN= \'/Users/bobbrown/Desktop/400x400-column-covariate-continuous-TestingErrors.txt\'\n+ #test row_col_cat_contin= \'column_continuous\'\n+ #test row_col_cat_contin= \'column_categorical\' \n+ #test covarLabel = \'bob test\'\n+ #test numCovariates= 1\n+ \n+ errorInMatrix,inMatrixRowLabels,inMatrixColLabels= ValidateHMInputMatrix(inMatrix) # verify input matrix\n+ \n+ print "\\nFirst & last Row labels ", inMatrixRowLabels[0],inMatrixRowLabels[-1]," and Columns ", inMatrixColLabels[0],inMatrixColLabels[-1], " number Rows= ",len(inMatrixRowLabels)," number Columns= ",len(inMatrixColLabels)\n+ \n+ # continue reviewing covariates to catch any errors in any of the input info\n+ if len(inMatrixRowLabels) < 5 or len(inMatrixColLabels) < 5: \n+ errorInMatrix = True\n+ print \'\\n----ERROR Input matrix has too few columns and rows need to ignore validating covariate files for now\'\n+ \n+ elif not errorInMatrix: \n+ print "\\n++++ SUCCESS the Input Matrix looks good\\n\\n"\n+ \n+ i= startCovarParam\n+ while i < (len(sys.argv)-2): # todo verify this works with advances tool is one other 0->n param after this\n+ covarLabel= sys.argv[i]\n+ covarLabel= covarLabel.replace(\' \',\'\')\n+ covarFN= sys.argv[i+1]\n+ covarFN= covarFN.replace(\' \',\'\')\n+ row_col_cat_contin= sys.argv[i+2]\n+ row_col_cat_contin= row_col_cat_contin.replace(\' \',\'\')\n+ i +=3\n+ \n+ print "\\nSTART Validating covariate file with label= ", covarLabel, " and type= ",row_col_cat_contin\n+ \n+ error= ValidateHMCorvarFile(covarLabel, covarFN, row_col_cat_contin,inMatrixRowLabels,inMatrixColLabels) # check covariate files\n+ \n+ if error or errorInMatrix:\n+ print"\\n---ERROR issues found in input or covariate files\\n "\n+ sys.stderr.write( "\\nERROR issues found in input or covariate files see errors in Standard Output\\n\\n ") \n+ sys.exit(3)\n+ \n+ \n+ print"\\n FINISHED -- Validation of the Input Matrix and Covariate files (if any)\\n\\n"\n+ \n+ #print" next running the clustered heat map generator \\n",str(sys.argv[11])+"/heatmap.'..b'ue\n+ sys.err= 7\n+ else:\n+ inMatrixRowLabels.append(eachRow[0])\n+ tmp= re.search(\'[abcdefghijklmnopqrstuvwxyz]\',eachRow[0].lower())\n+ try:\n+ if tmp.group(0) == \'\': # if doesn\'t exist then error\n+ tmp= tmp\n+ except Exception as e:\n+ print"-+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[j])\n+ sys.stderr.write("\\n--+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric "+str(eachRow[j]))\n+ \n+ \n+ if len(inMatrixColLabels) > 0: \n+ if (inMatrixColLabels[-1] ==\'\') or (inMatrixColLabels[-1] ==\'\\n\'): inMatrixColLabels.pop()\n+ \n+ inMatrixFH.close()\n+\n+ #print error, lenAllRows, len(eachRow), eachRow[0]\n+ except:\n+ #inMatrixFH.close()\n+ sys.stderr.write(str(traceback.format_exc()))\n+ error= True\n+ \n+ return error,inMatrixRowLabels,inMatrixColLabels\n+\n+ #+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n+\n+def ValidateHMCorvarFile(covarLabel, covariateFilePath, row_col_cat_contin, inMatrixRowLabels,inMatrixColLabels): # This sub routine ensures that the slide design input by the user matches a slide design on record\n+\n+# verify \n+# 1 That covar file labels match the col or row labels 1 to 1\n+# 2 That if a continuous covar file that the 2nd field is not all text hard to tell if \'-\' or \'e exponent\'\n+# 3 That the length of the covar file matches the row or col length of the input matrix \n+\n+ error= True\n+ try:\n+ \n+ covFH= open( covariateFilePath, \'rU\')\n+ countRow= 0\n+\n+ error= False\n+ \n+ for rawRow in covFH:\n+ countRow +=1\n+ rawRow= rawRow.replace(\'\\n\',\'\')\n+ eachRow= rawRow.split(\'\\t\')\n+ if countRow== 0: print "\\nCovariance file info - label ",str(covarLabel)," row/col categorical or continous",row_col_cat_contin," first row ",str(eachrow)\n+ \n+ if len(eachRow) < 2 and countRow > 1:\n+ print("----ERROR Input Row "+str(countRow)+" does not have a label and/or value ")\n+ sys.stderr.write("----ERROR Input Row "+str(countRow)+" does not have a label/or and value")\n+ error= True\n+ sys.err= 8\n+ #return error\n+ elif len(eachRow) > 1:\n+ tmp= re.search(\'[abcdefghijklmnopqrstuvwxyz]\',eachRow[0].lower())\n+ try:\n+ if tmp.group(0) == \'\': # if doesn\'t exist then error\n+ tmp= tmp\n+ except Exception as e:\n+ print"\\n-+-+- WARNING Covariate Label at row "+str(countRow)+" value appears to be non-alphanumeric --", eachRow[0],"--"\n+ sys.stderr.write("\\n--+-+- WARNING Row Headers at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[0])+"--")\n+ \n+ if not error:\n+ if row_col_cat_contin[-4:] == \'uous\': # verify continuous is number-ish\n+ tmp= re.search(\'[+-.0123456789eE]\',eachRow[1])\n+ try:\n+ if tmp.group(0) == \'\':\n+ tmp= tmp\n+ except Exception as e:\n+ print("\\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")\n+ sys.stderr.write("\\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")\n+ #error= True\n+ except:\n+ sys.stderr.write(str(traceback.format_exc()))\n+\n+ covFH.close()\n+\n+ return error\n+\n+\n+if __name__ == "__main__":\n+ main()\n+\n+\n' |
b |
diff -r e01b833f5d43 -r 16593e40c2cd mda_heatmap_gen.xml --- a/mda_heatmap_gen.xml Fri Feb 03 13:20:50 2017 -0500 +++ b/mda_heatmap_gen.xml Thu Jul 20 15:31:06 2017 -0400 |
b |
b'@@ -1,7 +1,8 @@\n <?xml version="1.0" encoding="UTF-8" ?>\n-<tool id="mda_heatmap_gen" name="NG-CHM Generator" version="2.0.2">\n+<tool id="mda_heatmap_gen" name="NG-CHM Generator" version="2.0.5">\n <description>Create Clustered Heat Maps</description>\n-<command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap.sh \'$hmname\' \'$hmdesc\' \'$inputmatrix\' ${d_rows.rowOrderMethod} ${d_rows.rowDistanceMeasure} ${d_rows.rowAgglomerationMethod} ${d_cols.columnOrderMethod} ${d_cols.columnDistanceMeasure} ${d_cols.columnAgglomerationMethod} $summarymethod \'$__tool_directory__\' ${d_rows.rowDendroCut} ${d_cols.colDendroCut} $rowDataType $colDataType\n+<!-- <command interpreter="python" detect_errors="aggressive">$__tool_directory__/mda_heatmap_gen.py \'Heat_Map_$hmname\' \'$hmdesc\' \'$inputmatrix\' ${d_rows.rowOrderMethod} ${d_rows.rowDistanceMeasure} ${d_rows.rowAgglomerationMethod} ${d_cols.columnOrderMethod} ${d_cols.columnDistanceMeasure} ${d_cols.columnAgglomerationMethod} $summarymethod \'$__tool_directory__\' 0 0 labels labels \'None\'-->\n+ <command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap.sh "Heat_Map_$hmname" "$hmdesc" \'$inputmatrix\' ${d_rows.rowOrderMethod} ${d_rows.rowDistanceMeasure} ${d_rows.rowAgglomerationMethod} ${d_cols.columnOrderMethod} ${d_cols.columnDistanceMeasure} ${d_cols.columnAgglomerationMethod} $summarymethod \'$__tool_directory__/\' 0 0 labels labels \'None\'\n #for $op in $operations\n ${op.class_name}\n ${op.repeatinput.file_name}\n@@ -13,13 +14,13 @@\n <exit_code range="1:" level="fatal" />\n \t</stdio>\n <inputs>\n- <param name="inputmatrix" type="data" format="text" label="Input Matrix" />\n- <param name="hmname" size="20" type="text" value="Heat_Map_name" label="User Defined Heat Map Name"/>\n+ <param name="inputmatrix" type="data" format="Tabular" label="Input Matrix" />\n+ <param name="hmname" size="20" type="text" value="Heat_Map_name" label="User Defined Heat Map Name"/>\n <param name="hmdesc" size="100" optional="true" type="text" value="Heat_Map_description" label="User Defined Heat Map Description"/>\n <param name="summarymethod" \ttype="select" label="Data Summarization Method">\n-\t\t<option value="average">average</option>\n-\t\t<option value="sample">sample</option>\n-\t\t<option value="mode">mode</option>\n+\t\t<option value="average">Average</option>\n+\t\t<option value="sample">Sample</option>\n+\t\t<option value="mode">Mode</option>\n </param>\n <conditional name="d_rows">\n \t<param name="rowOrderMethod" type="select" label="Row ordering method" help="Choices -- Hierarchical Clustering, Original Order, Random">\n@@ -28,69 +29,34 @@\n \t\t<option value="Random">Random</option>\t \n \t</param>\n <when value="Hierarchical">\n-\t<param name="rowDistanceMeasure" type="select" label="Row Distance Metric" help="euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation">\n-\t\t<option value="euclidean">Euclidean</option>\n-\t\t<option value="binary">Binary</option>\n-\t\t<option value="manhattan">Manhattan</option>\n-\t\t<option value="maximum">Maximum</option>\n-\t\t<option value="canberra">Canberra</option>\t \n-\t\t<option value="minkowski">Minkowski</option>\t \n-\t\t<option value="correlation">Correlation</option>\t \n-\t</param>\n-\t<param name="rowAgglomerationMethod" type="select" label="Row Clustering Method" help="Choices: \'average\' for Average Linkage, \'complete\' for Complete Linkage, \'single\' for Single Linkage, \'ward\', \'mcquitty\', \'median\', or \'centroid\'.">\n-\t\t<option value="average">Average Linkage</option>\n-\t\t<option value="complete">Complete Linkage</option>\n-\t\t<option value="single">Single Linkage</option>\n-\t\t<option value="ward" selected="true">Ward</option>\n-\t\t<option value="mcquitty">Mcquitty</option>\t \n-\t\t<option value="median">Median</option>\t \n-\t\t<option value="centroid">Centroid</option>\t \n-\t</param>\n- <param name="rowDendroCut" type="select" label="Create row categorical covariate bar b'..b' <when value="Random">\n-\t\t <param name="columnDistanceMeasure" type="text" size="0" value="n/a"/>\n-\t\t <param name="columnAgglomerationMethod" type="text" size="0" value="n/a"/>\n-\t\t <param name="colDendroCut" type="text" size="0" value="0"/>\n+\t\t <param name="columnDistanceMeasure" type="text" size="0" hidden="true" value="n/a"/>\n+\t\t <param name="columnAgglomerationMethod" type="text" size="0" hidden="true" value="n/a"/>\n </when>\n </conditional>\n- <param name="colDataType" type="select" label="Linkouts to column data type info" >\n- <option value="labels" selected="true" >None</option>\n- <option value="bio.probe.affymetrix" >Affymetrix Probe Id</option>\n- <option value="bio.feature.agilent" >Agilent Id</option>\n- <option value="bio.sample.cbioportal" >cBioPortal sample Id</option>\n- <option value="bio.transcript.ensemble" >Ensemble transcript Id</option>\n- <option value="bio.gene.entrez" >Gene Entrez Id</option>\n- <option value="bio.gene.hugo" >Gene HUGO symbol</option>\n- <option value="bio.go" >Gene Ontology (GO) Id</option>\n- <option value="bio.geo.acc" >GEO Accession Id</option>\n- <option value="bio.probe.illumina" >Illumina Probe Id</option>\n- <option value="bio.probe.infinium" >Infinium Probe Id</option>\n- <option value="bio.pathway.mdanderson" >MD Anderson pathway Id</option>\n- <option value="bio.mirna" >miRNA Id</option>\n- <option value="bio.mirna.mimat" >miRNA MIMAT Id</option>\n- <option value="bio.pubmed" >Pubmed Id</option>\n- <option value="bio.pubmed.search" >Pubmed Search Term</option>\n- <option value="scholar" >Scholarly term</option>\n- <option value="bio.gene.unigene" >Unigene CId</option>\n- <option value="bio.protein.uniprot" >UniProt Id</option>\n- </param> \n <repeat name="operations" title="Covariate Bars">\n <param name="class_name" size="20" type="text" value="" label="Axis Covariate Name">\n <sanitizer invalid_char="_">\n@@ -174,28 +105,28 @@\n </param>\n <param name="repeatinput" type="data" format="text" label="Axis Covariate File"/>\n \t<param name="cat" type="select" label="Axis Covariate Type">\n-\t <option value="row_categorical" >row categorical</option>\n-\t <option value="row_continuous" >row continuous</option>\n-\t <option value="column_categorical" >column categorical</option>\n-\t <option value="column_continuous" >column continuous</option>\n+\t <option value="row_categorical" >Row Categorical</option>\n+\t <option value="row_continuous" >Row Continuous</option>\n+\t <option value="column_categorical" >Column Categorical</option>\n+\t <option value="column_continuous" >Column Continuous</option>\n \t</param>\n </repeat> \n </inputs>\n <outputs>\n- <data name="output" label=\'${hmname}\' format="ngchm"/>\n+ <data name="output" label=\'Heat_Map_$hmname\' format="ngchm"/>\n </outputs>\n <tests>\n <test>\n <param name="inputmatrix" value="400x400.txt" />\n <param name="hmname" value="testRun" />\n <param name="$hmdesc" value="validateTool" />\n- <param name="summarymethod" value="average" />\n+ <param name="summarymethod" value="Average" />\n <param name="rowOrderMethod" value="Hierarchical" />\n- <param name="rowDistanceMeasure" value="manhattan" />\n- <param name="rowAgglomerationMethod" value="ward" />\n+ <param name="rowDistanceMeasure" value="Manhattan" />\n+ <param name="rowAgglomerationMethod" value="Ward" />\n <param name="columnOrderMethod" value="Hierarchical" />\n- <param name="columnDistanceMeasure" value="manhattan" />\n- <param name="columnAgglomerationMethod" value="ward" />\n+ <param name="columnDistanceMeasure" value="Manhattan" />\n+ <param name="columnAgglomerationMethod" value="Ward" />\n <output name="output" file="Galaxy400x400-noCovariates.ngchm" lines_diff="10" /> \n \n </test>\n' |
b |
diff -r e01b833f5d43 -r 16593e40c2cd mda_heatmap_viz.zip |
b |
Binary file mda_heatmap_viz.zip has changed |
b |
diff -r e01b833f5d43 -r 16593e40c2cd ngchm-matrix-functional-test-data/._.DS_Store |
b |
Binary file ngchm-matrix-functional-test-data/._.DS_Store has changed |
b |
diff -r e01b833f5d43 -r 16593e40c2cd ngchm-matrix-functional-test-data/._400x400.txt |
b |
Binary file ngchm-matrix-functional-test-data/._400x400.txt has changed |