# HG changeset patch
# User insilico-bob
# Date 1543346440 18000
# Node ID f1bcd79cd923a76ce97dcc843ad28f1c8835283e
# Parent  7f12c81e20833740569fbd555d2a83e35f38496a
Uploaded

diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Filters.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Filters.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,694 @@
+'''
+Created on Jun 7, 2017 updated Feb2018
+
+@author: rbrown and cjacoby
+'''
+
+import sys, traceback, argparse
+import numpy as np
+from Matrix_Validate_import import reader, Labeler
+import math
+#import matplotlib.pyplot as plt
+
+#Define argparse Function
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_file_txt', help='tab delimited text file input matrix(include .txt in name)')
+    parser.add_argument('choice',type=str, help='Variance Filter Method (Variance or Range)')
+    parser.add_argument('thresh', help='Thershold for Variance Filtering')
+    parser.add_argument('axes', help='Axes to Filter on (Either Row or Column')
+    parser.add_argument('output_file_txt', help='tab delimited text file output name (include .txt in name)')
+    args = parser.parse_args()
+    return args
+
+def Range_Filter_Row(matrix,thresh,row_header_list,column_header_list):
+    #Create Null Set of Filtered Row(Populated Later)
+    deletes = []
+    minVal  = +9999999
+    maxVal  = -99999
+    #Loop to Determine Which Rows have sub-Threshold Range
+    for i in range(0,len(matrix)):
+        temp_range = np.max(matrix[i][0::]) - np.min(matrix[i][0::])
+
+        if temp_range < minVal:    minVal = temp_range
+        elif temp_range > maxVal:  maxVal = temp_range
+
+        if temp_range <= float(thresh):
+            deletes = np.append(deletes,[i],0)
+            
+    #Delete Rows sub-Threshold Rows       
+    matrix = np.delete(matrix,deletes,0)
+    filter_rows = np.delete(row_header_list,deletes,0)
+    filter_cols = column_header_list
+    return matrix, filter_rows, filter_cols,len(deletes),minVal,maxVal
+
+def Range_Filter_Col(matrix,thresh,row_header_list,column_header_list):
+    #Create Null Set of Filtered Row(Populated Later)
+    deletes = []
+    minVal  = +9999999
+    maxVal  = -99999
+    #Loop to Determine Which Rows have sub-Threshold Variance
+    for i in range(0,len(matrix[0])):
+        
+        temp_range = np.max([row[i] for row in matrix]) - np.min([row[i] for row in matrix]) 
+
+        if temp_range < minVal:    minVal = temp_range
+        elif temp_range > maxVal:  maxVal = temp_range
+        
+        #print(temp_stdev)
+        if temp_range <= float(thresh):
+            deletes = np.append(deletes,[i],0)
+    print(deletes)
+
+    #Delete Rows sub-Threshold Rows       
+    matrix = np.delete(matrix,deletes,1)
+    filter_rows = row_header_list
+    filter_cols = np.delete(column_header_list,deletes,0)
+    #np.savetxt('testtest.txt',matrix,delimiter='\t')
+    
+    return matrix, filter_rows, filter_cols,len(deletes),minVal,maxVal
+
+#Define Function Which Deletes Sub-Threshold Rows
+def Variance_Percent_Filter_row(matrix,cutoff,row_header_list,column_header_list, create_plot= False):
+# if create a plot then DO NOT remove DATA only print diagram of variance ranges !!!
+
+#        temp_stdev = np.var(matrix[i][1::])
+    #cutoff is the percentile rank of the variance values
+    cutoff= int(cutoff)/100.0
+    if cutoff > 0.99 or cutoff < .01:
+        sys.stderr.write( "ERROR illegal cutoff value= "+str(cutoff*100)+" allowed values 1 to 99")
+        sys.exit(-8)
+        
+    deletes = []
+    varianceDict = {}
+    minVal  = +9999999
+    maxVal  = -99999
+    
+    #Loop to Determine Which Rows have sub-Threshold Variance
+    for i in range(len(matrix)):
+        vector   = []
+        for p in range(len(matrix[0])):
+            if not math.isnan(matrix[i][p]): 
+                vector.append(matrix[i][p])
+        
+        #temp_stdev = np.var(matrix[:,i])
+        if len(vector) > 1:
+            temp_stdev = np.var(vector)
+        else:
+            temp_stdev = 0.0
+            
+        if temp_stdev < minVal:    
+            minVal = temp_stdev
+        elif temp_stdev > maxVal:  
+            maxVal = temp_stdev
+
+        if temp_stdev not in varianceDict:
+            varianceDict[temp_stdev] = [i]
+        else:
+            tmp= varianceDict[temp_stdev]
+            tmp.append(i)
+            varianceDict[temp_stdev] = tmp
+
+        
+    #calc how many rows to remove
+    lowerLimit = int(cutoff*len(matrix) +1)
+    limit      = False
+    cnt        = 0
+    
+    for key in sorted(varianceDict.items()):
+        #rows = varianceDict[key]
+        rows= key[1]
+        cnt += len(rows)
+        if cnt < lowerLimit: #remove rows below percentile cutoff
+            for j in rows:
+                deletes = np.append(deletes,[j],0)
+                #print(deletes)
+        else:
+            limit = True
+
+    print( "Dataset Lowest Variance= %.2f" % minVal+" Highest Variance= %.2f" % maxVal+" and Percentile cutoff row = "+str(lowerLimit)+" of "+str(len(matrix))+" rows")            
+
+
+    #Delete Rows sub-Threshold Rows       
+    matrix = np.delete(matrix,deletes,0)
+    filter_rows = np.delete(row_header_list,deletes,0)
+    filter_cols = column_header_list
+    #np.savetxt('testtest.txt',matrix,delimiter='\t')
+
+    """
+    if create_plot:    
+        numBins  = 10
+        binWidth = 1
+        binCat   = []
+        binData  = []
+        counted  = False
+        incrmnt= (maxVal-minVal)/(numBins-1)
+        current_bin_max = minVal + incrmnt/2     
+        cnt    = 0
+        for key, val in sorted(varianceDict.items()):
+            if key < current_bin_max:
+                cnt += len(val)   # add all  rows having that variance value
+                counted  = False
+            else:
+                binData.append(cnt)
+                cnt= len(val)
+                binCat.append(str("%0.2f" % (current_bin_max - incrmnt/2.0)))
+                current_bin_max += incrmnt  
+                counted = True   
+
+        if not counted:
+            binData.append(cnt)
+            binCat.append(str("%0.2f" % (current_bin_max - incrmnt/2.0)))
+                           
+        tot = sum(binData)       
+        bins     = []                
+        for j in range(numBins):
+           bins.append(j*binWidth)
+           
+    #ttps://pythonspot.com/matplotlib-bar-chart/
+        y_pos = np.arange(numBins)
+        plt.xticks(y_pos, binCat)
+        plt.title("Distribution of Variance Values by Row")
+        plt.ylabel('Variance  Bin Totals')
+        plt.xlabel('Variance Value Bins')
+        #plt.legend()
+        plt.bar(y_pos, binData, align='center', alpha=0.5)
+    
+        fig, ax = plt.subplots(num=1, figsize=(8,3))
+        
+        plt.show()
+    """
+
+    
+    
+    return matrix,filter_rows,filter_cols ,len(deletes), minVal,maxVal
+            
+def Variance_Percent_Filter_col(matrix,cutoff,row_header_list,column_header_list, create_plot=False):
+    #cutoff is the percentile rank of the variance values
+    cutoff= int(cutoff)/100.0
+    if cutoff > 0.99 or cutoff < .01:
+        sys.stderr.write( "ERROR illegal cutoff value= "+str(cutoff*100)+" allowed values 1 to 99")
+        sys.exit(-8)
+        
+    deletes = []
+    varianceDict = {}
+    minVal  = +9999999
+    maxVal  = -99999
+    lenCol  = len(matrix[0])
+    
+    #Loop to Determine Which Rows have sub-Threshold Variance
+    for i in range(lenCol):
+        vector   = []
+        for p in range(len(matrix)):
+            if not math.isnan(matrix[p][i]): 
+                vector.append(matrix[p][i])
+        
+        #temp_stdev = np.var(matrix[:,i])
+        if len(vector) > 1:
+            temp_stdev = np.var(vector)
+        else:
+            temp_stdev = 0.0
+            
+        if temp_stdev < minVal:    
+            minVal = temp_stdev
+        elif temp_stdev > maxVal:  
+            maxVal = temp_stdev
+
+        if temp_stdev not in varianceDict:
+            varianceDict[temp_stdev] = [i]
+        else:
+            tmp= varianceDict[temp_stdev]
+            tmp.append(i)
+            varianceDict[temp_stdev] = tmp
+
+        #print(temp_stdev)
+        #if temp_stdev <= float(cutoff):
+        
+    #calc how many rows to remove
+    lowerLimit = int(cutoff*lenCol +1)
+    limit      = False
+    cnt        = 0
+    
+    for key in sorted(varianceDict.items()):
+        #rows = varianceDict[key]
+        cols= key[1]
+        cnt += len(cols)
+        if cnt < lowerLimit: #remove rows below percentile cutoff
+            for j in cols:
+                deletes = np.append(deletes,[j],0)
+                #print(deletes)
+        else:
+            limit = True
+
+    print( "Dataset Lowest Variance= %.2f" % minVal+" Highest Variance= %.2f" % maxVal+" and Percentile cutoff column= "+str(lowerLimit)+" of "+str(lenCol)+" columns")            
+
+    matrix = np.delete(matrix,deletes,1)
+    filter_rows = row_header_list
+    filter_cols = np.delete(column_header_list,deletes,0)
+    #np.savetxt('testtest.txt',matrix,delimiter='\t')
+
+    """
+    if create_plot:    
+        numBins  = 10
+        binWidth = 1
+        binCat   = []
+        binData  = []
+        counted  = False
+        incrmnt= (maxVal-minVal)/(numBins-1)
+        current_bin_max = minVal + incrmnt/2     
+        cnt    = 0
+        for key, val in sorted(varianceDict.items()):
+            if key < current_bin_max:
+                cnt += len(val)   # add all  rows having that variance value
+                counted  = False
+            else:
+                binData.append(cnt)
+                cnt= len(val)
+                binCat.append(str("%0.2f" % (current_bin_max - incrmnt/2.0)))
+                current_bin_max += incrmnt  
+                counted = True   
+
+        if not counted:
+            binData.append(cnt)
+            binCat.append(str("%0.2f" % (current_bin_max - incrmnt/2.0)))
+                           
+        tot = sum(binData)       
+        bins     = []
+        
+        for j in range(numBins):
+            bins.append(j*binWidth)
+    #https://pythonspot.com/matplotlib-bar-chart/
+        y_pos = np.arange(numBins)
+        
+        plt.xticks(y_pos, binCat)
+        plt.title("Distribution of Variance Values by Column")
+        plt.ylabel('Variance  Bin Totals')
+        plt.xlabel('Variance Value Bins')
+        #plt.legend()
+        plt.bar(y_pos, binData, align='center', alpha=0.5)
+    
+        fig, ax = plt.subplots(num=1, figsize=(8,3))
+        plt.show()
+    """
+
+    return matrix, filter_rows, filter_cols,len(deletes),minVal,maxVal
+    
+def UpperLowerLimit_Filter_Row(upperLower, matrix,cutoff,row_header_list,column_header_list):
+    #Create Null Set of Filtered Row(Populated Later)
+    deletes = []
+    minVal  = +9999999
+    maxVal  = -99999
+    #Loop to Determine Which Rows have sub-Threshold Range
+    for i in range(0,len(matrix)):
+        removeRow = False
+
+        for j in range(len(matrix[0])):
+            val= matrix[i][j]
+            if not math.isnan(val):
+                if val <= cutoff and upperLower == 'lower':    
+                    removeRow = True
+                elif val >= cutoff and upperLower == 'upper':  
+                    removeRow = True
+                else:
+                    if val < minVal:  minVal = val
+                    if val > maxVal:  maxVal = val
+        
+        #print(temp_stdev)
+        if removeRow:  
+            deletes = np.append(deletes,[i],0)
+            
+    #Delete Rows sub-Threshold Rows       
+    matrix = np.delete(matrix,deletes,0)
+    filter_rows = np.delete(row_header_list,deletes,0)
+    filter_cols = column_header_list
+    
+    return matrix, filter_rows, filter_cols,len(deletes),minVal,maxVal
+
+def UpperLowerLimit_Filter_Col(upperLower,matrix,cutoff,row_header_list,column_header_list):
+    #Create Null Set of Filtered Row(Populated Later)
+    deletes = []
+    minVal  = +9999999
+    maxVal  = -99999
+    #Loop to Determine Which Rows have sub-Threshold Variance
+
+    for i in range(0,len(matrix[0])):
+        removeRow = False
+
+        for j in range(len(matrix)):
+            val= matrix[j][i]
+            if not math.isnan(val):
+                if val <= cutoff and upperLower == 'lower':    
+                    removeRow = True
+                elif val >= cutoff and upperLower == 'upper':  
+                    removeRow = True
+                else:
+                    if val < minVal:  minVal = val
+                    if val > maxVal:  maxVal = val
+        
+        #print(temp_stdev)
+        if removeRow:  deletes = np.append(deletes,[i],0)
+
+    #Delete Rows sub-Threshold Rows       
+    matrix = np.delete(matrix,deletes,1)
+    filter_rows = row_header_list
+    filter_cols = np.delete(column_header_list,deletes,0)
+    #np.savetxt('testtest.txt',matrix,delimiter='\t')
+    
+    return matrix, filter_rows, filter_cols,len(deletes),minVal,maxVal
+
+#=========  remove rows with too many NANs in cells 
+def NAN_Filter_Row(matrix,nanList,maxAllowedNANs,row_header_list,column_header_list):
+
+    try:       
+        #Create Null Set of Filtered Row(Populated Later)
+        maxFoundNANs = 0
+        deletes = []
+        #Loop to Determine Which Rows have sub-Threshold Range
+        for i in range(0,len(matrix)):
+            #matches= [s for s in matrix[i][0::] if any(nan == s.upper() for nan in nanList)]
+            #matches= [s for s in matrix[i][:] if s in nanList]
+            matches= []
+            for s in matrix[i]:
+                if str(s) in nanList: matches.append(s)
+                
+            
+            lenMatches = len(matches)
+            if lenMatches > maxFoundNANs:   maxFoundNANs = lenMatches
+    
+            if lenMatches >= maxAllowedNANs:
+                deletes = np.append(deletes,[i],0)
+                
+        #Delete Rows sub-Threshold Rows       
+        matrix = np.delete(matrix,deletes,0)
+        filter_rows = np.delete(row_header_list,deletes,0)
+        filter_cols = column_header_list
+
+    except Exception as err:
+        traceback.print_exc()
+        sys.exit(-4)
+    
+    return matrix, filter_rows, filter_cols,len(deletes),maxFoundNANs
+
+#=========  remove Cols with too many NANs
+
+def NAN_Filter_Column(matrix,nanList,maxAllowedNANs,row_header_list,column_header_list):
+    
+    #Create Null Set of Filtered Row(Populated Later)
+    minNumNANs = 0
+    maxFoundNANs = 0
+    deletes = []
+    #Loop to Determine Which Rows have sub-Threshold Variance
+    for i in range(0,len(matrix[0])):
+        matches= []
+        for j in range(len(matrix)):
+            if str(matrix[j][i]) in nanList:  matches.append(matrix[j][i])
+           
+        lenMatches = len(matches)
+        if lenMatches > maxFoundNANs:   
+            maxFoundNANs = lenMatches
+
+        if lenMatches >= maxAllowedNANs:
+            deletes = np.append(deletes,[i],0)
+        
+    #Delete cols with too many NANs     
+    matrix = np.delete(matrix,deletes,1)
+    filter_rows = row_header_list
+    filter_cols = np.delete(column_header_list,deletes,0)
+    #np.savetxt('testtest.txt',matrix,delimiter='\t')
+    return matrix, filter_rows, filter_cols,len(deletes),maxFoundNANs
+
+
+#MAD Median Absolute Deviation  median (|Xi - Xmedian|) > X
+def Row_Value_MAD(matrix,cutoff,row_header_list,column_header_list):
+#MAD Median Absolute Deviation  median (|Xi - Xmedian|) > X
+# cutoff is MAX value used to meant to minimize the impact of one outlier
+
+    deletes = []
+    minVal  = +9999999
+    maxVal  = -99999
+    #Loop to Determine Which Rows have sub-Threshold Range
+    for i in range(0,len(matrix)):
+        medianRow = np.median(matrix[i])
+        temp = np.median(abs(matrix[i]- medianRow))
+# median (|Xi - Xmedian|) > X => meant to minimize the impact of one outlier
+        if temp < cutoff:
+            deletes = np.append(deletes,[i],0)
+            
+        if temp < minVal:  minVal = temp
+        if temp > maxVal:  maxVal = temp
+            
+    #Delete Rows sub-Threshold Rows       
+    matrix = np.delete(matrix,deletes,0)
+    filter_rows = np.delete(row_header_list,deletes,0)
+    filter_cols = column_header_list
+    print( "INFO Row MAD - Matrix min MAD value= "+str(minVal)+" and the max MAD value= "+str(maxVal) )
+    
+    return matrix, filter_rows, filter_cols,len(deletes),maxVal
+
+#MAD Median Absolute Deviation  median (|Xi - Xmedian|) > X
+def Col_Value_MAD(matrix,cutoff,row_header_list,column_header_list):
+#MAD Median Absolute Deviation  median (|Xi - Xmedian|) > X
+# cutoff is MAX value used to meant to minimize the impact of one outlier
+    deletes = []
+    minVal  = +9999999
+    maxVal  = -99999
+    #Loop to Determine Which Rows have sub-Threshold Range
+    for i in range(0,len(matrix[0])):
+        matrixCol= []
+        for j in range(len(matrix)):
+            matrixCol.append(matrix[j][i])
+
+        medianCol = np.median(matrixCol)
+        temp = np.median(abs(matrixCol- medianCol))
+# median (|Xi - Xmedian|) > X  meant to minimize the impact of one outlier
+        if temp < cutoff:
+            deletes = np.append(deletes,[i],0)
+            
+        if temp < minVal:  minVal = temp
+        if temp > maxVal:  maxVal = temp
+            
+    #Delete Rows sub-Threshold Rows       
+    matrix = np.delete(matrix,deletes,1)
+    filter_rows = row_header_list
+    filter_cols = np.delete(column_header_list,deletes,0)
+    print( "INFO Column MAD - Matrix min MAD value= "+str(minVal)+" and the max MAD value= "+str(maxVal) )
+    
+    return matrix, filter_rows, filter_cols,len(deletes),maxVal
+
+
+# if covariance of the data in two columns exceeds a thresehold remove one row list the rows in a separate output    
+# def CoVariance_Percent_Filter_row_col(matrix,thresh,row_header_list,column_header_list):
+# xv= array([8., 9.5, 7.8, 4.2, -7.7, -5.4, 3.2])
+# yv= array([8.9, 2.0, 4.8, -4.2, 2.7, -3.4, -5.9])
+# 
+# def cov(x,y):
+#     if (len(x) != len(y)
+#         [Stop] 
+#         x.bar = mean(x) 
+#         y.bar = mean(y) 
+#         N = len(x)      
+#     Cov = (sum((x-x.bar)*(y-y.bar))) / (N-1.0) 
+#     return(Cov) 
+
+#     #Create Null Set of Filtered Row(Populated Later)
+#     deletes = []
+#     
+#     temp_mean = np.nanmean(matrix[i])
+#     temp_stdev = np.nanstd(matrix[i])
+#     
+#     get stddev of each row the calc xi -xj sq 
+#     
+#     for i in range(0,len(matrix)):
+#         temp_range = np.max(matrix[i][0::]) - np.min(matrix[i][0::])
+#         if temp_range <= float(thresh):
+#             deletes = np.append(deletes,[i],0)
+#             
+#     #Delete Rows sub-Threshold Rows       
+#     matrix = np.delete(matrix,deletes,0)
+#     filter_rows = np.delete(row_header_list,deletes,0)
+#     filter_cols = column_header_list
+#     return(matrix,filter_rows,filter_cols)
+# 
+#     #np.savetxt('testtest.txt',matrix,delimiter='\t')
+#     return(matrix,filter_rows,filter_cols)
+#     
+
+#Define Function Which Labels Rows/Columns on Output
+#below replace
+# def labeler(matrix,filter_rows,filter_cols,output_file_txt):
+# 
+#     #Write Data to Specified Text File Output
+#     with open(output_file_txt,'w') as f:
+#         f.write("")
+#         for k in range(0,len(filter_cols)):
+#                 f.write('\t' + filter_cols[k])
+#         f.write('\n')
+#         for i in range(0,len(filter_rows)):
+#                 f.write(filter_rows[i])
+#                 for j in range(0,len(matrix[0])):
+#                         f.write('\t' + format(matrix[i][j]))
+#                 f.write('\n')
+
+
+#Define Main Function
+def main():
+    try:
+        args = get_args()
+        #sys.stdout.write(str(args)+"\n")
+#          <option value="LowerLimit">Minimum Absolute(Cell) Values to remove row/column</option>
+#          <option value="UpperLimit">Maximum Absolute(Cell) Values to remove row/column</option>
+#          <option value="NANnumber">NAN Number Cells Limit to remove row/column</option>
+#          <option value="NANpercent">NAN Percent Cells Limit to remove row/column</option>
+        nanList= ["NAN", "NA", "N/A", "-","?","nan", "na", "n/a"]
+
+        matrix, column_header_list,row_header_list = reader(args.input_file_txt)
+        #old_reader  matrix, row_header_list, column_header_list = reader(args.input_file_txt)
+        threshold = float(args.thresh)
+        if threshold < 0.000001:
+            print('Invalid negative or near-zero threshold chosen = '+str(args.thresh)+" choose positive value")
+            sys.exit(-4)
+
+#VariancePercent            
+        if args.choice == "VariancePercent" or args.choice == "VarianceCount":  # > percent variance
+            
+            if args.axes == "Row":
+                if args.choice == "VarianceCount":  threshold= (1-threshold/len(row_header_list))*100.0
+
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = Variance_Percent_Filter_row(matrix,threshold,row_header_list,column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for rows using variance percentile < '+str(args.thresh)+ ' by row. Matrix row minimum variance=  %.2f' % minVal+' and maximum variance=  %.2f' % maxVal)
+                    sys.stderr.write('\nFiltering out rows using variance percentile < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltering out rows using variance percentile < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+            elif args.axes == "Column":
+                if args.choice == "VarianceCount":  threshold= (1-threshold/len(column_header_list))*100.0
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = Variance_Percent_Filter_col(matrix,threshold,row_header_list,column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for columns using variance percentile < '+str(args.thresh)+ ' by columns. Matrix columns minimum variance=  %.2f' % minVal+' and maximum variance=  %.2f' % maxVal)
+                    sys.stderr.write('\nNO Filtering out rows using variance percentile < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltering out columns using variance percentile < '+str(args.thresh)+ ' removed '+str(delCnt)+' columns')
+            else:
+                print('Invalid Axes ='+str(args.thresh))
+                sys.exit(-1)
+#LowerLimit
+        elif args.choice == "LowerLimit":  #!! todo is NOT lower or upper limit but range of values
+            if args.axes == "Row":
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = UpperLowerLimit_Filter_Row('lower',matrix,threshold,row_header_list,column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for rows using LowerLimit < '+str(args.thresh)+ ' by row. Matrix row minimum range=  %.2f' % minVal+' and maximum range=  %.2f' % maxVal)
+                    sys.stderr.write('\nNO Filtering out rows using LowerLimit < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltered out '+str(delCnt)+' rows with Lower Limit < '+str(args.thresh))
+            elif args.axes == "Column":
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = UpperLowerLimit_Filter_Col('lower', matrix,threshold,row_header_list,column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for columns using Lower Limit < '+str(args.thresh)+ ' by columns. Matrix columns minimum range=  %.2f' % minVal+' and maximum range=  %.2f' % maxVal)
+                    sys.stderr.write('\nNO Filtering out rows using Lower Limit < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltered out '+str(delCnt)+' columns with Lower Limit < '+str(args.thresh))
+#UpperLimit
+        elif args.choice == "UpperLimit":  #!! todo is NOT lower or upper limit but range of values
+            if args.axes == "Row":
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = UpperLowerLimit_Filter_Row('upper',matrix,threshold,row_header_list,column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for rows using Upper Limit < '+str(args.thresh)+ ' by row. Matrix row minimum range=  %.2f' % minVal+' and maximum range=  %.2f' % maxVal)
+                    sys.stderr.write('\nNO Filtering out rows using Upper Limit < '+str(args.thresh)+ ' by row. Matrix row minimum range=  %.2f' % minVal+' and maximum range=  %.2f' % maxVal)
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltered out '+str(delCnt)+' rows with UpperLimit < '+str(args.thresh))
+            elif args.axes == "Column":
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = UpperLowerLimit_Filter_Col('upper', matrix,threshold,row_header_list,column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for columns using UpperLimit < '+str(args.thresh)+ ' by columns. Matrix columns minimum range=  %.2f' % minVal+' and maximum range=  %.2f' % maxVal)
+                    sys.stderr.write('\nFiltering out rows using UpperLimit < '+str(args.thresh)+ ' by columns. Matrix columns minimum range=  %.2f' % minVal+' and maximum range=  %.2f' % maxVal)
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltered out '+str(delCnt)+' columns with UpperLimit < '+str(args.thresh))
+#MADlimit
+        elif args.choice == "MADcount" or args.choice == "MADpercent":  #!! is lowerlimit of median absolute deviation medians
+            threshold= threshold
+            if args.axes == "Row":
+                if args.choice == "MADpercent":  threshold= len(row_header_list)*threshold/100.0
+
+                matrix, filter_rows, filter_cols,delCnt,maxVal = Row_Value_MAD(matrix,threshold,row_header_list,column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for rows using MAD < '+str(threshold)+ ' by row. Matrix row MAD maximum value=  %.2f' % maxVal)
+                    sys.stderr.write('\nFiltering out rows using MAD < '+str(threshold)+ ' by row. Matrix row  MAD maximum value=  %.2f' % maxVal)
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltered out '+str(delCnt)+' rows using  MAD maximum value > '+str(threshold))
+            elif args.axes == "Column":
+                if args.choice == "MADpercent":  threshold= len(column_header_list)*threshold/100.0
+
+                matrix, filter_rows, filter_cols,delCnt,maxVal = Col_Value_MAD(matrix,threshold,row_header_list,column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for columns using MAD < '+str(threshold)+ ' by columns. Matrix columns MAD maximum value=  %.2f' % maxVal)
+                    sys.stderr.write('\nFiltering out columns using MAD < '+str(threshold)+ ' by columns. Matrix columns  MAD maximum value=  %.2f' % maxVal)
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltered out '+str(delCnt)+' columns using  MAD maximum value > '+str(threshold))
+#NANlimit
+        elif args.choice == "NANlimit" or args.choice == "NANpercent":  
+            maxNANs= int(args.thresh)
+            val= ' '
+            if args.choice == "NANpercent":
+                n,m = np.shape(matrix)
+                maxNANs= int(int(args.thresh)*n/100)
+                val= '%'  
+            if args.axes == "Row":
+                matrix, filter_rows, filter_cols,delCnt, maxFoundNANs = NAN_Filter_Row(matrix,nanList,maxNANs,row_header_list,column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for rows using NAN limit = or > '+str(args.thresh)+val+ ' by row. Matrix row max NAN count is =' + str(maxFoundNANs ))
+                    sys.stderr.write('\nNO Filtering out rows using NAN limit = or >  '+str(args.thresh)+val+ ' by row. Matrix row max NAN count is =' + str(maxFoundNANs ))
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltered out '+str(delCnt)+' rows using NAN limit = or >  '+str(args.thresh)+val)
+            elif args.axes == "Column":
+                matrix, filter_rows, filter_cols,delCnt, maxFoundNANs = NAN_Filter_Column(matrix, nanList, maxNANs, row_header_list, column_header_list)
+                Labeler(matrix,filter_cols,filter_rows,args.output_file_txt)
+                if delCnt < 1:
+                    print('\nNO Filtering occurred for columns using NAN limit = or > '+str(args.thresh)+val+ ' by columns. Matrix columns max NAN count is = '+ str(maxFoundNANs))
+                    sys.stderr.write('\nNO Filtering out columns using NAN limit = or > '+str(args.thresh)+val+ ' by columns. Matrix columns max NAN count is = '+ str(maxFoundNANs))
+                    sys.exit(-1)
+                else:   
+                    print('\nFiltered out '+str(delCnt)+' columns using NAN limit = or >  '+str(args.thresh)+val )
+
+#         elif args.choice == "covariance":
+#             if args.axes == "Row":
+#                 matrix, filter_rows, filter_cols = CoVariance_Percent_Filter_row(matrix,args.thresh,row_header_list,column_header_list)
+#                 Labeler(matrix,filter_rows,filter_cols,args.output_file_txt)
+#                 print('Covariance_Filter on row')
+#             elif args.axes == "Column":
+#                 matrix, filter_rows, filter_cols = CoVariance_Percent_Filter_col(matrix,args.thresh,row_header_list,column_header_list)
+#                 Labeler(matrix,filter_rows,filter_cols,args.output_file_txt)
+#                 print('Covariance_Filter on column')
+            else:
+                print('Invalid Axes = '+str(args.axes))
+                sys.exit(-1)
+        else:
+            print("Invalid Filter Choice = "+str(args.choice))
+            sys.exit(-2)    
+        
+             
+    except Exception as err:
+        traceback.print_exc()
+        sys.exit(-3)
+
+if __name__ == '__main__':
+    main()
+    print("\ndone")
+    sys.exit(0)
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Filters.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Filters.xml	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,58 @@
+<?xml version='1.1' encoding='utf-8'?>
+<tool id="Matrix_Filters" name="Matrix Filters" version="2.1.12">
+  <description> rows or columns based on specified threshold</description>
+ <command interpreter="python">Matrix_Filters.py '$p_input' '$extra.choice' '$extra.thresh' '$axes' '$output_file'</command>
+ <inputs>
+ 	<param name="p_input" type="data" format="tabular" label="Input Matrix" />
+ 	
+     <conditional name="extra">
+      <param name="choice" type="select" label="Choose Metric to Filter By">
+ 		<option value="LowerLimit">Remove row/column if any cell is less than threshold value</option>
+ 		<option value="UpperLimit">Remove row/column if any cell is greater than threshold value</option>
+ 		<option value="NANnumber">Remove row/column if number NAN cells exceeds threshold value </option>
+ 		<option value="NANpercent">Remove row/column if NAN cells exceeds threshold percent</option>
+ 		<option value="VarianceValue">Remove row/column if Variance is below threshold value</option>
+ 		<option value="VarianceCount">Remove row/column if Variance is not in top row/column count</option>
+ 		<option value="VariancePercent">Remove row/column if Variance is not in top percent</option>
+ 		<option value="MADcount">Remove row/column if Median Absolute Deviation (MAD) is not in top row/column count</option>
+ 		<option value="MADpercent">Remove row/column if Median Absolute Deviation (MAD) is not in top percent</option>
+<!-- 	<option value="Covariance">Remove row/column if covariance exceeds threshold value</option>
+    	<option value="Median">Remove row/column if Median is less than threshold value</option>
+ 		<option value="MADvalue">Remove row/column if Median Absolute Deviation (MAD) is below threshold value</option>
+ 		<option value="Mean">Remove row/column if Mean is less than threshold value</option>
+ 		<option value="Quantile">Remove row/column if less than Quantile_Cutoff</option>
+ 		<option value="GeneList">Keep only these genes</option>
+ 		<option value="Histogram">View filter histogram</option>
+ --> 
+  	</param>
+ 	    <when value="LowerLimit">
+			<param name="thresh" type="float" size="5" value="0.01" label="Absolute minimum value" help="Lowest value allowed or row/col removed."/>
+	    </when>
+ 	    <when value="UpperLimit">
+			<param name="thresh" type="float" size="5" value="0.9" label="Absolute maximum value" help="Highest value allowed or row/col removed."/>
+	    </when>
+ 	    <when value="NANnumber">
+			<param name="thresh" type="float" size="5" value="50" label="Maximum number NANs" help="More NANs per row/col removed."/>
+	    </when>
+  	    <when value="NANpercent">
+			<param name="thresh" type="float" size="5" value="20" label="Percent maximum" help="Above percent rows/cols removed."/>
+	    </when>
+  	    <when value="Variance">
+			<param name="thresh" type="float" size="5" value="90" label="Percentile minimum" help="Below percentile rows/cols removed."/>
+	    </when> 	
+    </conditional>
+ <!-- 	
+ 	<param name="thresh" type="float" size="4" value=".05" label="Filter Threshold (Value above/below row/column will be Removed)" />
+--> 	
+ 	<param name="axes" type="select" label="Choose Axes to Filter on (Rows or Columns)">
+ 		<option value="Row">Row</option>
+ 		<option value="Column">Column</option>
+ 	</param>
+ 	
+ 	
+ </inputs>
+ <outputs>
+ 	<data name="output_file" format="tabular" />
+ </outputs>
+ 
+ </tool>
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Manipulation.sh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Manipulation.sh	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,42 @@
+#echo "1: " $1 # tool directory
+#echo "2: " $2 # manipulation option
+#echo "3: " $3 # input file
+#echo "4: " $4 # output file
+#echo "5: " $5 # choice
+#echo "6: " $6 # thresh
+#echo "7: " $7 # axis 
+#echo "8: " $8 # transpose
+#echo "9: " $9 # input2
+#echo "10: " ${10} # offsetvalue
+#echo "11: " ${11} # scalevalue
+#echo "12: " ${12} 
+#echo "13: " ${13} 
+#echo "14: " ${14} 
+#echo "15: " ${15}
+#echo "16: " ${16}
+
+#echo "tool directory is: " $1
+if [ "$2" = "Matrix_Filters" ]; then
+	echo "filter chosen"
+	#python $__tool_directory__/Matrix_Filters.py '$p_input '${manipulation.extra.choice}' '${manipulation.extra.thresh}' '${manipulation.axis}' '$output_file'
+	python $1/Matrix_Filters.py $3 $5 $6 $7 $4
+elif [ "$2" = "Matrix_Multiply" ]; then
+	echo "multiply chosen"
+	#python '$__tool_directory__/Matrix_Multiply.py' '$p_input' '${manipulation.extra.transpose}' '${manipulation.extra.input2}' '${manipulation.extra.choice}' '$output_file'
+	python $1/Matrix_Multiply.py $3 $8 $9 $5 $4
+elif [ "$2" = "Matrix_Statistics" ]; then
+	echo "statistics chosen"
+	#python '$__tool_directory__/Matrix_Statistics.py' '$p_input' '$choice' '$cutoff' '$axis' '$out_file'
+	python $1/Matrix_Statistics.py $3 $5 $6 $7 $4
+elif [ "$2" = "Matrix_Transformations" ]; then
+	echo "transform chosen"
+	#python '$__tool_directory__/Matrix_Transformations.py' '$p_input' '$choice' '$axis' '$scalevalue' '$offsetvalue' '$output_file'
+	python $1/Matrix_Transformations.py $3 $5 $7 ${11} ${10} $4
+elif [ "$2" = "Matrix_Validations" ]; then
+	echo "validations chosen"
+	#python '$__tool_directory__/Matrix_Validations.py' '$p_input' '${manipulation.extra.choice}'  '${manipulation.extra.axis}' '$output_file'
+	python $1/Matrix_Validations.py $3 $5 $7 $4
+else
+	echo "no valid choice made"
+fi
+
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Manipulation.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Manipulation.xml	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,406 @@
+<?xml version='1.1' encoding='utf-8'?>
+<tool id="Matrix_Manipulation" name="Matrix Manipulation" version="1.2.4">
+	 <description> Data manipulation before heatmap creation</description>
+
+	<command interpreter="bash" detect_errors="aggressive">
+		$__tool_directory__/Matrix_Manipulation.sh '$__tool_directory__' '${manipulation.option}' '$p_input' '$output_file' 
+			'${manipulation.extra.choice}' '${manipulation.extra.thresh}' '${manipulation.extra.axis}' 
+			'${manipulation.extra.transpose}' '${manipulation.extra.input2}' '${manipulation.extra.offsetvalue}' '${manipulation.extra.scalevalue}'
+			
+	</command>
+	
+	 <inputs>
+	<param name="p_input" type="data" format="tabular" label="Input Matrix" />
+	<conditional name="manipulation">
+		<param name="option" type="select" label="Choose Manipulation Method">
+			<option value="Matrix_Validations">Missing or Invalid Data</option>
+			<option value="Matrix_Filters">Filter Data</option>
+			<option value="Matrix_Transformations">Transform Data</option>
+			<option value="Matrix_Multiply">Multiply or Correlate Matrices</option>
+			<!-- <option value="Matrix_Statistics">View metadata about Heat Map Matrix</option> -->
+			<!-- <option value="Split_ExcelTabs_IntoFiles">Split Excel tabs into separate tab delimited files</option> -->
+		</param>
+	
+	
+			<when value="Matrix_Validations">
+			    <conditional name="extra">
+				 	<param name="choice" type="select" label="Replace Invalid Value with:">
+				 		<option value="Mean">Mean</option>
+				 		<option value="Zero">Zero</option>
+				 	</param>
+					<when value="Mean">
+				 		<param name="axis" type="select" label="Choose Axis:">
+							<option value="Row">Row</option>
+				 			<option value="Column">Column</option>
+				 		</param>
+				 		
+				 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+				 		<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+				 		<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+				 		<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+			 		<when value="Zero">
+				 		<param name="axis" type="text" size="10" hidden="true" value="Row"/>
+				 		
+				 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+				 		<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+				 		<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+				 		<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+			    </conditional>
+		 	</when>
+	
+	
+	
+			<when value="Matrix_Filters">
+			     <conditional name="extra">
+			      <param name="choice" type="select" label="Filter Criteria:">
+			 		<option value="LowerLimit">Remove row/column if any cell is less than threshold value</option>
+			 		<option value="UpperLimit">Remove row/column if any cell is greater than threshold value</option>
+			 		<option value="NANlimit">Remove row/column if number of invalid cells exceeds threshold value </option>
+			 		<option value="NANpercent">Remove row/column if invalid cells exceeds threshold percent</option>
+			 		<!-- <option value="VarianceValue">Remove row/column if Variance is below threshold value</option> -->
+			 		<option value="VarianceCount">Remove row/column if Variance is not in top row/column count</option>
+			 		<option value="VariancePercent">Remove row/column if Variance is not in top percent</option>
+			 		<!-- <option value="MADcount">Remove row/column if Median Absolute Deviation (MAD) is not in top row/column count</option>
+			 		<option value="MADpercent">Remove row/column if Median Absolute Deviation (MAD) is not in top percent</option> -->
+			<!-- 	<option value="Covariance">Remove row/column if covariance exceeds threshold value</option>
+			    	<option value="Median">Remove row/column if Median is less than threshold value</option>
+			 		<option value="MADvalue">Remove row/column if Median Absolute Deviation (MAD) is below threshold value</option>
+			 		<option value="Mean">Remove row/column if Mean is less than threshold value</option>
+			 		<option value="Quantile">Remove row/column if less than Quantile_Cutoff</option>
+			 		<option value="GeneList">Keep only these genes</option>
+			 		<option value="Histogram">View filter histogram</option>
+			 --> 
+			  	</param>
+			 	    <when value="LowerLimit">
+						<param name="thresh" type="float" size="5" value="0.01" label="Absolute minimum value" help="Lowest value allowed or row/col removed."/>
+						<param name="axis" type="select" label="Choose axis to Filter on (Rows or Columns)">
+					 		<option value="Row">Row</option>
+					 		<option value="Column">Column</option>
+					 	</param>
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+			 	    <when value="UpperLimit">
+						<param name="thresh" type="float" size="5" value="0.9" label="Absolute maximum value" help="Highest value allowed or row/col removed."/>
+						<param name="axis" type="select" label="Choose axis to Filter on (Rows or Columns)">
+					 		<option value="Row">Row</option>
+					 		<option value="Column">Column</option>
+					 	</param>
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+			 	    <when value="NANlimit">
+						<param name="thresh" type="integer" size="5" value="50" label="Maximum number NANs" help="More NANs per row/col removed."/>
+						<param name="axis" type="select" label="Choose axis to Filter on (Rows or Columns)">
+					 		<option value="Row">Row</option>
+					 		<option value="Column">Column</option>
+					 	</param>
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+			  	    <when value="NANpercent">
+						<param name="thresh" type="integer" size="5" value="20" label="Percent maximum" help="Above percent rows/cols removed."/>
+						<param name="axis" type="select" label="Choose axis to Filter on (Rows or Columns)">
+					 		<option value="Row">Row</option>
+					 		<option value="Column">Column</option>
+					 	</param>
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+			  	    <when value="VariancePercent">
+						<param name="thresh" type="integer" size="5" value="90" label="Percentile minimum" help="Below percentile rows/cols removed."/>
+						<param name="axis" type="select" label="Choose axis to Filter on (Rows or Columns)">
+					 		<option value="Row">Row</option>
+					 		<option value="Column">Column</option>
+					 	</param>
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when> 	
+				    <when value="VarianceCount">
+						<param name="thresh" type="integer" size="5" value="10" label="Number minimum" help="Below threshold rows/cols removed."/>
+						<param name="axis" type="select" label="Choose axis to Filter on (Rows or Columns)">
+					 		<option value="Row">Row</option>
+					 		<option value="Column">Column</option>
+					 	</param>
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when> 	
+				    <when value="MADpercent">
+						<param name="thresh" type="integer" size="5" value="90" label="Percentile minimum" help="Below percentile rows/cols removed."/>
+						<param name="axis" type="select" label="Choose axis to Filter on (Rows or Columns)">
+					 		<option value="Row">Row</option>
+					 		<option value="Column">Column</option>
+					 	</param>
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when> 	
+				    <when value="MADcount">
+						<param name="thresh" type="integer" size="5" value="10" label="Number minimum" help="Below threshold rows/cols removed."/>
+						<param name="axis" type="select" label="Choose axis to Filter on (Rows or Columns)">
+					 		<option value="Row">Row</option>
+					 		<option value="Column">Column</option>
+					 	</param>
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when> 	
+			 	</conditional>
+			</when>
+		
+		
+		
+			<when value="Matrix_Transformations">
+			    <conditional name="extra">
+			 	  <param name="choice" type="select" label="Choose Normalization Method">
+			 	  	<option value="ln_normalization">Log2 Normalization</option>
+			 		<option value="log_normalization">Log10 Normalization</option>
+			 		<option value="mean_center_normalization">Mean Centered Normalization</option>
+			 		<option value="median_center_normalization">Median Centered Normalization</option>
+			 		<option value="z_score_normalization">Z-Score Normalization</option>
+			 		<option value="add_offset">Add Offset Value</option>
+			 		<option value="scale">Scale by Multiple</option>
+			 		<option value="divide_by_sum">Divide by Sum</option>
+			 		<option value="transpose">Matrix Transpose</option>
+				  </param>
+			 	    <when value="add_offset">
+						<param name="offsetvalue" type="text" size="10" value="0.0" label="Matrix Values offset" help="Add value to each Matrix element."/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+				 		<param name="axis" type="text" size="10" hidden="true"  value="Row"/>
+				 		
+				 		<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+				 		<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+				 		<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+				    <when value="scale">
+						<param name="scalevalue" type="text" size="10" value="1.0" label="Matrix Value scaling" help="multiply value with each Matrix element."/>
+				 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="axis" type="text" size="10" hidden="true"  value="Row"/>
+				 		
+				 		<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+				 		<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+				 		<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+				    <when value="ln_normalization">
+						<param name="offsetvalue" type="text" size="10" value="0.0" label="Matrix Values offset" help="Calculate log2 for each Matrix element then add offset."/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+			 			<param name="axis" type="select" label="Choose Axis">
+			 				<option value="Row">Row</option>
+			 				<option value="Column">Column</option>
+			 			</param>
+			 			
+			 			<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+			 			<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+			 			<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+				    <when value="log_normalization">
+						<param name="offsetvalue" type="text" size="10" value="0.0" label="Matrix Values offset" help="Calculate log10 for each Matrix element then add offset."/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+			 			<param name="axis" type="select" label="Choose Axis">
+			 				<option value="Row">Row</option>
+			 				<option value="Column">Column</option>
+			 			</param>
+			 			
+			 			<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+			 			<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+			 			<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>	
+			 		<when value="z_score_normalization">
+				 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+			 			<param name="axis" type="select" label="Choose Axis">
+			 				<option value="Row">Row</option>
+			 				<option value="Column">Column</option>
+			 			</param>
+			 			
+			 			<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+			 			<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+			 			<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when> 		
+			 		<when value="mean_center_normalization">
+				 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+			 			<param name="axis" type="select" label="Choose Axis">
+			 				<option value="Row">Row</option>
+			 				<option value="Column">Column</option>
+			 			</param>
+			 			
+			 			<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+			 			<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+			 			<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+			 		<when value="median_center_normalization">
+				 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+			 			<param name="axis" type="select" label="Choose Axis">
+			 				<option value="Row">Row</option>
+			 				<option value="Column">Column</option>
+			 			</param>
+			 			
+			 			<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+			 			<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+			 			<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+					<when value="transpose">
+				 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+				 		<param name="axis" type="text" size="10" hidden="true"  value="neither"/>
+				 		
+				 		<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+				 		<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+				 		<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+			 		<when value="divide_by_sum">
+				 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+			 			<param name="axis" type="select" label="Choose Axis">
+			 				<option value="Row">Row</option>
+			 				<option value="Column">Column</option>
+			 			</param>
+			 			
+			 			<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+			 			<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+			 			<param name="input2" type="data" hidden="true"  value="$input1" />
+				    </when>
+			    </conditional>
+		 	</when>
+		
+		
+		
+			<when value="Matrix_Multiply">
+			    <conditional name="extra">
+			    	<param name="choice" type="select" label="Choose composition type">
+				 		<option value="MatrixMultiply">Matrix Multiplication</option> 
+				 		<option value="Corr2Matrices">Correlate two matrices</option> 
+				 		<option value="Corr1Matrix">Correlate one matrix with itself</option> 
+				  	</param>	
+				 	 <when value="MatrixMultiply">
+					 	<param name="transpose" type="select" label="transpose matrix?">
+						  <option value="y">Transpose matrix</option>
+						  <option value="n" selected="true">Do Not transpose matrix</option>
+					 	</param> 	
+					 	<param name="input2" type="data" format="tabular" label="Second Input Matrix size NxP" />
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+					 	<param name="axis" type="text" size="10" hidden="true" value="Row"/>
+					 </when>
+				 	 <when value="Corr2Matrices">
+					 	<param name="transpose" type="select" label="transpose matrix?">
+						  <option value="y">Transpose matrix</option>
+						  <option value="n" selected="true">Do Not transpose matrix</option>
+					 	</param> 	
+					 	<param name="input2" type="data" format="tabular" label="Second Input Matrix size NxP" />
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+					 	<param name="axis" type="text" size="10" hidden="true" value="Row"/>
+					 </when>
+				 	 <when value="Corr1Matrix">
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="thresh" type="float" size="4" value=".05" hidden="true" />
+					 	<param name="axis" type="text" size="10" hidden="true" value="Row"/>
+					 </when>
+			   </conditional>
+		 	</when>
+		 	
+		 	
+		 	
+		 	<when value="Matrix_Statistics">
+			 	<conditional name="extra">
+				    <param name="choice" type="select" label="Choose Metric to Filter By">
+				 		<option value="Histogram">Plot info about either matrix row/column</option>
+				  	</param>	
+				  	<when value = "Histogram">
+					 	<param name="thresh" type="float" size="4" value=".05" label="Filter Threshold (Value above/below row/column will be Removed)" />
+					 	<param name="axis" type="select" label="Choose axis to Filter on (Rows or Columns)">
+					 		<option value="Row">Row</option>
+					 		<option value="Column">Column</option>
+					 	</param>	
+					 	
+					 	<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+				 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+					 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+					 	<param name="input2" type="data" hidden="true"  value="$input1" />
+				 	</when>
+			 	</conditional>
+		 	</when>
+		 	
+		 	
+		 	
+		 	
+		 	
+		 	
+		 	
+		 	
+		 	<!-- <when value="Split_ExcelTabs_IntoFiles">
+		 		<description>into separate tab delimited files</description>
+				
+				 	<param name="input" type="data" format="txt" label="Input Excel.xls or Excel.xlsx file"> </param>
+				 		
+				    <conditional name="extra">
+				 	  <param name="choice" type="select" label="Select tab" help="Select tab position or name">
+						<option value="tab_number">Tab number</option>
+				  	  </param>  	
+				
+				'$extra.tabname'
+						<option value="tab_name">Tab name</option>
+						<option value="all">All tabs in file</option>
+				 		<option value="1">First tab only</option>
+				  	  </param>  	
+				    <when value="tab_name">
+						<param name="tabname" type="text" size="20" value="" label="Tab Name" help="Spreadsheet Tab Name">
+						</param>
+				    </when>
+				
+				    <when value="tab_number">
+						<param name="tabnumber" type="text" size="2" value="1" label="Tab position" help="Integer Tab position from left">
+						</param>
+				   </when>
+				  </conditional>
+				
+				 <outputs>
+				 	<data name="output_file" format="tabular" label="Output File Name" help="Name based on Input file name and tab name"/>
+				 </outputs>
+		 	</when> -->
+		 	
+		 	
+		 	
+ 	</conditional>
+ 	</inputs>
+ 	<outputs>
+	 	<data name="output_file" format="tabular" />
+	 </outputs>
+ </tool>
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Manipulation_Tool_Shed.zip
Binary file Matrix_Manipulation_Tool_Shed.zip has changed
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Multiply.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Multiply.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,173 @@
+'''
+Created on March 6, 2018
+
+@author: Bob Brown based on John Weinstein's algorithm
+'''
+
+import os
+import re
+import shutil
+import traceback
+import sys, traceback, argparse
+import numpy as np
+import warnings
+#import scipy.stats as ss
+from Matrix_Validate_import import reader, Labeler, MatchLabels
+import math
+warnings.filterwarnings('error')
+
+# John Weinsteins algorithm  by bob brown   https://discover.nci.nih.gov/CorrelateMatrices/help.do
+#http://www.blog.pythonlibrary.org/2014/04/30/reading-excel-spreadsheets-with-python-and-xlrd/
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_file1', help='text file input matrix(include .txt in name)')
+    parser.add_argument('transpose', type=str, help='transpose matrix 1?')
+    parser.add_argument('input_file2', help='text file input matrix(include .txt in name)')
+    parser.add_argument('choice', type=str, help='Choose Normalization Method: 1 = Z-score, 2 = Mean Centered, 3 = log2, 4= rank')
+#    parser.add_argument('scaleValue', help='optional scaling factor for matrix)')
+    parser.add_argument('out_fileName', help='text file output matrix(include .txt in name)')
+    args = parser.parse_args()
+    if args.transpose == "": args.transpose = 'n'
+    return args
+
+
+def Matrix_Multiply(matrix1, matrix2):
+
+    try: 
+#TODO handle NANs
+
+        matrixOut= np.dot(matrix1, matrix2)
+    
+
+    except Exception as err:
+        traceback.print_exc()
+        sys.exit(-5)
+
+    return(matrixOut)
+
+
+#CorrelateMatrices  correlation acorss 2 martices   https://discover.nci.nih.gov/CorrelateMatrices/home.do
+def Correlate_Matrices(matrix1, matrix2):
+
+    #try:    
+    # Leave both matrices as size axn and bxn and treat a is column and b as row
+    #matrix1T = Transpose(matrix1) 
+    
+#TODO handle NANs
+    numRows1,numColumns1= np.shape(matrix1)
+    
+    numRows2,numColumns2= np.shape(matrix2)
+    matrixOut= []
+
+    if numColumns1 != numRows2:
+        print("ERROR number columns Matrix 1 ", str(numColumns1), " not equal number rows for Matrix 2 ",str(numRows2))
+        sys.exit(-1)
+#TODO need to look for NANs??
+
+    for i in range(numRows1):
+        vectorM1 = matrix1[i][:]
+        meanVec1 = np.nanmean(vectorM1)
+        varStdDev1  = np.nanstd(vectorM1, ddof=1)
+        lowStdDev1  = False
+         #if equals zero
+        if abs(varStdDev1) < .000001:
+            print("ERROR Variance value almost zero", str(varStdDev1), " for Matrix 1 Row ",str(i+1))
+            lowStdDev1= True
+        correlationRow= []
+        
+        for j in range(numColumns2):
+            vectorM2 = []
+            for t in range(numRows2):
+                vectorM2.append(matrix2[t][j])
+            meanVec2 = np.nanmean(vectorM2)
+            varStdDev2  = np.nanstd(vectorM2, ddof=1)
+            lowStdDev2= False
+            #if equals zero
+            if abs(varStdDev2) < .000001:
+                print("ERROR Variance value almost zero", str(varStdDev2), " for Matrix 2 Column ",str(j+1))
+                lowStdDev2= True
+
+            covarStdDev12= 0
+            
+            if not lowStdDev1 and not lowStdDev2:
+                #try:
+                for pos in range(len(vectorM1)):
+                    covarStdDev12 += ((vectorM1[pos]-meanVec1)/varStdDev1)*((vectorM2[pos]-meanVec2)/varStdDev2)
+#                bottom= (numColumns1 -1)*(varStdDev1*varStdDev2)   
+#                correlationRow.append( covarStdDev12/bottom)
+                correlationRow.append( covarStdDev12/(numColumns1 -1))
+                #except:  bad value because of NAN or other
+            else:
+                correlationRow.append("divide by 0")   # cannot calculate correlation  var too small
+                    
+        matrixOut.append(correlationRow)
+            
+#     except Exception as err:
+#         traceback.print_exc()
+#         sys.exit(-6)
+
+    return(matrixOut)
+
+#----------------------------------------------------------------------
+def Transpose(in_mat):
+    out_mat     = []
+    numRows,numColumns= np.shape(in_mat)
+    
+    for i in range(numColumns):
+        temp= []
+        for j in range(numRows):
+            temp.append(in_mat[j][i])
+        out_mat.append(temp)
+    #print( str(out_mat))
+    return out_mat
+
+
+#----------------------------------------------------------------------
+if __name__ == "__main__":
+    
+#     input_file1 = "/Users/bobbrown/Desktop/Gene-by-var.txt"
+#     input_file2 = "/Users/bobbrown/Desktop/var-by-sample.txt"
+#     out_fileName = "/Users/bobbrown/Desktop/MatixMult-1-2-Out.txt"
+#     selection   = "MatrixMultiply"
+#TODO address NANs ???
+
+    try:
+        args = get_args()
+        selection= args.choice
+            
+        matrix1,column_labels1,row_labels1 = reader(args.input_file1)  # to be transposed later
+        matrix2,column_labels2,row_labels2 = reader(args.input_file2)
+
+
+        if args.transpose == 'y' or args.input_file1 == args.input_file2:
+            matrix1 = Transpose(matrix1)
+            print("\n>>>NOTICE Transposed first matrix so matrix 1 columns =  Matrix 2 number rows ")
+            temp          = row_labels1 #swap labels for output matrix
+            row_labels1   = column_labels1 #swap labels for output matrix
+            column_labels1= temp #swap labels for output matrix
+
+        MatchLabels(column_labels1,row_labels2)  # verfiy labels and their  order match
+        
+        if len(column_labels1) != len(row_labels2):
+            print("\n>>> ERROR attempting to multiple Matrices of incompatible dimensions ")
+            print("First Matrix is "+str(len(row_labels1))+" by "+str(len(column_labels1))+" where second Matrix is "+str(len(og_row2))+" by "+str(len(column_labels2))+"\n")
+            print("Matrices must have dimensions  AxB and BxC.  A can equal C (square matrices)")
+            sys.exit(-1)
+    
+        if selection == "MatrixMultiply":
+            matrixOut= Matrix_Multiply(matrix1, matrix2 )
+        
+        elif selection == "Corr2Matrices" or selection == "Corr1Matrix":
+            matrixOut = Correlate_Matrices(matrix1, matrix2)
+
+        Labeler(matrixOut,column_labels2,row_labels1,args.out_fileName)
+        
+        print("Matrix Multiply  "+str(len(row_labels1))+" by "+str(len(column_labels1))+" Matrix 1 by "+str(len(row_labels2))+" by "+str(len(column_labels2))+" matrix 2")
+        print("Output Matrix dimensions are "+str(len(row_labels1))+" by "+str(len(column_labels2))+"\n")
+            
+    except Exception as err:
+        traceback.print_exc()
+        sys.exit(-3)
+    
+    sys.exit(0)
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Multiply.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Multiply.xml	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,52 @@
+<?xml version='1.1' encoding='utf-8'?>
+<tool id="Matrix_Multiply" name="Matrix Compose" version="1.2.4">
+  <description> one matrix using one or two matrices</description>
+ <command interpreter="python">Matrix_Multiply.py '$extra.input1' 'extra.$transpose' 'extra.$input2' '$extra.choice' '$output_file'</command>
+
+ <inputs>
+    <conditional name="extra">
+    	<param name="choice" type="select" label="Choose composition type">
+	 		<option value="MatrixMultiply">Matrix Multiplication</option> 
+	 		<option value="Corr2Matrices">Correlate two matrices</option> 
+	 		<option value="Corr1Matrix">Correlate one matrix with itself</option> 
+	  	</param>	
+
+	 	 <when value="MatrixMultiply">
+		 	<param name="input1" type="data" format="tabular" label="First Input Matrix size NxM" />
+		 	<param name="transpose" type="select" label="transpose matrix?">
+			  <option value="y">Transpose matrix</option>
+			  <option value="n" selected="true">Do Not transpose matrix</option>
+		 	</param> 	
+		 	<param name="input2" type="data" format="tabular" label="Second Input Matrix size NxP" />
+		 </when>
+	 	 <when value="Corr2Matrices">
+		 	<param name="input1" type="data" format="tabular" label="First Input Matrix size NxM" />
+		 	<param name="transpose" type="select" label="transpose matrix?">
+			  <option value="y">Transpose matrix</option>
+			  <option value="n" selected="true">Do Not transpose matrix</option>
+		 	</param> 	
+		 	<param name="input2" type="data" format="tabular" label="Second Input Matrix size NxP" />
+		 </when>
+	 	 <when value="Corr1Matrix">
+		 	<param name="input1" type="data" format="tabular" label="Input Matrix" />
+		 	<param name="transpose" type="text" size="10" hidden="true"  value="n" />
+		 	<param name="input2" type="data" hidden="true"  value="$input1" />
+		 </when>
+   </conditional>
+ 	
+  	
+<!--  	<param name="thresh" type="float" size="4" value=".05" label="Filter Threshold (Axes with Metric below Threshold will be Removed)" />
+ 	<param name="transpose" type="select" multiple="false" display="checkboxes" label="transpose first matrix?">
+ 	
+ 	<param name="axes" type="select" label="Chose Axes to Filter on (Rows or Columns)">
+ 		<option value="Row">Row</option>
+ 		<option value="Column">Column</option>
+ 	</param>
+ -->	
+ </inputs>
+ 
+ <outputs>
+ 	<data name="output_file" format="tabular" />
+ </outputs>
+ 
+ </tool>
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Statistics.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Statistics.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,145 @@
+'''
+Created on Feb2018
+
+@author: bob brown 
+'''
+
+import sys, traceback, argparse
+import numpy as np
+from Matrix_Validate_import import reader
+#import matplotlib.pyplot as plt
+from Matrix_Filters import Variance_Percent_Filter_row, Variance_Percent_Filter_col
+
+#Define argparse Function
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_file_txt', help='tab delimited text file input matrix(include .txt in name)')
+    parser.add_argument('choice',type=str, help='Variance Filter Method (Variance or Range)')
+    parser.add_argument('thresh', help='Thershold for Variance Filtering')
+    parser.add_argument('axes', help='Axes to Filter on (Either Row or Column')
+    parser.add_argument('output_file_txt', help='tab delimited text file output name (include .txt in name)')
+    args = parser.parse_args()
+    return args
+
+
+#Define Function Which Labels Rows/Columns on Output
+def labeler(matrix,filter_rows,filter_cols,output_file_txt):
+
+    #Write Data to Specified Text File Output
+    with open(output_file_txt,'w') as f:
+        f.write("")
+        for k in range(0,len(filter_cols)):
+                f.write('\t' + filter_cols[k])
+        f.write('\n')
+        for i in range(0,len(filter_rows)):
+                f.write(filter_rows[i])
+                for j in range(0,len(matrix[0])):
+                        f.write('\t' + format(matrix[i][j]))
+                f.write('\n')
+
+
+def Histo(matrix):
+    numBins= 20
+    data = []
+#    numRow,numCol= np.shape(matrix)
+    for i in range(len(matrix[0])):
+        data.append(np.nanmean([row[i] for row in matrix]))
+         
+#        print(str(np.nanmean([row[i] for row in matrix])))
+
+#https://stackoverflow.com/questions/5328556/histogram-matplotlib
+    #bins = [0, 40, 60, 75, 90, 110, 125, 140, 160, 200]
+    minBin = int(min(data)-0.5)
+    maxBin = int(max(data)+0.5)
+    binWidth = float(maxBin-minBin)/numBins
+    bins= []
+    """
+    for j in range(numBins):
+        bins.append(minBin+ j*binWidth)
+    #bins= 20
+    n, bins, patches = plt.hist(data,bins, normed=False)
+    #n, bins, patches = plt.hist(data,bins, normed=1, color='green')
+    #hist, bins = np.histogram(data, bins=bins)
+    width = np.diff(bins)
+    center = (minBin + bins[1:]) / 2
+    
+    cm = plt.cm.get_cmap('RdYlBu_r')
+    #col = (n-n.min())/(n.max()-n.min())
+    for c, p in zip(bins, patches):
+        plt.setp( p, 'facecolor', cm(c/numBins))
+    fig, ax = plt.subplots(num=1, figsize=(8,3))
+    ax.set_title("Distribution of Column Means")
+    #ax.bar(center,bins, align='center', width=width)
+    #ax.bar(center, hist, align='center', width=width)
+    #ax.set_xticks(bins)
+#    fig.savefig("/Users/bobbrown/Desktop/Matrix-tools-Test-output/Column_Mean_Histogram.png")
+    
+    plt.show()
+    """
+    return()
+
+#========== test create variable number output files in Galaxy
+def CreateFiles(output_file_info):    
+    
+        for i in range(3):
+            fd= open( output_file_info, 'w')
+            fd.write('File number = '+ str(i)+"\n")
+            fd.close()
+            
+        return()
+    
+#==================
+    
+    #Define Main Function
+def main():
+    try:
+        args = get_args()
+        #sys.stdout.write(str(args)+"\n")
+        nanList= ["NAN", "NA", "N/A", "-","?","nan", "na", "n/a"]
+
+        matrix, og_cols,og_rows = reader(args.input_file_txt)
+        #old_reader  matrix, og_rows, og_cols = reader(args.input_file_txt)
+#         if float(args.thresh) < 0.000001:
+#             print('Invalid negative threshold chosen = '+str(args.thresh)+" choose positive value")
+#             sys.exit(-4)
+            
+        if args.choice == "Histogram":
+            Histo(matrix)
+        elif args.choice == "CreateFiles":
+            CreateFiles(args.output_file_info)
+            
+        elif args.choice == "Variance":
+            if args.axes == "Row":
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = Variance_Percent_Filter_row(matrix,1,og_rows,og_cols,True)
+                labeler(matrix,filter_rows,filter_cols,args.output_file_txt)
+#                 if delCnt < 1:
+#                     print('\nNO Filtering occurred for rows using variance < '+str(args.thresh)+ ' by row. Matrix row minimum variance=  %.2f' % minVal+' and maximum variance=  %.2f' % maxVal)
+#                     sys.stderr.write('\nFiltering out rows using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+#                     sys.exit(-1)
+#                 else:   
+#                     print('\nFiltering out rows using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+            elif args.axes == "Column":
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = Variance_Percent_Filter_col(matrix,1,og_rows,og_cols,True)
+                labeler(matrix,filter_rows,filter_cols,args.output_file_txt)
+#                 if delCnt < 1:
+#                     print('\nNO Filtering occurred for columns using variance < '+str(args.thresh)+ ' by columns. Matrix columns minimum variance=  %.2f' % minVal+' and maximum variance=  %.2f' % maxVal)
+#                     sys.stderr.write('\nFiltering out rows using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+#                     sys.exit(-1)
+#                 else:   
+#                     print('\nFiltering out columns using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' columns')
+            else:
+                print('Invalid Axes = '+str(args.axes))
+                sys.exit(-1)
+        else:
+            print("Invalid Filter Choice = "+str(args.choice))
+            sys.exit(-2)    
+        
+             
+    except Exception as err:
+        traceback.print_exc()
+        sys.exit(-3)
+
+if __name__ == '__main__':
+    main()
+    print("\nFini")
+    sys.exit(0)
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Statistics.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Statistics.xml	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,23 @@
+<?xml version='1.1' encoding='utf-8'?>
+<tool id="Matrix_Statistics" name="Matrix Statistics" version="1.1.5" force_history_refresh="True">
+  <description>View metadata about Heat Map Matrix</description>
+ <command interpreter="python">Matrix_Statistics.py '$input' '$choice' '$cutoff' '$axes' '$out_file'</command> 
+ <inputs>
+ 	<param name="choice" type="select" label="Choose Metric to Filter By">
+  		<option value="Histogram">Plot info about either matrix row/column</option>
+  	</param>
+
+ 	<param name="input" type="data" format="tabular" label="Heat Map Matrix" />
+ 	 	
+ 	<param name="cutoff" type="float" size="4" value=".05" label="Filter Threshold (Value above/below row/column will be Removed)" />
+ 	
+ 	<param name="axes" type="select" label="Choose Axes to Filter on (Rows or Columns)">
+ 		<option value="Row">Row</option>
+ 		<option value="Column">Column</option>
+ 	</param>	
+ </inputs>
+ <outputs>
+ 	<data name="out_file" format="tabular" />
+ </outputs>
+ 
+ </tool>
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Transformations.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Transformations.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,301 @@
+'''
+Created on Jun 6, 2017  updated Feb 2018
+
+@author: cjacoby and Bob Brown
+'''
+import os
+import sys, traceback, argparse
+import numpy as np
+from numpy import size, array
+import warnings
+from Matrix_Validate_import import reader
+#import scipy.stats as ss
+warnings.filterwarnings('error')
+
+#Define argparse Function
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_file_txt', help='text file input matrix(include .txt in name)')
+    parser.add_argument('choice', type=str, help='Choose normalization Method: 1 = Z-score, 2 = Mean Centered, 3 = log2, 4= rank')
+    parser.add_argument('axes', type=str, help='Choose Axis to normalize On (Row or Column)')
+    parser.add_argument('scalevalue', help='optional scaling factor for matrix)')
+    parser.add_argument('offsetvalue', help='optional offset for matrix')
+    parser.add_argument('output_file_txt', help='text file output matrix(include .txt in name)')
+    args = parser.parse_args()
+    return args
+
+
+def Zscore_row(matrix):
+
+    #Loop To Perform Z-Score normalization
+    for i in range(0,len(matrix)):
+        temp_mean = np.nanmean(matrix[i])
+        temp_stdev = np.nanstd(matrix[i],ddof=1)
+        for j in range(0,len(matrix[0])):
+            matrix[i][j] = (matrix[i][j]-temp_mean)/temp_stdev
+    return(matrix)
+
+#Define Z-Score normalization Function
+def Zscore_col(matrix):
+
+    #Loop To Perform Z-Score normalization
+    for i in range(len(matrix[0])):
+#            matrix[:][i] = [scaleValue*x+offset for x in matrix[i]] 
+        temp_mean = np.nanmean([row[i] for row in matrix])
+        temp_stdev = np.nanstd([row[i] for row in matrix],ddof=1)
+        #Probably Should Have if statement checking if stdev equals zero, although this implies the data is already Z-score normalized
+        for j in range(len(matrix)):
+            matrix[j][i] = (matrix[j][i]-temp_mean)/temp_stdev
+    return(matrix)
+
+
+#Define Mean Centered or Median centered normalization Function
+def MeanMedianCenter_row(matrix,type):
+
+    
+    #Loop To Perform mean or median center
+    for i in range(0,len(matrix)):
+        if type == "mean": 
+            temp_type = np.nanmean(matrix[i][1::])
+        else:
+            temp_type = np.nanmedian(matrix[i][1::])
+            
+        for j in range(0,len(matrix[0])):
+            matrix[i][j] = (matrix[i][j]-temp_type)
+    return(matrix)
+
+
+#Define mean or median
+def MeanMedianCenter_col(matrix,type):
+
+    #Loop To Perform mean or median center
+    for i in range(0,len(matrix[0])):
+        if type == "mean": 
+            temp_type = np.nanmean([row[i] for row in matrix])
+        else:
+            temp_type = np.nanmedian([row[i] for row in matrix])
+        #Probably Should Have if statement checking if stdev equals zero, although this implies the data is already Z-score normalized
+        for j in range(0,len(matrix)):
+            matrix[j][i] = (matrix[j][i]-temp_type)
+    return(matrix)
+
+#Divide by sum of the Row Function
+def Divide_By_Sum_row(matrix):
+ 
+    #Loop To Perform mean or median center
+    numRow,numCol= np.shape(matrix)
+    
+    for i in range(numRow):
+        sumValue = sum(matrix[i][:])
+
+        #if equals zero
+        if abs(sumValue) > .0001:
+            for j in range(numCol):
+                matrix[i][j] = matrix[i][j]/sumValue
+        else: 
+            print("ERROR Cannot divide by Sum almost zero", str(sumValue), " for Row ",str(i+1))
+    return(matrix)
+
+
+#Divide by sum of the Column Function
+def Divide_By_Sum_col(matrix):
+
+    #Loop To Perform mean or median center
+    numRow,numCol= np.shape(matrix)
+    
+    for i in range(numCol):
+        sumValue= 0
+
+        #if equals zero
+        if abs(sumValue) > .0001:
+            for j in range(numRow):
+                matrix[j][i] = (matrix[j][i]/sumValue)
+        else: 
+            print("ERROR Cannot divide by Sum almost zero", str(sumValue), " for Column ",str(i+1))
+    return(matrix)
+
+#scale or add offset to matrix by row 
+def ScaleOffset_row(matrix,scaleValue,offset):
+
+    #Loop To Perform scale and offset do one or the other per request
+    if abs(scaleValue) > 0.0001:
+        for i in range(0,len(matrix)):
+            matrix[i][:] = [scaleValue*x+offset for x in matrix[i]] 
+    else:
+        print (" Scale facter "+str(scaleValue)+" too small")
+    return(matrix)
+
+#scale or add offset to matrix by column
+def ScaleOffset_col(matrix,scaleValue,offset):
+
+    #Loop To Perform scale and offset do one or the other per request
+    if abs(scaleValue) > 0.0001:
+        for i in range(0,len(matrix[0])):
+            matrix[:][i] = [scaleValue*x+offset for x in matrix[i]] 
+    else:
+        print (" Scale facter "+str(scaleValue)+" too small")
+    return(matrix)
+
+#Define Log2 normalization Method
+def Convert2Logs(matrix,logValue, offset):
+    import warnings
+    warnings.filterwarnings('error')
+
+    #Loop To Perform Z-Score normalization
+    for i in range(0,len(matrix)):
+        for j in range(0,len(matrix[0])):
+            try:
+                if logValue == "log2":
+                    matrix[i][j] = np.log2(matrix[i][j]+offset)
+                else:
+                    matrix[i][j] = np.log10(matrix[i][j]+offset)
+                    
+            except RuntimeWarning:
+                print(logValue+" normalization Failed: Encountered elements <= 0, which are invalid inputs for a Log normalization")
+                break
+        else:
+            continue
+        break
+    return(matrix) 
+
+#transpose matrix
+def Transpose(in_mat):
+    out_mat     = []
+    numRow,numCol= np.shape(in_mat)
+    
+    for i in range(numCol):
+        temp= []
+        for j in range(numRow):
+            temp.append(in_mat[j][i])
+        out_mat.append(temp)
+    #print( str(out_mat))
+    return out_mat
+
+# restores row and column labels in ouput
+def labeler(matrix,og_cols,og_rows,output_file_txt):
+    #Define Null Sets For Col and Row Headers
+    with open(output_file_txt,'w') as f:
+        f.write("")
+        for k in range(0,len(og_cols)):
+                f.write('\t' + str(og_cols[k]) )
+        f.write('\n')
+        for i in range(0,len(og_rows)):
+                f.write(str(og_rows[i]) )
+                for j in range(0,len(matrix[0])):
+                        f.write('\t' + format(matrix[i][j]))
+                f.write('\n')
+
+#Define Main Function
+def main():
+
+    try:
+        args = get_args()
+        scaleValue = float(args.scalevalue)
+        offsetValue= float(args.offsetvalue)
+        #print(args)
+        #sys.stdout.write(str(args)+"\n")
+
+        matrix,og_cols,og_rows = reader(args.input_file_txt)
+        if args.choice == "z_score_normalization":
+            if args.axes == "Row":
+                matrix = Zscore_row(matrix)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("zcore, row")
+            elif args.axes == "Column":
+                matrix = Zscore_col(matrix)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("zscore, column")
+            else:
+                print("zscore, invalid axis")
+        elif args.choice == "mean_center_normalization":
+            if args.axes == "Row":
+                matrix = MeanMedianCenter_row(matrix,"mean")
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("mean-center by row")
+            elif args.axes == "Column":
+                matrix = MeanMedianCenter_col(matrix,"mean")
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("mean-center by column")
+            else:
+                print("meancenter, invalid axis")
+        elif args.choice == "median_center_normalization":
+            if args.axes == "Row":
+                matrix = MeanMedianCenter_row(matrix,"median")
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("median-center by row")
+            elif args.axes == "Column":
+                matrix = MeanMedianCenter_col(matrix,"median")
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("median-center by column")
+            else:
+                print("meancenter, invalid axis")
+        elif args.choice == "add_offset":
+            if args.axes == "Row":
+                #offset = -100 #!!!! TODO REMOVE AND ADD WHEN clause to xml to get value                
+                matrix = ScaleOffset_row(matrix,1.0,offsetValue)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("offset of "+str(offsetValue)+" by row")
+            elif args.axes == "Column":
+                matrix = ScaleOffset_col(matrix,1.0,offsetValue)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("offset of "+str(offsetValue)+" by column")
+            else:
+                print("offset"+str(offsetValue)+" invalid axis -not row or column")
+        elif args.choice == "scale":
+            if args.axes == "Row":
+                #scaleValue = 1000 #!!!! TODO REMOVE AND ADD WHEN clause to xml to get value
+                matrix = ScaleOffset_row(matrix,scaleValue,0.0)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("scaling "+str(scaleValue)+" by row")
+            elif args.axes == "Column":
+                matrix = ScaleOffset_col(matrix,scaleValue,0.0)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("scaling "+str(scaleValue)+" by column")
+            else:
+                print("scaling "+str(scaleValue)+" invalid axis")
+        elif args.choice == "transpose":
+            matrix = Transpose(matrix)  #issue using same  matrix? 
+            labeler(matrix,og_rows,og_cols,args.output_file_txt) #swapped row&col labels
+            print("transpose mxn matrix to nxm size")
+        elif args.choice == "ln_normalization":
+            matrix = Convert2Logs(matrix,"log2",offsetValue)
+            labeler(matrix,og_cols,og_rows,args.output_file_txt)
+            print("log2 plus "+str(offsetValue)+" normalization for all values")
+        elif args.choice == "log_normalization":
+            matrix = Convert2Logs(matrix,"log10",offsetValue)
+            labeler(matrix,og_cols,og_rows,args.output_file_txt)
+            print("log10 normalization for all values")
+        elif args.choice == "rank":
+            if args.axes == "Row":
+                matrix = Rankdata_ByRow(matrix)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("performed rank normalization by row")
+            elif args.axes == "Column":
+                matrix = Rankdata_ByColumn(matrix)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("performed rank normalization by column")
+            else:
+                print("rank, invalid axis")
+        elif args.choice == "divide_by_sum":
+            if args.axes == "Row":
+                matrix = Divide_By_Sum_row(matrix)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("performed divide row N values by row N's sum")
+            elif args.axes == "Column":
+                matrix = Divide_By_Sum_col(matrix)
+                labeler(matrix,og_cols,og_rows,args.output_file_txt)
+                print("performed divide column N values by column N's sum")
+            else:
+                print("divide_by_sum, invalid axis")
+
+        else:
+            print("Invalid normalization Choice")
+       
+    except Exception as err:
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
+    print("Done")
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Transformations.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Transformations.xml	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,106 @@
+<?xml version='1.1' encoding='utf-8'?>
+<tool id="Matrix_Transformations" name="Matrix Transformations" version="2.2">
+  <description> by Rows, Columns, All by method</description>
+ <command interpreter="python">Matrix_Transformations.py '$p_input' '$extra.choice' '$extra.axes' '$extra.scalevalue' '$extra.offsetvalue' '$output_file'</command>
+
+ <inputs>
+    <conditional name="extra">
+ 	  <param name="choice" type="select" label="Choose Normalization Method">
+ 		<option value="mean_center_normalization">Mean Centered Normalization</option>
+ 		<option value="median_center_normalization">Median Centered Normalization</option>
+ 		<option value="ln_normalization">Log2 Normalization w offset</option>
+ 		<option value="log_normalization">Log10 Normalization w offset</option>
+ 		<option value="z_score_normalization">Z-Score Normalization</option>
+		<option value="transpose">Matrix Transpose</option>
+ 		<option value="add_offset">Add Offset Value</option>
+ 		<option value="scale">Scale by Multiple</option>
+ 		<option value="divide_by_sum">Divide by Sum</option>
+	  </param>
+ 	    <when value="add_offset">
+			<param name="offsetvalue" type="text" size="10" value="0.0" label="Matrix Values offset" help="Add value to each Matrix element."/>
+	 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+	 		<param name="axes" type="text" size="10" hidden="true"  value="neither"/>
+	    </when>
+	    <when value="scale">
+			<param name="scalevalue" type="text" size="10" value="1.0" label="Matrix Value scaling" help="multiply value with each Matrix element."/>
+	 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+	 		<param name="axes" type="text" size="10" hidden="true"  value="neither"/>
+	    </when>
+	    <when value="ln_normalization">
+			<param name="offsetvalue" type="text" size="10" value="0.0" label="Matrix Values offset" help="Calculate log2 for each Matrix element then add offset."/>
+	 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+ 			<param name="axes" type="select" label="Choose Axis">
+ 				<option value="Row">Row</option>
+ 				<option value="Column">Column</option>
+ 			</param>
+	    </when>
+	    <when value="log_normalization">
+			<param name="offsetvalue" type="text" size="10" value="0.0" label="Matrix Values offset" help="Calculate log10 for each Matrix element then add offset."/>
+	 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+ 			<param name="axes" type="select" label="Choose Axis">
+ 				<option value="Row">Row</option>
+ 				<option value="Column">Column</option>
+ 			</param>
+	    </when>	
+ 		<when value="z_score_normalization">
+	 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+	 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+ 			<param name="axes" type="select" label="Choose Axis">
+ 				<option value="Row">Row</option>
+ 				<option value="Column">Column</option>
+ 			</param>
+	    </when> 		
+ 		<when value="mean_center_normalization">
+	 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+	 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+ 			<param name="axes" type="select" label="Choose Axis">
+ 				<option value="Row">Row</option>
+ 				<option value="Column">Column</option>
+ 			</param>
+	    </when>
+ 		<when value="median_center_normalization">
+	 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+	 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+ 			<param name="axes" type="select" label="Choose Axis">
+ 				<option value="Row">Row</option>
+ 				<option value="Column">Column</option>
+ 			</param>
+	    </when>
+		<when value="transpose">
+	 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+	 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+	 		<param name="axes" type="text" size="10" hidden="true"  value="neither"/>
+	    </when>
+ 		<when value="divide_by_sum">
+	 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+	 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+ 			<param name="axes" type="select" label="Choose Axis">
+ 				<option value="Row">Row</option>
+ 				<option value="Column">Column</option>
+ 			</param>
+	    </when>
+    </conditional>
+
+ 	<param name="p_input" type="data" format="tabular" label="Input Matrix"> </param>
+
+<!--  need scipy!!!!
+		<option value="rank">Rank Normalization</option>
+		<when value="rank">
+	 		<param name="offsetvalue" type="text" size="10" hidden="true"   value="0"/>
+	 		<param name="scalevalue" type="text" size="10" hidden="true"   value="1.0"/>
+	    </when>
+ 	
+ 	<param name="axes" type="select" label="Choose Axis">
+ 		<option value="Row">Row</option>
+ 		<option value="Column">Column</option>
+ 	</param>
+--> 	
+ </inputs>
+
+
+
+ <outputs>
+ 	<data name="output_file" format="tabular" />
+ </outputs>
+ 
+ </tool>
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Validate_import.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Validate_import.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,179 @@
+'''
+Created on Jun 7, 2017 modified Feb2018
+
+@author: cjacoby and Bob Brown
+'''
+ 
+import sys, traceback, argparse
+import numpy as np
+import os
+#import matplotlib.pyplot as plt
+#import matplotlib.pyplot as plt; plt.rcdefaults()
+
+# Define the Reading Function Which Pulls the Data from a .txt file
+def reader(input_file_txt, create_plot= False):
+    #Read Matrix, Preserving String Values for Headers first row and first column (both minus first cell) 
+    #Read Matrix, Converting all values to Float for Data Processing    
+        
+    f = open(input_file_txt, "rU")
+
+    #print( 'Valid NAN identifiers are: empty cells, cells with blanks,"NA","N/A","-", and "?"')
+
+    column_labels = []
+    row_labels = []
+    matrix  = []
+    firstLine= True
+    
+    line = f.readline()
+
+#    "NA","N/A","-","?","NAN","NaN","Na","na","n/a","null",EMPTY/Null, SPACE (blank char) 
+
+    nanList    = ["", " ","NAN", "NA", "N/A", "-","?"]
+    binCatDict = {"":0, " ":0, "Text":0, "NA":0, "-":0,"NAN":0, "N/A":0,"?":0}
+    row       = 0
+    nanCnt    = 0
+    nonNumCnt = 0
+    
+    while line:
+        line = line.strip("\n")
+        line = line.split('\t')
+
+        row += 1
+        
+        if firstLine: 
+            lengthRow = len(line)
+            column_labels   = line[1:]
+        else:
+            if lengthRow != len(line):
+               # print("\nERROR matrix row lengths unequal for row 0 and row "+str(row)+"\n" )
+                sys.exit(-1)
+            
+            temp  = []
+#            column= 0
+            row_labels.append(str(line[0]))
+           
+            #for item in line[1:]:  use enumerate
+            for column, item in enumerate(line[1:],1):
+#                column += 1
+                try:
+                    temp.append(float(item))
+                except ValueError:
+                    temp.append(np.nan)
+                    itemUC= item.upper()
+                    
+                    if itemUC in nanList:
+                        nanCnt += 1
+                        binCatDict[itemUC]= binCatDict[itemUC]+1
+                       # print( 'Legit nans= ',str(item))
+                    else:
+                        if nonNumCnt == 0:  sys.stderr.write("Start List of up to first 50 Invalid cell values \n")
+                        nonNumCnt +=1
+                        if nonNumCnt < 50:  sys.stderr.write("At row_column= "+str(row)+"_"+str(column)+' invalid data cell value '+ item+"\n")
+                       
+            matrix.append(temp)
+            
+        line = f.readline()
+        firstLine= False
+                   
+    #sys.stdout.write("\n\n")
+    f.close()
+    binCatDict["Text"]= nonNumCnt
+
+# plot results of NAN counts above
+
+    binCat = ["null", "blank", 'hyphen', '?','NA','N/A' ,'NAN', 'text']
+    orderDict= {0:"", 1:"", 2:'-', 3:'?',4:'NA',  5:'N/A' ,6:'NAN', 7:'Text'}
+#TODO verify dict orde for data    
+        #print("> key value  =",key, str(value))
+
+    if create_plot:    
+        numBins = len(binCat)
+        binWidth = 1
+        bins     = []
+        binData  = []
+        
+        for key in sorted(orderDict):
+            value= binCatDict[orderDict[key]]   # place items on chart in order and with data value for item
+            if value < 1:
+                binData.append(value+0.01)
+            else:
+                binData.append(value)
+
+        #"""
+        for j in range(numBins):
+            bins.append(j*binWidth)
+    #ttps://pythonspot.com/matplotlib-bar-chart/
+        y_pos = np.arange(numBins)
+        plt.yticks(y_pos, binCat)
+        plt.title("Distribution of NAN types (UPPER & lower & MiXeD case combined)")
+        plt.ylabel('NAN Types')
+        plt.xlabel('Occurrences')
+        #plt.legend()
+        plt.barh(y_pos, binData, align='center', alpha=0.5)
+    
+        fig, ax = plt.subplots(num=1, figsize=(8,3))
+        ax.set_title("Data Cell Counts of Not A Number (NAN) Types")
+        #ax.bar(center,bins, align='center', width=width)
+        #ax.bar(center, hist, align='center', width=width)
+        #ax.set_xticks(bins)
+    #    fig.savefig("/Users/bobbrown/Desktop/Matrix-tools-Test-output/NAN-plot.png")
+        
+    #    fig, ax = plt.subplots(num=1, figsize=(8,3))
+    #    fig.savefig("/Users/bobbrown/Desktop/Matrix-tools-Test-output/hist-out.png")
+        
+        plt.show()
+        #"""
+
+#after plot error?
+    x,y=np.shape(matrix)
+    if nanCnt > 0: print("WARNING -- Found "+str(nanCnt)+" Valid Non-numbers. Their percent of total matrix data cell values = "+str((100*nanCnt)/(x*y))+"% ")
+    if nonNumCnt > 0:  sys.exit(-1)
+    #print ("reader output:")
+    #print (matrix)
+    #print (column_labels)
+    #print(row_labels)
+    return matrix,column_labels,row_labels
+
+#----------------------------------------------------------------------
+# Verify Matrix A  column_labels match Matrix B row_labels in name and order for A*B 
+def MatchLabels(column_labels,row_labels):
+     
+        if len(column_labels) != len(row_labels):
+            sys.err("ERROR 1st matrix column count "+str(len(column_labels))+" not equal 2nd Matrix number row count "+str(len(row_labels))+"\n" )
+        else:
+            cnt= 0
+            for k in range(0,len(column_labels)):
+                if column_labels[k] != row_labels[k] and cnt < 20:
+                    cnt += 1
+                    #sys.err("ERROR At column & row position "+str(k)+" Matrix 1 column value "+str(column_labels)+" not equal 2nd Matrix row value "+str(row_labels)+"\n" )
+            
+            if cnt > 0: 
+                sys.exit(-11)
+#----------------------------------------------------------------------
+# restores row and column labels in ouput
+def Labeler(matrix,column_labels,row_labels,output_file_txt):
+    #print("matrix length: " + str(len(matrix)))
+    #print("row labels length: " + str(len(row_labels)))
+    #print("col labels length: " +str(len(column_labels)))
+    #Define Null Sets For Col and Row Headers
+    with open(output_file_txt,'w') as f:
+        f.write("")
+        for k in range(0,len(column_labels)):
+                f.write('\t' + str(column_labels[k]) )
+        f.write('\n')
+        #for i in range(0,len(row_labels)):
+        for i in range(0,len(matrix)):
+                f.write(str(row_labels[i]) )
+                #print("matrix["+str(i)+"] length:" + str(len(matrix[i])))
+                for j in range(0,len(matrix[0])):
+                        f.write('\t' + format(matrix[i][j]))
+                f.write('\n')
+
+
+#----------------------------------------------------------------------
+if __name__ == '__main__':
+    input_file_txt = str(sys.argv[1])
+                                 
+    matrix,column_labels,row_labels = reader(input_file_txt)
+    print("Done")
+
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Validations.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Validations.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,180 @@
+'''
+Created on Jun 7, 2017 modified Feb2018
+
+@author: Bob Brown and cjacoby
+'''
+ 
+import sys, traceback, argparse
+import numpy as np
+import os
+from Matrix_Validate_import import reader, Labeler
+
+#Define The Four Arguments Used in the Program
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_file_txt', help='tab delimited text file input matrix(include .txt in name)')
+    parser.add_argument('replacement', type=str, help='Choose Replacement for Missing Value. Valid Choices are strings: "Mean" or "Zero"')
+    parser.add_argument('axes', type=str, help='Choose Axes to Normalize On (Either "Row" or "Column"')
+    parser.add_argument('output_file_txt' ,help='tab delimited text file output name (include .txt in name)')
+    args = parser.parse_args()
+    return args
+
+
+#Define Function to Replace Null Values with Row Mean
+def nan_replacer_mean_rows(matrix):
+
+    nonNumCnt= 0
+    nanCnt   = 0   #valid NANs are "NA","N/A","-","?"
+
+    #Loop Replacing all Null Values with Row Mean
+    for i in range(0,len(matrix)):
+        temp_mean = np.nanmean(matrix[i])
+        for j in range(0,len(matrix[0])):
+            #if matrix[i][j] == "NA": #np.isnan(matrix[i][j]) == True:
+            if np.isnan(matrix[i][j]) == True:
+                matrix[i][j] = temp_mean     
+                nanCnt = nanCnt + 1
+    return matrix, nonNumCnt, nanCnt
+
+#Define Function to Replace Null Values with Column Mean
+def nan_replacer_mean_columns(matrix):
+
+    nonNumCnt= 0
+    nanCnt   = 0   #valid NANs are "NA","N/A","-","?"
+    
+    #Loop Replacing all Null Values with Column Mean
+    for i in range(0,len(matrix[0])):
+        col = [row[i] for row in matrix]
+        temp_mean = np.nanmean(col)
+        for j in range(0,len(matrix)):
+            #if matrix[i][j] == "NA": #elif np.isnan(matrix[j][i]) == True:
+            if np.isnan(matrix[j][i]) == True:
+                matrix[j][i] = temp_mean  
+                nanCnt = nanCnt + 1   
+    
+    return matrix, nonNumCnt, nanCnt
+
+#Define Function to Replace Null Values with Zero (axis orientation is irrelevant)
+def nan_replacer_zero(matrix):
+
+    nonNumCnt= 0
+    nanCnt   = 0   #valid NANs are "NA","N/A","-","?"
+    
+    #Loop Replacing all Null Values with Row Range
+    for i in range(0,len(matrix)):
+        for j in range(0,len(matrix[0])):
+            #if matrix[i][j] =="NA":
+            if np.isnan(matrix[i][j]) == True:
+               matrix[i][j] = 0
+
+    return matrix, nonNumCnt, nanCnt
+
+#Define Function to Re-Label Output Matrix
+#!!!! not needed no output matrix from Validate tool
+def OLD_labeler(matrix, og_cols, og_rows, output_file_txt): 
+    #Write Data to Specified Text File Output
+    with open(output_file_txt,'w') as f:
+        f.write("Use original input file for further processing\n")
+    f.close()
+#        f.write("")
+#         for k in range(0,len(og_cols)):
+#                 f.write('\t' + str(og_cols[k]))
+#         f.write('\n')
+#         for i in range(0,len(og_rows)):
+#                 f.write(og_rows[i])
+#                 for j in range(0,len(matrix[0])):
+#                         f.write('\t' + format(matrix[i][j]))
+#                 f.write('\n') 
+    
+#Main Function
+def main():
+    args = get_args()
+    #print(args)
+    #sys.stdout.write(str(args))
+    #sys.stdout.write( '\nValid NAN identifiers are "NA","N/A","-", and "?"')
+    
+    matrix,og_cols,og_rows = reader(args.input_file_txt)
+
+#     if nonNumCnt > 0:
+#         print('\nERROR Matrix has non-numbers that are non-NAN identifiers in matrix. Total and percent unknown strings found = '+str(nonNumCnt)+ ',  %.2f' % (100.0*nonNumCnt/(1.0*len(og_cols)*len(og_rows)))+'%' )
+#         #sys.stderr.write('\nERROR Matrix has non-numbers that are non-NAN identifiers in matrix. Total and percent unknown strings found = '+str(nonNumCnt)+ ',  %.2f' % (100.0*nonNumCnt/(1.0*len(og_cols)*len(og_rows)))+'%' )
+#         if nanCnt > 0:
+#             print('\nWARNING Matrix has '+str(nanCnt)+'  that is  %.2f' % (100.0*nanCnt/(1.0*len(og_cols)*len(og_rows)))+'% known NAN identifiers')
+#         sys.exit(-1)
+#     else:
+#         if nanCnt > 0:
+#             print('\nWARNING Matrix has NO unknown non-numbers in matrix, but contains '+str(nanCnt)+' that is  %.2f' % (100.0*nanCnt/(1.0*len(og_cols)*len(og_rows)))+'% known NAN identifiers')
+#         else:
+#             print('Matrix is Good-to-Go -- all numbers in data area. ')
+
+    #with open(args.output_file_txt,'w') as f:
+    #    f.write("Use original input file for further processing\n")
+    #f.close()
+    #sys.exit(0)
+    
+# TODO !!!!!  Below if MDA decides to use it  TURNED OFF FOR NOW
+# TODO !!!!!  Below if MDA decides to use it  TURNED OFF FOR NOW
+
+    if args.replacement == "Mean":
+        if args.axes == "Row":
+            matrix, nonNumCnt, nanCnt = nan_replacer_mean_rows(matrix)
+            Labeler(matrix,og_cols,og_rows,args.output_file_txt)
+            #OLD_labeler(matrix, og_cols, og_rows, args.output_file_txt)
+            #print('Mean,Row')
+            if nonNumCnt > 0:
+                print('ERROR Matrix has non-numbers that are non-NAN identifiers in matrix. Total and percent unknown strings found = '+str(nonNumCnt)+ ',  %.2f' % (100.0*nonNumCnt/(1.0*len(og_cols)*len(og_rows)))+'%' )
+                sys.stderr.write('ERROR Matrix has non-numbers that are non-NAN identifiers in matrix. Total and percent unknown strings found = '+str(nonNumCnt)+ ',  %.2f' % (100.0*nonNumCnt/(1.0*len(og_cols)*len(og_rows)))+'%' )
+                if nanCnt > 0:
+                    print('WARNING Matrix has '+str(nanCnt)+'  that is  %.2f' % (100.0*nanCnt/(1.0*len(og_cols)*len(og_rows)))+'% known NAN identifiers')
+                sys.exit(-1)
+            else:
+                if nanCnt > 0:
+                    print('\nWARNING Matrix has '+str(nanCnt)+'  that is  %.2f' % (100.0*nanCnt/(1.0*len(og_cols)*len(og_rows)))+'% known NAN identifiers')
+                else:
+                    print('\nMatrix is Good-to-Go -- all numbers in matrix. ')
+                sys.exit(0)
+        elif args.axes == "Column":
+            matrix, nonNumCnt, nanCnt = nan_replacer_mean_columns(matrix)
+            Labeler(matrix,og_cols,og_rows,args.output_file_txt)
+            #OLD_labeler(matrix, og_cols, og_rows, args.output_file_txt)
+            #print('Mean,Column')
+            if nonNumCnt > 0:
+                print('\nERROR Matrix has non-numbers that are non-NAN identifiers in matrix. Total and percent unknown strings found = '+str(nonNumCnt)+ ',  %.2f' % (100.0*nonNumCnt/(1.0*len(og_cols)*len(og_rows)))+'%' )
+                sys.stderr.write('\nERROR Matrix has non-numbers that are non-NAN identifiers in matrix. Total and percent unknown strings found = '+str(nonNumCnt)+ ',  %.2f' % (100.0*nonNumCnt/(1.0*len(og_cols)*len(og_rows)))+'%' )
+                if nanCnt > 0:
+                    print('\nWARNING Matrix has '+str(nanCnt)+'  that is  %.2f' % (100.0*nanCnt/(1.0*len(og_cols)*len(og_rows)))+'% known NAN identifiers')
+                sys.exit(-1)
+            else:
+                if nanCnt > 0:
+                    print('\nWARNING Matrix has '+str(nanCnt)+'  that is  %.2f' % (100.0*nanCnt/(1.0*len(og_cols)*len(og_rows)))+'% known NAN identifiers')
+                else:
+                    print('\nMatrix is Good-to-Go -- all numbers in matrix. ')
+                sys.exit(0)
+        else:
+            print('Mean, but given Invalid Axis= '+str(args.axes))
+            sys.stderr.write('Mean, but given Invalid Axis= '+str(args.axes))
+    elif args.replacement == "Zero":
+        matrix, nonNumCnt, nanCnt = nan_replacer_zero(matrix)
+        Labeler(matrix,og_cols,og_rows,args.output_file_txt)
+        #OLD_labeler(matrix, og_cols, og_rows, args.output_file_txt)
+        if nonNumCnt > 0:
+            print('\nERROR Matrix has non-numbers that are non-NAN identifiers in matrix. Total and percent unknown strings found = '+str(nonNumCnt)+ ',  %.2f' % (100.0*nonNumCnt/(1.0*len(og_cols)*len(og_rows)))+'%' )
+            sys.stderr.write('\nERROR Matrix has non-numbers that are non-NAN identifiers in matrix. Total and percent unknown strings found = '+str(nonNumCnt)+ ',  %.2f' % (100.0*nonNumCnt/(1.0*len(og_cols)*len(og_rows)))+'%' )
+            if nanCnt > 0:
+                print('\nWARNING Matrix has '+str(nanCnt)+'  that is  %.2f' % (100.0*nanCnt/(1.0*len(og_cols)*len(og_rows)))+'% known NAN identifiers')
+            sys.exit(-1)
+        else:
+            if nanCnt > 0:
+                print('\nWARNING Matrix has '+str(nanCnt)+'  that is  %.2f' % (100.0*nanCnt/(1.0*len(og_cols)*len(og_rows)))+'% known NAN identifiers')
+            else:
+                print('\nMatrix is Good-to-Go -- all numbers in matrix. ')
+            sys.exit(0)
+    else:
+        print('zero, but given Invalid Axis= '+str(args.axes))
+        sys.stderr.write('zero, but given Invalid Axis= '+str(args.axes))
+        sys.exit(-2)
+
+       
+if __name__ == '__main__':
+    main()
+    print("done")
diff -r 7f12c81e2083 -r f1bcd79cd923 Matrix_Validations.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Validations.xml	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,40 @@
+<?xml version='1.1' encoding='utf-8'?>
+<tool id="Matrix_Validations" name="Matrix Data Validation" version="2.2">
+  <description> Locate and identify non-numbers</description>
+ <command interpreter="python">Matrix_Validations.py '$p_input' 'Zero'  'Row' '$output_file'</command>
+ <inputs>
+ 	<param name="p_input" type="data" format="tabular" label="Input Matrix" />
+ 	
+ <!--  
+    <conditional name="extra">
+ 	<param name="replacement" type="select" label="Choose Replacement Type for Non-Numeric Data Cells">
+ 		<option value="Mean">Mean</option>
+ 		<option value="Zero">Zero</option>
+ 	</param>
+		<when value="Mean">
+	 		<param name="axes" type="select" label="Choose Axes To Calculate Cell Replacement Value">
+				<option value="Row">Row</option>
+	 			<option value="Column">Column</option>
+	 		</param>
+	    </when>
+ 		<when value="Zero">
+	 		<param name="axes" type="text" size="10" hidden="true" value="Row"/>
+	    </when>
+    </conditional>
+--> 	
+<!--   replace by when above  to place zero values doesn't matter if by row or column 
+ 	<param name="axes" type="select" label="Choose Axes To Calculate Cell Replacement Value">
+		<option value="Row">Row</option>
+ 		<option value="Column">Column</option>
+ 	</param>
+ --> 		
+ 	
+ 	
+ </inputs>
+ 
+ <outputs>
+ 	<data name="output_file" format="txt" />
+ </outputs>
+ 
+ </tool>
+ 
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Split_ExcelTabs_IntoFiles.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Split_ExcelTabs_IntoFiles.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,290 @@
+import sys
+import os
+#import MySQLdb
+#import config
+import subprocess
+import re
+import shutil
+import traceback
+#import xlsxwriter
+import xlrd
+
+#http://www.blog.pythonlibrary.org/2014/04/30/reading-excel-spreadsheets-with-python-and-xlrd/
+
+def File_From_Tab(infileName, outfileName, tabName,tabNumber):
+    """
+    Open and read an Excel file
+    """
+    book = xlrd.open_workbook(infileName)
+    # print number of sheets
+    #print book.nsheets
+ 
+    # print sheet names
+    tabList= book.sheet_names()
+    #print tabList
+    #print book.sheet_names()
+    if tabName == "" and (tabNumber <1 or tabNumber > len(tabList)):
+        sys.stderr.write("\n>>>ERROR illegal tab number "+str(tabNumber)+" input when no tab name was specified\n")
+        sys.stderr.write("\n>>>Allowed tab numbers, or tab names, for this file with "+str(len(tabList))+" total tabs are:")
+        
+        for i in range(len(tabList)):
+            sys.stderr.write("\n>>>   tab number "+str(i+1)+" is named "+str(tabList[i]))
+        sys.exit(-1)
+
+    if tabName != "":   # use name instead of tab number
+        found = False
+        i = 0
+        while (i < len(tabList)) and not found:
+            i += 1
+            if tabName == str(tabList[i-1]):
+                tabNumber = i
+                found = True
+        if not found:
+            sys.stderr("\n>>> ERROR -- Input Tab name "+tabName+" was not found\n")
+            sys.exit(-1)
+    # get the first worksheet
+    #first_sheet = book.sheet_by_index(0)
+    worksheet = book.sheet_by_index(tabNumber-1)
+ 
+    outFile = open(outfileName+str(tabList[tabNumber-1]+".tsv"), 'w')
+   
+    #https://stackoverflow.com/questions/14944623/python-xrld-read-rows-and-columns
+    #workbook = xlrd.open_workbook('my_workbook.xls')
+    #worksheet = workbook.sheet_by_name('Sheet1')
+    num_rows = worksheet.nrows - 1
+    num_cells = worksheet.ncols - 1
+    curr_row = -1
+    while curr_row < num_rows:
+      curr_row += 1
+      row = worksheet.row(curr_row)
+      
+      if curr_row == 0:
+          endOfLine= False
+          allRowNumCols= len(row)
+          i= len(row)-1
+          # find length of matrix and covariates using first row 
+          # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
+          while i <= len(row)-1 and not endOfLine:
+               cell_type = worksheet.cell_type(curr_row, i)
+               #temp = str(worksheet.cell_value(curr_row, i))
+               #print( " pos and cell type row one ",cell_type, i)
+               
+               if cell_type == 0 or cell_type == 6:
+                  allRowNumCols -= 1
+                  i -= 1
+               else:
+                  endOfLine=  True
+         
+      if allRowNumCols < 5:
+          sys.stderr.write("\nERROR First row number of columns= "+str(allRowNumCols)+" is too short, so all rows will be ignored\n")
+          sys.exit(-1)
+      elif curr_row == 0: 
+          sys.stdout.write("\nALL Rows must all have the same number of columns as the First row's number columns = "+ str(allRowNumCols) +"\n")
+          
+      temp= ''
+      rowLen= 0
+      endOfLine= False
+          
+      while rowLen < allRowNumCols and not endOfLine:
+          temp += str(worksheet.cell_value(curr_row, rowLen))+"\t"
+          #temp += str(row[rowLen])+"\t"
+          rowLen += 1
+              
+      temp = temp[:-1]+"\n"
+      #print 'Row:', curr_row, len(row), rowLen
+      outFile.write(temp)  #TODO check if rows are all same length
+    
+    sys.stdout.write("File created with "+str(curr_row)+" rows and "+str(allRowNumCols)+" columns\n")
+#       curr_cell = -1
+#       while curr_cell < num_cells:
+#         curr_cell += 1
+#         # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
+#         cell_type = worksheet.cell_type(curr_row, curr_cell)
+#         cell_value = worksheet.cell_value(curr_row, curr_cell)
+#         print ' ', cell_type, ':', cell_value 
+    #     # read a row
+#     print first_sheet.row_values(0)
+#  
+#     # read a cell
+#     cell = first_sheet.cell(0,0)
+#     print cell
+#     print cell.value
+#  
+#     # read a row slice
+#     print first_sheet.row_slice(rowx=0,
+#                                 start_colx=0,
+#                                 end_colx=2)
+ 
+    return tabList
+
+
+#======================
+# from RPPA callInSilicoReportWriter.py
+# def write_xlsx_for_report(directory_for_reports, report_name, report_id, dict_cf2_values):
+#     
+#     
+#     error_write_xlsx = ""
+#     error_occurred = 0
+#     
+#     try:
+#         path_to_dir_when_writing = os.path.join(directory_for_reports, report_name)
+#         header_path = os.path.join(directory_for_reports, report_name, "header.csv")    
+#         raw_log_2_path = os.path.join(directory_for_reports, report_name, "RawLog2.csv")
+#         norm_linear_path = os.path.join(directory_for_reports, report_name, "NormLinear.csv")
+#         norm_log_2_path = os.path.join(directory_for_reports, report_name, "NormLog2.csv")
+#         norm_log_2_median_centered_path = os.path.join(directory_for_reports, report_name, "NormLog2_MedianCentered.csv")
+#         
+# #         put the cf2 values in the NormLinear file
+#         error_put_cf2_in_normLinear = write_new_normLinear_csv_file_with_cf2_values(path_to_dir_when_writing, norm_linear_path, dict_cf2_values)
+#         
+#         
+#         excel_workBook = xlsxwriter.Workbook(os.path.join(directory_for_reports, report_name,report_name + ".xlsx"), {'strings_to_numbers': True})
+#     
+#         rawLog2_worksheet = excel_workBook.add_worksheet("RawLog2")
+#         error_rawLog2 = construct_worksheet_for_xlsx(rawLog2_worksheet, header_path, "RawLog2", raw_log_2_path)
+#     
+#         norm_linear_worksheet = excel_workBook.add_worksheet("NormLinear")
+#         error_norm_linear = construct_worksheet_for_xlsx(norm_linear_worksheet, header_path, "NormLinear", norm_linear_path)
+#     
+#         norm_log_2_worksheet = excel_workBook.add_worksheet("NormLog2")
+#         error_norm_log_2 = construct_worksheet_for_xlsx(norm_log_2_worksheet, header_path, "NormLog2", norm_log_2_path)
+#     
+#         norm_log_2_median_centered_worksheet = excel_workBook.add_worksheet("NormLog2_MedianCentered")
+#         error_norm_log_2_median_centered = construct_worksheet_for_xlsx(norm_log_2_median_centered_worksheet, header_path, "Median-Centered", norm_log_2_median_centered_path)
+#     
+#         errors_array = [error_put_cf2_in_normLinear, error_rawLog2, error_norm_linear, error_norm_log_2, error_norm_log_2_median_centered]
+#         for error in errors_array:
+#             if error != "":
+#                 error_write_xlsx = error_write_xlsx + error
+#                 error_occurred = 1
+#         if error_occurred == 1:
+#             error_write_xlsx + "\nThe excel workbook for the report "+report_name+" was not written successfully.\n\n"
+#         
+#         excel_workBook.close()
+#     except Exception, e:
+#         error_occurred = 1
+#         error_write_xlsx += str(repr(e)) + "\n\n"
+#         error_write_xlsx + "\nThe excel workbook for the report "+report_name+" was not written successfully.\n\n"
+#         try:
+#             excel_workBook.close()
+#         except Exception, f:
+#             sys.stderr.write("An unforeseen problem has occurred in write_xlsx_for_report()\n")
+#             sys.stderr.write(str(repr(f)) + "\n\n")
+#         
+#     
+#     return error_occurred, error_write_xlsx
+# 
+# 
+# def write_new_normLinear_csv_file_with_cf2_values(path_to_dir, norm_linear_path, dict_cf2_values):
+#     errors = ""
+#     try:
+#         titles = {}
+#         new_lines_normLinear_with_cf2 = []
+#     #     read old norm linear file
+#         rf_normLinear = open(norm_linear_path, 'rU')
+#         line_num = 0
+#         for line in rf_normLinear:
+#             line = strip_new_line_from_right_side(line)
+#             toks = line.split(",")
+#             line_num += 1
+#             if line_num == 1:
+#                 line += "1,CF2"
+#                 new_lines_normLinear_with_cf2.append(line)
+#                 titles = toks
+#                 continue
+#             pos_rf = int(toks[titles.index('Order')])
+#             line += "," + str(dict_cf2_values[pos_rf])
+#             new_lines_normLinear_with_cf2.append(line)
+#         rf_normLinear.close()
+#     #     rename the old normLinear file
+#         os.rename(norm_linear_path, os.path.join(path_to_dir, 'before_cf2_NormLinear.csv'))
+#         
+#     #     write new normLinear with cf2
+#         wf_new_normLinear = open(norm_linear_path, 'w')
+#         for line_writing in new_lines_normLinear_with_cf2:
+#             wf_new_normLinear.write(line_writing + "\n")
+#         wf_new_normLinear.close()
+#     except Exception, err_write_normLinear_with_cf2_values:
+#         errors = str(repr(err_write_normLinear_with_cf2_values))
+# 
+#     return errors
+# 
+# 
+# # This function constructs the worksheet for each tab in the excel file for a report
+# # It puts these things in this order:
+# #     1. Title of the tab
+# #     2. Header for the tab
+# #     3. Content of the tab
+# def construct_worksheet_for_xlsx(worksheet, header_path, title_top_of_tab, tab_input_path):
+# 
+#     reload(sys)  
+#     sys.setdefaultencoding('utf8')
+#     errors = ""
+#     
+#     try:
+# #         Write the title at the top of the tab
+#         worksheet.write(0,0,title_top_of_tab)
+# 
+# #         Variable to keep track of the rows
+#         row_num = 1
+#         
+# #         Write the header stuff
+#         header_file = open(header_path, 'rU')
+#         for head_line in header_file:
+#             head_line = strip_new_line_from_right_side(head_line)
+#             head_toks = head_line.split(",")
+#             col_num = 0
+#             for tok in head_toks:
+#                 worksheet.write(row_num, col_num, tok)
+#                 col_num += 1
+#             row_num += 1
+#         
+# #         Write the content stuff
+#         tab_input_file = open(tab_input_path, 'rU')
+#         for tab_line in tab_input_file:
+#             tab_line = strip_new_line_from_right_side(tab_line)
+#             tab_toks = tab_line.split(",")
+#             col_num = 0
+#             for tok in tab_toks:
+#                 tok = tok.decode('iso-8859-1').encode('utf-8')
+#                 worksheet.write(row_num, col_num, tok)
+#                 col_num += 1
+#             row_num += 1        
+#         
+#         header_file.close()
+#         tab_input_file.close()
+#     except Exception, e:
+#         errors = errors + "\n\nAn error occurred while constructing the "+title_top_of_tab+" tab for the excel file.\n"
+#         errors = errors + "The error was :\n\t" + str(e) + "\n\n"
+#         try:
+#             header_file.close()
+#             tab_input_file.close()
+#         except NameError:
+#             x = 5
+#     
+    return errors
+
+#----------------------------------------------------------------------
+if __name__ == "__main__":
+    
+    #try:
+    if len(sys.argv) > 4:
+        infileName            = '"'+sys.argv[1]+'"'
+        tabName               = '"'+sys.argv[2]+'"' 
+        tabNumber = 0           
+        if tabName == '':  tabNumber = int(sys.argv[3])           
+        outfileName           = '"'+sys.argv[4]+'"'  #TODO Later multiple outputs one per tab
+          
+        sys.stdout.write( "\nInput parameters ",str(sys.argv[1:4]),"\n" )
+        
+    #infileName = "/Users/bobbrown/Desktop/01_Gordon_Mills__Zhiyong_Ding.xlsx"
+    #outfileName= "/Users/bobbrown/Desktop/01_Gordon_Mills__Zhiyong_Ding-Tab-Out-"
+    #tabName ="NormLog2"
+    #tabName =""
+    #tabNumber= 10
+        
+    status=  File_From_Tab(infileName, outfileName, tabName, tabNumber )
+    #except
+        #sys.exit(-1)
+    
+    sys.exit(0)
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Split_ExcelTabs_IntoFiles.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Split_ExcelTabs_IntoFiles.xml	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,35 @@
+<?xml version='1.1' encoding='utf-8'?>
+<tool id="Split_ExcelTabs_IntoFiles" name="Split Excel Tabs" version="1.0">
+  <description>into separate tab delimited files</description>
+ <command interpreter="python">Split_ExcelTabs_IntoFiles.py '$input' '' '$extra.tabnumber' '$output_file'</command>
+
+ <inputs>
+ 	<param name="input" type="data" format="txt" label="Input Excel.xls or Excel.xlsx file"> </param>
+ 		
+    <conditional name="extra">
+ 	  <param name="choice" type="select" label="Select tab" help="Select tab position or name">
+		<option value="tab_number">Tab number</option>
+  	  </param>  	
+<!--
+'$extra.tabname'
+		<option value="tab_name">Tab name</option>
+		<option value="all">All tabs in file</option>
+ 		<option value="1">First tab only</option>
+  	  </param>  	
+    <when value="tab_name">
+		<param name="tabname" type="text" size="20" value="" label="Tab Name" help="Spreadsheet Tab Name">
+		</param>
+    </when>
+-->
+    <when value="tab_number">
+		<param name="tabnumber" type="text" size="2" value="1" label="Tab position" help="Integer Tab position from left">
+		</param>
+   </when>
+  </conditional>
+ </inputs>
+
+ <outputs>
+ 	<data name="output_file" format="tabular" label="Output File Name" help="Name based on Input file name and tab name"/>
+ </outputs>
+ 
+ </tool>
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 TestOutFile.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TestOutFile.txt	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,1 @@
+output from input= TestInFile
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Test_input_into_file.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Test_input_into_file.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+#Created on Jule 23, 2018
+
+# @author: Bob Brown 
+
+import sys
+import os
+
+def main():
+
+    # Grab the inputs from the Galaxy xml interface and write to a file that is passed to the program
+    # Not each of the  parameters as separate command line variables.
+#    ab_gene_name_for_header={}
+#    ab_rrid_for_header={}
+    dir= "/Users/bobbrown/Desktop/junk/"
+    accepted_extensions = ["csv", "tsv"]
+    filenames = [fn for fn in os.listdir(dir) if fn.split(".")[-1] in accepted_extensions]    
+    for f in filenames:
+        print("filename= "+f)
+        os.remove(dir+f)
+        
+    sys.exit(0)
+    
+    ab_gene_name_for_header={'abc':'geneName'}
+    ab_rrid_for_header={'abc':'rrid123'}
+    line=  'abc,123\n'
+    
+    pos= line.find(",")
+    ABname= line[0:pos]
+    ABnewName= ABname+ "|"+ab_gene_name_for_header[ABname]+"|"+ab_rrid_for_header[ABname]    
+    line= ABnewName+line[pos:]
+    line= line.replace(',','\t')
+    sys.exit(0)
+#    try:
+    print(' \n starting Test program read params from file stored in tools dir. Arguments=')
+    print(str(sys.argv[1:])+'\n')
+                
+    if False:
+        infileName    = sys.argv[1]
+    #    directory     = sys.argv[2]
+        directory     = '/Users/bobbrown/Desktop/'
+        outfileName   = sys.argv[3]   #use later
+    #        outfile      = sys.argv[6]
+        
+    #sys.stdout.write
+    
+     #   ifile= open(directory+"/"+infileName,'rU')
+        ifile= open(directory+infileName,'rU')
+        ofile= open(directory+outfileName,'w')
+    #    ofile= open('/Users/bobbrown/Desktop/TestOutFileVarParams.txt','w')
+    
+        cnt= 0
+    #     for param in range(2,len(sys.argv)):
+    #         cnt +=1
+    #         ofile.write("param "+str(cnt)+"= "+param+"\n")
+    
+    
+        for param in ifile:
+            cnt +=1
+            ofile.write("param "+str(cnt)+"= "+param)
+     
+        ifile.close()
+    
+        ofile.close()
+    
+        print('Fini -- rows read = '+str(cnt)+'\n')        
+
+#    except :
+#        print('Error>>> ')
+
+    return
+##
+##
+
+if __name__ == '__main__': main()
+    #sys.exit(0)
\ No newline at end of file
diff -r 7f12c81e2083 -r f1bcd79cd923 Test_input_into_file.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Test_input_into_file.xml	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,61 @@
+<?xml version='1.1' encoding='utf-8'?>
+<tool id="Test-input-into-file" name="Test-File" version="1.0.0" force_history_refresh="True">
+  <description> Read xml params into a file</description>
+  
+<command interpreter="python">Test-input-into-file.py  $inputfile "$__tool_dir__" $outputfile  
+      #for $op in $test_param1
+        ${op.discrete_fields1.Text_Fields1} 
+      #end for
+      "$EndofVarParam1" 
+      "$Covariate_Type" 
+      "$EndofVarParam2" 
+      #for $op in $test_param3
+        ${op.discrete_fields3.Text_Fields3} 
+      #end for
+      "$EndofVarParam3" 
+  </command> 
+<!--      #in=$__tool_dir__/$inputfile  
+      #inputfile="$__tool_dir__"$inputfile  
+      
+type 	Describes the parameter type - each different type as different semantics 
+ and the tool form widget is different. Currently valid parameter types are: 
+ text, integer, float, boolean, genomebuild, select, color, data_column, hidden, hidden_data, baseurl, file, ftpfile, data, data_collection, library_data, drill_down. The definition of supported parameter 
+ types as defined in the parameter_types dictionary in /lib/galaxy/tools/parameters/basic.py.
+ -->      
+	<stdio>
+      <exit_code range="1:" level="fatal" /> 
+	</stdio>
+  <inputs>
+    <data format="txt" name="inputfile"/>
+	<repeat name="test_param1" title="Select 1 or more Covariate Clinical Categories">
+		  <conditional name="discrete_fields1">
+			<param name="Text_Fields1" type="select" label="Covariate Bar Choices" help="">
+				<option value='cat_EMPTY' selected="true">Click cursor in middle of this text to see choices</option>
+				<option value='cat_SampleTypeCode'>Sample Cohort</option>
+				<option value='cat_gender'>Gender</option>
+				<option value='cat_race'>Race</option>
+			</param>
+		  </conditional>     
+	</repeat>       
+	<param name="EndofVarParam1" type="hidden" value="paramend1" </param>
+	<param name="Covariate_Type" type="select" value="" label="Type of Covariate Bars" help="Choose Sample or Gene covariate bar files">
+        <option value="S" selected="true">Participant-Sample</option> 
+        <option value="G" >Gene</option>
+	</param>
+	<param name="EndofVarParam2" type="hidden" value="paramend2" </param>
+	<repeat name="test_param3" title="Select 1 or more Covariate Clinical Categories">
+		  <conditional name="discrete_fields3">
+			<param name="Text_Fields3" type="select" label="Covariate Bar Choices" help="">
+				<option value='cat_EMPTY' selected="true">Click cursor in middle of this text to see choices</option>
+				<option value='cat_SampleTypeCode'>Sample Cohort</option>
+				<option value='cat_gender'>Gender</option>
+				<option value='cat_race'>Race</option>
+			</param>
+		  </conditional>     
+	</repeat>       
+	<param name="EndofVarParam3" type="hidden" value="paramend1" </param>
+  </inputs>
+  <outputs>
+     <data format="txt" name="outputfile" label="${tool.name} on ${on_string}"/>
+  </outputs>
+</tool>
diff -r 7f12c81e2083 -r f1bcd79cd923 bar_chart_plot.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bar_chart_plot.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+"""
+histogram_gnuplot.py <datafile> <xtic column> <column_list> <title> <ylabel> <yrange_min> <yrange_max> <grath_file>
+a generic histogram builder based on gnuplot backend
+
+   data_file    - tab delimited file with data
+   xtic_column  - column containing labels for x ticks [integer, 0 means no ticks]
+   column_list  - comma separated list of columns to plot
+   title        - title for the entire histrogram
+   ylabel       - y axis label
+   yrange_max   - minimal value at the y axis (integer)
+   yrange_max   - maximal value at the y_axis (integer)
+                  to set yrange to autoscaling assign 0 to yrange_min and yrange_max
+   graph_file   - file to write histogram image to
+   img_size     - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.)
+
+
+   This tool required gnuplot and gnuplot.py
+
+anton nekrutenko | anton@bx.psu.edu
+"""
+
+import string
+import sys
+import tempfile
+
+import Gnuplot
+import Gnuplot.funcutils
+
+assert sys.version_info[:2] >= (2, 4)
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit()
+
+
+def main(tmpFileName):
+    skipped_lines_count = 0
+    skipped_lines_index = []
+    gf = open(tmpFileName, 'w')
+
+    try:
+        in_file = open(sys.argv[1], 'r')
+        xtic = int(sys.argv[2])
+        col_list = string.split(sys.argv[3], ",")
+        title = 'set title "' + sys.argv[4] + '"'
+        ylabel = 'set ylabel "' + sys.argv[5] + '"'
+        ymin = sys.argv[6]
+        ymax = sys.argv[7]
+        img_file = sys.argv[8]
+        img_size = sys.argv[9]
+    except:
+        stop_err("Check arguments\n")
+
+    try:
+        int(col_list[0])
+    except:
+        stop_err('You forgot to set columns for plotting\n')
+
+    for i, line in enumerate(in_file):
+        valid = True
+        line = line.rstrip('\r\n')
+        if line and not line.startswith('#'):
+            row = []
+            try:
+                fields = line.split('\t')
+                for col in col_list:
+                    row.append(str(float(fields[int(col) - 1])))
+            except:
+                valid = False
+                skipped_lines_count += 1
+                skipped_lines_index.append(i)
+        else:
+            valid = False
+            skipped_lines_count += 1
+            skipped_lines_index.append(i)
+
+        if valid and xtic > 0:
+            row.append(fields[xtic - 1])
+        elif valid and xtic == 0:
+            row.append(str(i))
+
+        if valid:
+            gf.write('\t'.join(row))
+            gf.write('\n')
+
+    if skipped_lines_count < i:
+        # Prepare 'using' clause of plot statement
+        g_plot_command = ' '
+
+        # Set the first column
+        if xtic > 0:
+            g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % (tmpFileName, str(len(row)), col_list[0])
+        else:
+            g_plot_command = "'%s' using 1 ti 'Column %s', " % (tmpFileName, col_list[0])
+
+        # Set subsequent columns
+        for i in range(1, len(col_list)):
+            g_plot_command += "'%s' using %s t 'Column %s', " % (tmpFileName, str(i + 1), col_list[i])
+
+        g_plot_command = g_plot_command.rstrip(', ')
+
+        yrange = 'set yrange [' + ymin + ":" + ymax + ']'
+
+        try:
+            g = Gnuplot.Gnuplot()
+            g('reset')
+            g('set boxwidth 0.9 absolute')
+            g('set style fill  solid 1.00 border -1')
+            g('set style histogram clustered gap 5 title  offset character 0, 0, 0')
+            g('set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0')
+            g('set key invert reverse Left outside')
+            if xtic == 0:
+                g('unset xtics')
+            g(title)
+            g(ylabel)
+            g_term = 'set terminal png tiny size ' + img_size
+            g(g_term)
+            g_out = 'set output "' + img_file + '"'
+            if ymin != ymax:
+                g(yrange)
+            g(g_out)
+            g('set style data histograms')
+            g.plot(g_plot_command)
+        except:
+            stop_err("Gnuplot error: Data cannot be plotted")
+    else:
+        sys.stderr.write('Column(s) %s of your dataset do not contain valid numeric data' % sys.argv[3])
+
+    if skipped_lines_count > 0:
+        sys.stdout.write('\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d.  These lines were skipped while building the graph.\n' % (skipped_lines_count, skipped_lines_index[0] + 1))
+
+
+if __name__ == "__main__":
+    # The tempfile initialization is here because while inside the main() it seems to create a condition
+    # when the file is removed before gnuplot has a chance of accessing it
+    gp_data_file = tempfile.NamedTemporaryFile('w')
+    Gnuplot.gp.GnuplotOpts.default_term = 'png'
+    main(gp_data_file.name)