Mercurial > repos > insilico-bob > mean_center_matrix
view meancentermatrix.py @ 3:9c8c6c3ae359 draft default tip
Deleted selected files
author | insilico-bob |
---|---|
date | Thu, 26 Jan 2017 12:14:24 -0500 |
parents | 621af44576a2 |
children |
line wrap: on
line source
#!/usr/bin/env python # created by Robert E. Brown all rights reserved. Use by signed written agreement only. 2014Jan03. #========================= #SMean Center each columns in Matrix v0 #======================================== # by Bob Brown 09 Feb 2016 import sys import traceback #from scipy.stats.mstats import rankdata #from scipy.stats import norm #from scipy import * #from numpy #import math ##v33 for the log function ###================ Galaxyy mods below v34a v0 ============ def main(): # check the command line sys.stdout.write(' starting set invalid values to mean value. Arguments=') sys.stdout.write(str(sys.argv[1:])+'\n') #outFile = "/Users/bobbrown/Desktop/bobMatrixOut.txt" try: inFile = sys.argv[1] outFile = sys.argv[2] # colDelimiter = sys.argv[3] #print "sys arguments=", sys.argv[1:] colDelimiter = '\t' Matrix = [] #infile = "/Users/bobbrown/Desktop/bobINmatrix.txt" fin = open( inFile, 'rU') indata = fin.read() # split the file into lines #print 'fin', indata[1:] a = indata[:].split('\n') cnt = -1 for i in a: # for each row from matrix tmp = i.replace('\n','') ##v22 fix /n b = tmp.split(colDelimiter) tmp2 = [] for j in b: tmp2.append(j) Matrix.append(tmp2) #find text in numeric cells and convert to badvalue = 'false' #use header row for correct number of rows numrows = len(Matrix) numcols = len(Matrix[0]) for rows in range(1, numrows ): for cols in range(1,numcols): if numcols <= len(Matrix[rows] ): try: Matrix[rows][cols] = float(Matrix[rows][cols]) # if non real number will cause error except: #sys.stdout.write('Illegal Value '+str(Matrix[i][cols])+ ' in row and column '+rows +cols +'\n' ) print('Illegal value at row, column - ', rows+1,cols+1 ) Matrix[rows][cols] = 99999 # temporarily set to number then later to the mean value #badvalue = 'true' else: if rows < numrows -1: print('skip row', rows+1, 'not enough columns') #if i == numrows-1: numrows = numrows -1 #if non valid values above then change them to the COLUMN Mean value if badvalue == 'false' and numrows >1 and numcols > 1: for rows in range(1, numrows ): # skip = 0 top = 0 # print (str(Matrix[1:][i])) # print (str(Matrix[i][1:])) if len(Matrix[rows]) > 1: for cols in range(1,numcols): try: top =top + float(Matrix[rows][cols]) except: junk = 0 meanN = top / (numcols -1) for cols in range(1,numcols): try: Matrix[rows][cols] = str('%.3f' % (float(Matrix[rows][cols]) -meanN)) #temp = Matrix[k][i] except: junk = 0 #print('For row', rows+1,' the mean =', meanN ) else: print( 'bad value or insufficient columns or rows ' , badvalue, numcols, numrows) # have mean so go back and set bad values to mean fout = open( outFile, 'w') for rows in range(0, numrows ): #print('out', str(Matrix[i])) if len(Matrix[rows])> 2: try: for cols in range(0, numcols-1): fout.write( str(str(Matrix[rows][cols])) +'\t' ) fout.write( str(str(Matrix[rows][cols]))+'\n' ) except: junk = 0 #if (rows < numrows -1 and len(Matrix[cols]) > 1): fout.write( '\n') #fout.write( '\n') fout.close() fin.close() except Exception as e: print( 'Usage: python MeanCenter failed', e ) print traceback.format_exc() sys.exit(-1) ## print 'Success ', numrows, ' rows mean centered \n' ## return ## ## if __name__ == "__main__": main()