Mercurial > repos > insilico-bob > mean_center_matrix
diff meancentermatrix.py @ 2:621af44576a2 draft
Uploaded
author | insilico-bob |
---|---|
date | Thu, 26 Jan 2017 12:14:01 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/meancentermatrix.py Thu Jan 26 12:14:01 2017 -0500 @@ -0,0 +1,134 @@ +#!/usr/bin/env python + +# created by Robert E. Brown all rights reserved. Use by signed written agreement only. 2014Jan03. +#========================= +#SMean Center each columns in Matrix v0 +#======================================== + +# by Bob Brown 09 Feb 2016 + + +import sys +import traceback +#from scipy.stats.mstats import rankdata +#from scipy.stats import norm +#from scipy import * +#from numpy +#import math ##v33 for the log function + + +###================ Galaxyy mods below v34a v0 ============ +def main(): + + + # check the command line + sys.stdout.write(' starting set invalid values to mean value. Arguments=') + sys.stdout.write(str(sys.argv[1:])+'\n') + #outFile = "/Users/bobbrown/Desktop/bobMatrixOut.txt" + + try: + inFile = sys.argv[1] + outFile = sys.argv[2] + # colDelimiter = sys.argv[3] + + #print "sys arguments=", sys.argv[1:] + colDelimiter = '\t' + + Matrix = [] + #infile = "/Users/bobbrown/Desktop/bobINmatrix.txt" + fin = open( inFile, 'rU') + indata = fin.read() + # split the file into lines + #print 'fin', indata[1:] + a = indata[:].split('\n') + cnt = -1 + for i in a: # for each row from matrix + tmp = i.replace('\n','') ##v22 fix /n + b = tmp.split(colDelimiter) + tmp2 = [] + for j in b: + tmp2.append(j) + Matrix.append(tmp2) + + #find text in numeric cells and convert to + badvalue = 'false' + #use header row for correct number of rows + numrows = len(Matrix) + numcols = len(Matrix[0]) + for rows in range(1, numrows ): + for cols in range(1,numcols): + if numcols <= len(Matrix[rows] ): + try: + Matrix[rows][cols] = float(Matrix[rows][cols]) # if non real number will cause error + except: + #sys.stdout.write('Illegal Value '+str(Matrix[i][cols])+ ' in row and column '+rows +cols +'\n' ) + print('Illegal value at row, column - ', rows+1,cols+1 ) + Matrix[rows][cols] = 99999 # temporarily set to number then later to the mean value + #badvalue = 'true' + + else: + if rows < numrows -1: print('skip row', rows+1, 'not enough columns') + #if i == numrows-1: numrows = numrows -1 + + + + #if non valid values above then change them to the COLUMN Mean value + if badvalue == 'false' and numrows >1 and numcols > 1: + for rows in range(1, numrows ): +# skip = 0 + top = 0 +# print (str(Matrix[1:][i])) +# print (str(Matrix[i][1:])) + if len(Matrix[rows]) > 1: + for cols in range(1,numcols): + try: + top =top + float(Matrix[rows][cols]) + except: + junk = 0 + + meanN = top / (numcols -1) + + for cols in range(1,numcols): + try: + Matrix[rows][cols] = str('%.3f' % (float(Matrix[rows][cols]) -meanN)) + #temp = Matrix[k][i] + except: + junk = 0 + + #print('For row', rows+1,' the mean =', meanN ) + + else: + print( 'bad value or insufficient columns or rows ' , badvalue, numcols, numrows) + + # have mean so go back and set bad values to mean + fout = open( outFile, 'w') + + for rows in range(0, numrows ): + #print('out', str(Matrix[i])) + if len(Matrix[rows])> 2: + try: + for cols in range(0, numcols-1): + fout.write( str(str(Matrix[rows][cols])) +'\t' ) + fout.write( str(str(Matrix[rows][cols]))+'\n' ) + except: junk = 0 + #if (rows < numrows -1 and len(Matrix[cols]) > 1): fout.write( '\n') + #fout.write( '\n') + + + fout.close() + fin.close() + except Exception as e: + print( 'Usage: python MeanCenter failed', e ) + print traceback.format_exc() + sys.exit(-1) + + + +## + print 'Success ', numrows, ' rows mean centered \n' +## + return +## +## + +if __name__ == "__main__": main()