Mercurial > repos > insilico-bob > mean_center_matrix
comparison meancentermatrix.py @ 2:621af44576a2 draft
Uploaded
| author | insilico-bob |
|---|---|
| date | Thu, 26 Jan 2017 12:14:01 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:e7e80f706eb6 | 2:621af44576a2 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 # created by Robert E. Brown all rights reserved. Use by signed written agreement only. 2014Jan03. | |
| 4 #========================= | |
| 5 #SMean Center each columns in Matrix v0 | |
| 6 #======================================== | |
| 7 | |
| 8 # by Bob Brown 09 Feb 2016 | |
| 9 | |
| 10 | |
| 11 import sys | |
| 12 import traceback | |
| 13 #from scipy.stats.mstats import rankdata | |
| 14 #from scipy.stats import norm | |
| 15 #from scipy import * | |
| 16 #from numpy | |
| 17 #import math ##v33 for the log function | |
| 18 | |
| 19 | |
| 20 ###================ Galaxyy mods below v34a v0 ============ | |
| 21 def main(): | |
| 22 | |
| 23 | |
| 24 # check the command line | |
| 25 sys.stdout.write(' starting set invalid values to mean value. Arguments=') | |
| 26 sys.stdout.write(str(sys.argv[1:])+'\n') | |
| 27 #outFile = "/Users/bobbrown/Desktop/bobMatrixOut.txt" | |
| 28 | |
| 29 try: | |
| 30 inFile = sys.argv[1] | |
| 31 outFile = sys.argv[2] | |
| 32 # colDelimiter = sys.argv[3] | |
| 33 | |
| 34 #print "sys arguments=", sys.argv[1:] | |
| 35 colDelimiter = '\t' | |
| 36 | |
| 37 Matrix = [] | |
| 38 #infile = "/Users/bobbrown/Desktop/bobINmatrix.txt" | |
| 39 fin = open( inFile, 'rU') | |
| 40 indata = fin.read() | |
| 41 # split the file into lines | |
| 42 #print 'fin', indata[1:] | |
| 43 a = indata[:].split('\n') | |
| 44 cnt = -1 | |
| 45 for i in a: # for each row from matrix | |
| 46 tmp = i.replace('\n','') ##v22 fix /n | |
| 47 b = tmp.split(colDelimiter) | |
| 48 tmp2 = [] | |
| 49 for j in b: | |
| 50 tmp2.append(j) | |
| 51 Matrix.append(tmp2) | |
| 52 | |
| 53 #find text in numeric cells and convert to | |
| 54 badvalue = 'false' | |
| 55 #use header row for correct number of rows | |
| 56 numrows = len(Matrix) | |
| 57 numcols = len(Matrix[0]) | |
| 58 for rows in range(1, numrows ): | |
| 59 for cols in range(1,numcols): | |
| 60 if numcols <= len(Matrix[rows] ): | |
| 61 try: | |
| 62 Matrix[rows][cols] = float(Matrix[rows][cols]) # if non real number will cause error | |
| 63 except: | |
| 64 #sys.stdout.write('Illegal Value '+str(Matrix[i][cols])+ ' in row and column '+rows +cols +'\n' ) | |
| 65 print('Illegal value at row, column - ', rows+1,cols+1 ) | |
| 66 Matrix[rows][cols] = 99999 # temporarily set to number then later to the mean value | |
| 67 #badvalue = 'true' | |
| 68 | |
| 69 else: | |
| 70 if rows < numrows -1: print('skip row', rows+1, 'not enough columns') | |
| 71 #if i == numrows-1: numrows = numrows -1 | |
| 72 | |
| 73 | |
| 74 | |
| 75 #if non valid values above then change them to the COLUMN Mean value | |
| 76 if badvalue == 'false' and numrows >1 and numcols > 1: | |
| 77 for rows in range(1, numrows ): | |
| 78 # skip = 0 | |
| 79 top = 0 | |
| 80 # print (str(Matrix[1:][i])) | |
| 81 # print (str(Matrix[i][1:])) | |
| 82 if len(Matrix[rows]) > 1: | |
| 83 for cols in range(1,numcols): | |
| 84 try: | |
| 85 top =top + float(Matrix[rows][cols]) | |
| 86 except: | |
| 87 junk = 0 | |
| 88 | |
| 89 meanN = top / (numcols -1) | |
| 90 | |
| 91 for cols in range(1,numcols): | |
| 92 try: | |
| 93 Matrix[rows][cols] = str('%.3f' % (float(Matrix[rows][cols]) -meanN)) | |
| 94 #temp = Matrix[k][i] | |
| 95 except: | |
| 96 junk = 0 | |
| 97 | |
| 98 #print('For row', rows+1,' the mean =', meanN ) | |
| 99 | |
| 100 else: | |
| 101 print( 'bad value or insufficient columns or rows ' , badvalue, numcols, numrows) | |
| 102 | |
| 103 # have mean so go back and set bad values to mean | |
| 104 fout = open( outFile, 'w') | |
| 105 | |
| 106 for rows in range(0, numrows ): | |
| 107 #print('out', str(Matrix[i])) | |
| 108 if len(Matrix[rows])> 2: | |
| 109 try: | |
| 110 for cols in range(0, numcols-1): | |
| 111 fout.write( str(str(Matrix[rows][cols])) +'\t' ) | |
| 112 fout.write( str(str(Matrix[rows][cols]))+'\n' ) | |
| 113 except: junk = 0 | |
| 114 #if (rows < numrows -1 and len(Matrix[cols]) > 1): fout.write( '\n') | |
| 115 #fout.write( '\n') | |
| 116 | |
| 117 | |
| 118 fout.close() | |
| 119 fin.close() | |
| 120 except Exception as e: | |
| 121 print( 'Usage: python MeanCenter failed', e ) | |
| 122 print traceback.format_exc() | |
| 123 sys.exit(-1) | |
| 124 | |
| 125 | |
| 126 | |
| 127 ## | |
| 128 print 'Success ', numrows, ' rows mean centered \n' | |
| 129 ## | |
| 130 return | |
| 131 ## | |
| 132 ## | |
| 133 | |
| 134 if __name__ == "__main__": main() |
