2
|
1 #!/usr/bin/env python
|
|
2
|
|
3 # created by Robert E. Brown all rights reserved. Use by signed written agreement only. 2014Jan03.
|
|
4 #=========================
|
|
5 #SMean Center each columns in Matrix v0
|
|
6 #========================================
|
|
7
|
|
8 # by Bob Brown 09 Feb 2016
|
|
9
|
|
10
|
|
11 import sys
|
|
12 import traceback
|
|
13 #from scipy.stats.mstats import rankdata
|
|
14 #from scipy.stats import norm
|
|
15 #from scipy import *
|
|
16 #from numpy
|
|
17 #import math ##v33 for the log function
|
|
18
|
|
19
|
|
20 ###================ Galaxyy mods below v34a v0 ============
|
|
21 def main():
|
|
22
|
|
23
|
|
24 # check the command line
|
|
25 sys.stdout.write(' starting set invalid values to mean value. Arguments=')
|
|
26 sys.stdout.write(str(sys.argv[1:])+'\n')
|
|
27 #outFile = "/Users/bobbrown/Desktop/bobMatrixOut.txt"
|
|
28
|
|
29 try:
|
|
30 inFile = sys.argv[1]
|
|
31 outFile = sys.argv[2]
|
|
32 # colDelimiter = sys.argv[3]
|
|
33
|
|
34 #print "sys arguments=", sys.argv[1:]
|
|
35 colDelimiter = '\t'
|
|
36
|
|
37 Matrix = []
|
|
38 #infile = "/Users/bobbrown/Desktop/bobINmatrix.txt"
|
|
39 fin = open( inFile, 'rU')
|
|
40 indata = fin.read()
|
|
41 # split the file into lines
|
|
42 #print 'fin', indata[1:]
|
|
43 a = indata[:].split('\n')
|
|
44 cnt = -1
|
|
45 for i in a: # for each row from matrix
|
|
46 tmp = i.replace('\n','') ##v22 fix /n
|
|
47 b = tmp.split(colDelimiter)
|
|
48 tmp2 = []
|
|
49 for j in b:
|
|
50 tmp2.append(j)
|
|
51 Matrix.append(tmp2)
|
|
52
|
|
53 #find text in numeric cells and convert to
|
|
54 badvalue = 'false'
|
|
55 #use header row for correct number of rows
|
|
56 numrows = len(Matrix)
|
|
57 numcols = len(Matrix[0])
|
|
58 for rows in range(1, numrows ):
|
|
59 for cols in range(1,numcols):
|
|
60 if numcols <= len(Matrix[rows] ):
|
|
61 try:
|
|
62 Matrix[rows][cols] = float(Matrix[rows][cols]) # if non real number will cause error
|
|
63 except:
|
|
64 #sys.stdout.write('Illegal Value '+str(Matrix[i][cols])+ ' in row and column '+rows +cols +'\n' )
|
|
65 print('Illegal value at row, column - ', rows+1,cols+1 )
|
|
66 Matrix[rows][cols] = 99999 # temporarily set to number then later to the mean value
|
|
67 #badvalue = 'true'
|
|
68
|
|
69 else:
|
|
70 if rows < numrows -1: print('skip row', rows+1, 'not enough columns')
|
|
71 #if i == numrows-1: numrows = numrows -1
|
|
72
|
|
73
|
|
74
|
|
75 #if non valid values above then change them to the COLUMN Mean value
|
|
76 if badvalue == 'false' and numrows >1 and numcols > 1:
|
|
77 for rows in range(1, numrows ):
|
|
78 # skip = 0
|
|
79 top = 0
|
|
80 # print (str(Matrix[1:][i]))
|
|
81 # print (str(Matrix[i][1:]))
|
|
82 if len(Matrix[rows]) > 1:
|
|
83 for cols in range(1,numcols):
|
|
84 try:
|
|
85 top =top + float(Matrix[rows][cols])
|
|
86 except:
|
|
87 junk = 0
|
|
88
|
|
89 meanN = top / (numcols -1)
|
|
90
|
|
91 for cols in range(1,numcols):
|
|
92 try:
|
|
93 Matrix[rows][cols] = str('%.3f' % (float(Matrix[rows][cols]) -meanN))
|
|
94 #temp = Matrix[k][i]
|
|
95 except:
|
|
96 junk = 0
|
|
97
|
|
98 #print('For row', rows+1,' the mean =', meanN )
|
|
99
|
|
100 else:
|
|
101 print( 'bad value or insufficient columns or rows ' , badvalue, numcols, numrows)
|
|
102
|
|
103 # have mean so go back and set bad values to mean
|
|
104 fout = open( outFile, 'w')
|
|
105
|
|
106 for rows in range(0, numrows ):
|
|
107 #print('out', str(Matrix[i]))
|
|
108 if len(Matrix[rows])> 2:
|
|
109 try:
|
|
110 for cols in range(0, numcols-1):
|
|
111 fout.write( str(str(Matrix[rows][cols])) +'\t' )
|
|
112 fout.write( str(str(Matrix[rows][cols]))+'\n' )
|
|
113 except: junk = 0
|
|
114 #if (rows < numrows -1 and len(Matrix[cols]) > 1): fout.write( '\n')
|
|
115 #fout.write( '\n')
|
|
116
|
|
117
|
|
118 fout.close()
|
|
119 fin.close()
|
|
120 except Exception as e:
|
|
121 print( 'Usage: python MeanCenter failed', e )
|
|
122 print traceback.format_exc()
|
|
123 sys.exit(-1)
|
|
124
|
|
125
|
|
126
|
|
127 ##
|
|
128 print 'Success ', numrows, ' rows mean centered \n'
|
|
129 ##
|
|
130 return
|
|
131 ##
|
|
132 ##
|
|
133
|
|
134 if __name__ == "__main__": main()
|