Mercurial > repos > insilico-bob > mean_center_matrix
comparison meancentermatrix.py @ 2:621af44576a2 draft
Uploaded
author | insilico-bob |
---|---|
date | Thu, 26 Jan 2017 12:14:01 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:e7e80f706eb6 | 2:621af44576a2 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # created by Robert E. Brown all rights reserved. Use by signed written agreement only. 2014Jan03. | |
4 #========================= | |
5 #SMean Center each columns in Matrix v0 | |
6 #======================================== | |
7 | |
8 # by Bob Brown 09 Feb 2016 | |
9 | |
10 | |
11 import sys | |
12 import traceback | |
13 #from scipy.stats.mstats import rankdata | |
14 #from scipy.stats import norm | |
15 #from scipy import * | |
16 #from numpy | |
17 #import math ##v33 for the log function | |
18 | |
19 | |
20 ###================ Galaxyy mods below v34a v0 ============ | |
21 def main(): | |
22 | |
23 | |
24 # check the command line | |
25 sys.stdout.write(' starting set invalid values to mean value. Arguments=') | |
26 sys.stdout.write(str(sys.argv[1:])+'\n') | |
27 #outFile = "/Users/bobbrown/Desktop/bobMatrixOut.txt" | |
28 | |
29 try: | |
30 inFile = sys.argv[1] | |
31 outFile = sys.argv[2] | |
32 # colDelimiter = sys.argv[3] | |
33 | |
34 #print "sys arguments=", sys.argv[1:] | |
35 colDelimiter = '\t' | |
36 | |
37 Matrix = [] | |
38 #infile = "/Users/bobbrown/Desktop/bobINmatrix.txt" | |
39 fin = open( inFile, 'rU') | |
40 indata = fin.read() | |
41 # split the file into lines | |
42 #print 'fin', indata[1:] | |
43 a = indata[:].split('\n') | |
44 cnt = -1 | |
45 for i in a: # for each row from matrix | |
46 tmp = i.replace('\n','') ##v22 fix /n | |
47 b = tmp.split(colDelimiter) | |
48 tmp2 = [] | |
49 for j in b: | |
50 tmp2.append(j) | |
51 Matrix.append(tmp2) | |
52 | |
53 #find text in numeric cells and convert to | |
54 badvalue = 'false' | |
55 #use header row for correct number of rows | |
56 numrows = len(Matrix) | |
57 numcols = len(Matrix[0]) | |
58 for rows in range(1, numrows ): | |
59 for cols in range(1,numcols): | |
60 if numcols <= len(Matrix[rows] ): | |
61 try: | |
62 Matrix[rows][cols] = float(Matrix[rows][cols]) # if non real number will cause error | |
63 except: | |
64 #sys.stdout.write('Illegal Value '+str(Matrix[i][cols])+ ' in row and column '+rows +cols +'\n' ) | |
65 print('Illegal value at row, column - ', rows+1,cols+1 ) | |
66 Matrix[rows][cols] = 99999 # temporarily set to number then later to the mean value | |
67 #badvalue = 'true' | |
68 | |
69 else: | |
70 if rows < numrows -1: print('skip row', rows+1, 'not enough columns') | |
71 #if i == numrows-1: numrows = numrows -1 | |
72 | |
73 | |
74 | |
75 #if non valid values above then change them to the COLUMN Mean value | |
76 if badvalue == 'false' and numrows >1 and numcols > 1: | |
77 for rows in range(1, numrows ): | |
78 # skip = 0 | |
79 top = 0 | |
80 # print (str(Matrix[1:][i])) | |
81 # print (str(Matrix[i][1:])) | |
82 if len(Matrix[rows]) > 1: | |
83 for cols in range(1,numcols): | |
84 try: | |
85 top =top + float(Matrix[rows][cols]) | |
86 except: | |
87 junk = 0 | |
88 | |
89 meanN = top / (numcols -1) | |
90 | |
91 for cols in range(1,numcols): | |
92 try: | |
93 Matrix[rows][cols] = str('%.3f' % (float(Matrix[rows][cols]) -meanN)) | |
94 #temp = Matrix[k][i] | |
95 except: | |
96 junk = 0 | |
97 | |
98 #print('For row', rows+1,' the mean =', meanN ) | |
99 | |
100 else: | |
101 print( 'bad value or insufficient columns or rows ' , badvalue, numcols, numrows) | |
102 | |
103 # have mean so go back and set bad values to mean | |
104 fout = open( outFile, 'w') | |
105 | |
106 for rows in range(0, numrows ): | |
107 #print('out', str(Matrix[i])) | |
108 if len(Matrix[rows])> 2: | |
109 try: | |
110 for cols in range(0, numcols-1): | |
111 fout.write( str(str(Matrix[rows][cols])) +'\t' ) | |
112 fout.write( str(str(Matrix[rows][cols]))+'\n' ) | |
113 except: junk = 0 | |
114 #if (rows < numrows -1 and len(Matrix[cols]) > 1): fout.write( '\n') | |
115 #fout.write( '\n') | |
116 | |
117 | |
118 fout.close() | |
119 fin.close() | |
120 except Exception as e: | |
121 print( 'Usage: python MeanCenter failed', e ) | |
122 print traceback.format_exc() | |
123 sys.exit(-1) | |
124 | |
125 | |
126 | |
127 ## | |
128 print 'Success ', numrows, ' rows mean centered \n' | |
129 ## | |
130 return | |
131 ## | |
132 ## | |
133 | |
134 if __name__ == "__main__": main() |