comparison meancentermatrix.py @ 2:621af44576a2 draft

Uploaded
author insilico-bob
date Thu, 26 Jan 2017 12:14:01 -0500
parents
children
comparison
equal deleted inserted replaced
1:e7e80f706eb6 2:621af44576a2
1 #!/usr/bin/env python
2
3 # created by Robert E. Brown all rights reserved. Use by signed written agreement only. 2014Jan03.
4 #=========================
5 #SMean Center each columns in Matrix v0
6 #========================================
7
8 # by Bob Brown 09 Feb 2016
9
10
11 import sys
12 import traceback
13 #from scipy.stats.mstats import rankdata
14 #from scipy.stats import norm
15 #from scipy import *
16 #from numpy
17 #import math ##v33 for the log function
18
19
20 ###================ Galaxyy mods below v34a v0 ============
21 def main():
22
23
24 # check the command line
25 sys.stdout.write(' starting set invalid values to mean value. Arguments=')
26 sys.stdout.write(str(sys.argv[1:])+'\n')
27 #outFile = "/Users/bobbrown/Desktop/bobMatrixOut.txt"
28
29 try:
30 inFile = sys.argv[1]
31 outFile = sys.argv[2]
32 # colDelimiter = sys.argv[3]
33
34 #print "sys arguments=", sys.argv[1:]
35 colDelimiter = '\t'
36
37 Matrix = []
38 #infile = "/Users/bobbrown/Desktop/bobINmatrix.txt"
39 fin = open( inFile, 'rU')
40 indata = fin.read()
41 # split the file into lines
42 #print 'fin', indata[1:]
43 a = indata[:].split('\n')
44 cnt = -1
45 for i in a: # for each row from matrix
46 tmp = i.replace('\n','') ##v22 fix /n
47 b = tmp.split(colDelimiter)
48 tmp2 = []
49 for j in b:
50 tmp2.append(j)
51 Matrix.append(tmp2)
52
53 #find text in numeric cells and convert to
54 badvalue = 'false'
55 #use header row for correct number of rows
56 numrows = len(Matrix)
57 numcols = len(Matrix[0])
58 for rows in range(1, numrows ):
59 for cols in range(1,numcols):
60 if numcols <= len(Matrix[rows] ):
61 try:
62 Matrix[rows][cols] = float(Matrix[rows][cols]) # if non real number will cause error
63 except:
64 #sys.stdout.write('Illegal Value '+str(Matrix[i][cols])+ ' in row and column '+rows +cols +'\n' )
65 print('Illegal value at row, column - ', rows+1,cols+1 )
66 Matrix[rows][cols] = 99999 # temporarily set to number then later to the mean value
67 #badvalue = 'true'
68
69 else:
70 if rows < numrows -1: print('skip row', rows+1, 'not enough columns')
71 #if i == numrows-1: numrows = numrows -1
72
73
74
75 #if non valid values above then change them to the COLUMN Mean value
76 if badvalue == 'false' and numrows >1 and numcols > 1:
77 for rows in range(1, numrows ):
78 # skip = 0
79 top = 0
80 # print (str(Matrix[1:][i]))
81 # print (str(Matrix[i][1:]))
82 if len(Matrix[rows]) > 1:
83 for cols in range(1,numcols):
84 try:
85 top =top + float(Matrix[rows][cols])
86 except:
87 junk = 0
88
89 meanN = top / (numcols -1)
90
91 for cols in range(1,numcols):
92 try:
93 Matrix[rows][cols] = str('%.3f' % (float(Matrix[rows][cols]) -meanN))
94 #temp = Matrix[k][i]
95 except:
96 junk = 0
97
98 #print('For row', rows+1,' the mean =', meanN )
99
100 else:
101 print( 'bad value or insufficient columns or rows ' , badvalue, numcols, numrows)
102
103 # have mean so go back and set bad values to mean
104 fout = open( outFile, 'w')
105
106 for rows in range(0, numrows ):
107 #print('out', str(Matrix[i]))
108 if len(Matrix[rows])> 2:
109 try:
110 for cols in range(0, numcols-1):
111 fout.write( str(str(Matrix[rows][cols])) +'\t' )
112 fout.write( str(str(Matrix[rows][cols]))+'\n' )
113 except: junk = 0
114 #if (rows < numrows -1 and len(Matrix[cols]) > 1): fout.write( '\n')
115 #fout.write( '\n')
116
117
118 fout.close()
119 fin.close()
120 except Exception as e:
121 print( 'Usage: python MeanCenter failed', e )
122 print traceback.format_exc()
123 sys.exit(-1)
124
125
126
127 ##
128 print 'Success ', numrows, ' rows mean centered \n'
129 ##
130 return
131 ##
132 ##
133
134 if __name__ == "__main__": main()