annotate meancentermatrix.py @ 3:9c8c6c3ae359 draft default tip

Deleted selected files
author insilico-bob
date Thu, 26 Jan 2017 12:14:24 -0500
parents 621af44576a2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
1 #!/usr/bin/env python
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
2
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
3 # created by Robert E. Brown all rights reserved. Use by signed written agreement only. 2014Jan03.
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
4 #=========================
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
5 #SMean Center each columns in Matrix v0
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
6 #========================================
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
7
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
8 # by Bob Brown 09 Feb 2016
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
9
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
10
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
11 import sys
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
12 import traceback
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
13 #from scipy.stats.mstats import rankdata
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
14 #from scipy.stats import norm
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
15 #from scipy import *
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
16 #from numpy
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
17 #import math ##v33 for the log function
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
18
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
19
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
20 ###================ Galaxyy mods below v34a v0 ============
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
21 def main():
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
22
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
23
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
24 # check the command line
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
25 sys.stdout.write(' starting set invalid values to mean value. Arguments=')
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
26 sys.stdout.write(str(sys.argv[1:])+'\n')
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
27 #outFile = "/Users/bobbrown/Desktop/bobMatrixOut.txt"
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
28
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
29 try:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
30 inFile = sys.argv[1]
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
31 outFile = sys.argv[2]
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
32 # colDelimiter = sys.argv[3]
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
33
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
34 #print "sys arguments=", sys.argv[1:]
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
35 colDelimiter = '\t'
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
36
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
37 Matrix = []
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
38 #infile = "/Users/bobbrown/Desktop/bobINmatrix.txt"
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
39 fin = open( inFile, 'rU')
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
40 indata = fin.read()
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
41 # split the file into lines
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
42 #print 'fin', indata[1:]
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
43 a = indata[:].split('\n')
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
44 cnt = -1
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
45 for i in a: # for each row from matrix
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
46 tmp = i.replace('\n','') ##v22 fix /n
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
47 b = tmp.split(colDelimiter)
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
48 tmp2 = []
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
49 for j in b:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
50 tmp2.append(j)
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
51 Matrix.append(tmp2)
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
52
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
53 #find text in numeric cells and convert to
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
54 badvalue = 'false'
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
55 #use header row for correct number of rows
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
56 numrows = len(Matrix)
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
57 numcols = len(Matrix[0])
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
58 for rows in range(1, numrows ):
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
59 for cols in range(1,numcols):
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
60 if numcols <= len(Matrix[rows] ):
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
61 try:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
62 Matrix[rows][cols] = float(Matrix[rows][cols]) # if non real number will cause error
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
63 except:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
64 #sys.stdout.write('Illegal Value '+str(Matrix[i][cols])+ ' in row and column '+rows +cols +'\n' )
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
65 print('Illegal value at row, column - ', rows+1,cols+1 )
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
66 Matrix[rows][cols] = 99999 # temporarily set to number then later to the mean value
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
67 #badvalue = 'true'
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
68
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
69 else:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
70 if rows < numrows -1: print('skip row', rows+1, 'not enough columns')
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
71 #if i == numrows-1: numrows = numrows -1
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
72
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
73
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
74
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
75 #if non valid values above then change them to the COLUMN Mean value
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
76 if badvalue == 'false' and numrows >1 and numcols > 1:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
77 for rows in range(1, numrows ):
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
78 # skip = 0
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
79 top = 0
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
80 # print (str(Matrix[1:][i]))
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
81 # print (str(Matrix[i][1:]))
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
82 if len(Matrix[rows]) > 1:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
83 for cols in range(1,numcols):
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
84 try:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
85 top =top + float(Matrix[rows][cols])
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
86 except:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
87 junk = 0
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
88
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
89 meanN = top / (numcols -1)
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
90
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
91 for cols in range(1,numcols):
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
92 try:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
93 Matrix[rows][cols] = str('%.3f' % (float(Matrix[rows][cols]) -meanN))
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
94 #temp = Matrix[k][i]
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
95 except:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
96 junk = 0
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
97
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
98 #print('For row', rows+1,' the mean =', meanN )
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
99
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
100 else:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
101 print( 'bad value or insufficient columns or rows ' , badvalue, numcols, numrows)
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
102
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
103 # have mean so go back and set bad values to mean
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
104 fout = open( outFile, 'w')
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
105
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
106 for rows in range(0, numrows ):
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
107 #print('out', str(Matrix[i]))
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
108 if len(Matrix[rows])> 2:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
109 try:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
110 for cols in range(0, numcols-1):
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
111 fout.write( str(str(Matrix[rows][cols])) +'\t' )
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
112 fout.write( str(str(Matrix[rows][cols]))+'\n' )
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
113 except: junk = 0
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
114 #if (rows < numrows -1 and len(Matrix[cols]) > 1): fout.write( '\n')
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
115 #fout.write( '\n')
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
116
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
117
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
118 fout.close()
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
119 fin.close()
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
120 except Exception as e:
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
121 print( 'Usage: python MeanCenter failed', e )
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
122 print traceback.format_exc()
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
123 sys.exit(-1)
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
124
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
125
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
126
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
127 ##
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
128 print 'Success ', numrows, ' rows mean centered \n'
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
129 ##
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
130 return
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
131 ##
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
132 ##
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
133
621af44576a2 Uploaded
insilico-bob
parents:
diff changeset
134 if __name__ == "__main__": main()