annotate Matrix_Transformations.py @ 1:f1bcd79cd923 draft default tip

Uploaded
author insilico-bob
date Tue, 27 Nov 2018 14:20:40 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
1 '''
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
2 Created on Jun 6, 2017 updated Feb 2018
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
3
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
4 @author: cjacoby and Bob Brown
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
5 '''
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
6 import os
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
7 import sys, traceback, argparse
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
8 import numpy as np
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
9 from numpy import size, array
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
10 import warnings
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
11 from Matrix_Validate_import import reader
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
12 #import scipy.stats as ss
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
13 warnings.filterwarnings('error')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
14
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
15 #Define argparse Function
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
16 def get_args():
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
17 parser = argparse.ArgumentParser()
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
18 parser.add_argument('input_file_txt', help='text file input matrix(include .txt in name)')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
19 parser.add_argument('choice', type=str, help='Choose normalization Method: 1 = Z-score, 2 = Mean Centered, 3 = log2, 4= rank')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
20 parser.add_argument('axes', type=str, help='Choose Axis to normalize On (Row or Column)')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
21 parser.add_argument('scalevalue', help='optional scaling factor for matrix)')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
22 parser.add_argument('offsetvalue', help='optional offset for matrix')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
23 parser.add_argument('output_file_txt', help='text file output matrix(include .txt in name)')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
24 args = parser.parse_args()
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
25 return args
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
26
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
27
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
28 def Zscore_row(matrix):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
29
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
30 #Loop To Perform Z-Score normalization
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
31 for i in range(0,len(matrix)):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
32 temp_mean = np.nanmean(matrix[i])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
33 temp_stdev = np.nanstd(matrix[i],ddof=1)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
34 for j in range(0,len(matrix[0])):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
35 matrix[i][j] = (matrix[i][j]-temp_mean)/temp_stdev
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
36 return(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
37
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
38 #Define Z-Score normalization Function
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
39 def Zscore_col(matrix):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
40
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
41 #Loop To Perform Z-Score normalization
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
42 for i in range(len(matrix[0])):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
43 # matrix[:][i] = [scaleValue*x+offset for x in matrix[i]]
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
44 temp_mean = np.nanmean([row[i] for row in matrix])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
45 temp_stdev = np.nanstd([row[i] for row in matrix],ddof=1)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
46 #Probably Should Have if statement checking if stdev equals zero, although this implies the data is already Z-score normalized
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
47 for j in range(len(matrix)):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
48 matrix[j][i] = (matrix[j][i]-temp_mean)/temp_stdev
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
49 return(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
50
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
51
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
52 #Define Mean Centered or Median centered normalization Function
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
53 def MeanMedianCenter_row(matrix,type):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
54
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
55
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
56 #Loop To Perform mean or median center
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
57 for i in range(0,len(matrix)):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
58 if type == "mean":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
59 temp_type = np.nanmean(matrix[i][1::])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
60 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
61 temp_type = np.nanmedian(matrix[i][1::])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
62
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
63 for j in range(0,len(matrix[0])):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
64 matrix[i][j] = (matrix[i][j]-temp_type)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
65 return(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
66
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
67
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
68 #Define mean or median
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
69 def MeanMedianCenter_col(matrix,type):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
70
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
71 #Loop To Perform mean or median center
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
72 for i in range(0,len(matrix[0])):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
73 if type == "mean":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
74 temp_type = np.nanmean([row[i] for row in matrix])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
75 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
76 temp_type = np.nanmedian([row[i] for row in matrix])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
77 #Probably Should Have if statement checking if stdev equals zero, although this implies the data is already Z-score normalized
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
78 for j in range(0,len(matrix)):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
79 matrix[j][i] = (matrix[j][i]-temp_type)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
80 return(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
81
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
82 #Divide by sum of the Row Function
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
83 def Divide_By_Sum_row(matrix):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
84
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
85 #Loop To Perform mean or median center
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
86 numRow,numCol= np.shape(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
87
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
88 for i in range(numRow):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
89 sumValue = sum(matrix[i][:])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
90
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
91 #if equals zero
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
92 if abs(sumValue) > .0001:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
93 for j in range(numCol):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
94 matrix[i][j] = matrix[i][j]/sumValue
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
95 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
96 print("ERROR Cannot divide by Sum almost zero", str(sumValue), " for Row ",str(i+1))
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
97 return(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
98
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
99
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
100 #Divide by sum of the Column Function
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
101 def Divide_By_Sum_col(matrix):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
102
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
103 #Loop To Perform mean or median center
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
104 numRow,numCol= np.shape(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
105
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
106 for i in range(numCol):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
107 sumValue= 0
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
108
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
109 #if equals zero
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
110 if abs(sumValue) > .0001:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
111 for j in range(numRow):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
112 matrix[j][i] = (matrix[j][i]/sumValue)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
113 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
114 print("ERROR Cannot divide by Sum almost zero", str(sumValue), " for Column ",str(i+1))
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
115 return(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
116
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
117 #scale or add offset to matrix by row
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
118 def ScaleOffset_row(matrix,scaleValue,offset):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
119
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
120 #Loop To Perform scale and offset do one or the other per request
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
121 if abs(scaleValue) > 0.0001:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
122 for i in range(0,len(matrix)):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
123 matrix[i][:] = [scaleValue*x+offset for x in matrix[i]]
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
124 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
125 print (" Scale facter "+str(scaleValue)+" too small")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
126 return(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
127
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
128 #scale or add offset to matrix by column
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
129 def ScaleOffset_col(matrix,scaleValue,offset):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
130
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
131 #Loop To Perform scale and offset do one or the other per request
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
132 if abs(scaleValue) > 0.0001:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
133 for i in range(0,len(matrix[0])):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
134 matrix[:][i] = [scaleValue*x+offset for x in matrix[i]]
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
135 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
136 print (" Scale facter "+str(scaleValue)+" too small")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
137 return(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
138
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
139 #Define Log2 normalization Method
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
140 def Convert2Logs(matrix,logValue, offset):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
141 import warnings
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
142 warnings.filterwarnings('error')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
143
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
144 #Loop To Perform Z-Score normalization
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
145 for i in range(0,len(matrix)):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
146 for j in range(0,len(matrix[0])):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
147 try:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
148 if logValue == "log2":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
149 matrix[i][j] = np.log2(matrix[i][j]+offset)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
150 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
151 matrix[i][j] = np.log10(matrix[i][j]+offset)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
152
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
153 except RuntimeWarning:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
154 print(logValue+" normalization Failed: Encountered elements <= 0, which are invalid inputs for a Log normalization")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
155 break
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
156 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
157 continue
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
158 break
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
159 return(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
160
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
161 #transpose matrix
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
162 def Transpose(in_mat):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
163 out_mat = []
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
164 numRow,numCol= np.shape(in_mat)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
165
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
166 for i in range(numCol):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
167 temp= []
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
168 for j in range(numRow):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
169 temp.append(in_mat[j][i])
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
170 out_mat.append(temp)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
171 #print( str(out_mat))
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
172 return out_mat
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
173
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
174 # restores row and column labels in ouput
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
175 def labeler(matrix,og_cols,og_rows,output_file_txt):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
176 #Define Null Sets For Col and Row Headers
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
177 with open(output_file_txt,'w') as f:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
178 f.write("")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
179 for k in range(0,len(og_cols)):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
180 f.write('\t' + str(og_cols[k]) )
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
181 f.write('\n')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
182 for i in range(0,len(og_rows)):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
183 f.write(str(og_rows[i]) )
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
184 for j in range(0,len(matrix[0])):
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
185 f.write('\t' + format(matrix[i][j]))
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
186 f.write('\n')
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
187
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
188 #Define Main Function
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
189 def main():
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
190
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
191 try:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
192 args = get_args()
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
193 scaleValue = float(args.scalevalue)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
194 offsetValue= float(args.offsetvalue)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
195 #print(args)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
196 #sys.stdout.write(str(args)+"\n")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
197
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
198 matrix,og_cols,og_rows = reader(args.input_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
199 if args.choice == "z_score_normalization":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
200 if args.axes == "Row":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
201 matrix = Zscore_row(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
202 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
203 print("zcore, row")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
204 elif args.axes == "Column":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
205 matrix = Zscore_col(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
206 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
207 print("zscore, column")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
208 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
209 print("zscore, invalid axis")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
210 elif args.choice == "mean_center_normalization":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
211 if args.axes == "Row":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
212 matrix = MeanMedianCenter_row(matrix,"mean")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
213 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
214 print("mean-center by row")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
215 elif args.axes == "Column":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
216 matrix = MeanMedianCenter_col(matrix,"mean")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
217 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
218 print("mean-center by column")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
219 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
220 print("meancenter, invalid axis")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
221 elif args.choice == "median_center_normalization":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
222 if args.axes == "Row":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
223 matrix = MeanMedianCenter_row(matrix,"median")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
224 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
225 print("median-center by row")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
226 elif args.axes == "Column":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
227 matrix = MeanMedianCenter_col(matrix,"median")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
228 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
229 print("median-center by column")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
230 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
231 print("meancenter, invalid axis")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
232 elif args.choice == "add_offset":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
233 if args.axes == "Row":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
234 #offset = -100 #!!!! TODO REMOVE AND ADD WHEN clause to xml to get value
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
235 matrix = ScaleOffset_row(matrix,1.0,offsetValue)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
236 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
237 print("offset of "+str(offsetValue)+" by row")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
238 elif args.axes == "Column":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
239 matrix = ScaleOffset_col(matrix,1.0,offsetValue)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
240 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
241 print("offset of "+str(offsetValue)+" by column")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
242 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
243 print("offset"+str(offsetValue)+" invalid axis -not row or column")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
244 elif args.choice == "scale":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
245 if args.axes == "Row":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
246 #scaleValue = 1000 #!!!! TODO REMOVE AND ADD WHEN clause to xml to get value
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
247 matrix = ScaleOffset_row(matrix,scaleValue,0.0)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
248 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
249 print("scaling "+str(scaleValue)+" by row")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
250 elif args.axes == "Column":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
251 matrix = ScaleOffset_col(matrix,scaleValue,0.0)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
252 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
253 print("scaling "+str(scaleValue)+" by column")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
254 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
255 print("scaling "+str(scaleValue)+" invalid axis")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
256 elif args.choice == "transpose":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
257 matrix = Transpose(matrix) #issue using same matrix?
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
258 labeler(matrix,og_rows,og_cols,args.output_file_txt) #swapped row&col labels
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
259 print("transpose mxn matrix to nxm size")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
260 elif args.choice == "ln_normalization":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
261 matrix = Convert2Logs(matrix,"log2",offsetValue)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
262 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
263 print("log2 plus "+str(offsetValue)+" normalization for all values")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
264 elif args.choice == "log_normalization":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
265 matrix = Convert2Logs(matrix,"log10",offsetValue)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
266 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
267 print("log10 normalization for all values")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
268 elif args.choice == "rank":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
269 if args.axes == "Row":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
270 matrix = Rankdata_ByRow(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
271 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
272 print("performed rank normalization by row")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
273 elif args.axes == "Column":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
274 matrix = Rankdata_ByColumn(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
275 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
276 print("performed rank normalization by column")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
277 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
278 print("rank, invalid axis")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
279 elif args.choice == "divide_by_sum":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
280 if args.axes == "Row":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
281 matrix = Divide_By_Sum_row(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
282 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
283 print("performed divide row N values by row N's sum")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
284 elif args.axes == "Column":
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
285 matrix = Divide_By_Sum_col(matrix)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
286 labeler(matrix,og_cols,og_rows,args.output_file_txt)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
287 print("performed divide column N values by column N's sum")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
288 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
289 print("divide_by_sum, invalid axis")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
290
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
291 else:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
292 print("Invalid normalization Choice")
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
293
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
294 except Exception as err:
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
295 traceback.print_exc()
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
296 sys.exit(1)
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
297
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
298
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
299 if __name__ == '__main__':
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
300 main()
f1bcd79cd923 Uploaded
insilico-bob
parents:
diff changeset
301 print("Done")