diff meancentermatrix.py @ 2:621af44576a2 draft

Uploaded
author insilico-bob
date Thu, 26 Jan 2017 12:14:01 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/meancentermatrix.py	Thu Jan 26 12:14:01 2017 -0500
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+
+# created by Robert E. Brown all rights reserved.	Use by signed written agreement only. 2014Jan03.
+#=========================
+#SMean Center each columns in Matrix v0
+#========================================
+
+# by Bob Brown 09 Feb 2016
+
+
+import sys	 
+import traceback 
+#from scipy.stats.mstats import rankdata	 
+#from scipy.stats import norm  
+#from scipy import *	 
+#from numpy
+#import math	 ##v33 for the log function
+
+
+###================ Galaxyy mods below	v34a v0 ============
+def main():
+
+
+	# check the command line
+	sys.stdout.write(' starting set invalid values to mean value. Arguments=')
+	sys.stdout.write(str(sys.argv[1:])+'\n')
+	#outFile = "/Users/bobbrown/Desktop/bobMatrixOut.txt"
+
+	try:
+		inFile				= sys.argv[1]
+		outFile				= sys.argv[2]
+		#	colDelimiter		 = sys.argv[3]		
+	
+		#print  "sys arguments=", sys.argv[1:]
+		colDelimiter  = '\t'
+		
+		Matrix = []
+		#infile = "/Users/bobbrown/Desktop/bobINmatrix.txt"
+		fin = open( inFile, 'rU')
+		indata = fin.read()
+	   # split the file into lines
+		#print 'fin', indata[1:]
+		a = indata[:].split('\n')	
+		cnt = -1
+		for i in a:	 # for each row from matrix
+			tmp = i.replace('\n','') ##v22 fix /n
+			b = tmp.split(colDelimiter)
+			tmp2 = []
+			for j in b:
+				tmp2.append(j)
+			Matrix.append(tmp2)
+	
+	#find text in numeric cells and convert to 
+		badvalue = 'false'
+			#use header row for correct number of rows
+		numrows = len(Matrix)
+		numcols = len(Matrix[0])
+		for rows in range(1, numrows ):
+			for cols in range(1,numcols):
+				if numcols <= len(Matrix[rows] ):
+					try:
+						Matrix[rows][cols] = float(Matrix[rows][cols])   # if non real number will cause error
+					except:
+						#sys.stdout.write('Illegal Value '+str(Matrix[i][cols])+ ' in row and column '+rows +cols +'\n' )
+						print('Illegal value at row, column - ', rows+1,cols+1 )
+						Matrix[rows][cols] = 99999   # temporarily set to number then later to the mean value
+						#badvalue = 'true'
+	 			
+ 				else:  
+ 					if rows < numrows -1: print('skip row', rows+1, 'not enough columns')
+ 					#if i == numrows-1: numrows = numrows -1
+
+ 	
+ 	
+	#if non valid values above then change them to the COLUMN Mean value 
+		if badvalue == 'false' and numrows >1 and numcols > 1: 
+			for rows in range(1, numrows ):
+# 				skip = 0
+ 				top  = 0
+# 				print (str(Matrix[1:][i]))
+# 				print (str(Matrix[i][1:]))
+				if len(Matrix[rows]) > 1:
+					for cols in range(1,numcols):
+						try:
+							top =top + float(Matrix[rows][cols])
+						except:
+							junk = 0
+	 					
+					meanN  = top / (numcols -1)
+					
+	 				for cols in range(1,numcols):
+						try:
+							Matrix[rows][cols] = str('%.3f' % (float(Matrix[rows][cols]) -meanN))
+							#temp = Matrix[k][i]
+						except:
+							junk = 0
+	 					
+					#print('For row', rows+1,' the mean =', meanN )
+
+		else:
+			print( 'bad value or insufficient columns or rows ' , badvalue, numcols, numrows)
+				
+			# have mean so go back and set bad values to mean
+		fout = open( outFile, 'w') 
+	
+		for rows in range(0, numrows ):
+			#print('out', str(Matrix[i]))
+			if len(Matrix[rows])> 2:
+				try:
+					for cols in range(0, numcols-1):
+						fout.write( str(str(Matrix[rows][cols])) +'\t' )
+					fout.write( str(str(Matrix[rows][cols]))+'\n' )
+				except: junk = 0
+			#if (rows < numrows -1 and len(Matrix[cols]) > 1): fout.write( '\n')
+			#fout.write( '\n')
+									
+		
+		fout.close()
+		fin.close()
+	except Exception as e:
+		 print( 'Usage: python MeanCenter  failed', e )
+		 print traceback.format_exc()
+		 sys.exit(-1)
+
+
+
+##		  
+	print 'Success ', numrows, ' rows mean centered \n'
+##
+	return
+##
+##
+
+if __name__ == "__main__": main()