view meancentermatrix.py @ 2:621af44576a2 draft

Uploaded
author insilico-bob
date Thu, 26 Jan 2017 12:14:01 -0500
parents
children
line wrap: on
line source

#!/usr/bin/env python

# created by Robert E. Brown all rights reserved.	Use by signed written agreement only. 2014Jan03.
#=========================
#SMean Center each columns in Matrix v0
#========================================

# by Bob Brown 09 Feb 2016


import sys	 
import traceback 
#from scipy.stats.mstats import rankdata	 
#from scipy.stats import norm  
#from scipy import *	 
#from numpy
#import math	 ##v33 for the log function


###================ Galaxyy mods below	v34a v0 ============
def main():


	# check the command line
	sys.stdout.write(' starting set invalid values to mean value. Arguments=')
	sys.stdout.write(str(sys.argv[1:])+'\n')
	#outFile = "/Users/bobbrown/Desktop/bobMatrixOut.txt"

	try:
		inFile				= sys.argv[1]
		outFile				= sys.argv[2]
		#	colDelimiter		 = sys.argv[3]		
	
		#print  "sys arguments=", sys.argv[1:]
		colDelimiter  = '\t'
		
		Matrix = []
		#infile = "/Users/bobbrown/Desktop/bobINmatrix.txt"
		fin = open( inFile, 'rU')
		indata = fin.read()
	   # split the file into lines
		#print 'fin', indata[1:]
		a = indata[:].split('\n')	
		cnt = -1
		for i in a:	 # for each row from matrix
			tmp = i.replace('\n','') ##v22 fix /n
			b = tmp.split(colDelimiter)
			tmp2 = []
			for j in b:
				tmp2.append(j)
			Matrix.append(tmp2)
	
	#find text in numeric cells and convert to 
		badvalue = 'false'
			#use header row for correct number of rows
		numrows = len(Matrix)
		numcols = len(Matrix[0])
		for rows in range(1, numrows ):
			for cols in range(1,numcols):
				if numcols <= len(Matrix[rows] ):
					try:
						Matrix[rows][cols] = float(Matrix[rows][cols])   # if non real number will cause error
					except:
						#sys.stdout.write('Illegal Value '+str(Matrix[i][cols])+ ' in row and column '+rows +cols +'\n' )
						print('Illegal value at row, column - ', rows+1,cols+1 )
						Matrix[rows][cols] = 99999   # temporarily set to number then later to the mean value
						#badvalue = 'true'
	 			
 				else:  
 					if rows < numrows -1: print('skip row', rows+1, 'not enough columns')
 					#if i == numrows-1: numrows = numrows -1

 	
 	
	#if non valid values above then change them to the COLUMN Mean value 
		if badvalue == 'false' and numrows >1 and numcols > 1: 
			for rows in range(1, numrows ):
# 				skip = 0
 				top  = 0
# 				print (str(Matrix[1:][i]))
# 				print (str(Matrix[i][1:]))
				if len(Matrix[rows]) > 1:
					for cols in range(1,numcols):
						try:
							top =top + float(Matrix[rows][cols])
						except:
							junk = 0
	 					
					meanN  = top / (numcols -1)
					
	 				for cols in range(1,numcols):
						try:
							Matrix[rows][cols] = str('%.3f' % (float(Matrix[rows][cols]) -meanN))
							#temp = Matrix[k][i]
						except:
							junk = 0
	 					
					#print('For row', rows+1,' the mean =', meanN )

		else:
			print( 'bad value or insufficient columns or rows ' , badvalue, numcols, numrows)
				
			# have mean so go back and set bad values to mean
		fout = open( outFile, 'w') 
	
		for rows in range(0, numrows ):
			#print('out', str(Matrix[i]))
			if len(Matrix[rows])> 2:
				try:
					for cols in range(0, numcols-1):
						fout.write( str(str(Matrix[rows][cols])) +'\t' )
					fout.write( str(str(Matrix[rows][cols]))+'\n' )
				except: junk = 0
			#if (rows < numrows -1 and len(Matrix[cols]) > 1): fout.write( '\n')
			#fout.write( '\n')
									
		
		fout.close()
		fin.close()
	except Exception as e:
		 print( 'Usage: python MeanCenter  failed', e )
		 print traceback.format_exc()
		 sys.exit(-1)



##		  
	print 'Success ', numrows, ' rows mean centered \n'
##
	return
##
##

if __name__ == "__main__": main()