Mercurial > repos > cstrittmatter > mitokmer
diff kmer_read_m3.py @ 1:1434bc7b4786 draft
planemo upload commit 03463f4b0598df5619def5230de3fb758b4090ba-dirty
author | cstrittmatter |
---|---|
date | Mon, 22 Apr 2019 09:37:19 -0400 |
parents | 1472b4f4fbfe |
children | c9d98f5bc240 |
line wrap: on
line diff
--- a/kmer_read_m3.py Fri Apr 12 14:58:18 2019 -0400 +++ b/kmer_read_m3.py Mon Apr 22 09:37:19 2019 -0400 @@ -3,7 +3,7 @@ import re import sys -import numpy as np +#import numpy as np -- removed numpy requirement from subprocess import Popen, PIPE if __name__ == '__main__': @@ -65,7 +65,7 @@ if use == "1": name_list.append(name) factor_list.append(tested / hit / gensize) - factor_arr = np.array(factor_list) + #factor_arr = np.array(factor_list) num_targs = len(name_list) process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) @@ -73,40 +73,49 @@ #read in output files noid_list = [] - read_ct = [] - num_cols = 1 - m = np.zeros((num_targs,num_cols)) + num_cols = 1 #only one sample, no matrix needed + #m = np.zeros((num_targs,num_cols)) + m = [0.0 for _ in range(num_targs)] col = 0 data_file = open(cname, 'r') data = ''.join(data_file.readlines()) data_file.close() lines = data.split('\n') - read_ct.append(0.0) + read_ct = 0.0 index = 0 for line in lines: if len(line) > 1: t_s, count, uniq = line.split(',') target = int(t_s) - read_ct[col] += float(count) + read_ct += float(count) if target > 0: if in_use[target]: - m[index, col] = float(count) + m[index] = float(count) index += 1 else: noid_list.append(int(count)) - b = m * factor_arr[:,None] #normalize each row by kmer coverage - sums = np.sum(b, axis=0) - b = b / sums[None,:] - b = b * 100.0 - rowmax = b.max(axis=1) + #b = m * factor_arr[:,None] #normalize each row by kmer coverage + #sums = np.sum(b, axis=0) + #b = b / sums[None,:] + #b = b * 100.0 + sum = 0.0 + b = [] + for i in range(num_targs): + b1 = m[i] * factor_list[i] + sum += b1 + b.append(b1) + sum /= 100.0 + for i in range(num_targs): + b[i] /=sum + #rowmax = b.max(axis=1) out_file = open(oname, 'w') output = "taxid,reads,abundance\n" out_file.write(output) output = "total," - for i in range(num_cols): - output += str(read_ct[i]) + ",," + #for i in range(num_cols): + output += str(read_ct) + ",," output += "\n" out_file.write(output) output = "no_id," @@ -116,10 +125,10 @@ out_file.write(output) for i in range(num_targs): #l = order_row[i] - if rowmax[i] > 0.000: #only output non-zero results + if m[i] > 0: #only output non-zero results output = name_list[i] for j in range(num_cols): - output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j]) + output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i]) output += "\n" out_file.write(output) out_file.close()