diff kmer_read_m3.py @ 3:c9d98f5bc240 draft

planemo upload commit 003cdb83fd17248ef57959332d58a3c96311332a-dirty
author cstrittmatter
date Sun, 28 Apr 2019 00:42:24 -0400
parents 1434bc7b4786
children
line wrap: on
line diff
--- a/kmer_read_m3.py	Thu Apr 25 07:59:05 2019 -0400
+++ b/kmer_read_m3.py	Sun Apr 28 00:42:24 2019 -0400
@@ -3,7 +3,7 @@
 
 import re
 import sys
-#import numpy as np -- removed numpy requirement
+import numpy as np
 from subprocess import Popen, PIPE
 
 if __name__ == '__main__':
@@ -65,57 +65,49 @@
             if use == "1":
                 name_list.append(name)
                 factor_list.append(tested / hit / gensize)
-    #factor_arr = np.array(factor_list)
+    factor_arr = np.array(factor_list)
     num_targs = len(name_list)
+    print wdir
     process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2])
 
     (stdout, stderr) = process.communicate()
 
     #read in output files
     noid_list = []
-    num_cols = 1 #only one sample, no matrix needed
-    #m = np.zeros((num_targs,num_cols))
-    m = [0.0 for _ in range(num_targs)]
+    read_ct = []
+    num_cols = 1
+    m = np.zeros((num_targs,num_cols))
     col = 0
     data_file = open(cname, 'r')
     data = ''.join(data_file.readlines())
     data_file.close()
     lines = data.split('\n')
-    read_ct = 0.0
+    read_ct.append(0.0)
     index = 0
     for line in lines:
         if len(line) > 1:
             t_s, count, uniq = line.split(',')
             target = int(t_s)
-            read_ct += float(count) 
+            read_ct[col] += float(count) 
             if target > 0:
                 if in_use[target]: 
-                    m[index] = float(count)
+                    m[index, col] = float(count)
                     index += 1
             else:
                 noid_list.append(int(count))
 
-    #b = m * factor_arr[:,None] #normalize each row by kmer coverage
-    #sums = np.sum(b, axis=0)
-    #b = b / sums[None,:]
-    #b = b * 100.0
-    sum = 0.0
-    b = []
-    for i in range(num_targs):
-        b1 = m[i] * factor_list[i]
-        sum += b1
-        b.append(b1)
-    sum /= 100.0
-    for i in range(num_targs):
-        b[i] /=sum
-    #rowmax = b.max(axis=1)
+    b = m * factor_arr[:,None] #normalize each row by kmer coverage
+    sums = np.sum(b, axis=0)
+    b = b / sums[None,:]
+    b = b * 100.0
+    rowmax = b.max(axis=1)
 
     out_file = open(oname, 'w')
     output = "taxid,reads,abundance\n"
     out_file.write(output)
     output = "total,"
-    #for i in range(num_cols):
-    output += str(read_ct) + ",,"
+    for i in range(num_cols):
+        output += str(read_ct[i]) + ",,"
     output += "\n"
     out_file.write(output)
     output = "no_id,"
@@ -125,10 +117,10 @@
     out_file.write(output)
     for i in range(num_targs):
         #l = order_row[i]
-        if m[i] > 0: #only output non-zero results
+        if rowmax[i] > 0.000: #only output non-zero results
             output = name_list[i]
             for j in range(num_cols):
-                output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i])
+                output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j])
             output += "\n"
             out_file.write(output)
     out_file.close()