comparison kmer_read_m3.py @ 3:c9d98f5bc240 draft

planemo upload commit 003cdb83fd17248ef57959332d58a3c96311332a-dirty
author cstrittmatter
date Sun, 28 Apr 2019 00:42:24 -0400
parents 1434bc7b4786
children
comparison
equal deleted inserted replaced
2:a0852bb4b09b 3:c9d98f5bc240
1 #Reads fastq filenames, runs kmerread, and generates report 1 #Reads fastq filenames, runs kmerread, and generates report
2 #MKM 1/1/2019 2 #MKM 1/1/2019
3 3
4 import re 4 import re
5 import sys 5 import sys
6 #import numpy as np -- removed numpy requirement 6 import numpy as np
7 from subprocess import Popen, PIPE 7 from subprocess import Popen, PIPE
8 8
9 if __name__ == '__main__': 9 if __name__ == '__main__':
10 if len(sys.argv) > 2: 10 if len(sys.argv) > 2:
11 item = sys.argv[1].strip() 11 item = sys.argv[1].strip()
63 use = "0" 63 use = "0"
64 in_use.append(int(use)) 64 in_use.append(int(use))
65 if use == "1": 65 if use == "1":
66 name_list.append(name) 66 name_list.append(name)
67 factor_list.append(tested / hit / gensize) 67 factor_list.append(tested / hit / gensize)
68 #factor_arr = np.array(factor_list) 68 factor_arr = np.array(factor_list)
69 num_targs = len(name_list) 69 num_targs = len(name_list)
70 print wdir
70 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) 71 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2])
71 72
72 (stdout, stderr) = process.communicate() 73 (stdout, stderr) = process.communicate()
73 74
74 #read in output files 75 #read in output files
75 noid_list = [] 76 noid_list = []
76 num_cols = 1 #only one sample, no matrix needed 77 read_ct = []
77 #m = np.zeros((num_targs,num_cols)) 78 num_cols = 1
78 m = [0.0 for _ in range(num_targs)] 79 m = np.zeros((num_targs,num_cols))
79 col = 0 80 col = 0
80 data_file = open(cname, 'r') 81 data_file = open(cname, 'r')
81 data = ''.join(data_file.readlines()) 82 data = ''.join(data_file.readlines())
82 data_file.close() 83 data_file.close()
83 lines = data.split('\n') 84 lines = data.split('\n')
84 read_ct = 0.0 85 read_ct.append(0.0)
85 index = 0 86 index = 0
86 for line in lines: 87 for line in lines:
87 if len(line) > 1: 88 if len(line) > 1:
88 t_s, count, uniq = line.split(',') 89 t_s, count, uniq = line.split(',')
89 target = int(t_s) 90 target = int(t_s)
90 read_ct += float(count) 91 read_ct[col] += float(count)
91 if target > 0: 92 if target > 0:
92 if in_use[target]: 93 if in_use[target]:
93 m[index] = float(count) 94 m[index, col] = float(count)
94 index += 1 95 index += 1
95 else: 96 else:
96 noid_list.append(int(count)) 97 noid_list.append(int(count))
97 98
98 #b = m * factor_arr[:,None] #normalize each row by kmer coverage 99 b = m * factor_arr[:,None] #normalize each row by kmer coverage
99 #sums = np.sum(b, axis=0) 100 sums = np.sum(b, axis=0)
100 #b = b / sums[None,:] 101 b = b / sums[None,:]
101 #b = b * 100.0 102 b = b * 100.0
102 sum = 0.0 103 rowmax = b.max(axis=1)
103 b = []
104 for i in range(num_targs):
105 b1 = m[i] * factor_list[i]
106 sum += b1
107 b.append(b1)
108 sum /= 100.0
109 for i in range(num_targs):
110 b[i] /=sum
111 #rowmax = b.max(axis=1)
112 104
113 out_file = open(oname, 'w') 105 out_file = open(oname, 'w')
114 output = "taxid,reads,abundance\n" 106 output = "taxid,reads,abundance\n"
115 out_file.write(output) 107 out_file.write(output)
116 output = "total," 108 output = "total,"
117 #for i in range(num_cols): 109 for i in range(num_cols):
118 output += str(read_ct) + ",," 110 output += str(read_ct[i]) + ",,"
119 output += "\n" 111 output += "\n"
120 out_file.write(output) 112 out_file.write(output)
121 output = "no_id," 113 output = "no_id,"
122 for i in range(num_cols): 114 for i in range(num_cols):
123 output += str(noid_list[i]) + ",," 115 output += str(noid_list[i]) + ",,"
124 output += "\n" 116 output += "\n"
125 out_file.write(output) 117 out_file.write(output)
126 for i in range(num_targs): 118 for i in range(num_targs):
127 #l = order_row[i] 119 #l = order_row[i]
128 if m[i] > 0: #only output non-zero results 120 if rowmax[i] > 0.000: #only output non-zero results
129 output = name_list[i] 121 output = name_list[i]
130 for j in range(num_cols): 122 for j in range(num_cols):
131 output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i]) 123 output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j])
132 output += "\n" 124 output += "\n"
133 out_file.write(output) 125 out_file.write(output)
134 out_file.close() 126 out_file.close()
135 127
136 128