Mercurial > repos > cstrittmatter > mitokmer
comparison kmer_read_m3.py @ 1:1434bc7b4786 draft
planemo upload commit 03463f4b0598df5619def5230de3fb758b4090ba-dirty
author | cstrittmatter |
---|---|
date | Mon, 22 Apr 2019 09:37:19 -0400 |
parents | 1472b4f4fbfe |
children | c9d98f5bc240 |
comparison
equal
deleted
inserted
replaced
0:1472b4f4fbfe | 1:1434bc7b4786 |
---|---|
1 #Reads fastq filenames, runs kmerread, and generates report | 1 #Reads fastq filenames, runs kmerread, and generates report |
2 #MKM 1/1/2019 | 2 #MKM 1/1/2019 |
3 | 3 |
4 import re | 4 import re |
5 import sys | 5 import sys |
6 import numpy as np | 6 #import numpy as np -- removed numpy requirement |
7 from subprocess import Popen, PIPE | 7 from subprocess import Popen, PIPE |
8 | 8 |
9 if __name__ == '__main__': | 9 if __name__ == '__main__': |
10 if len(sys.argv) > 2: | 10 if len(sys.argv) > 2: |
11 item = sys.argv[1].strip() | 11 item = sys.argv[1].strip() |
63 use = "0" | 63 use = "0" |
64 in_use.append(int(use)) | 64 in_use.append(int(use)) |
65 if use == "1": | 65 if use == "1": |
66 name_list.append(name) | 66 name_list.append(name) |
67 factor_list.append(tested / hit / gensize) | 67 factor_list.append(tested / hit / gensize) |
68 factor_arr = np.array(factor_list) | 68 #factor_arr = np.array(factor_list) |
69 num_targs = len(name_list) | 69 num_targs = len(name_list) |
70 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) | 70 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) |
71 | 71 |
72 (stdout, stderr) = process.communicate() | 72 (stdout, stderr) = process.communicate() |
73 | 73 |
74 #read in output files | 74 #read in output files |
75 noid_list = [] | 75 noid_list = [] |
76 read_ct = [] | 76 num_cols = 1 #only one sample, no matrix needed |
77 num_cols = 1 | 77 #m = np.zeros((num_targs,num_cols)) |
78 m = np.zeros((num_targs,num_cols)) | 78 m = [0.0 for _ in range(num_targs)] |
79 col = 0 | 79 col = 0 |
80 data_file = open(cname, 'r') | 80 data_file = open(cname, 'r') |
81 data = ''.join(data_file.readlines()) | 81 data = ''.join(data_file.readlines()) |
82 data_file.close() | 82 data_file.close() |
83 lines = data.split('\n') | 83 lines = data.split('\n') |
84 read_ct.append(0.0) | 84 read_ct = 0.0 |
85 index = 0 | 85 index = 0 |
86 for line in lines: | 86 for line in lines: |
87 if len(line) > 1: | 87 if len(line) > 1: |
88 t_s, count, uniq = line.split(',') | 88 t_s, count, uniq = line.split(',') |
89 target = int(t_s) | 89 target = int(t_s) |
90 read_ct[col] += float(count) | 90 read_ct += float(count) |
91 if target > 0: | 91 if target > 0: |
92 if in_use[target]: | 92 if in_use[target]: |
93 m[index, col] = float(count) | 93 m[index] = float(count) |
94 index += 1 | 94 index += 1 |
95 else: | 95 else: |
96 noid_list.append(int(count)) | 96 noid_list.append(int(count)) |
97 | 97 |
98 b = m * factor_arr[:,None] #normalize each row by kmer coverage | 98 #b = m * factor_arr[:,None] #normalize each row by kmer coverage |
99 sums = np.sum(b, axis=0) | 99 #sums = np.sum(b, axis=0) |
100 b = b / sums[None,:] | 100 #b = b / sums[None,:] |
101 b = b * 100.0 | 101 #b = b * 100.0 |
102 rowmax = b.max(axis=1) | 102 sum = 0.0 |
103 b = [] | |
104 for i in range(num_targs): | |
105 b1 = m[i] * factor_list[i] | |
106 sum += b1 | |
107 b.append(b1) | |
108 sum /= 100.0 | |
109 for i in range(num_targs): | |
110 b[i] /=sum | |
111 #rowmax = b.max(axis=1) | |
103 | 112 |
104 out_file = open(oname, 'w') | 113 out_file = open(oname, 'w') |
105 output = "taxid,reads,abundance\n" | 114 output = "taxid,reads,abundance\n" |
106 out_file.write(output) | 115 out_file.write(output) |
107 output = "total," | 116 output = "total," |
108 for i in range(num_cols): | 117 #for i in range(num_cols): |
109 output += str(read_ct[i]) + ",," | 118 output += str(read_ct) + ",," |
110 output += "\n" | 119 output += "\n" |
111 out_file.write(output) | 120 out_file.write(output) |
112 output = "no_id," | 121 output = "no_id," |
113 for i in range(num_cols): | 122 for i in range(num_cols): |
114 output += str(noid_list[i]) + ",," | 123 output += str(noid_list[i]) + ",," |
115 output += "\n" | 124 output += "\n" |
116 out_file.write(output) | 125 out_file.write(output) |
117 for i in range(num_targs): | 126 for i in range(num_targs): |
118 #l = order_row[i] | 127 #l = order_row[i] |
119 if rowmax[i] > 0.000: #only output non-zero results | 128 if m[i] > 0: #only output non-zero results |
120 output = name_list[i] | 129 output = name_list[i] |
121 for j in range(num_cols): | 130 for j in range(num_cols): |
122 output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j]) | 131 output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i]) |
123 output += "\n" | 132 output += "\n" |
124 out_file.write(output) | 133 out_file.write(output) |
125 out_file.close() | 134 out_file.close() |
126 | 135 |
127 | 136 |