Mercurial > repos > cstrittmatter > mitokmer
comparison kmer_read_m3.py @ 3:c9d98f5bc240 draft
planemo upload commit 003cdb83fd17248ef57959332d58a3c96311332a-dirty
author | cstrittmatter |
---|---|
date | Sun, 28 Apr 2019 00:42:24 -0400 |
parents | 1434bc7b4786 |
children |
comparison
equal
deleted
inserted
replaced
2:a0852bb4b09b | 3:c9d98f5bc240 |
---|---|
1 #Reads fastq filenames, runs kmerread, and generates report | 1 #Reads fastq filenames, runs kmerread, and generates report |
2 #MKM 1/1/2019 | 2 #MKM 1/1/2019 |
3 | 3 |
4 import re | 4 import re |
5 import sys | 5 import sys |
6 #import numpy as np -- removed numpy requirement | 6 import numpy as np |
7 from subprocess import Popen, PIPE | 7 from subprocess import Popen, PIPE |
8 | 8 |
9 if __name__ == '__main__': | 9 if __name__ == '__main__': |
10 if len(sys.argv) > 2: | 10 if len(sys.argv) > 2: |
11 item = sys.argv[1].strip() | 11 item = sys.argv[1].strip() |
63 use = "0" | 63 use = "0" |
64 in_use.append(int(use)) | 64 in_use.append(int(use)) |
65 if use == "1": | 65 if use == "1": |
66 name_list.append(name) | 66 name_list.append(name) |
67 factor_list.append(tested / hit / gensize) | 67 factor_list.append(tested / hit / gensize) |
68 #factor_arr = np.array(factor_list) | 68 factor_arr = np.array(factor_list) |
69 num_targs = len(name_list) | 69 num_targs = len(name_list) |
70 print wdir | |
70 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) | 71 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) |
71 | 72 |
72 (stdout, stderr) = process.communicate() | 73 (stdout, stderr) = process.communicate() |
73 | 74 |
74 #read in output files | 75 #read in output files |
75 noid_list = [] | 76 noid_list = [] |
76 num_cols = 1 #only one sample, no matrix needed | 77 read_ct = [] |
77 #m = np.zeros((num_targs,num_cols)) | 78 num_cols = 1 |
78 m = [0.0 for _ in range(num_targs)] | 79 m = np.zeros((num_targs,num_cols)) |
79 col = 0 | 80 col = 0 |
80 data_file = open(cname, 'r') | 81 data_file = open(cname, 'r') |
81 data = ''.join(data_file.readlines()) | 82 data = ''.join(data_file.readlines()) |
82 data_file.close() | 83 data_file.close() |
83 lines = data.split('\n') | 84 lines = data.split('\n') |
84 read_ct = 0.0 | 85 read_ct.append(0.0) |
85 index = 0 | 86 index = 0 |
86 for line in lines: | 87 for line in lines: |
87 if len(line) > 1: | 88 if len(line) > 1: |
88 t_s, count, uniq = line.split(',') | 89 t_s, count, uniq = line.split(',') |
89 target = int(t_s) | 90 target = int(t_s) |
90 read_ct += float(count) | 91 read_ct[col] += float(count) |
91 if target > 0: | 92 if target > 0: |
92 if in_use[target]: | 93 if in_use[target]: |
93 m[index] = float(count) | 94 m[index, col] = float(count) |
94 index += 1 | 95 index += 1 |
95 else: | 96 else: |
96 noid_list.append(int(count)) | 97 noid_list.append(int(count)) |
97 | 98 |
98 #b = m * factor_arr[:,None] #normalize each row by kmer coverage | 99 b = m * factor_arr[:,None] #normalize each row by kmer coverage |
99 #sums = np.sum(b, axis=0) | 100 sums = np.sum(b, axis=0) |
100 #b = b / sums[None,:] | 101 b = b / sums[None,:] |
101 #b = b * 100.0 | 102 b = b * 100.0 |
102 sum = 0.0 | 103 rowmax = b.max(axis=1) |
103 b = [] | |
104 for i in range(num_targs): | |
105 b1 = m[i] * factor_list[i] | |
106 sum += b1 | |
107 b.append(b1) | |
108 sum /= 100.0 | |
109 for i in range(num_targs): | |
110 b[i] /=sum | |
111 #rowmax = b.max(axis=1) | |
112 | 104 |
113 out_file = open(oname, 'w') | 105 out_file = open(oname, 'w') |
114 output = "taxid,reads,abundance\n" | 106 output = "taxid,reads,abundance\n" |
115 out_file.write(output) | 107 out_file.write(output) |
116 output = "total," | 108 output = "total," |
117 #for i in range(num_cols): | 109 for i in range(num_cols): |
118 output += str(read_ct) + ",," | 110 output += str(read_ct[i]) + ",," |
119 output += "\n" | 111 output += "\n" |
120 out_file.write(output) | 112 out_file.write(output) |
121 output = "no_id," | 113 output = "no_id," |
122 for i in range(num_cols): | 114 for i in range(num_cols): |
123 output += str(noid_list[i]) + ",," | 115 output += str(noid_list[i]) + ",," |
124 output += "\n" | 116 output += "\n" |
125 out_file.write(output) | 117 out_file.write(output) |
126 for i in range(num_targs): | 118 for i in range(num_targs): |
127 #l = order_row[i] | 119 #l = order_row[i] |
128 if m[i] > 0: #only output non-zero results | 120 if rowmax[i] > 0.000: #only output non-zero results |
129 output = name_list[i] | 121 output = name_list[i] |
130 for j in range(num_cols): | 122 for j in range(num_cols): |
131 output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i]) | 123 output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j]) |
132 output += "\n" | 124 output += "\n" |
133 out_file.write(output) | 125 out_file.write(output) |
134 out_file.close() | 126 out_file.close() |
135 | 127 |
136 | 128 |