Mercurial > repos > cstrittmatter > mitokmer
comparison kmer_read_m3.py @ 3:c9d98f5bc240 draft
planemo upload commit 003cdb83fd17248ef57959332d58a3c96311332a-dirty
| author | cstrittmatter |
|---|---|
| date | Sun, 28 Apr 2019 00:42:24 -0400 |
| parents | 1434bc7b4786 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:a0852bb4b09b | 3:c9d98f5bc240 |
|---|---|
| 1 #Reads fastq filenames, runs kmerread, and generates report | 1 #Reads fastq filenames, runs kmerread, and generates report |
| 2 #MKM 1/1/2019 | 2 #MKM 1/1/2019 |
| 3 | 3 |
| 4 import re | 4 import re |
| 5 import sys | 5 import sys |
| 6 #import numpy as np -- removed numpy requirement | 6 import numpy as np |
| 7 from subprocess import Popen, PIPE | 7 from subprocess import Popen, PIPE |
| 8 | 8 |
| 9 if __name__ == '__main__': | 9 if __name__ == '__main__': |
| 10 if len(sys.argv) > 2: | 10 if len(sys.argv) > 2: |
| 11 item = sys.argv[1].strip() | 11 item = sys.argv[1].strip() |
| 63 use = "0" | 63 use = "0" |
| 64 in_use.append(int(use)) | 64 in_use.append(int(use)) |
| 65 if use == "1": | 65 if use == "1": |
| 66 name_list.append(name) | 66 name_list.append(name) |
| 67 factor_list.append(tested / hit / gensize) | 67 factor_list.append(tested / hit / gensize) |
| 68 #factor_arr = np.array(factor_list) | 68 factor_arr = np.array(factor_list) |
| 69 num_targs = len(name_list) | 69 num_targs = len(name_list) |
| 70 print wdir | |
| 70 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) | 71 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) |
| 71 | 72 |
| 72 (stdout, stderr) = process.communicate() | 73 (stdout, stderr) = process.communicate() |
| 73 | 74 |
| 74 #read in output files | 75 #read in output files |
| 75 noid_list = [] | 76 noid_list = [] |
| 76 num_cols = 1 #only one sample, no matrix needed | 77 read_ct = [] |
| 77 #m = np.zeros((num_targs,num_cols)) | 78 num_cols = 1 |
| 78 m = [0.0 for _ in range(num_targs)] | 79 m = np.zeros((num_targs,num_cols)) |
| 79 col = 0 | 80 col = 0 |
| 80 data_file = open(cname, 'r') | 81 data_file = open(cname, 'r') |
| 81 data = ''.join(data_file.readlines()) | 82 data = ''.join(data_file.readlines()) |
| 82 data_file.close() | 83 data_file.close() |
| 83 lines = data.split('\n') | 84 lines = data.split('\n') |
| 84 read_ct = 0.0 | 85 read_ct.append(0.0) |
| 85 index = 0 | 86 index = 0 |
| 86 for line in lines: | 87 for line in lines: |
| 87 if len(line) > 1: | 88 if len(line) > 1: |
| 88 t_s, count, uniq = line.split(',') | 89 t_s, count, uniq = line.split(',') |
| 89 target = int(t_s) | 90 target = int(t_s) |
| 90 read_ct += float(count) | 91 read_ct[col] += float(count) |
| 91 if target > 0: | 92 if target > 0: |
| 92 if in_use[target]: | 93 if in_use[target]: |
| 93 m[index] = float(count) | 94 m[index, col] = float(count) |
| 94 index += 1 | 95 index += 1 |
| 95 else: | 96 else: |
| 96 noid_list.append(int(count)) | 97 noid_list.append(int(count)) |
| 97 | 98 |
| 98 #b = m * factor_arr[:,None] #normalize each row by kmer coverage | 99 b = m * factor_arr[:,None] #normalize each row by kmer coverage |
| 99 #sums = np.sum(b, axis=0) | 100 sums = np.sum(b, axis=0) |
| 100 #b = b / sums[None,:] | 101 b = b / sums[None,:] |
| 101 #b = b * 100.0 | 102 b = b * 100.0 |
| 102 sum = 0.0 | 103 rowmax = b.max(axis=1) |
| 103 b = [] | |
| 104 for i in range(num_targs): | |
| 105 b1 = m[i] * factor_list[i] | |
| 106 sum += b1 | |
| 107 b.append(b1) | |
| 108 sum /= 100.0 | |
| 109 for i in range(num_targs): | |
| 110 b[i] /=sum | |
| 111 #rowmax = b.max(axis=1) | |
| 112 | 104 |
| 113 out_file = open(oname, 'w') | 105 out_file = open(oname, 'w') |
| 114 output = "taxid,reads,abundance\n" | 106 output = "taxid,reads,abundance\n" |
| 115 out_file.write(output) | 107 out_file.write(output) |
| 116 output = "total," | 108 output = "total," |
| 117 #for i in range(num_cols): | 109 for i in range(num_cols): |
| 118 output += str(read_ct) + ",," | 110 output += str(read_ct[i]) + ",," |
| 119 output += "\n" | 111 output += "\n" |
| 120 out_file.write(output) | 112 out_file.write(output) |
| 121 output = "no_id," | 113 output = "no_id," |
| 122 for i in range(num_cols): | 114 for i in range(num_cols): |
| 123 output += str(noid_list[i]) + ",," | 115 output += str(noid_list[i]) + ",," |
| 124 output += "\n" | 116 output += "\n" |
| 125 out_file.write(output) | 117 out_file.write(output) |
| 126 for i in range(num_targs): | 118 for i in range(num_targs): |
| 127 #l = order_row[i] | 119 #l = order_row[i] |
| 128 if m[i] > 0: #only output non-zero results | 120 if rowmax[i] > 0.000: #only output non-zero results |
| 129 output = name_list[i] | 121 output = name_list[i] |
| 130 for j in range(num_cols): | 122 for j in range(num_cols): |
| 131 output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i]) | 123 output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j]) |
| 132 output += "\n" | 124 output += "\n" |
| 133 out_file.write(output) | 125 out_file.write(output) |
| 134 out_file.close() | 126 out_file.close() |
| 135 | 127 |
| 136 | 128 |
