Mercurial > repos > iuc > variant_analyzer
comparison read2mut.py @ 1:3556001ff2db draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 60dc8db809909edf44d662683b1f392b9d5964bf"
author | iuc |
---|---|
date | Wed, 04 Dec 2019 16:21:17 -0500 |
parents | 8d29173d49a9 |
children | 3f1dbd2c59bf |
comparison
equal
deleted
inserted
replaced
0:8d29173d49a9 | 1:3556001ff2db |
---|---|
21 """ | 21 """ |
22 | 22 |
23 from __future__ import division | 23 from __future__ import division |
24 | 24 |
25 import argparse | 25 import argparse |
26 import itertools | |
27 import json | 26 import json |
28 import operator | 27 import operator |
29 import os | 28 import os |
30 import re | 29 import re |
31 import sys | 30 import sys |
87 if trim < 0: | 86 if trim < 0: |
88 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) | 87 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) |
89 | 88 |
90 # 1. read mut file | 89 # 1. read mut file |
91 with open(file1, 'r') as mut: | 90 with open(file1, 'r') as mut: |
92 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') | 91 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) |
93 | 92 |
94 # 2. load dicts | 93 # 2. load dicts |
95 with open(json_file, "r") as f: | 94 with open(json_file, "r") as f: |
96 (tag_dict, cvrg_dict) = json.load(f) | 95 (tag_dict, cvrg_dict) = json.load(f) |
97 | 96 |
120 alt = mut_array[m, 10] | 119 alt = mut_array[m, 10] |
121 mut_dict[chrom_stop_pos] = {} | 120 mut_dict[chrom_stop_pos] = {} |
122 mut_read_pos_dict[chrom_stop_pos] = {} | 121 mut_read_pos_dict[chrom_stop_pos] = {} |
123 reads_dict[chrom_stop_pos] = {} | 122 reads_dict[chrom_stop_pos] = {} |
124 | 123 |
125 for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000): | 124 for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000): |
126 if pileupcolumn.reference_pos == stop_pos - 1: | 125 if pileupcolumn.reference_pos == stop_pos - 1: |
127 count_alt = 0 | 126 count_alt = 0 |
128 count_ref = 0 | 127 count_ref = 0 |
129 count_indel = 0 | 128 count_indel = 0 |
130 count_n = 0 | 129 count_n = 0 |
217 else: | 216 else: |
218 pure_tags_dict_short = pure_tags_dict | 217 pure_tags_dict_short = pure_tags_dict |
219 | 218 |
220 whole_array = [] | 219 whole_array = [] |
221 for k in pure_tags_dict.values(): | 220 for k in pure_tags_dict.values(): |
222 if len(k) != 0: | 221 whole_array.extend(k.keys()) |
223 keys = k.keys() | |
224 if len(keys) > 1: | |
225 for k1 in keys: | |
226 whole_array.append(k1) | |
227 else: | |
228 whole_array.append(keys[0]) | |
229 | 222 |
230 # 7. output summary with threshold | 223 # 7. output summary with threshold |
231 workbook = xlsxwriter.Workbook(outfile) | 224 workbook = xlsxwriter.Workbook(outfile) |
232 ws1 = workbook.add_worksheet("Results") | 225 ws1 = workbook.add_worksheet("Results") |
233 ws2 = workbook.add_worksheet("Allele frequencies") | 226 ws2 = workbook.add_worksheet("Allele frequencies") |
621 half1_mate1 = array1_half2 | 614 half1_mate1 = array1_half2 |
622 half2_mate1 = array1_half | 615 half2_mate1 = array1_half |
623 half1_mate2 = array2_half2 | 616 half1_mate2 = array2_half2 |
624 half2_mate2 = array2_half | 617 half2_mate2 = array2_half |
625 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" | 618 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
626 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) | 619 dist = np.array([sum(map(operator.ne, half1_mate1, c)) for c in half1_mate2]) |
627 min_index = np.where(dist == dist.min()) # get index of min HD | 620 min_index = np.where(dist == dist.min()) # get index of min HD |
628 # get all "b's" of the tag or all "a's" of the tag with minimum HD | 621 # get all "b's" of the tag or all "a's" of the tag with minimum HD |
629 min_tag_half2 = half2_mate2[min_index] | 622 min_tag_half2 = half2_mate2[min_index] |
630 min_tag_array2 = array2[min_index] # get whole tag with min HD | 623 min_tag_array2 = array2[min_index] # get whole tag with min HD |
631 min_value = dist.min() | 624 min_value = dist.min() |
632 # calculate HD of "b" to all "b's" or "a" to all "a's" | 625 # calculate HD of "b" to all "b's" or "a" to all "a's" |
633 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) | 626 dist_second_half = np.array([sum(map(operator.ne, half2_mate1, e)) |
634 for e in min_tag_half2]) | 627 for e in min_tag_half2]) |
635 | 628 |
636 dist2 = dist_second_half.max() | 629 dist2 = dist_second_half.max() |
637 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD | 630 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
638 max_tag = min_tag_array2[max_index] | 631 max_tag = min_tag_array2[max_index] |