comparison read2mut.py @ 1:3556001ff2db draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 60dc8db809909edf44d662683b1f392b9d5964bf"
author iuc
date Wed, 04 Dec 2019 16:21:17 -0500
parents 8d29173d49a9
children 3f1dbd2c59bf
comparison
equal deleted inserted replaced
0:8d29173d49a9 1:3556001ff2db
21 """ 21 """
22 22
23 from __future__ import division 23 from __future__ import division
24 24
25 import argparse 25 import argparse
26 import itertools
27 import json 26 import json
28 import operator 27 import operator
29 import os 28 import os
30 import re 29 import re
31 import sys 30 import sys
87 if trim < 0: 86 if trim < 0:
88 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) 87 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh))
89 88
90 # 1. read mut file 89 # 1. read mut file
91 with open(file1, 'r') as mut: 90 with open(file1, 'r') as mut:
92 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') 91 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)
93 92
94 # 2. load dicts 93 # 2. load dicts
95 with open(json_file, "r") as f: 94 with open(json_file, "r") as f:
96 (tag_dict, cvrg_dict) = json.load(f) 95 (tag_dict, cvrg_dict) = json.load(f)
97 96
120 alt = mut_array[m, 10] 119 alt = mut_array[m, 10]
121 mut_dict[chrom_stop_pos] = {} 120 mut_dict[chrom_stop_pos] = {}
122 mut_read_pos_dict[chrom_stop_pos] = {} 121 mut_read_pos_dict[chrom_stop_pos] = {}
123 reads_dict[chrom_stop_pos] = {} 122 reads_dict[chrom_stop_pos] = {}
124 123
125 for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000): 124 for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000):
126 if pileupcolumn.reference_pos == stop_pos - 1: 125 if pileupcolumn.reference_pos == stop_pos - 1:
127 count_alt = 0 126 count_alt = 0
128 count_ref = 0 127 count_ref = 0
129 count_indel = 0 128 count_indel = 0
130 count_n = 0 129 count_n = 0
217 else: 216 else:
218 pure_tags_dict_short = pure_tags_dict 217 pure_tags_dict_short = pure_tags_dict
219 218
220 whole_array = [] 219 whole_array = []
221 for k in pure_tags_dict.values(): 220 for k in pure_tags_dict.values():
222 if len(k) != 0: 221 whole_array.extend(k.keys())
223 keys = k.keys()
224 if len(keys) > 1:
225 for k1 in keys:
226 whole_array.append(k1)
227 else:
228 whole_array.append(keys[0])
229 222
230 # 7. output summary with threshold 223 # 7. output summary with threshold
231 workbook = xlsxwriter.Workbook(outfile) 224 workbook = xlsxwriter.Workbook(outfile)
232 ws1 = workbook.add_worksheet("Results") 225 ws1 = workbook.add_worksheet("Results")
233 ws2 = workbook.add_worksheet("Allele frequencies") 226 ws2 = workbook.add_worksheet("Allele frequencies")
621 half1_mate1 = array1_half2 614 half1_mate1 = array1_half2
622 half2_mate1 = array1_half 615 half2_mate1 = array1_half
623 half1_mate2 = array2_half2 616 half1_mate2 = array2_half2
624 half2_mate2 = array2_half 617 half2_mate2 = array2_half
625 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" 618 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's"
626 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) 619 dist = np.array([sum(map(operator.ne, half1_mate1, c)) for c in half1_mate2])
627 min_index = np.where(dist == dist.min()) # get index of min HD 620 min_index = np.where(dist == dist.min()) # get index of min HD
628 # get all "b's" of the tag or all "a's" of the tag with minimum HD 621 # get all "b's" of the tag or all "a's" of the tag with minimum HD
629 min_tag_half2 = half2_mate2[min_index] 622 min_tag_half2 = half2_mate2[min_index]
630 min_tag_array2 = array2[min_index] # get whole tag with min HD 623 min_tag_array2 = array2[min_index] # get whole tag with min HD
631 min_value = dist.min() 624 min_value = dist.min()
632 # calculate HD of "b" to all "b's" or "a" to all "a's" 625 # calculate HD of "b" to all "b's" or "a" to all "a's"
633 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) 626 dist_second_half = np.array([sum(map(operator.ne, half2_mate1, e))
634 for e in min_tag_half2]) 627 for e in min_tag_half2])
635 628
636 dist2 = dist_second_half.max() 629 dist2 = dist_second_half.max()
637 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD 630 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD
638 max_tag = min_tag_array2[max_index] 631 max_tag = min_tag_array2[max_index]