Mercurial > repos > iuc > variant_analyzer
comparison read2mut.py @ 1:3556001ff2db draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 60dc8db809909edf44d662683b1f392b9d5964bf"
| author | iuc |
|---|---|
| date | Wed, 04 Dec 2019 16:21:17 -0500 |
| parents | 8d29173d49a9 |
| children | 3f1dbd2c59bf |
comparison
equal
deleted
inserted
replaced
| 0:8d29173d49a9 | 1:3556001ff2db |
|---|---|
| 21 """ | 21 """ |
| 22 | 22 |
| 23 from __future__ import division | 23 from __future__ import division |
| 24 | 24 |
| 25 import argparse | 25 import argparse |
| 26 import itertools | |
| 27 import json | 26 import json |
| 28 import operator | 27 import operator |
| 29 import os | 28 import os |
| 30 import re | 29 import re |
| 31 import sys | 30 import sys |
| 87 if trim < 0: | 86 if trim < 0: |
| 88 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) | 87 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) |
| 89 | 88 |
| 90 # 1. read mut file | 89 # 1. read mut file |
| 91 with open(file1, 'r') as mut: | 90 with open(file1, 'r') as mut: |
| 92 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') | 91 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) |
| 93 | 92 |
| 94 # 2. load dicts | 93 # 2. load dicts |
| 95 with open(json_file, "r") as f: | 94 with open(json_file, "r") as f: |
| 96 (tag_dict, cvrg_dict) = json.load(f) | 95 (tag_dict, cvrg_dict) = json.load(f) |
| 97 | 96 |
| 120 alt = mut_array[m, 10] | 119 alt = mut_array[m, 10] |
| 121 mut_dict[chrom_stop_pos] = {} | 120 mut_dict[chrom_stop_pos] = {} |
| 122 mut_read_pos_dict[chrom_stop_pos] = {} | 121 mut_read_pos_dict[chrom_stop_pos] = {} |
| 123 reads_dict[chrom_stop_pos] = {} | 122 reads_dict[chrom_stop_pos] = {} |
| 124 | 123 |
| 125 for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000): | 124 for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000): |
| 126 if pileupcolumn.reference_pos == stop_pos - 1: | 125 if pileupcolumn.reference_pos == stop_pos - 1: |
| 127 count_alt = 0 | 126 count_alt = 0 |
| 128 count_ref = 0 | 127 count_ref = 0 |
| 129 count_indel = 0 | 128 count_indel = 0 |
| 130 count_n = 0 | 129 count_n = 0 |
| 217 else: | 216 else: |
| 218 pure_tags_dict_short = pure_tags_dict | 217 pure_tags_dict_short = pure_tags_dict |
| 219 | 218 |
| 220 whole_array = [] | 219 whole_array = [] |
| 221 for k in pure_tags_dict.values(): | 220 for k in pure_tags_dict.values(): |
| 222 if len(k) != 0: | 221 whole_array.extend(k.keys()) |
| 223 keys = k.keys() | |
| 224 if len(keys) > 1: | |
| 225 for k1 in keys: | |
| 226 whole_array.append(k1) | |
| 227 else: | |
| 228 whole_array.append(keys[0]) | |
| 229 | 222 |
| 230 # 7. output summary with threshold | 223 # 7. output summary with threshold |
| 231 workbook = xlsxwriter.Workbook(outfile) | 224 workbook = xlsxwriter.Workbook(outfile) |
| 232 ws1 = workbook.add_worksheet("Results") | 225 ws1 = workbook.add_worksheet("Results") |
| 233 ws2 = workbook.add_worksheet("Allele frequencies") | 226 ws2 = workbook.add_worksheet("Allele frequencies") |
| 621 half1_mate1 = array1_half2 | 614 half1_mate1 = array1_half2 |
| 622 half2_mate1 = array1_half | 615 half2_mate1 = array1_half |
| 623 half1_mate2 = array2_half2 | 616 half1_mate2 = array2_half2 |
| 624 half2_mate2 = array2_half | 617 half2_mate2 = array2_half |
| 625 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" | 618 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
| 626 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) | 619 dist = np.array([sum(map(operator.ne, half1_mate1, c)) for c in half1_mate2]) |
| 627 min_index = np.where(dist == dist.min()) # get index of min HD | 620 min_index = np.where(dist == dist.min()) # get index of min HD |
| 628 # get all "b's" of the tag or all "a's" of the tag with minimum HD | 621 # get all "b's" of the tag or all "a's" of the tag with minimum HD |
| 629 min_tag_half2 = half2_mate2[min_index] | 622 min_tag_half2 = half2_mate2[min_index] |
| 630 min_tag_array2 = array2[min_index] # get whole tag with min HD | 623 min_tag_array2 = array2[min_index] # get whole tag with min HD |
| 631 min_value = dist.min() | 624 min_value = dist.min() |
| 632 # calculate HD of "b" to all "b's" or "a" to all "a's" | 625 # calculate HD of "b" to all "b's" or "a" to all "a's" |
| 633 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) | 626 dist_second_half = np.array([sum(map(operator.ne, half2_mate1, e)) |
| 634 for e in min_tag_half2]) | 627 for e in min_tag_half2]) |
| 635 | 628 |
| 636 dist2 = dist_second_half.max() | 629 dist2 = dist_second_half.max() |
| 637 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD | 630 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
| 638 max_tag = min_tag_array2[max_index] | 631 max_tag = min_tag_array2[max_index] |
