comparison read2mut.py @ 75:6ccff403db8a draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Tue, 23 Mar 2021 15:18:17 +0000
parents eca1365eb42c
children 56f271641828
comparison
equal deleted inserted replaced
74:5023186c2061 75:6ccff403db8a
129 real_start_end = {} 129 real_start_end = {}
130 i = 0 130 i = 0
131 mut_array = [] 131 mut_array = []
132 132
133 for count, variant in enumerate(VCF(file1)): 133 for count, variant in enumerate(VCF(file1)):
134 #if count == 2000:
135 # break
136 chrom = variant.CHROM 134 chrom = variant.CHROM
137 stop_pos = variant.start 135 stop_pos = variant.start
138 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
139 ref = variant.REF 136 ref = variant.REF
140 if len(variant.ALT) == 0: 137 if len(variant.ALT) == 0:
141 continue 138 continue
142 else: 139 else:
143 alt = variant.ALT[0] 140 alt = variant.ALT[0]
159 count_indel = 0 156 count_indel = 0
160 count_n = 0 157 count_n = 0
161 count_other = 0 158 count_other = 0
162 count_lowq = 0 159 count_lowq = 0
163 n = 0 160 n = 0
164 #print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
165 # "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
166 for pileupread in pileupcolumn.pileups: 161 for pileupread in pileupcolumn.pileups:
167 n += 1 162 n += 1
168 if not pileupread.is_del and not pileupread.is_refskip: 163 if not pileupread.is_del and not pileupread.is_refskip:
169 tag = pileupread.alignment.query_name 164 tag = pileupread.alignment.query_name
170 nuc = pileupread.alignment.query_sequence[pileupread.query_position] 165 nuc = pileupread.alignment.query_sequence[pileupread.query_position]
171 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 166 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33
172
173 # if read is softclipped, store real position in reference 167 # if read is softclipped, store real position in reference
174 if "S" in pileupread.alignment.cigarstring: 168 if "S" in pileupread.alignment.cigarstring:
175 # spftclipped at start 169 # spftclipped at start
176 if re.search(r"^[0-9]+S", pileupread.alignment.cigarstring): 170 if re.search(r"^[0-9]+S", pileupread.alignment.cigarstring):
177 start = pileupread.alignment.reference_start - int(pileupread.alignment.cigarstring.split("S")[0]) 171 start = pileupread.alignment.reference_start - int(pileupread.alignment.cigarstring.split("S")[0])
178 end = pileupread.alignment.reference_end 172 end = pileupread.alignment.reference_end
179 # softclipped at end 173 # softclipped at end
196 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1] 190 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1]
197 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)] 191 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)]
198 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring] 192 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring]
199 real_start_end[chrom_stop_pos][tag] = [(start, end)] 193 real_start_end[chrom_stop_pos][tag] = [(start, end)]
200 else: 194 else:
201 mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1) 195 mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1)
202 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence)) 196 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence))
203 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring) 197 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring)
204 real_start_end[chrom_stop_pos][tag].append((start, end)) 198 real_start_end[chrom_stop_pos][tag].append((start, end))
205 if nuc == alt: 199 if nuc == alt:
206 count_alt += 1 200 count_alt += 1
207 if tag not in mut_read_dict: 201 if tag not in mut_read_dict:
208 mut_read_dict[tag] = {} 202 mut_read_dict[tag] = {}
218 else: 212 else:
219 count_other += 1 213 count_other += 1
220 else: 214 else:
221 count_indel += 1 215 count_indel += 1
222 216
223 #print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq))
224 #else:
225 # print("indels are currently not evaluated")
226 mut_array = np.array(mut_array) 217 mut_array = np.array(mut_array)
227 for read in bam.fetch(until_eof=True): 218 for read in bam.fetch(until_eof=True):
228 if read.is_unmapped: 219 if read.is_unmapped:
229 pure_tag = read.query_name[:-5] 220 pure_tag = read.query_name[:-5]
230 nuc = "na" 221 nuc = "na"
240 bam.close() 231 bam.close()
241 232
242 # create pure_tags_dict 233 # create pure_tags_dict
243 pure_tags_dict = {} 234 pure_tags_dict = {}
244 for key1, value1 in sorted(mut_dict.items()): 235 for key1, value1 in sorted(mut_dict.items()):
245 #if len(np.where(np.array(['#'.join(str(i) for i in z)
246 # for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0:
247 # continue
248
249 i = np.where(np.array(['#'.join(str(i) for i in z) 236 i = np.where(np.array(['#'.join(str(i) for i in z)
250 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] 237 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0]
251 ref = mut_array[i, 2] 238 ref = mut_array[i, 2]
252 alt = mut_array[i, 3] 239 alt = mut_array[i, 3]
253 pure_tags_dict[key1] = {} 240 pure_tags_dict[key1] = {}
334 alt = mut_array[i, 3] 321 alt = mut_array[i, 3]
335 dcs_median = cvrg_dict[key1][2] 322 dcs_median = cvrg_dict[key1][2]
336 whole_array = pure_tags_dict_short[key1].keys() 323 whole_array = pure_tags_dict_short[key1].keys()
337 324
338 tier_dict[key1] = {} 325 tier_dict[key1] = {}
339 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0),("tier 2.5", 0), 326 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 2.5", 0),
340 ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), 327 ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0),
341 ("tier 6", 0), ("tier 7", 0)] 328 ("tier 6", 0), ("tier 7", 0)]
342 for k, v in values_tier_dict: 329 for k, v in values_tier_dict:
343 tier_dict[key1][k] = v 330 tier_dict[key1][k] = v
344 331
617 604
618 # mate 1 - SSCS ab 605 # mate 1 - SSCS ab
619 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] 606 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1]
620 ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads 607 ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads
621 if any(ij is True for ij in softclipped_idx1): 608 if any(ij is True for ij in softclipped_idx1):
622 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] 609 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1]
623 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] 610 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1]
624 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] 611 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1]
625 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] 612 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)]
626 dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)] 613 dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)]
627 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping 614 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
628 if any(ij is True for ij in softclipped_both_ends_idx1): 615 if any(ij is True for ij in softclipped_both_ends_idx1):
629 for nr, indx in enumerate(softclipped_both_ends_idx1): 616 for nr, indx in enumerate(softclipped_both_ends_idx1):
630 if indx: 617 if indx:
631 if dist_start_read1[nr] <= dist_end_read1[nr]: 618 if dist_start_read1[nr] <= dist_end_read1[nr]:
632 dist_end_read1[nr] = thr + 1000 # use dist of start and set start to very large number 619 dist_end_read1[nr] = thr + 1000 # use dist of start and set start to very large number
633 else: 620 else:
634 dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number 621 dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number
635 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads 622 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads
636 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads 623 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads
637 624
638 # mate 1 - SSCS ba 625 # mate 1 - SSCS ba
639 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] 626 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4]
640 ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads 627 ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads
641 if any(ij is True for ij in softclipped_idx4): 628 if any(ij is True for ij in softclipped_idx4):
642 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] 629 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4]
643 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4] 630 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4]
644 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] 631 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4]
645 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] 632 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)]
646 dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)] 633 dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)]
647 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping 634 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
648 if any(ij is True for ij in softclipped_both_ends_idx4): 635 if any(ij is True for ij in softclipped_both_ends_idx4):
649 for nr, indx in enumerate(softclipped_both_ends_idx4): 636 for nr, indx in enumerate(softclipped_both_ends_idx4):
650 if indx: 637 if indx:
651 if dist_start_read4[nr] <= dist_end_read4[nr]: 638 if dist_start_read4[nr] <= dist_end_read4[nr]:
652 dist_end_read4[nr] = thr + 1000 # use dist of start and set start to very large number 639 dist_end_read4[nr] = thr + 1000 # use dist of start and set start to very large number
653 else: 640 else:
654 dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number 641 dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number
655 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads 642 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads
656 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads 643 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads
657 644
658 # mate 2 - SSCS ab 645 # mate 2 - SSCS ab
659 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] 646 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2]
660 ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads 647 ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads
661 if any(ij is True for ij in softclipped_idx2): 648 if any(ij is True for ij in softclipped_idx2):
662 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] 649 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2]
663 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2] 650 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2]
664 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] 651 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2]
665 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] 652 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)]
666 dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)] 653 dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)]
667 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping 654 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
668 if any(ij is True for ij in softclipped_both_ends_idx2): 655 if any(ij is True for ij in softclipped_both_ends_idx2):
669 for nr, indx in enumerate(softclipped_both_ends_idx2): 656 for nr, indx in enumerate(softclipped_both_ends_idx2):
670 if indx: 657 if indx:
671 if dist_start_read2[nr] <= dist_end_read2[nr]: 658 if dist_start_read2[nr] <= dist_end_read2[nr]:
672 dist_end_read2[nr] = thr + 1000 # use dist of start and set start to very large number 659 dist_end_read2[nr] = thr + 1000 # use dist of start and set start to very large number
673 else: 660 else:
674 dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number 661 dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number
675 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads 662 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads
676 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads 663 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads
677 664
678 # mate 2 - SSCS ba 665 # mate 2 - SSCS ba
679 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] 666 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3]
682 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] 669 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3]
683 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3] 670 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3]
684 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] 671 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3]
685 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] 672 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)]
686 dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)] 673 dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)]
687 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping 674 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping
688 if any(ij is True for ij in softclipped_both_ends_idx3): 675 if any(ij is True for ij in softclipped_both_ends_idx3):
689 for nr, indx in enumerate(softclipped_both_ends_idx3): 676 for nr, indx in enumerate(softclipped_both_ends_idx3):
690 if indx: 677 if indx:
691 if dist_start_read3[nr] <= dist_end_read3[nr]: 678 if dist_start_read3[nr] <= dist_end_read3[nr]:
692 dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh 679 dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh
693 else: 680 else:
694 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number 681 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number
695 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads 682 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads
696 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads 683 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads
697 684
698 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant 685 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant
699 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | 686 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) |
705 alt3ff = 0 692 alt3ff = 0
706 trimmed = False 693 trimmed = False
707 contradictory = True 694 contradictory = True
708 # softclipping tiers 695 # softclipping tiers
709 # information of both mates available --> all reads for both mates and SSCS are softclipped 696 # information of both mates available --> all reads for both mates and SSCS are softclipped
710 elif (ratio1 & ratio4 & ratio2 & ratio3 & 697 elif (ratio1 & ratio4 & ratio2 & ratio3 &
711 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & 698 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) &
712 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available 699 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
713 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 700 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
714 softclipped_mutation_allMates = True 701 softclipped_mutation_allMates = True
715 softclipped_mutation_oneOfTwoMates = False 702 softclipped_mutation_oneOfTwoMates = False
716 softclipped_mutation_oneOfTwoSSCS = False 703 softclipped_mutation_oneOfTwoSSCS = False
717 softclipped_mutation_oneOfTwoSSCS_diffMates = False 704 softclipped_mutation_oneOfTwoSSCS_diffMates = False
718 softclipped_mutation_oneMate = False 705 softclipped_mutation_oneMate = False
724 trimmed = False 711 trimmed = False
725 contradictory = False 712 contradictory = False
726 # information of both mates available --> only one mate softclipped 713 # information of both mates available --> only one mate softclipped
727 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) | 714 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) |
728 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) & 715 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) &
729 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available 716 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
730 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 717 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
731 min_start1 = min(min([ij[0] for ij in ref_positions1]), min([ij[0] for ij in ref_positions4])) # red 718 min_start1 = min(min([ij[0] for ij in ref_positions1]), min([ij[0] for ij in ref_positions4])) # red
732 min_start2 = min(min([ij[0] for ij in ref_positions2]), min([ij[0] for ij in ref_positions3])) # blue 719 min_start2 = min(min([ij[0] for ij in ref_positions2]), min([ij[0] for ij in ref_positions3])) # blue
733 max_end1 = max(max([ij[1] for ij in ref_positions1]), max([ij[1] for ij in ref_positions4])) # red 720 max_end1 = max(max([ij[1] for ij in ref_positions1]), max([ij[1] for ij in ref_positions4])) # red
734 max_end2 = max(max([ij[1] for ij in ref_positions2]), max([ij[1] for ij in ref_positions3])) # blue 721 max_end2 = max(max([ij[1] for ij in ref_positions2]), max([ij[1] for ij in ref_positions3])) # blue
735 if (min_start1 > min_start2) or (max_end1 > max_end2): # if mate1 is red and mate2 is blue 722 if (min_start1 > min_start2) or (max_end1 > max_end2): # if mate1 is red and mate2 is blue
736 softclipped_mutation_oneOfTwoMates = False 723 softclipped_mutation_oneOfTwoMates = False
737 # blue mate at beginning softclipped 724 # blue mate at beginning softclipped
738 if min_start1 > min_start2: 725 if min_start1 > min_start2:
739 n_spacer_barcode = min_start1 - min_start2 726 n_spacer_barcode = min_start1 - min_start2
740 read_pos2 = read_pos2 - n_spacer_barcode 727 read_pos2 = read_pos2 - n_spacer_barcode
741 read_pos3 = read_pos3 - n_spacer_barcode 728 read_pos3 = read_pos3 - n_spacer_barcode
742 read_len_median2 = read_len_median2 - n_spacer_barcode 729 read_len_median2 = read_len_median2 - n_spacer_barcode
743 read_len_median3 = read_len_median3 - n_spacer_barcode 730 read_len_median3 = read_len_median3 - n_spacer_barcode
744 # red mate at end softclipped 731 # red mate at end softclipped
745 if max_end1 > max_end2: 732 if max_end1 > max_end2:
746 n_spacer_barcode = max_end1 - max_end2 733 n_spacer_barcode = max_end1 - max_end2
747 read_len_median1 = read_len_median1 - n_spacer_barcode 734 read_len_median1 = read_len_median1 - n_spacer_barcode
748 read_len_median4 = read_len_median4 - n_spacer_barcode 735 read_len_median4 = read_len_median4 - n_spacer_barcode
749 elif (min_start1 < min_start2) or (max_end1 < max_end2): # if mate1 is blue and mate2 is red 736 elif (min_start1 < min_start2) or (max_end1 < max_end2): # if mate1 is blue and mate2 is red
750 softclipped_mutation_oneOfTwoMates = False 737 softclipped_mutation_oneOfTwoMates = False
751 if min_start1 < min_start2: 738 if min_start1 < min_start2:
752 n_spacer_barcode = min_start2 - min_start1 739 n_spacer_barcode = min_start2 - min_start1
753 read_pos1 = read_pos1 - n_spacer_barcode 740 read_pos1 = read_pos1 - n_spacer_barcode
754 read_pos4 = read_pos4 - n_spacer_barcode 741 read_pos4 = read_pos4 - n_spacer_barcode
755 read_len_median1 = read_len_median1 - n_spacer_barcode 742 read_len_median1 = read_len_median1 - n_spacer_barcode
756 read_len_median4 = read_len_median4 - n_spacer_barcode 743 read_len_median4 = read_len_median4 - n_spacer_barcode
757 if max_end1 < max_end2: # if mate1 ends after mate 2 starts 744 if max_end1 < max_end2: # if mate1 ends after mate 2 starts
758 n_spacer_barcode = max_end2 - max_end1 745 n_spacer_barcode = max_end2 - max_end1
759 read_len_median2 = read_len_median2 - n_spacer_barcode 746 read_len_median2 = read_len_median2 - n_spacer_barcode
760 read_len_median3 = read_len_median3 - n_spacer_barcode 747 read_len_median3 = read_len_median3 - n_spacer_barcode
761 else: 748 else:
762 softclipped_mutation_oneOfTwoMates = True 749 softclipped_mutation_oneOfTwoMates = True
768 contradictory = False 755 contradictory = False
769 softclipped_mutation_allMates = False 756 softclipped_mutation_allMates = False
770 softclipped_mutation_oneOfTwoSSCS = False 757 softclipped_mutation_oneOfTwoSSCS = False
771 softclipped_mutation_oneMate = False 758 softclipped_mutation_oneMate = False
772 softclipped_mutation_oneMateOneSSCS = False 759 softclipped_mutation_oneMateOneSSCS = False
773 760
774 if softclipped_mutation_oneOfTwoMates is False: # check trimming tier 761 if softclipped_mutation_oneOfTwoMates is False: # check trimming tier
775 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): 762 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))):
776 beg1 = total1new 763 beg1 = total1new
777 total1new = 0 764 total1new = 0
778 alt1ff = 0 765 alt1ff = 0
779 alt1f = 0 766 alt1f = 0
803 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) 790 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
804 791
805 # information of both mates available --> only one mate softclipped 792 # information of both mates available --> only one mate softclipped
806 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & 793 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) &
807 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & 794 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) &
808 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available 795 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available
809 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 796 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
810 softclipped_mutation_allMates = False 797 softclipped_mutation_allMates = False
811 softclipped_mutation_oneOfTwoMates = False 798 softclipped_mutation_oneOfTwoMates = False
812 softclipped_mutation_oneOfTwoSSCS = True 799 softclipped_mutation_oneOfTwoSSCS = True
813 softclipped_mutation_oneOfTwoSSCS_diffMates = False 800 softclipped_mutation_oneOfTwoSSCS_diffMates = False
814 softclipped_mutation_oneMate = False 801 softclipped_mutation_oneMate = False
821 contradictory = False 808 contradictory = False
822 809
823 # information of one mate available --> all reads of one mate are softclipped 810 # information of one mate available --> all reads of one mate are softclipped
824 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & 811 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) &
825 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | 812 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) |
826 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & 813 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) &
827 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available 814 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available
828 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 815 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
829 softclipped_mutation_allMates = False 816 softclipped_mutation_allMates = False
830 softclipped_mutation_oneOfTwoMates = False 817 softclipped_mutation_oneOfTwoMates = False
831 softclipped_mutation_oneOfTwoSSCS = False 818 softclipped_mutation_oneOfTwoSSCS = False
832 softclipped_mutation_oneOfTwoSSCS_diffMates = False 819 softclipped_mutation_oneOfTwoSSCS_diffMates = False
833 softclipped_mutation_oneMate = True 820 softclipped_mutation_oneMate = True
839 trimmed = False 826 trimmed = False
840 contradictory = False 827 contradictory = False
841 # information of one mate available --> only one SSCS is softclipped 828 # information of one mate available --> only one SSCS is softclipped
842 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & 829 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) &
843 (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) | 830 (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) |
844 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & 831 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) &
845 (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available 832 (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available
846 # if distance between softclipping and mutation is at start or end of the read smaller than threshold 833 # if distance between softclipping and mutation is at start or end of the read smaller than threshold
847 softclipped_mutation_allMates = False 834 softclipped_mutation_allMates = False
848 softclipped_mutation_oneOfTwoMates = False 835 softclipped_mutation_oneOfTwoMates = False
849 softclipped_mutation_oneOfTwoSSCS = False 836 softclipped_mutation_oneOfTwoSSCS = False
850 softclipped_mutation_oneOfTwoSSCS_diffMates = False 837 softclipped_mutation_oneOfTwoSSCS_diffMates = False
851 softclipped_mutation_oneMate = False 838 softclipped_mutation_oneMate = False
959 elif (trimmed): 946 elif (trimmed):
960 tier = "4" 947 tier = "4"
961 counter_tier4 += 1 948 counter_tier4 += 1
962 tier_dict[key1]["tier 4"] += 1 949 tier_dict[key1]["tier 4"] += 1
963 950
964 # assign tiers 951 # assign tiers
965 if ((all(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & 952 if ((all(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) &
966 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) | 953 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) |
967 (all(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & 954 (all(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) &
968 all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))): 955 all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))):
969 trimmed_actual_high_tier = True 956 trimmed_actual_high_tier = True
1110 if (read_pos2 == -1): 1097 if (read_pos2 == -1):
1111 read_pos2 = read_len_median2 = None 1098 read_pos2 = read_len_median2 = None
1112 if (read_pos3 == -1): 1099 if (read_pos3 == -1):
1113 read_pos3 = read_len_median3 = None 1100 read_pos3 = read_len_median3 = None
1114 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) 1101 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
1115 #ws1.write_row(row, 0, line) 1102 # ws1.write_row(row, 0, line)
1116 #csv_writer.writerow(line) 1103 # csv_writer.writerow(line)
1117 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) 1104 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
1118 #ws1.write_row(row + 1, 0, line2) 1105 # ws1.write_row(row + 1, 0, line2)
1119 #csv_writer.writerow(line2) 1106 # csv_writer.writerow(line2)
1120 1107
1121 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1108 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1122 # {'type': 'formula', 1109 # {'type': 'formula',
1123 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 1110 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
1124 # 'format': format1, 1111 # 'format': format1,
1125 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1112 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1126 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1113 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1127 # {'type': 'formula', 1114 # {'type': 'formula',
1128 # 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 1115 # 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
1129 # 'format': format3, 1116 # 'format': format3,
1130 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1117 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1131 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1118 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1132 # {'type': 'formula', 1119 # {'type': 'formula',
1133 # 'criteria': '=$B${}>="3"'.format(row + 1), 1120 # 'criteria': '=$B${}>="3"'.format(row + 1),
1134 # 'format': format2, 1121 # 'format': format2,
1135 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1122 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1136 change_tier_after_print.append((row, line, line2, trimmed_actual_high_tier)) 1123 change_tier_after_print.append((row, line, line2, trimmed_actual_high_tier))
1175 csv_writer.writerow(line1) 1162 csv_writer.writerow(line1)
1176 ws1.write_row(row_number + 1, 0, line2) 1163 ws1.write_row(row_number + 1, 0, line2)
1177 csv_writer.writerow(line2) 1164 csv_writer.writerow(line2)
1178 1165
1179 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), 1166 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2),
1180 {'type': 'formula', 1167 {'type': 'formula',
1181 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 1168 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1),
1182 'format': format1, 1169 'format': format1,
1183 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1170 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1184 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), 1171 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2),
1185 {'type': 'formula', 1172 {'type': 'formula',
1186 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 1173 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1),
1187 'format': format3, 1174 'format': format3,
1188 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1175 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1189 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), 1176 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2),
1190 {'type': 'formula', 1177 {'type': 'formula',
1191 'criteria': '=$B${}>="3"'.format(row_number + 1), 1178 'criteria': '=$B${}>="3"'.format(row_number + 1),
1192 'format': format2, 1179 'format': format2,
1193 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1180 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)})
1194
1195 # sheet 2 1181 # sheet 2
1196 if chimera_correction: 1182 if chimera_correction:
1197 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1183 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)',
1198 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', 1184 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5',
1199 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 1185 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
1200 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') 1186 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7')
1201 else: 1187 else:
1202 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1188 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)',
1203 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', 1189 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5',
1204 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 1190 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
1205 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') 1191 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7')
1226 # calculate cummulative AF 1212 # calculate cummulative AF
1227 used_tiers.append(value2) 1213 used_tiers.append(value2)
1228 if len(used_tiers) > 1: 1214 if len(used_tiers) > 1:
1229 cum = safe_div(sum(used_tiers), cvrg) 1215 cum = safe_div(sum(used_tiers), cvrg)
1230 cum_af.append(cum) 1216 cum_af.append(cum)
1231 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place 1217 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place
1232 continue 1218 continue
1233 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) 1219 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)])
1234 lst.extend([(cvrg - sum(used_tiers[-10:])), sum(used_tiers[0:7]), safe_div(sum(used_tiers[0:7]), (cvrg - sum(used_tiers[-10:])))]) 1220 lst.extend([(cvrg - sum(used_tiers[-10:])), sum(used_tiers[0:7]), safe_div(sum(used_tiers[0:7]), (cvrg - sum(used_tiers[-10:])))])
1235 if chimera_correction: 1221 if chimera_correction:
1236 chimeras_all = chimera_dict[key1][1] 1222 chimeras_all = chimera_dict[key1][1]
1237 new_alt = sum(used_tiers[0:7]) - chimeras_all 1223 new_alt = sum(used_tiers[0:7]) - chimeras_all
1255 ws2.conditional_format('Q{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$Q$1="tier 3.1"', 'format': format22, 'multi_range': 'Q{}:Z{} Q1:Z1'.format(row + 2, row + 2)}) 1241 ws2.conditional_format('Q{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$Q$1="tier 3.1"', 'format': format22, 'multi_range': 'Q{}:Z{} Q1:Z1'.format(row + 2, row + 2)})
1256 row += 1 1242 row += 1
1257 1243
1258 # sheet 3 1244 # sheet 3
1259 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), 1245 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21),
1260 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), ("tier 2.5", counter_tier25), 1246 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), ("tier 2.5", counter_tier25),
1261 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4", counter_tier4), 1247 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4", counter_tier4),
1262 ("tier 5.1", counter_tier51), ("tier 5.2", counter_tier52), 1248 ("tier 5.1", counter_tier51), ("tier 5.2", counter_tier52),
1263 ("tier 5.3", counter_tier53), ("tier 5.4", counter_tier54), ("tier 5.5", counter_tier55), ("tier 6", counter_tier6), ("tier 7", counter_tier7)] 1249 ("tier 5.3", counter_tier53), ("tier 5.4", counter_tier54), ("tier 5.5", counter_tier55), ("tier 6", counter_tier6), ("tier 7", counter_tier7)]
1264 1250
1265 header = ("tier", "count") 1251 header = ("tier", "count")
1401 csv_data.close() 1387 csv_data.close()
1402 1388
1403 1389
1404 if __name__ == '__main__': 1390 if __name__ == '__main__':
1405 sys.exit(read2mut(sys.argv)) 1391 sys.exit(read2mut(sys.argv))
1406