comparison read2mut.py @ 79:d7aea14291e8 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8-dirty
author mheinzl
date Mon, 25 Jul 2022 13:24:04 +0000
parents fdfe9a919ff7
children 8336a4f2b647
comparison
equal deleted inserted replaced
78:fdfe9a919ff7 79:d7aea14291e8
311 count_lowq += 1 311 count_lowq += 1
312 else: 312 else:
313 count_other += 1 313 count_other += 1
314 else: 314 else:
315 count_indel += 1 315 count_indel += 1
316 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, low quality = %s\n" %
317 (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_lowq))
316 318
317 mut_array = np.array(mut_array) 319 mut_array = np.array(mut_array)
318 for read in bam.fetch(until_eof=True): 320 for read in bam.fetch(until_eof=True):
319 if read.is_unmapped: 321 if read.is_unmapped:
320 pure_tag = read.query_name[:-5] 322 pure_tag = read.query_name[:-5]
423 counts_mut = 0 425 counts_mut = 0
424 chimeric_tag_list = [] 426 chimeric_tag_list = []
425 chimeric_tag = {} 427 chimeric_tag = {}
426 if (key1 in pure_tags_dict_short.keys()) or (key1 in pure_tags_dict_ref.keys()): # ref or alt 428 if (key1 in pure_tags_dict_short.keys()) or (key1 in pure_tags_dict_ref.keys()): # ref or alt
427 429
430 if key1 not in np.array(['#'.join(str(i) for i in z)
431 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]):
432 continue
433
428 change_tier_after_print = [] 434 change_tier_after_print = []
429 i = np.where(np.array(['#'.join(str(i) for i in z) 435 i = np.where(np.array(['#'.join(str(i) for i in z)
430 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] 436 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0]
431 ref = mut_array[i, 2] 437 ref = mut_array[i, 2]
432 alt = mut_array[i, 3] 438 alt = mut_array[i, 3]
441 for k, v in values_tier_dict: 447 for k, v in values_tier_dict:
442 tier_dict[key1][k] = v 448 tier_dict[key1][k] = v
443 tier_dict_ref[key1][k] = v 449 tier_dict_ref[key1][k] = v
444 450
445 used_keys = [] 451 used_keys = []
452 # used_keys_ref = []
446 if 'ab' in mut_pos_dict[key1].keys(): 453 if 'ab' in mut_pos_dict[key1].keys():
447 sscs_mut_ab = mut_pos_dict[key1]['ab'] 454 sscs_mut_ab = mut_pos_dict[key1]['ab']
448 else: 455 else:
449 sscs_mut_ab = 0 456 sscs_mut_ab = 0
450 if 'ba' in mut_pos_dict[key1].keys(): 457 if 'ba' in mut_pos_dict[key1].keys():
463 add_mut14 = "" 470 add_mut14 = ""
464 add_mut23 = "" 471 add_mut23 = ""
465 if key2[:-5] not in tag_dict.keys() and key2[:-5] not in tag_dict_ref.keys(): # skip reads that have not alt or ref 472 if key2[:-5] not in tag_dict.keys() and key2[:-5] not in tag_dict_ref.keys(): # skip reads that have not alt or ref
466 continue 473 continue
467 474
468 if ((key2[:-5] in tag_dict.keys() and key2[:-5] in pure_tags_dict_short[key1].keys() and key1 in tag_dict[key2[:-5]].keys()) or (key2[:-5] in tag_dict_ref.keys() and key2[:-5] in pure_tags_dict_ref[key1].keys() and key1 in tag_dict_ref[key2[:-5]].keys())) and (key2[:-5] not in used_keys): 475 if (((key2[:-5] in tag_dict.keys()) and (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key1 in tag_dict[key2[:-5]].keys()) and (key2[:-5] not in used_keys)) or ((key2[:-5] in tag_dict_ref.keys()) and (key2[:-5] in pure_tags_dict_ref[key1].keys()) and (key1 in tag_dict_ref[key2[:-5]].keys()) and (key2[:-5] not in used_keys))):
469 476 if key2[:-5] in tag_dict.keys():
470 if key2[:-5] in pure_tags_dict_short[key1].keys():
471 variant_type = "alt" 477 variant_type = "alt"
472 elif key2[:-5] in pure_tags_dict_ref[key1].keys(): 478 elif key2[:-5] in tag_dict_ref.keys():
473 variant_type = "ref" 479 variant_type = "ref"
474 480
475 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): 481 if key2[:-5] + '.ab.1' in mut_dict[key1].keys():
476 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) 482 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values())
477 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): 483 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys():
668 cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'] 674 cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2']
669 pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2'] 675 pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2']
670 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2'] 676 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2']
671 ref_positions4 = real_start_end[key1][key2[:-5] + '.ba.2'] 677 ref_positions4 = real_start_end[key1][key2[:-5] + '.ba.2']
672 678
679 # if variant_type == "alt":
673 used_keys.append(key2[:-5]) 680 used_keys.append(key2[:-5])
681 # elif variant_type == "ref":
682 # used_keys_ref.append(key2[:-5])
674 counts_mut += 1 683 counts_mut += 1
675 if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)): 684 if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)):
676 if variant_type == "alt": 685 if variant_type == "alt":
677 tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f 686 tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f
678 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f 687 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f
1238 elif variant_type == "ref": 1247 elif variant_type == "ref":
1239 tier_dict_ref[key1]["tier 7"] += 1 1248 tier_dict_ref[key1]["tier 7"] += 1
1240 1249
1241 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) 1250 chrom, pos, ref_a, alt_a = re.split(r'\#', key1)
1242 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) 1251 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt])
1243 sample_tag = key2[:-5] 1252
1244 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time 1253 if variant_type == "alt":
1245 # exclude identical tag from array2, to prevent comparison to itself 1254 sample_tag = key2[:-5]
1246 same_tag = np.where(array2 == sample_tag) 1255 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time
1247 index_array2 = np.arange(0, len(array2), 1) 1256 # exclude identical tag from array2, to prevent comparison to itself
1248 index_withoutSame = np.delete(index_array2, same_tag) # delete identical tag from the data 1257 same_tag = np.where(array2 == sample_tag)
1249 array2 = array2[index_withoutSame] 1258 index_array2 = np.arange(0, len(array2), 1)
1250 if len(array2) != 0: # only perform chimera analysis if there is more than 1 variant 1259 index_withoutSame = np.delete(index_array2, same_tag) # delete identical tag from the data
1251 array1_half = sample_tag[0:int(len(sample_tag) / 2)] # mate1 part1 1260 array2 = array2[index_withoutSame]
1252 array1_half2 = sample_tag[int(len(sample_tag) / 2):int(len(sample_tag))] # mate1 part 2 1261 if len(array2) != 0: # only perform chimera analysis if there is more than 1 variant
1253 array2_half = np.array([ii[0:int(len(ii) / 2)] for ii in array2]) # mate2 part1 1262 array1_half = sample_tag[0:int(len(sample_tag) / 2)] # mate1 part1
1254 array2_half2 = np.array([ii[int(len(ii) / 2):int(len(ii))] for ii in array2]) # mate2 part2 1263 array1_half2 = sample_tag[int(len(sample_tag) / 2):int(len(sample_tag))] # mate1 part 2
1255 1264 array2_half = np.array([ii[0:int(len(ii) / 2)] for ii in array2]) # mate2 part1
1256 min_tags_list_zeros = [] 1265 array2_half2 = np.array([ii[int(len(ii) / 2):int(len(ii))] for ii in array2]) # mate2 part2
1257 chimera_tags = [] 1266 min_tags_list_zeros = []
1258 for mate_b in [False, True]: 1267 chimera_tags = []
1259 i = 0 # counter, only used to see how many HDs of tags were already calculated 1268 for mate_b in [False, True]:
1260 if mate_b is False: # HD calculation for all a's 1269 i = 0 # counter, only used to see how many HDs of tags were already calculated
1261 half1_mate1 = array1_half 1270 if mate_b is False: # HD calculation for all a's
1262 half2_mate1 = array1_half2 1271 half1_mate1 = array1_half
1263 half1_mate2 = array2_half 1272 half2_mate1 = array1_half2
1264 half2_mate2 = array2_half2 1273 half1_mate2 = array2_half
1265 elif mate_b is True: # HD calculation for all b's 1274 half2_mate2 = array2_half2
1266 half1_mate1 = array1_half2 1275 elif mate_b is True: # HD calculation for all b's
1267 half2_mate1 = array1_half 1276 half1_mate1 = array1_half2
1268 half1_mate2 = array2_half2 1277 half2_mate1 = array1_half
1269 half2_mate2 = array2_half 1278 half1_mate2 = array2_half2
1270 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" 1279 half2_mate2 = array2_half
1271 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) 1280 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's"
1272 min_index = np.where(dist == dist.min()) # get index of min HD 1281 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2])
1273 # get all "b's" of the tag or all "a's" of the tag with minimum HD 1282 min_index = np.where(dist == dist.min()) # get index of min HD
1274 min_tag_half2 = half2_mate2[min_index] 1283 # get all "b's" of the tag or all "a's" of the tag with minimum HD
1275 min_tag_array2 = array2[min_index] # get whole tag with min HD 1284 min_tag_half2 = half2_mate2[min_index]
1276 min_value = dist.min() 1285 min_tag_array2 = array2[min_index] # get whole tag with min HD
1277 # calculate HD of "b" to all "b's" or "a" to all "a's" 1286 min_value = dist.min()
1278 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) 1287 # calculate HD of "b" to all "b's" or "a" to all "a's"
1279 for e in min_tag_half2]) 1288 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e))
1280 dist2 = dist_second_half.max() 1289 for e in min_tag_half2])
1281 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD 1290 dist2 = dist_second_half.max()
1282 max_tag = min_tag_array2[max_index] 1291 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD
1283 1292 max_tag = min_tag_array2[max_index]
1284 # tags which have identical parts: 1293 # tags which have identical parts:
1285 if min_value == 0 or dist2 == 0: 1294 if min_value == 0 or dist2 == 0:
1286 min_tags_list_zeros.append(tag) 1295 min_tags_list_zeros.append(tag)
1287 chimera_tags.append(max_tag) 1296 chimera_tags.append(max_tag)
1288 i += 1 1297 i += 1
1289 chimera_tags = [x for x in chimera_tags if x != []] 1298 chimera_tags = [x for x in chimera_tags if x != []]
1290 chimera_tags_new = [] 1299 chimera_tags_new = []
1291 for i in chimera_tags: 1300 for i in chimera_tags:
1292 if len(i) > 1: 1301 if len(i) > 1:
1293 for t in i: 1302 for t in i:
1294 chimera_tags_new.append(t) 1303 chimera_tags_new.append(t)
1295 else: 1304 else:
1296 chimera_tags_new.extend(i) 1305 chimera_tags_new.extend(i)
1297 chimera = ", ".join(chimera_tags_new) 1306 chimera = ", ".join(chimera_tags_new)
1307 else:
1308 chimera_tags_new = []
1309 chimera = ""
1298 else: 1310 else:
1299 chimera_tags_new = [] 1311 chimera_tags_new = []
1300 chimera = "" 1312 chimera = ""
1301 1313
1302 if len(chimera_tags_new) > 0: 1314 if len(chimera_tags_new) > 0:
1314 if (read_pos2 == -1): 1326 if (read_pos2 == -1):
1315 read_pos2 = read_len_median2 = None 1327 read_pos2 = read_len_median2 = None
1316 if (read_pos3 == -1): 1328 if (read_pos3 == -1):
1317 read_pos3 = read_len_median3 = None 1329 read_pos3 = read_len_median3 = None
1318 line = (var_id, tier, variant_type, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) 1330 line = (var_id, tier, variant_type, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
1319 # ws1.write_row(row, 0, line)
1320 # csv_writer.writerow(line)
1321 line2 = ("", "", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) 1331 line2 = ("", "", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
1322 # ws1.write_row(row + 1, 0, line2) 1332 if tier != "4":
1323 # csv_writer.writerow(line2) 1333 ws1.write_row(row, 0, line)
1324 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1334 csv_writer.writerow(line)
1325 # {'type': 'formula', 1335 ws1.write_row(row + 1, 0, line2)
1326 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 1336 csv_writer.writerow(line2)
1327 # 'format': format1, 1337 if variant_type == "alt":
1328 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1338 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1329 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1339 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1330 # {'type': 'formula', 1340 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1331 # 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 1341 elif variant_type == "ref":
1332 # 'format': format3, 1342 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1333 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1343 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1334 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1344 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1335 # {'type': 'formula', 1345 else:
1336 # 'criteria': '=$B${}>="3"'.format(row + 1), 1346 change_tier_after_print.append((line, line2, trimmed_actual_high_tier))
1337 # 'format': format2, 1347
1338 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1339 change_tier_after_print.append((row, line, line2, trimmed_actual_high_tier))
1340 row += 3 1348 row += 3
1341 1349
1342 if chimera_correction: 1350 if chimera_correction:
1343 chimeric_dcs_high_tiers = 0 1351 chimeric_dcs_high_tiers = 0
1344 chimeric_dcs = 0 1352 chimeric_dcs = 0
1365 1373
1366 if tier_dict_ref[key1]["tier 4"] > 0 and sum_highTiers_ref > 0: 1374 if tier_dict_ref[key1]["tier 4"] > 0 and sum_highTiers_ref > 0:
1367 tier_dict_ref[key1]["tier 2.5"] = tier_dict_ref[key1]["tier 4"] 1375 tier_dict_ref[key1]["tier 2.5"] = tier_dict_ref[key1]["tier 4"]
1368 tier_dict_ref[key1]["tier 4"] = 0 1376 tier_dict_ref[key1]["tier 4"] = 0
1369 correct_tier_ref = True 1377 correct_tier_ref = True
1370 1378 # print(key1, "change tiers from tier 4 to tier 2.5 for {} DCS ...".format(len(change_tier_after_print)))
1371 for sample in change_tier_after_print: 1379 if len(change_tier_after_print) > 0:
1372 row_number = sample[0] 1380 for sample in change_tier_after_print:
1373 line1 = sample[1] 1381 # row_number = sample[0]
1374 line2 = sample[2] 1382 line1 = sample[0]
1375 actual_high_tier = sample[3] 1383 line2 = sample[1]
1376 current_tier = list(line1)[1] 1384 actual_high_tier = sample[2]
1377 1385 current_tier = list(line1)[1]
1378 if line1[2] == "alt" and correct_tier and (current_tier == "4") and actual_high_tier: 1386 if line1[2] == "alt" and correct_tier and (current_tier == "4") and actual_high_tier:
1379 line1 = list(line1) 1387 line1 = list(line1)
1380 line1[1] = "2.5" 1388 line1[1] = "2.5"
1381 line1 = tuple(line1) 1389 line1 = tuple(line1)
1382 counter_tier25 += 1 1390 counter_tier25 += 1
1383 counter_tier4 -= 1 1391 counter_tier4 -= 1
1384 if line1[2] == "ref" and correct_tier_ref and (current_tier == "4") and actual_high_tier: 1392 if line1[2] == "ref" and correct_tier_ref and (current_tier == "4") and actual_high_tier:
1385 line1 = list(line1) 1393 line1 = list(line1)
1386 line1[1] = "2.5" 1394 line1[1] = "2.5"
1387 line1 = tuple(line1) 1395 line1 = tuple(line1)
1388 counter_tier25 += 1 1396 counter_tier25 += 1
1389 counter_tier4 -= 1 1397 counter_tier4 -= 1
1390 1398 ws1.write_row(row, 0, line1)
1391 ws1.write_row(row_number, 0, line1) 1399 csv_writer.writerow(line1)
1392 csv_writer.writerow(line1) 1400 ws1.write_row(row + 1, 0, line2)
1393 ws1.write_row(row_number + 1, 0, line2) 1401 csv_writer.writerow(line2)
1394 csv_writer.writerow(line2) 1402 if line1[2] == "alt":
1395 if line1[2] == "alt": 1403 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1396 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1404 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1397 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1405 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1398 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row_number + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1406 elif line1[2] == "ref":
1399 elif line1[2] == "ref": 1407 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1400 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1408 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1401 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1409 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1402 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row_number + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) 1410 row += 3
1403 1411
1404 # sheet 2 1412 # sheet 2
1405 if chimera_correction: 1413 if chimera_correction:
1406 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1414 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)',
1407 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)', 1415 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)',
1427 alt = mut_array[i, 3] 1435 alt = mut_array[i, 3]
1428 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) 1436 chrom, pos, ref_a, alt_a = re.split(r'\#', key1)
1429 ref_count = cvrg_dict[key1][0] 1437 ref_count = cvrg_dict[key1][0]
1430 alt_count = cvrg_dict[key1][1] 1438 alt_count = cvrg_dict[key1][1]
1431 cvrg = ref_count + alt_count 1439 cvrg = ref_count + alt_count
1432
1433 ref_tiers = tier_dict_ref[key1] 1440 ref_tiers = tier_dict_ref[key1]
1434
1435 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) 1441 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt])
1436 lst = [var_id, cvrg] 1442 lst = [var_id, cvrg]
1437 used_tiers = [] 1443 used_tiers = []
1438 used_tiers_ref = [t for k, t in sorted(ref_tiers.items())] 1444 used_tiers_ref = [t for k, t in sorted(ref_tiers.items())]
1439 cum_af = [] 1445 cum_af = []
1451 chimeras_all = chimera_dict[key1][1] 1457 chimeras_all = chimera_dict[key1][1]
1452 new_alt = sum(used_tiers[0:7]) - chimeras_all 1458 new_alt = sum(used_tiers[0:7]) - chimeras_all
1453 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:7]))) 1459 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:7])))
1454 if fraction_chimeras is None: 1460 if fraction_chimeras is None:
1455 fraction_chimeras = 0. 1461 fraction_chimeras = 0.
1456 new_cvrg = (sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])) * (1. - fraction_chimeras) 1462 new_cvrg = (cvrg - sum(used_tiers[-10:])) * (1. - fraction_chimeras)
1457 lst.extend([new_cvrg, chimeras_all, safe_div(new_alt, new_cvrg)]) 1463 lst.extend([new_cvrg, chimeras_all, safe_div(new_alt, new_cvrg)])
1458 lst.extend([alt_count, safe_div(alt_count, cvrg)]) 1464 lst.extend([alt_count, safe_div(alt_count, cvrg)])
1459 lst.extend(used_tiers) 1465 lst.extend(used_tiers)
1460 lst.extend(used_tiers_ref) 1466 lst.extend(used_tiers_ref)
1461 # lst.extend(cum_af) 1467 # lst.extend(cum_af)