Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 79:d7aea14291e8 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8-dirty
author | mheinzl |
---|---|
date | Mon, 25 Jul 2022 13:24:04 +0000 |
parents | fdfe9a919ff7 |
children | 8336a4f2b647 |
comparison
equal
deleted
inserted
replaced
78:fdfe9a919ff7 | 79:d7aea14291e8 |
---|---|
311 count_lowq += 1 | 311 count_lowq += 1 |
312 else: | 312 else: |
313 count_other += 1 | 313 count_other += 1 |
314 else: | 314 else: |
315 count_indel += 1 | 315 count_indel += 1 |
316 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, low quality = %s\n" % | |
317 (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_lowq)) | |
316 | 318 |
317 mut_array = np.array(mut_array) | 319 mut_array = np.array(mut_array) |
318 for read in bam.fetch(until_eof=True): | 320 for read in bam.fetch(until_eof=True): |
319 if read.is_unmapped: | 321 if read.is_unmapped: |
320 pure_tag = read.query_name[:-5] | 322 pure_tag = read.query_name[:-5] |
423 counts_mut = 0 | 425 counts_mut = 0 |
424 chimeric_tag_list = [] | 426 chimeric_tag_list = [] |
425 chimeric_tag = {} | 427 chimeric_tag = {} |
426 if (key1 in pure_tags_dict_short.keys()) or (key1 in pure_tags_dict_ref.keys()): # ref or alt | 428 if (key1 in pure_tags_dict_short.keys()) or (key1 in pure_tags_dict_ref.keys()): # ref or alt |
427 | 429 |
430 if key1 not in np.array(['#'.join(str(i) for i in z) | |
431 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]): | |
432 continue | |
433 | |
428 change_tier_after_print = [] | 434 change_tier_after_print = [] |
429 i = np.where(np.array(['#'.join(str(i) for i in z) | 435 i = np.where(np.array(['#'.join(str(i) for i in z) |
430 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] | 436 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
431 ref = mut_array[i, 2] | 437 ref = mut_array[i, 2] |
432 alt = mut_array[i, 3] | 438 alt = mut_array[i, 3] |
441 for k, v in values_tier_dict: | 447 for k, v in values_tier_dict: |
442 tier_dict[key1][k] = v | 448 tier_dict[key1][k] = v |
443 tier_dict_ref[key1][k] = v | 449 tier_dict_ref[key1][k] = v |
444 | 450 |
445 used_keys = [] | 451 used_keys = [] |
452 # used_keys_ref = [] | |
446 if 'ab' in mut_pos_dict[key1].keys(): | 453 if 'ab' in mut_pos_dict[key1].keys(): |
447 sscs_mut_ab = mut_pos_dict[key1]['ab'] | 454 sscs_mut_ab = mut_pos_dict[key1]['ab'] |
448 else: | 455 else: |
449 sscs_mut_ab = 0 | 456 sscs_mut_ab = 0 |
450 if 'ba' in mut_pos_dict[key1].keys(): | 457 if 'ba' in mut_pos_dict[key1].keys(): |
463 add_mut14 = "" | 470 add_mut14 = "" |
464 add_mut23 = "" | 471 add_mut23 = "" |
465 if key2[:-5] not in tag_dict.keys() and key2[:-5] not in tag_dict_ref.keys(): # skip reads that have not alt or ref | 472 if key2[:-5] not in tag_dict.keys() and key2[:-5] not in tag_dict_ref.keys(): # skip reads that have not alt or ref |
466 continue | 473 continue |
467 | 474 |
468 if ((key2[:-5] in tag_dict.keys() and key2[:-5] in pure_tags_dict_short[key1].keys() and key1 in tag_dict[key2[:-5]].keys()) or (key2[:-5] in tag_dict_ref.keys() and key2[:-5] in pure_tags_dict_ref[key1].keys() and key1 in tag_dict_ref[key2[:-5]].keys())) and (key2[:-5] not in used_keys): | 475 if (((key2[:-5] in tag_dict.keys()) and (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key1 in tag_dict[key2[:-5]].keys()) and (key2[:-5] not in used_keys)) or ((key2[:-5] in tag_dict_ref.keys()) and (key2[:-5] in pure_tags_dict_ref[key1].keys()) and (key1 in tag_dict_ref[key2[:-5]].keys()) and (key2[:-5] not in used_keys))): |
469 | 476 if key2[:-5] in tag_dict.keys(): |
470 if key2[:-5] in pure_tags_dict_short[key1].keys(): | |
471 variant_type = "alt" | 477 variant_type = "alt" |
472 elif key2[:-5] in pure_tags_dict_ref[key1].keys(): | 478 elif key2[:-5] in tag_dict_ref.keys(): |
473 variant_type = "ref" | 479 variant_type = "ref" |
474 | 480 |
475 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): | 481 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): |
476 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) | 482 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) |
477 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 483 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
668 cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'] | 674 cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'] |
669 pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2'] | 675 pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2'] |
670 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2'] | 676 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2'] |
671 ref_positions4 = real_start_end[key1][key2[:-5] + '.ba.2'] | 677 ref_positions4 = real_start_end[key1][key2[:-5] + '.ba.2'] |
672 | 678 |
679 # if variant_type == "alt": | |
673 used_keys.append(key2[:-5]) | 680 used_keys.append(key2[:-5]) |
681 # elif variant_type == "ref": | |
682 # used_keys_ref.append(key2[:-5]) | |
674 counts_mut += 1 | 683 counts_mut += 1 |
675 if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)): | 684 if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)): |
676 if variant_type == "alt": | 685 if variant_type == "alt": |
677 tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f | 686 tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f |
678 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f | 687 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f |
1238 elif variant_type == "ref": | 1247 elif variant_type == "ref": |
1239 tier_dict_ref[key1]["tier 7"] += 1 | 1248 tier_dict_ref[key1]["tier 7"] += 1 |
1240 | 1249 |
1241 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) | 1250 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) |
1242 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) | 1251 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) |
1243 sample_tag = key2[:-5] | 1252 |
1244 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time | 1253 if variant_type == "alt": |
1245 # exclude identical tag from array2, to prevent comparison to itself | 1254 sample_tag = key2[:-5] |
1246 same_tag = np.where(array2 == sample_tag) | 1255 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time |
1247 index_array2 = np.arange(0, len(array2), 1) | 1256 # exclude identical tag from array2, to prevent comparison to itself |
1248 index_withoutSame = np.delete(index_array2, same_tag) # delete identical tag from the data | 1257 same_tag = np.where(array2 == sample_tag) |
1249 array2 = array2[index_withoutSame] | 1258 index_array2 = np.arange(0, len(array2), 1) |
1250 if len(array2) != 0: # only perform chimera analysis if there is more than 1 variant | 1259 index_withoutSame = np.delete(index_array2, same_tag) # delete identical tag from the data |
1251 array1_half = sample_tag[0:int(len(sample_tag) / 2)] # mate1 part1 | 1260 array2 = array2[index_withoutSame] |
1252 array1_half2 = sample_tag[int(len(sample_tag) / 2):int(len(sample_tag))] # mate1 part 2 | 1261 if len(array2) != 0: # only perform chimera analysis if there is more than 1 variant |
1253 array2_half = np.array([ii[0:int(len(ii) / 2)] for ii in array2]) # mate2 part1 | 1262 array1_half = sample_tag[0:int(len(sample_tag) / 2)] # mate1 part1 |
1254 array2_half2 = np.array([ii[int(len(ii) / 2):int(len(ii))] for ii in array2]) # mate2 part2 | 1263 array1_half2 = sample_tag[int(len(sample_tag) / 2):int(len(sample_tag))] # mate1 part 2 |
1255 | 1264 array2_half = np.array([ii[0:int(len(ii) / 2)] for ii in array2]) # mate2 part1 |
1256 min_tags_list_zeros = [] | 1265 array2_half2 = np.array([ii[int(len(ii) / 2):int(len(ii))] for ii in array2]) # mate2 part2 |
1257 chimera_tags = [] | 1266 min_tags_list_zeros = [] |
1258 for mate_b in [False, True]: | 1267 chimera_tags = [] |
1259 i = 0 # counter, only used to see how many HDs of tags were already calculated | 1268 for mate_b in [False, True]: |
1260 if mate_b is False: # HD calculation for all a's | 1269 i = 0 # counter, only used to see how many HDs of tags were already calculated |
1261 half1_mate1 = array1_half | 1270 if mate_b is False: # HD calculation for all a's |
1262 half2_mate1 = array1_half2 | 1271 half1_mate1 = array1_half |
1263 half1_mate2 = array2_half | 1272 half2_mate1 = array1_half2 |
1264 half2_mate2 = array2_half2 | 1273 half1_mate2 = array2_half |
1265 elif mate_b is True: # HD calculation for all b's | 1274 half2_mate2 = array2_half2 |
1266 half1_mate1 = array1_half2 | 1275 elif mate_b is True: # HD calculation for all b's |
1267 half2_mate1 = array1_half | 1276 half1_mate1 = array1_half2 |
1268 half1_mate2 = array2_half2 | 1277 half2_mate1 = array1_half |
1269 half2_mate2 = array2_half | 1278 half1_mate2 = array2_half2 |
1270 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" | 1279 half2_mate2 = array2_half |
1271 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) | 1280 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
1272 min_index = np.where(dist == dist.min()) # get index of min HD | 1281 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) |
1273 # get all "b's" of the tag or all "a's" of the tag with minimum HD | 1282 min_index = np.where(dist == dist.min()) # get index of min HD |
1274 min_tag_half2 = half2_mate2[min_index] | 1283 # get all "b's" of the tag or all "a's" of the tag with minimum HD |
1275 min_tag_array2 = array2[min_index] # get whole tag with min HD | 1284 min_tag_half2 = half2_mate2[min_index] |
1276 min_value = dist.min() | 1285 min_tag_array2 = array2[min_index] # get whole tag with min HD |
1277 # calculate HD of "b" to all "b's" or "a" to all "a's" | 1286 min_value = dist.min() |
1278 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) | 1287 # calculate HD of "b" to all "b's" or "a" to all "a's" |
1279 for e in min_tag_half2]) | 1288 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) |
1280 dist2 = dist_second_half.max() | 1289 for e in min_tag_half2]) |
1281 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD | 1290 dist2 = dist_second_half.max() |
1282 max_tag = min_tag_array2[max_index] | 1291 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
1283 | 1292 max_tag = min_tag_array2[max_index] |
1284 # tags which have identical parts: | 1293 # tags which have identical parts: |
1285 if min_value == 0 or dist2 == 0: | 1294 if min_value == 0 or dist2 == 0: |
1286 min_tags_list_zeros.append(tag) | 1295 min_tags_list_zeros.append(tag) |
1287 chimera_tags.append(max_tag) | 1296 chimera_tags.append(max_tag) |
1288 i += 1 | 1297 i += 1 |
1289 chimera_tags = [x for x in chimera_tags if x != []] | 1298 chimera_tags = [x for x in chimera_tags if x != []] |
1290 chimera_tags_new = [] | 1299 chimera_tags_new = [] |
1291 for i in chimera_tags: | 1300 for i in chimera_tags: |
1292 if len(i) > 1: | 1301 if len(i) > 1: |
1293 for t in i: | 1302 for t in i: |
1294 chimera_tags_new.append(t) | 1303 chimera_tags_new.append(t) |
1295 else: | 1304 else: |
1296 chimera_tags_new.extend(i) | 1305 chimera_tags_new.extend(i) |
1297 chimera = ", ".join(chimera_tags_new) | 1306 chimera = ", ".join(chimera_tags_new) |
1307 else: | |
1308 chimera_tags_new = [] | |
1309 chimera = "" | |
1298 else: | 1310 else: |
1299 chimera_tags_new = [] | 1311 chimera_tags_new = [] |
1300 chimera = "" | 1312 chimera = "" |
1301 | 1313 |
1302 if len(chimera_tags_new) > 0: | 1314 if len(chimera_tags_new) > 0: |
1314 if (read_pos2 == -1): | 1326 if (read_pos2 == -1): |
1315 read_pos2 = read_len_median2 = None | 1327 read_pos2 = read_len_median2 = None |
1316 if (read_pos3 == -1): | 1328 if (read_pos3 == -1): |
1317 read_pos3 = read_len_median3 = None | 1329 read_pos3 = read_len_median3 = None |
1318 line = (var_id, tier, variant_type, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) | 1330 line = (var_id, tier, variant_type, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) |
1319 # ws1.write_row(row, 0, line) | |
1320 # csv_writer.writerow(line) | |
1321 line2 = ("", "", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) | 1331 line2 = ("", "", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) |
1322 # ws1.write_row(row + 1, 0, line2) | 1332 if tier != "4": |
1323 # csv_writer.writerow(line2) | 1333 ws1.write_row(row, 0, line) |
1324 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1334 csv_writer.writerow(line) |
1325 # {'type': 'formula', | 1335 ws1.write_row(row + 1, 0, line2) |
1326 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 1336 csv_writer.writerow(line2) |
1327 # 'format': format1, | 1337 if variant_type == "alt": |
1328 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1338 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1329 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1339 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1330 # {'type': 'formula', | 1340 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1331 # 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), | 1341 elif variant_type == "ref": |
1332 # 'format': format3, | 1342 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1333 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1343 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1334 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1344 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1335 # {'type': 'formula', | 1345 else: |
1336 # 'criteria': '=$B${}>="3"'.format(row + 1), | 1346 change_tier_after_print.append((line, line2, trimmed_actual_high_tier)) |
1337 # 'format': format2, | 1347 |
1338 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | |
1339 change_tier_after_print.append((row, line, line2, trimmed_actual_high_tier)) | |
1340 row += 3 | 1348 row += 3 |
1341 | 1349 |
1342 if chimera_correction: | 1350 if chimera_correction: |
1343 chimeric_dcs_high_tiers = 0 | 1351 chimeric_dcs_high_tiers = 0 |
1344 chimeric_dcs = 0 | 1352 chimeric_dcs = 0 |
1365 | 1373 |
1366 if tier_dict_ref[key1]["tier 4"] > 0 and sum_highTiers_ref > 0: | 1374 if tier_dict_ref[key1]["tier 4"] > 0 and sum_highTiers_ref > 0: |
1367 tier_dict_ref[key1]["tier 2.5"] = tier_dict_ref[key1]["tier 4"] | 1375 tier_dict_ref[key1]["tier 2.5"] = tier_dict_ref[key1]["tier 4"] |
1368 tier_dict_ref[key1]["tier 4"] = 0 | 1376 tier_dict_ref[key1]["tier 4"] = 0 |
1369 correct_tier_ref = True | 1377 correct_tier_ref = True |
1370 | 1378 # print(key1, "change tiers from tier 4 to tier 2.5 for {} DCS ...".format(len(change_tier_after_print))) |
1371 for sample in change_tier_after_print: | 1379 if len(change_tier_after_print) > 0: |
1372 row_number = sample[0] | 1380 for sample in change_tier_after_print: |
1373 line1 = sample[1] | 1381 # row_number = sample[0] |
1374 line2 = sample[2] | 1382 line1 = sample[0] |
1375 actual_high_tier = sample[3] | 1383 line2 = sample[1] |
1376 current_tier = list(line1)[1] | 1384 actual_high_tier = sample[2] |
1377 | 1385 current_tier = list(line1)[1] |
1378 if line1[2] == "alt" and correct_tier and (current_tier == "4") and actual_high_tier: | 1386 if line1[2] == "alt" and correct_tier and (current_tier == "4") and actual_high_tier: |
1379 line1 = list(line1) | 1387 line1 = list(line1) |
1380 line1[1] = "2.5" | 1388 line1[1] = "2.5" |
1381 line1 = tuple(line1) | 1389 line1 = tuple(line1) |
1382 counter_tier25 += 1 | 1390 counter_tier25 += 1 |
1383 counter_tier4 -= 1 | 1391 counter_tier4 -= 1 |
1384 if line1[2] == "ref" and correct_tier_ref and (current_tier == "4") and actual_high_tier: | 1392 if line1[2] == "ref" and correct_tier_ref and (current_tier == "4") and actual_high_tier: |
1385 line1 = list(line1) | 1393 line1 = list(line1) |
1386 line1[1] = "2.5" | 1394 line1[1] = "2.5" |
1387 line1 = tuple(line1) | 1395 line1 = tuple(line1) |
1388 counter_tier25 += 1 | 1396 counter_tier25 += 1 |
1389 counter_tier4 -= 1 | 1397 counter_tier4 -= 1 |
1390 | 1398 ws1.write_row(row, 0, line1) |
1391 ws1.write_row(row_number, 0, line1) | 1399 csv_writer.writerow(line1) |
1392 csv_writer.writerow(line1) | 1400 ws1.write_row(row + 1, 0, line2) |
1393 ws1.write_row(row_number + 1, 0, line2) | 1401 csv_writer.writerow(line2) |
1394 csv_writer.writerow(line2) | 1402 if line1[2] == "alt": |
1395 if line1[2] == "alt": | 1403 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1396 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 'format': format1, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) | 1404 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1397 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 'format': format3, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) | 1405 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1398 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row_number + 1), 'format': format2, 'multi_range': 'M{}:N{} U{}:V{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) | 1406 elif line1[2] == "ref": |
1399 elif line1[2] == "ref": | 1407 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1400 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), 'format': format1, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) | 1408 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1401 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), 'format': format3, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) | 1409 ws1.conditional_format('M{}:N{}'.format(row + 1, row + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1402 ws1.conditional_format('M{}:N{}'.format(row_number + 1, row_number + 2), {'type': 'formula', 'criteria': '=$B${}>="3"'.format(row_number + 1), 'format': format2, 'multi_range': 'M{}:N{} S{}:T{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) | 1410 row += 3 |
1403 | 1411 |
1404 # sheet 2 | 1412 # sheet 2 |
1405 if chimera_correction: | 1413 if chimera_correction: |
1406 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1414 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC ref (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
1407 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)', | 1415 'tier 1.1 (alt)', 'tier 1.2 (alt)', 'tier 2.1 (alt)', 'tier 2.2 (alt)', 'tier 2.3 (alt)', 'tier 2.4 (alt)', 'tier 2.5 (alt)', |
1427 alt = mut_array[i, 3] | 1435 alt = mut_array[i, 3] |
1428 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) | 1436 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) |
1429 ref_count = cvrg_dict[key1][0] | 1437 ref_count = cvrg_dict[key1][0] |
1430 alt_count = cvrg_dict[key1][1] | 1438 alt_count = cvrg_dict[key1][1] |
1431 cvrg = ref_count + alt_count | 1439 cvrg = ref_count + alt_count |
1432 | |
1433 ref_tiers = tier_dict_ref[key1] | 1440 ref_tiers = tier_dict_ref[key1] |
1434 | |
1435 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) | 1441 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) |
1436 lst = [var_id, cvrg] | 1442 lst = [var_id, cvrg] |
1437 used_tiers = [] | 1443 used_tiers = [] |
1438 used_tiers_ref = [t for k, t in sorted(ref_tiers.items())] | 1444 used_tiers_ref = [t for k, t in sorted(ref_tiers.items())] |
1439 cum_af = [] | 1445 cum_af = [] |
1451 chimeras_all = chimera_dict[key1][1] | 1457 chimeras_all = chimera_dict[key1][1] |
1452 new_alt = sum(used_tiers[0:7]) - chimeras_all | 1458 new_alt = sum(used_tiers[0:7]) - chimeras_all |
1453 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:7]))) | 1459 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:7]))) |
1454 if fraction_chimeras is None: | 1460 if fraction_chimeras is None: |
1455 fraction_chimeras = 0. | 1461 fraction_chimeras = 0. |
1456 new_cvrg = (sum(used_tiers_ref[0:7]) + sum(used_tiers[0:7])) * (1. - fraction_chimeras) | 1462 new_cvrg = (cvrg - sum(used_tiers[-10:])) * (1. - fraction_chimeras) |
1457 lst.extend([new_cvrg, chimeras_all, safe_div(new_alt, new_cvrg)]) | 1463 lst.extend([new_cvrg, chimeras_all, safe_div(new_alt, new_cvrg)]) |
1458 lst.extend([alt_count, safe_div(alt_count, cvrg)]) | 1464 lst.extend([alt_count, safe_div(alt_count, cvrg)]) |
1459 lst.extend(used_tiers) | 1465 lst.extend(used_tiers) |
1460 lst.extend(used_tiers_ref) | 1466 lst.extend(used_tiers_ref) |
1461 # lst.extend(cum_af) | 1467 # lst.extend(cum_af) |