comparison read2mut.py @ 89:1a5974404d4f draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8-dirty
author mheinzl
date Tue, 25 Apr 2023 17:06:38 +0000
parents 63e4e5d9a98f
children 24f166c1dba7
comparison
equal deleted inserted replaced
88:63e4e5d9a98f 89:1a5974404d4f
381 ws1 = workbook.add_worksheet("Results" + str(count_sheet)) 381 ws1 = workbook.add_worksheet("Results" + str(count_sheet))
382 ws2 = workbook2.add_worksheet("Allele frequencies") 382 ws2 = workbook2.add_worksheet("Allele frequencies")
383 ws3 = workbook3.add_worksheet("Tiers") 383 ws3 = workbook3.add_worksheet("Tiers")
384 current_result_sheet = ws1 384 current_result_sheet = ws1
385 385
386
387 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green 386 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green
388 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red 387 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red
389 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow 388 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow
390 389
391 format12 = workbook2.add_format({'bg_color': '#BCF5A9'}) # green 390 format12 = workbook2.add_format({'bg_color': '#BCF5A9'}) # green
401 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', 400 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba',
402 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', 401 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba',
403 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', 402 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba',
404 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', 403 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba',
405 'in phase', 'chimeric tag') 404 'in phase', 'chimeric tag')
406 ws1.write_row(0, 0, header_line) 405 current_result_sheet.write_row(0, 0, header_line)
407 csv_writer.writerow(header_line) 406 csv_writer.writerow(header_line)
408 407
409 counter_tier11 = 0 408 counter_tier11 = 0
410 counter_tier12 = 0 409 counter_tier12 = 0
411 counter_tier21 = 0 410 counter_tier21 = 0
695 if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)): 694 if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)):
696 if row > 1000000: 695 if row > 1000000:
697 count_sheet += 1 696 count_sheet += 1
698 ws_new = workbook.add_worksheet("Results" + str(count_sheet)) 697 ws_new = workbook.add_worksheet("Results" + str(count_sheet))
699 current_result_sheet = ws_new 698 current_result_sheet = ws_new
699 current_result_sheet.write_row(0, 0, header_line)
700 row = 1 700 row = 1
701 701
702 if variant_type == "alt": 702 if variant_type == "alt":
703 tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f 703 tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f
704 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f 704 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f
1293 half1_mate1 = array1_half2 1293 half1_mate1 = array1_half2
1294 half2_mate1 = array1_half 1294 half2_mate1 = array1_half
1295 half1_mate2 = array2_half2 1295 half1_mate2 = array2_half2
1296 half2_mate2 = array2_half 1296 half2_mate2 = array2_half
1297 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" 1297 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's"
1298 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) 1298 dist = np.array([sum(itertools.map(operator.ne, half1_mate1, c)) for c in half1_mate2])
1299 min_index = np.where(dist == dist.min()) # get index of min HD 1299 min_index = np.where(dist == dist.min()) # get index of min HD
1300 # get all "b's" of the tag or all "a's" of the tag with minimum HD 1300 # get all "b's" of the tag or all "a's" of the tag with minimum HD
1301 min_tag_half2 = half2_mate2[min_index] 1301 min_tag_half2 = half2_mate2[min_index]
1302 min_tag_array2 = array2[min_index] # get whole tag with min HD 1302 min_tag_array2 = array2[min_index] # get whole tag with min HD
1303 min_value = dist.min() 1303 min_value = dist.min()
1304 # calculate HD of "b" to all "b's" or "a" to all "a's" 1304 # calculate HD of "b" to all "b's" or "a" to all "a's"
1305 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) 1305 dist_second_half = np.array([sum(itertools.map(operator.ne, half2_mate1, e))
1306 for e in min_tag_half2]) 1306 for e in min_tag_half2])
1307 dist2 = dist_second_half.max() 1307 dist2 = dist_second_half.max()
1308 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD 1308 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD
1309 max_tag = min_tag_array2[max_index] 1309 max_tag = min_tag_array2[max_index]
1310 # tags which have identical parts: 1310 # tags which have identical parts: