comparison hd.py @ 8:e2596a4e1c56 draft

planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
author mheinzl
date Tue, 15 May 2018 11:15:22 -0400
parents 6f124cc95838
children c81bc96bea1c
comparison
equal deleted inserted replaced
7:6f124cc95838 8:e2596a4e1c56
437 array1_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array1]) # mate1 part 2 437 array1_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array1]) # mate1 part 2
438 438
439 array2_half = numpy.array([i[0:(len(i)) / 2] for i in array2]) # mate2 part1 439 array2_half = numpy.array([i[0:(len(i)) / 2] for i in array2]) # mate2 part1
440 array2_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array2]) # mate2 part2 440 array2_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array2]) # mate2 part2
441 441
442 diff11 = 999 * numpy.ones(len(array2)) 442 #diff11 = 999 * numpy.ones(len(array2))
443 relativeDiffList = 999 * numpy.ones(len(array2)) 443 #relativeDiffList = 999 * numpy.ones(len(array2))
444 ham1 = 999 * numpy.ones(len(array2)) 444 #ham1 = 999 * numpy.ones(len(array2))
445 ham2 = 999 * numpy.ones(len(array2)) 445 #ham2 = 999 * numpy.ones(len(array2))
446 min_valueList = 999 * numpy.ones(len(array2)) 446 #min_valueList = 999 * numpy.ones(len(array2))
447 min_tagsList = 999 * numpy.ones(len(array2)) 447 #min_tagsList = 999 * numpy.ones(len(array2))
448 diff11_zeros = 999 * numpy.ones(len(array2)) 448 #diff11_zeros = 999 * numpy.ones(len(array2))
449 min_tagsList_zeros = 999 * numpy.ones(len(array2)) 449 #min_tagsList_zeros = 999 * numpy.ones(len(array2))
450 450
451 451
452 #diff11 = [] 452 diff11 = []
453 #relativeDiffList = [] 453 relativeDiffList = []
454 #ham1 = [] 454 ham1 = []
455 #ham2 = [] 455 ham2 = []
456 #min_valueList = [] 456 min_valueList = []
457 #min_tagsList = [] 457 min_tagsList = []
458 #diff11_zeros = [] 458 diff11_zeros = []
459 #min_tagsList_zeros = [] 459 min_tagsList_zeros = []
460 counter = 0 # counter, only used to see how many HDs of tags were already calculated 460 i = 0 # counter, only used to see how many HDs of tags were already calculated
461 if mate_b is False: # HD calculation for all a's 461 if mate_b is False: # HD calculation for all a's
462 half1_mate1 = array1_half 462 half1_mate1 = array1_half
463 half2_mate1 = array1_half2 463 half2_mate1 = array1_half2
464 half1_mate2 = array2_half 464 half1_mate2 = array2_half
465 half2_mate2 = array2_half2 465 half2_mate2 = array2_half2
494 d = d_2 494 d = d_2
495 d2 = d_1 495 d2 = d_1
496 else: # half1, corrects the variable of the HD from both halfs if it is a or b 496 else: # half1, corrects the variable of the HD from both halfs if it is a or b
497 d = d_1 497 d = d_1
498 d2 = d_2 498 d2 = d_2
499 min_valueList[counter] = d + d2 499 min_valueList.append(d + d2)
500 min_tagsList[counter] = tag 500 min_tagsList.append(tag)
501 ham1[counter] = d 501 ham1.append.append(d)
502 ham2[counter] = d2 502 ham2.append.append(d2)
503 difference1 = abs(d - d2) 503 difference1 = abs(d - d2)
504 diff11[counter] = difference1 504 diff11.append(difference1)
505 rel_difference = round(float(difference1) / (d + d2), 1) 505 rel_difference = round(float(difference1) / (d + d2), 1)
506 relativeDiffList[counter] = rel_difference 506 relativeDiffList.append(rel_difference)
507 507
508 #### tags which have identical parts: 508 #### tags which have identical parts:
509 if d == 0 or d2 == 0: 509 if d == 0 or d2 == 0:
510 min_tagsList_zeros[counter] = tag 510 min_tagsList_zeros.append(tag)
511 difference1_zeros = abs(d - d2) 511 difference1_zeros = abs(d - d2)
512 diff11_zeros[counter] = difference1_zeros 512 diff11_zeros.append(difference1_zeros)
513 513 i += 1
514 counter += 1
515 514
516 #print(i) 515 #print(i)
517 diff11 = [st for st in diff11 if st != 999] 516 diff11 = [st for st in diff11 if st != 999]
518 ham1 = [st for st in ham1 if st != 999] 517 ham1 = [st for st in ham1 if st != 999]
519 ham2 = [st for st in ham2 if st != 999] 518 ham2 = [st for st in ham2 if st != 999]
710 files = [file1] 709 files = [file1]
711 name1 = name1.split(".tabular")[0] 710 name1 = name1.split(".tabular")[0]
712 names = [name1] 711 names = [name1]
713 pdf_files = [title_savedFile_pdf] 712 pdf_files = [title_savedFile_pdf]
714 csv_files = [title_savedFile_csv] 713 csv_files = [title_savedFile_csv]
714
715 print(type(onlyDuplicates))
716 print(onlyDuplicates)
715 717
716 for f, name_file, pdf_f, csv_f in zip(files, names, pdf_files, csv_files): 718 for f, name_file, pdf_f, csv_f in zip(files, names, pdf_files, csv_files):
717 with open(csv_f, "w") as output_file, PdfPages(pdf_f) as pdf: 719 with open(csv_f, "w") as output_file, PdfPages(pdf_f) as pdf:
718 print("dataset: ", name_file) 720 print("dataset: ", name_file)
719 integers, data_array = readFileReferenceFree(f) 721 integers, data_array = readFileReferenceFree(f)