comparison hd.py @ 21:9919024d7778 draft

planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
author mheinzl
date Fri, 14 Dec 2018 05:03:24 -0500
parents b084b6a8e3ac
children 7e570ba56b83
comparison
equal deleted inserted replaced
20:b084b6a8e3ac 21:9919024d7778
74 plt.xlim((0, maximumXFS + 1)) 74 plt.xlim((0, maximumXFS + 1))
75 if len(numpy.concatenate(familySizeList1)) != 0: 75 if len(numpy.concatenate(familySizeList1)) != 0:
76 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1)) 76 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1))
77 77
78 plt.ylim((0, maximumY * 1.2)) 78 plt.ylim((0, maximumY * 1.2))
79 legend = "\nmax. family size: \nabsolute frequency: \nrelative frequency: " 79 legend = "\nfamily size: \nabsolute frequency: \nrelative frequency: "
80 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) 80 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure)
81 81
82 count = numpy.bincount(originalCounts) # original counts 82 count = numpy.bincount(originalCounts) # original counts
83 legend1 = "{}\n{}\n{:.5f}".format(max(originalCounts), count[len(count) - 1], float(count[len(count) - 1]) / sum(count)) 83 if max(originalCounts) >= 20:
84 max_count = ">= 20"
85 else:
86 max_count = max(originalCounts)
87 legend1 = "{}\n{}\n{:.5f}".format(max_count, count[len(count) - 1], float(count[len(count) - 1]) / sum(count))
84 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) 88 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure)
85 legend3 = "singletons\n{:,}\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]), float(counts[0][len(counts[0]) - 1][1]) / sum(counts[0][len(counts[0]) - 1])) 89 legend3 = "singletons\n{:,}\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]), float(counts[0][len(counts[0]) - 1][1]) / sum(counts[0][len(counts[0]) - 1]))
86 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) 90 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12)
87 plt.grid(b=True, which='major', color='#424242', linestyle=':') 91 plt.grid(b=True, which='major', color='#424242', linestyle=':')
88 92
957 # FSD 961 # FSD
958 createFileFSD2(summary5, sumCol5, overallSum5, output_file, 962 createFileFSD2(summary5, sumCol5, overallSum5, output_file,
959 "Family size distribution separated by Hamming distance", sep, 963 "Family size distribution separated by Hamming distance", sep,
960 diff=False) 964 diff=False)
961 965
962 count = numpy.bincount(quant)
963 # output_file.write("{}{}\n".format(sep, name1)) 966 # output_file.write("{}{}\n".format(sep, name1))
964 output_file.write("\n") 967 output_file.write("\n")
965 output_file.write("max. family size:{}{}\n".format(sep, max(quant))) 968 max_fs = numpy.bincount(integers[result])
966 output_file.write("absolute frequency:{}{}\n".format(sep, count[len(count) - 1])) 969 output_file.write("max. family size in sample:{}{}\n".format(sep, max(integers[result])))
970 output_file.write("absolute frequency:{}{}\n".format(sep, max_fs[len(max_fs) - 1]))
967 output_file.write( 971 output_file.write(
968 "relative frequency:{}{}\n\n".format(sep, float(count[len(count) - 1]) / sum(count))) 972 "relative frequency:{}{}\n\n".format(sep, float(max_fs[len(max_fs) - 1]) / sum(max_fs)))
969 973
970 # HD within tags 974 # HD within tags
971 output_file.write( 975 output_file.write(
972 "The hamming distances were calculated by comparing each half of all tags against the tag(s) with the minimum Hamming distance per half.\n" 976 "The hamming distances were calculated by comparing each half of all tags against the tag(s) with the minimum Hamming distance per half.\n"
973 "It is possible that one tag can have the minimum HD from multiple tags, so the sample size in this calculation differs from the sample size entered by the user.\n") 977 "It is possible that one tag can have the minimum HD from multiple tags, so the sample size in this calculation differs from the sample size entered by the user.\n")