# HG changeset patch # User mheinzl # Date 1565785767 14400 # Node ID 321a4871564bc060048d2565e6e2f5ecd257c18a # Parent 54f0dac1c8340bfba6d909294848432caac6c8b4 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty diff -r 54f0dac1c834 -r 321a4871564b fsd.py --- a/fsd.py Wed Aug 14 04:30:58 2019 -0400 +++ b/fsd.py Wed Aug 14 08:29:27 2019 -0400 @@ -274,8 +274,7 @@ ax = fig.add_subplot(2, 1, l+1) ticks = numpy.arange(1, 22, 1) ticks1 = map(str, ticks) - if maximumX > 20: - ticks1[len(ticks1) - 1] = ">20" + ticks1[len(ticks1) - 1] = ">20" if to_plot[l] == "Relative frequencies": w = [numpy.zeros_like(data) + 1. / len(data) for data in list_to_plot2] @@ -284,7 +283,7 @@ linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8) ax.set_ylim(0, 1.07) else: - counts = ax.hist(list_to_plot2, bins=numpy.arange(minimumX, maximumX + 2), stacked=False, edgecolor="black", linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8, color=colors) + counts = ax.hist(list_to_plot2, bins=numpy.arange(1, 23), stacked=False, edgecolor="black", linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8, color=colors) ax.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1)) ax.set_xticks(numpy.array(ticks)) @@ -303,55 +302,44 @@ for l in range(len(to_plot)): ax = fig2.add_subplot(2, 1, l + 1) ticks = numpy.arange(minimumX, maximumX + 1) + ticks = numpy.arange(1, 22) ticks1 = map(str, ticks) - if maximumX > 20: - ticks1[len(ticks1) - 1] = ">20" + ticks1[len(ticks1) - 1] = ">20" reads = [] reads_rel = [] - barWidth = 0 - (len(list_to_plot)+1)/2 * 1./(len(list_to_plot) + 1) + barWidth = 0 - (len(list_to_plot)+1)/2 * 1./(len(list_to_plot)+1) for i in range(len(list_to_plot2)): + x = list(numpy.arange(1, 22).astype(float)) unique, c = numpy.unique(list_to_plot2[i], return_counts=True) - new_c = [] - new_unique = [] - for t in ticks: - if t not in unique: - new_c.append(0) # add zero count of not occuring - new_unique.append(t) - else: - c_idx = numpy.where(t == unique)[0] - new_c.append(c[c_idx]) - new_unique.append(unique[c_idx]) - y = numpy.array(new_unique) * numpy.array(new_c) + y = unique * c if sum(list_to_plot_original[i] > 20) > 0: y[len(y) - 1] = sum(list_to_plot_original[i][list_to_plot_original[i] > 20]) - # y = [y[x[idx] == unique][0] if x[idx] in unique else 0 for idx in range(len(x))] + y = [y[x[idx] == unique][0] if x[idx] in unique else 0 for idx in range(len(x))] reads.append(y) reads_rel.append(list(numpy.float_(y)) / sum(y)) + #x = [xi + barWidth for xi in x] - x = list(numpy.arange(numpy.amin(unique), numpy.amax(unique) + 1).astype(float)) if len(list_to_plot2) == 1: x = [xi * 0.5 for xi in x] w = 0.4 else: x = [xi + barWidth for xi in x] w = 1./(len(list_to_plot) + 1) - if to_plot[l] == "Relative frequencies": counts2_rel = ax.bar(x, list(numpy.float_(y)) / numpy.sum(y), align="edge", width=w, edgecolor="black", label=label[i],linewidth=1, alpha=0.7, color=colors[i]) ax.set_ylim(0, 1.07) else: - y = list(y.reshape((len(y)))) - + #y = list(y.reshape((len(y)))) counts2 = ax.bar(x, y, align="edge", width=w, edgecolor="black", label=label[i], linewidth=1, alpha=0.7, color=colors[i]) - if i == len(list_to_plot2): + if i == len(list_to_plot2)-1: barWidth += 1. / (len(list_to_plot) + 1) + 1. / (len(list_to_plot) + 1) else: barWidth += 1. / (len(list_to_plot) + 1) - + if to_plot[l] == "Absolute frequencies": ax.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1)) else: @@ -407,6 +395,7 @@ # output_file.write("{}sum".format(sep)) output_file.write("\n") j = 0 + for fs in bins: if fs == 21: fs = ">20" @@ -421,7 +410,7 @@ output_file.write("\n") j += 1 output_file.write("sum{}".format(sep)) - if len(label) == 1: + if len(label) == 1: output_file.write("{}{}".format(int(sum(numpy.concatenate(reads))), sep)) else: for i in reads: @@ -494,14 +483,13 @@ fig = plt.figure() plt.subplots_adjust(left=0.12, right=0.97, bottom=0.3, top=0.94, hspace=0) - counts = plt.hist(list1, bins=numpy.arange(minimumX, maximumX + 2), stacked=True, label=["duplex", "ab", "ba"], + counts = plt.hist(list1, bins=numpy.arange(1, 23), stacked=True, label=["duplex", "ab", "ba"], edgecolor="black", linewidth=1, align="left", color=["#FF0000", "#5FB404", "#FFBF00"], rwidth=0.8) # tick labels of x axis ticks = numpy.arange(1, 22, 1) ticks1 = map(str, ticks) - if maximumX > 20: - ticks1[len(ticks1) - 1] = ">20" + ticks1[len(ticks1) - 1] = ">20" plt.xticks(numpy.array(ticks), ticks1) singl = counts[0][2][0] # singletons last = counts[0][2][len(counts[0][0]) - 1] # large families @@ -518,10 +506,10 @@ legend = "SSCS ab= \nSSCS ba= \nDCS (total)= \ntotal nr. of tags=" plt.text(0.1, 0.09, legend, size=10, transform=plt.gcf().transFigure) - legend = "nr. of tags\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,}".format(len(dataAB), len(dataBA), len(duplTags), len(duplTags_double), (len(dataAB) + len(dataBA) + len(duplTags))) + legend = "nr. of tags\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,} ({:,})".format(len(dataAB), len(dataBA), len(duplTags), len(duplTags_double), (len(dataAB) + len(dataBA) + len(duplTags)), (len(ab) + len(ba))) plt.text(0.23, 0.09, legend, size=10, transform=plt.gcf().transFigure) - legend5 = "PE reads\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,}".format(sum(dataAB_o), sum(dataBA_o), sum(duplTags_o), sum(duplTags_double_o), (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o))) + legend5 = "PE reads\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,} ({:,})".format(sum(dataAB_o), sum(dataBA_o), sum(duplTags_o), sum(duplTags_double_o), (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), (sum(ab_o) + sum(ba_o))) plt.text(0.38, 0.09, legend5, size=10, transform=plt.gcf().transFigure) legend = "rel. freq. of tags\nunique\n{:.3f}\n{:.3f}\n{:.3f}\n{:,}".format(float(len(dataAB)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(dataBA)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(duplTags)) / (len(dataAB) + len(dataBA) + len(duplTags)), (len(dataAB) + len(dataBA) + len(duplTags))) diff -r 54f0dac1c834 -r 321a4871564b fsd.xml --- a/fsd.xml Wed Aug 14 04:30:58 2019 -0400 +++ b/fsd.xml Wed Aug 14 08:29:27 2019 -0400 @@ -1,6 +1,6 @@ - + : Family Size Distribution of duplex sequencing tags python diff -r 54f0dac1c834 -r 321a4871564b test-data/fsd_output1.pdf Binary file test-data/fsd_output1.pdf has changed diff -r 54f0dac1c834 -r 321a4871564b test-data/fsd_output2.pdf Binary file test-data/fsd_output2.pdf has changed