diff fsd.py @ 42:321a4871564b draft

planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
author mheinzl
date Wed, 14 Aug 2019 08:29:27 -0400
parents 54f0dac1c834
children f72593bcc8ee
line wrap: on
line diff
--- a/fsd.py	Wed Aug 14 04:30:58 2019 -0400
+++ b/fsd.py	Wed Aug 14 08:29:27 2019 -0400
@@ -274,8 +274,7 @@
             ax = fig.add_subplot(2, 1, l+1)
             ticks = numpy.arange(1, 22, 1)
             ticks1 = map(str, ticks)
-            if maximumX > 20:
-                ticks1[len(ticks1) - 1] = ">20"
+            ticks1[len(ticks1) - 1] = ">20"
 
             if to_plot[l] == "Relative frequencies":
                 w = [numpy.zeros_like(data) + 1. / len(data) for data in list_to_plot2]
@@ -284,7 +283,7 @@
                                      linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8)
                 ax.set_ylim(0, 1.07)
             else:
-                counts = ax.hist(list_to_plot2, bins=numpy.arange(minimumX, maximumX + 2), stacked=False, edgecolor="black", linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8, color=colors)
+                counts = ax.hist(list_to_plot2, bins=numpy.arange(1, 23), stacked=False, edgecolor="black", linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8, color=colors)
                 ax.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1))
 
             ax.set_xticks(numpy.array(ticks))
@@ -303,55 +302,44 @@
         for l in range(len(to_plot)):
             ax = fig2.add_subplot(2, 1, l + 1)
             ticks = numpy.arange(minimumX, maximumX + 1)
+            ticks = numpy.arange(1, 22)
             ticks1 = map(str, ticks)
-            if maximumX > 20:
-                ticks1[len(ticks1) - 1] = ">20"
+            ticks1[len(ticks1) - 1] = ">20"
             reads = []
             reads_rel = []
 
-            barWidth = 0 - (len(list_to_plot)+1)/2 * 1./(len(list_to_plot) + 1)
+            barWidth = 0 - (len(list_to_plot)+1)/2 * 1./(len(list_to_plot)+1)
 
             for i in range(len(list_to_plot2)):
+                x = list(numpy.arange(1, 22).astype(float))
                 unique, c = numpy.unique(list_to_plot2[i], return_counts=True)
-                new_c = []
-                new_unique = []
-                for t in ticks:
-                    if t not in unique:
-                        new_c.append(0)  # add zero count of not occuring
-                        new_unique.append(t)
-                    else:
-                        c_idx = numpy.where(t == unique)[0]
-                        new_c.append(c[c_idx])
-                        new_unique.append(unique[c_idx])
-                y = numpy.array(new_unique) * numpy.array(new_c)
+                y = unique * c
                 if sum(list_to_plot_original[i] > 20) > 0:
                     y[len(y) - 1] = sum(list_to_plot_original[i][list_to_plot_original[i] > 20])
-                # y = [y[x[idx] == unique][0] if x[idx] in unique else 0 for idx in range(len(x))]
+                y = [y[x[idx] == unique][0] if x[idx] in unique else 0 for idx in range(len(x))]
                 reads.append(y)
                 reads_rel.append(list(numpy.float_(y)) / sum(y))
+                #x = [xi + barWidth for xi in x]
 
-                x = list(numpy.arange(numpy.amin(unique), numpy.amax(unique) + 1).astype(float))
                 if len(list_to_plot2) == 1:
                     x = [xi * 0.5 for xi in x]
                     w = 0.4
                 else:
                     x = [xi + barWidth for xi in x]
                     w = 1./(len(list_to_plot) + 1)
-
                 if to_plot[l] == "Relative frequencies":
                     counts2_rel = ax.bar(x, list(numpy.float_(y)) / numpy.sum(y), align="edge", width=w,
                                          edgecolor="black", label=label[i],linewidth=1, alpha=0.7, color=colors[i])
                     ax.set_ylim(0, 1.07)
                 else:
-                    y = list(y.reshape((len(y))))
-                    
+                    #y = list(y.reshape((len(y))))
                     counts2 = ax.bar(x, y, align="edge", width=w, edgecolor="black", label=label[i], linewidth=1,
                                      alpha=0.7, color=colors[i])
-                if i == len(list_to_plot2):
+                if i == len(list_to_plot2)-1:
                     barWidth += 1. / (len(list_to_plot) + 1) + 1. / (len(list_to_plot) + 1)
                 else:
                     barWidth += 1. / (len(list_to_plot) + 1)
-
+                    
             if to_plot[l] == "Absolute frequencies":
                 ax.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1))
             else:
@@ -407,6 +395,7 @@
         # output_file.write("{}sum".format(sep))
         output_file.write("\n")
         j = 0
+        
         for fs in bins:
             if fs == 21:
                 fs = ">20"
@@ -421,7 +410,7 @@
             output_file.write("\n")
             j += 1
         output_file.write("sum{}".format(sep))
-        if len(label) == 1:
+        if len(label) == 1:            
             output_file.write("{}{}".format(int(sum(numpy.concatenate(reads))), sep))
         else:
             for i in reads:
@@ -494,14 +483,13 @@
 
             fig = plt.figure()
             plt.subplots_adjust(left=0.12, right=0.97, bottom=0.3, top=0.94, hspace=0)
-            counts = plt.hist(list1, bins=numpy.arange(minimumX, maximumX + 2), stacked=True, label=["duplex", "ab", "ba"],
+            counts = plt.hist(list1, bins=numpy.arange(1, 23), stacked=True, label=["duplex", "ab", "ba"],
                               edgecolor="black", linewidth=1, align="left", color=["#FF0000", "#5FB404", "#FFBF00"],
                               rwidth=0.8)
             # tick labels of x axis
             ticks = numpy.arange(1, 22, 1)
             ticks1 = map(str, ticks)
-            if maximumX > 20:
-                ticks1[len(ticks1) - 1] = ">20"
+            ticks1[len(ticks1) - 1] = ">20"
             plt.xticks(numpy.array(ticks), ticks1)
             singl = counts[0][2][0]  # singletons
             last = counts[0][2][len(counts[0][0]) - 1]  # large families
@@ -518,10 +506,10 @@
             legend = "SSCS ab= \nSSCS ba= \nDCS (total)= \ntotal nr. of tags="
             plt.text(0.1, 0.09, legend, size=10, transform=plt.gcf().transFigure)
 
-            legend = "nr. of tags\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,}".format(len(dataAB), len(dataBA), len(duplTags), len(duplTags_double), (len(dataAB) + len(dataBA) + len(duplTags)))
+            legend = "nr. of tags\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,} ({:,})".format(len(dataAB), len(dataBA), len(duplTags), len(duplTags_double), (len(dataAB) + len(dataBA) + len(duplTags)), (len(ab) + len(ba)))
             plt.text(0.23, 0.09, legend, size=10, transform=plt.gcf().transFigure)
 
-            legend5 = "PE reads\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,}".format(sum(dataAB_o), sum(dataBA_o), sum(duplTags_o), sum(duplTags_double_o), (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)))
+            legend5 = "PE reads\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,} ({:,})".format(sum(dataAB_o), sum(dataBA_o), sum(duplTags_o), sum(duplTags_double_o), (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), (sum(ab_o) + sum(ba_o)))
             plt.text(0.38, 0.09, legend5, size=10, transform=plt.gcf().transFigure)
 
             legend = "rel. freq. of tags\nunique\n{:.3f}\n{:.3f}\n{:.3f}\n{:,}".format(float(len(dataAB)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(dataBA)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(duplTags)) / (len(dataAB) + len(dataBA) + len(duplTags)), (len(dataAB) + len(dataBA) + len(duplTags)))