Mercurial > repos > mheinzl > fsd
comparison fsd.py @ 42:321a4871564b draft
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
author | mheinzl |
---|---|
date | Wed, 14 Aug 2019 08:29:27 -0400 |
parents | 54f0dac1c834 |
children | f72593bcc8ee |
comparison
equal
deleted
inserted
replaced
41:54f0dac1c834 | 42:321a4871564b |
---|---|
272 | 272 |
273 for l in range(len(to_plot)): | 273 for l in range(len(to_plot)): |
274 ax = fig.add_subplot(2, 1, l+1) | 274 ax = fig.add_subplot(2, 1, l+1) |
275 ticks = numpy.arange(1, 22, 1) | 275 ticks = numpy.arange(1, 22, 1) |
276 ticks1 = map(str, ticks) | 276 ticks1 = map(str, ticks) |
277 if maximumX > 20: | 277 ticks1[len(ticks1) - 1] = ">20" |
278 ticks1[len(ticks1) - 1] = ">20" | |
279 | 278 |
280 if to_plot[l] == "Relative frequencies": | 279 if to_plot[l] == "Relative frequencies": |
281 w = [numpy.zeros_like(data) + 1. / len(data) for data in list_to_plot2] | 280 w = [numpy.zeros_like(data) + 1. / len(data) for data in list_to_plot2] |
282 counts_rel = ax.hist(list_to_plot2, weights=w, | 281 counts_rel = ax.hist(list_to_plot2, weights=w, |
283 bins=numpy.arange(1, 23), stacked=False, edgecolor="black", | 282 bins=numpy.arange(1, 23), stacked=False, edgecolor="black", |
284 linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8) | 283 linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8) |
285 ax.set_ylim(0, 1.07) | 284 ax.set_ylim(0, 1.07) |
286 else: | 285 else: |
287 counts = ax.hist(list_to_plot2, bins=numpy.arange(minimumX, maximumX + 2), stacked=False, edgecolor="black", linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8, color=colors) | 286 counts = ax.hist(list_to_plot2, bins=numpy.arange(1, 23), stacked=False, edgecolor="black", linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8, color=colors) |
288 ax.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1)) | 287 ax.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1)) |
289 | 288 |
290 ax.set_xticks(numpy.array(ticks)) | 289 ax.set_xticks(numpy.array(ticks)) |
291 ax.set_xticklabels(ticks1) | 290 ax.set_xticklabels(ticks1) |
292 | 291 |
301 | 300 |
302 fig2.suptitle('Family Size Distribution (PE reads)', fontsize=14) | 301 fig2.suptitle('Family Size Distribution (PE reads)', fontsize=14) |
303 for l in range(len(to_plot)): | 302 for l in range(len(to_plot)): |
304 ax = fig2.add_subplot(2, 1, l + 1) | 303 ax = fig2.add_subplot(2, 1, l + 1) |
305 ticks = numpy.arange(minimumX, maximumX + 1) | 304 ticks = numpy.arange(minimumX, maximumX + 1) |
305 ticks = numpy.arange(1, 22) | |
306 ticks1 = map(str, ticks) | 306 ticks1 = map(str, ticks) |
307 if maximumX > 20: | 307 ticks1[len(ticks1) - 1] = ">20" |
308 ticks1[len(ticks1) - 1] = ">20" | |
309 reads = [] | 308 reads = [] |
310 reads_rel = [] | 309 reads_rel = [] |
311 | 310 |
312 barWidth = 0 - (len(list_to_plot)+1)/2 * 1./(len(list_to_plot) + 1) | 311 barWidth = 0 - (len(list_to_plot)+1)/2 * 1./(len(list_to_plot)+1) |
313 | 312 |
314 for i in range(len(list_to_plot2)): | 313 for i in range(len(list_to_plot2)): |
314 x = list(numpy.arange(1, 22).astype(float)) | |
315 unique, c = numpy.unique(list_to_plot2[i], return_counts=True) | 315 unique, c = numpy.unique(list_to_plot2[i], return_counts=True) |
316 new_c = [] | 316 y = unique * c |
317 new_unique = [] | |
318 for t in ticks: | |
319 if t not in unique: | |
320 new_c.append(0) # add zero count of not occuring | |
321 new_unique.append(t) | |
322 else: | |
323 c_idx = numpy.where(t == unique)[0] | |
324 new_c.append(c[c_idx]) | |
325 new_unique.append(unique[c_idx]) | |
326 y = numpy.array(new_unique) * numpy.array(new_c) | |
327 if sum(list_to_plot_original[i] > 20) > 0: | 317 if sum(list_to_plot_original[i] > 20) > 0: |
328 y[len(y) - 1] = sum(list_to_plot_original[i][list_to_plot_original[i] > 20]) | 318 y[len(y) - 1] = sum(list_to_plot_original[i][list_to_plot_original[i] > 20]) |
329 # y = [y[x[idx] == unique][0] if x[idx] in unique else 0 for idx in range(len(x))] | 319 y = [y[x[idx] == unique][0] if x[idx] in unique else 0 for idx in range(len(x))] |
330 reads.append(y) | 320 reads.append(y) |
331 reads_rel.append(list(numpy.float_(y)) / sum(y)) | 321 reads_rel.append(list(numpy.float_(y)) / sum(y)) |
332 | 322 #x = [xi + barWidth for xi in x] |
333 x = list(numpy.arange(numpy.amin(unique), numpy.amax(unique) + 1).astype(float)) | 323 |
334 if len(list_to_plot2) == 1: | 324 if len(list_to_plot2) == 1: |
335 x = [xi * 0.5 for xi in x] | 325 x = [xi * 0.5 for xi in x] |
336 w = 0.4 | 326 w = 0.4 |
337 else: | 327 else: |
338 x = [xi + barWidth for xi in x] | 328 x = [xi + barWidth for xi in x] |
339 w = 1./(len(list_to_plot) + 1) | 329 w = 1./(len(list_to_plot) + 1) |
340 | |
341 if to_plot[l] == "Relative frequencies": | 330 if to_plot[l] == "Relative frequencies": |
342 counts2_rel = ax.bar(x, list(numpy.float_(y)) / numpy.sum(y), align="edge", width=w, | 331 counts2_rel = ax.bar(x, list(numpy.float_(y)) / numpy.sum(y), align="edge", width=w, |
343 edgecolor="black", label=label[i],linewidth=1, alpha=0.7, color=colors[i]) | 332 edgecolor="black", label=label[i],linewidth=1, alpha=0.7, color=colors[i]) |
344 ax.set_ylim(0, 1.07) | 333 ax.set_ylim(0, 1.07) |
345 else: | 334 else: |
346 y = list(y.reshape((len(y)))) | 335 #y = list(y.reshape((len(y)))) |
347 | |
348 counts2 = ax.bar(x, y, align="edge", width=w, edgecolor="black", label=label[i], linewidth=1, | 336 counts2 = ax.bar(x, y, align="edge", width=w, edgecolor="black", label=label[i], linewidth=1, |
349 alpha=0.7, color=colors[i]) | 337 alpha=0.7, color=colors[i]) |
350 if i == len(list_to_plot2): | 338 if i == len(list_to_plot2)-1: |
351 barWidth += 1. / (len(list_to_plot) + 1) + 1. / (len(list_to_plot) + 1) | 339 barWidth += 1. / (len(list_to_plot) + 1) + 1. / (len(list_to_plot) + 1) |
352 else: | 340 else: |
353 barWidth += 1. / (len(list_to_plot) + 1) | 341 barWidth += 1. / (len(list_to_plot) + 1) |
354 | 342 |
355 if to_plot[l] == "Absolute frequencies": | 343 if to_plot[l] == "Absolute frequencies": |
356 ax.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1)) | 344 ax.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(0.9, 1)) |
357 else: | 345 else: |
358 ax.set_xlabel("Family size", fontsize=14) | 346 ax.set_xlabel("Family size", fontsize=14) |
359 | 347 |
405 for i in label: | 393 for i in label: |
406 output_file.write("{}{}".format(sep, i)) | 394 output_file.write("{}{}".format(sep, i)) |
407 # output_file.write("{}sum".format(sep)) | 395 # output_file.write("{}sum".format(sep)) |
408 output_file.write("\n") | 396 output_file.write("\n") |
409 j = 0 | 397 j = 0 |
398 | |
410 for fs in bins: | 399 for fs in bins: |
411 if fs == 21: | 400 if fs == 21: |
412 fs = ">20" | 401 fs = ">20" |
413 else: | 402 else: |
414 fs = "={}".format(fs) | 403 fs = "={}".format(fs) |
419 for n in range(len(label)): | 408 for n in range(len(label)): |
420 output_file.write("{}{}".format(int(reads[n][j]), sep)) | 409 output_file.write("{}{}".format(int(reads[n][j]), sep)) |
421 output_file.write("\n") | 410 output_file.write("\n") |
422 j += 1 | 411 j += 1 |
423 output_file.write("sum{}".format(sep)) | 412 output_file.write("sum{}".format(sep)) |
424 if len(label) == 1: | 413 if len(label) == 1: |
425 output_file.write("{}{}".format(int(sum(numpy.concatenate(reads))), sep)) | 414 output_file.write("{}{}".format(int(sum(numpy.concatenate(reads))), sep)) |
426 else: | 415 else: |
427 for i in reads: | 416 for i in reads: |
428 output_file.write("{}{}".format(int(sum(i)), sep)) | 417 output_file.write("{}{}".format(int(sum(i)), sep)) |
429 output_file.write("\n") | 418 output_file.write("\n") |
492 duplTags_FS3_BA_o = duplTagsBA_o[(duplTags_o >= 3) & (duplTagsBA_o >= 3)] # ba+ab with FS>=3 | 481 duplTags_FS3_BA_o = duplTagsBA_o[(duplTags_o >= 3) & (duplTagsBA_o >= 3)] # ba+ab with FS>=3 |
493 duplTags_double_FS3_o = sum(duplTags_FS3_o) + sum(duplTags_FS3_BA_o) # both ab and ba strands with FS>=3 | 482 duplTags_double_FS3_o = sum(duplTags_FS3_o) + sum(duplTags_FS3_BA_o) # both ab and ba strands with FS>=3 |
494 | 483 |
495 fig = plt.figure() | 484 fig = plt.figure() |
496 plt.subplots_adjust(left=0.12, right=0.97, bottom=0.3, top=0.94, hspace=0) | 485 plt.subplots_adjust(left=0.12, right=0.97, bottom=0.3, top=0.94, hspace=0) |
497 counts = plt.hist(list1, bins=numpy.arange(minimumX, maximumX + 2), stacked=True, label=["duplex", "ab", "ba"], | 486 counts = plt.hist(list1, bins=numpy.arange(1, 23), stacked=True, label=["duplex", "ab", "ba"], |
498 edgecolor="black", linewidth=1, align="left", color=["#FF0000", "#5FB404", "#FFBF00"], | 487 edgecolor="black", linewidth=1, align="left", color=["#FF0000", "#5FB404", "#FFBF00"], |
499 rwidth=0.8) | 488 rwidth=0.8) |
500 # tick labels of x axis | 489 # tick labels of x axis |
501 ticks = numpy.arange(1, 22, 1) | 490 ticks = numpy.arange(1, 22, 1) |
502 ticks1 = map(str, ticks) | 491 ticks1 = map(str, ticks) |
503 if maximumX > 20: | 492 ticks1[len(ticks1) - 1] = ">20" |
504 ticks1[len(ticks1) - 1] = ">20" | |
505 plt.xticks(numpy.array(ticks), ticks1) | 493 plt.xticks(numpy.array(ticks), ticks1) |
506 singl = counts[0][2][0] # singletons | 494 singl = counts[0][2][0] # singletons |
507 last = counts[0][2][len(counts[0][0]) - 1] # large families | 495 last = counts[0][2][len(counts[0][0]) - 1] # large families |
508 if log_axis: | 496 if log_axis: |
509 plt.yscale('log') | 497 plt.yscale('log') |
516 | 504 |
517 # extra information beneath the plot | 505 # extra information beneath the plot |
518 legend = "SSCS ab= \nSSCS ba= \nDCS (total)= \ntotal nr. of tags=" | 506 legend = "SSCS ab= \nSSCS ba= \nDCS (total)= \ntotal nr. of tags=" |
519 plt.text(0.1, 0.09, legend, size=10, transform=plt.gcf().transFigure) | 507 plt.text(0.1, 0.09, legend, size=10, transform=plt.gcf().transFigure) |
520 | 508 |
521 legend = "nr. of tags\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,}".format(len(dataAB), len(dataBA), len(duplTags), len(duplTags_double), (len(dataAB) + len(dataBA) + len(duplTags))) | 509 legend = "nr. of tags\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,} ({:,})".format(len(dataAB), len(dataBA), len(duplTags), len(duplTags_double), (len(dataAB) + len(dataBA) + len(duplTags)), (len(ab) + len(ba))) |
522 plt.text(0.23, 0.09, legend, size=10, transform=plt.gcf().transFigure) | 510 plt.text(0.23, 0.09, legend, size=10, transform=plt.gcf().transFigure) |
523 | 511 |
524 legend5 = "PE reads\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,}".format(sum(dataAB_o), sum(dataBA_o), sum(duplTags_o), sum(duplTags_double_o), (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o))) | 512 legend5 = "PE reads\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,} ({:,})".format(sum(dataAB_o), sum(dataBA_o), sum(duplTags_o), sum(duplTags_double_o), (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), (sum(ab_o) + sum(ba_o))) |
525 plt.text(0.38, 0.09, legend5, size=10, transform=plt.gcf().transFigure) | 513 plt.text(0.38, 0.09, legend5, size=10, transform=plt.gcf().transFigure) |
526 | 514 |
527 legend = "rel. freq. of tags\nunique\n{:.3f}\n{:.3f}\n{:.3f}\n{:,}".format(float(len(dataAB)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(dataBA)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(duplTags)) / (len(dataAB) + len(dataBA) + len(duplTags)), (len(dataAB) + len(dataBA) + len(duplTags))) | 515 legend = "rel. freq. of tags\nunique\n{:.3f}\n{:.3f}\n{:.3f}\n{:,}".format(float(len(dataAB)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(dataBA)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(duplTags)) / (len(dataAB) + len(dataBA) + len(duplTags)), (len(dataAB) + len(dataBA) + len(duplTags))) |
528 plt.text(0.54, 0.09, legend, size=10, transform=plt.gcf().transFigure) | 516 plt.text(0.54, 0.09, legend, size=10, transform=plt.gcf().transFigure) |
529 | 517 |