Previous changeset 0:239c4448a163 (2018-05-10) Next changeset 2:316fbf91dd12 (2018-05-15) |
Commit message:
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce |
modified:
hd.py hd.xml |
b |
diff -r 239c4448a163 -r 7414792e1cb8 hd.py --- a/hd.py Thu May 10 07:30:27 2018 -0400 +++ b/hd.py Sat May 12 04:52:34 2018 -0400 |
[ |
b'@@ -24,16 +24,404 @@\n import cPickle as pickle\n from multiprocessing.pool import Pool\n from functools import partial\n-from HDAnalysis_plots.plot_HDwithFSD import plotHDwithFSD\n-from HDAnalysis_plots.plot_FSDwithHD2 import plotFSDwithHD2\n-from HDAnalysis_plots.plot_HDwithinSeq_Sum2 import plotHDwithinSeq_Sum2\n-from HDAnalysis_plots.table_HD import createTableHD, createFileHD, createTableHDwithTags, createFileHDwithinTag\n-from HDAnalysis_plots.table_FSD import createTableFSD2, createFileFSD2\n+#from HDAnalysis_plots.plot_HDwithFSD import plotHDwithFSD\n+#from HDAnalysis_plots.plot_FSDwithHD2 import plotFSDwithHD2\n+#from HDAnalysis_plots.plot_HDwithinSeq_Sum2 import plotHDwithinSeq_Sum2\n+#from HDAnalysis_plots.table_HD import createTableHD, createFileHD, createTableHDwithTags, createFileHDwithinTag\n+#from HDAnalysis_plots.table_FSD import createTableFSD2, createFileFSD2\n import argparse\n import sys\n import os\n from matplotlib.backends.backend_pdf import PdfPages\n+from collections import Counter\n \n+def plotFSDwithHD2(familySizeList1,maximumXFS,minimumXFS, quant,\n+ title_file1, subtitle, pdf, relative=False, diff = True):\n+ if diff is False:\n+ colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"]\n+ labels = ["HD=1", "HD=2", "HD=3", "HD=4", "HD=5-8","HD>8"]\n+ else:\n+ colors = ["#93A6AB", "#403C14", "#731E41", "#BAB591", "#085B6F", "#E8AA35", "#726C66"]\n+ if relative is True:\n+ labels = ["d=0", "d=0.1", "d=0.2", "d=0.3", "d=0.4", "d=0.5-0.8", "d>0.8"]\n+ else:\n+ labels = ["d=0","d=1", "d=2", "d=3", "d=4", "d=5-8","d>8"]\n+\n+ fig = plt.figure(figsize=(6, 7))\n+ ax = fig.add_subplot(111)\n+ plt.subplots_adjust(bottom=0.1)\n+ p1 = numpy.bincount(numpy.concatenate((familySizeList1)))\n+ maximumY = numpy.amax(p1)\n+\n+ if len(range(minimumXFS, maximumXFS)) == 0:\n+ range1 = range(minimumXFS - 1, minimumXFS + 2)\n+ else:\n+ range1 = range(0, maximumXFS + 2)\n+ counts = plt.hist(familySizeList1, label=labels,\n+ color=colors, stacked=True,\n+ rwidth=0.8,alpha=1, align="left",\n+ edgecolor="None",bins=range1)\n+ plt.legend(loc=\'upper right\', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1))\n+\n+ plt.title(title_file1, fontsize=12)\n+ plt.suptitle(subtitle, y=1, x=0.5, fontsize=14)\n+ plt.xlabel("No. of Family Members", fontsize=12)\n+ plt.ylabel("Absolute Frequency", fontsize=12)\n+\n+ ticks = numpy.arange(0, maximumXFS + 1, 1)\n+ ticks1 = map(str, ticks)\n+ if maximumXFS >= 20:\n+ ticks1[len(ticks1) - 1] = ">=20"\n+ plt.xticks(numpy.array(ticks), ticks1)\n+ [l.set_visible(False) for (i, l) in enumerate(ax.get_xticklabels()) if i % 5 != 0]\n+\n+ plt.xlim((0, maximumXFS + 1))\n+ if len(numpy.concatenate(familySizeList1)) != 0:\n+ plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1))\n+\n+ plt.ylim((0, maximumY * 1.2))\n+ legend = "\\nmax. family size: \\nabsolute frequency: \\nrelative frequency: "\n+ plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure)\n+\n+ count = numpy.bincount(quant) # original counts\n+ legend1 = "{}\\n{}\\n{:.5f}" \\\n+ .format(max(quant), count[len(count) - 1], float(count[len(count) - 1]) / sum(count))\n+ plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure)\n+ legend3 = "singletons\\n{:,}\\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]),\n+ float(counts[0][len(counts[0]) - 1][1]) / sum(\n+ counts[0][len(counts[0]) - 1]))\n+ plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12)\n+ plt.grid(b=True, which=\'major\', color=\'#424242\', linestyle=\':\')\n+\n+ pdf.savefig(fig, bbox_inches="tight")\n+ plt.close("all")\n+ \n+def plotHDwithFSD(list1,maximumX,minimumX, subtitle, lenTags, title_file1,pdf,\n+ '..b'e:\n+ if j[0] == uniqueHD[l]:\n+ count[l, 2] = j[1]\n+\n+ if state == 4:\n+ for i, l in zip(uniqueHD, nr):\n+ for j in table:\n+ if j[0] == uniqueHD[l]:\n+ count[l, 3] = j[1]\n+\n+ if state == 5:\n+ for i, l in zip(uniqueHD, nr):\n+ for j in table:\n+ if j[0] == uniqueHD[l]:\n+ count[l, 4] = j[1]\n+\n+ if state == 6:\n+ for i, l in zip(uniqueHD, nr):\n+ for j in table:\n+ if j[0] == uniqueHD[l]:\n+ count[l, 5] = j[1]\n+ state = state + 1\n+\n+ sumRow = count.sum(axis=1)\n+ sumCol = count.sum(axis=0)\n+ first = ["{}{}".format(row_label,i) for i in uniqueHD]\n+ final = numpy.column_stack((first, count, sumRow))\n+\n+ return (final, sumCol)\n+\n+def createTableHDwithTags(list1):\n+ selfAB = numpy.concatenate(list1)\n+ uniqueHD = numpy.unique(selfAB)\n+ nr = numpy.arange(0, len(uniqueHD), 1)\n+ count = numpy.zeros((len(uniqueHD), 3))\n+\n+ state = 1\n+ for i in list1:\n+ counts = list(Counter(i).items())\n+ hd = [item[0] for item in counts]\n+ c = [item[1] for item in counts]\n+ table = numpy.column_stack((hd, c))\n+ if len(table) == 0:\n+ state = state + 1\n+ continue\n+ else:\n+ if state == 1:\n+ for i, l in zip(uniqueHD, nr):\n+ for j in table:\n+ if j[0] == uniqueHD[l]:\n+ count[l, 0] = j[1]\n+ if state == 2:\n+ for i, l in zip(uniqueHD, nr):\n+ for j in table:\n+ if j[0] == uniqueHD[l]:\n+ count[l, 1] = j[1]\n+\n+ if state == 3:\n+ for i, l in zip(uniqueHD, nr):\n+ for j in table:\n+ if j[0] == uniqueHD[l]:\n+ count[l, 2] = j[1]\n+ state = state + 1\n+\n+ sumRow = count.sum(axis=1)\n+ sumCol = count.sum(axis=0)\n+ first = ["HD={}".format(i) for i in uniqueHD]\n+ final = numpy.column_stack((first, count, sumRow))\n+\n+ return (final, sumCol)\n+\n+def createFileHD(summary, sumCol, overallSum, output_file, name,sep):\n+ output_file.write(name)\n+ output_file.write("\\n")\n+ output_file.write("{}FS=1{}FS=2{}FS=3{}FS=4{}FS=5-10{}FS>10{}sum{}\\n".format(sep,sep,sep,sep,sep,sep,sep,sep))\n+ for item in summary:\n+ for nr in item:\n+ if "HD" not in nr and "diff" not in nr:\n+ nr = nr.astype(float)\n+ nr = nr.astype(int)\n+ output_file.write("{}{}".format(nr,sep))\n+ output_file.write("\\n")\n+ output_file.write("sum{}".format(sep))\n+ sumCol = map(int, sumCol)\n+ for el in sumCol:\n+ output_file.write("{}{}".format(el,sep))\n+ output_file.write("{}{}".format(overallSum.astype(int),sep))\n+ output_file.write("\\n\\n")\n+\n+def createFileHDwithinTag(summary, sumCol, overallSum, output_file, name,sep):\n+ output_file.write(name)\n+ output_file.write("\\n")\n+ output_file.write("{}HD of whole tag;tag1-half1 vs. tag2-half1{}tag1-half2 vs. tag2-half2{}sum{}\\n".format(sep,sep,sep,sep))\n+ for item in summary:\n+ for nr in item:\n+ if "HD" not in nr:\n+ nr = nr.astype(float)\n+ nr = nr.astype(int)\n+ output_file.write("{}{}".format(nr,sep))\n+ output_file.write("\\n")\n+ output_file.write("sum{}".format(sep))\n+ sumCol = map(int, sumCol)\n+ for el in sumCol:\n+ output_file.write("{}{}".format(el,sep))\n+ output_file.write("{}{}".format(overallSum.astype(int),sep))\n+ output_file.write("\\n\\n")\n+\n+\n+ \n def hamming(array1, array2):\n res = 99 * numpy.ones(len(array1))\n i = 0\n' |
b |
diff -r 239c4448a163 -r 7414792e1cb8 hd.xml --- a/hd.xml Thu May 10 07:30:27 2018 -0400 +++ b/hd.xml Sat May 12 04:52:34 2018 -0400 |
b |
@@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="hd" name="Duplex Sequencing Analysis:" version="0.0.1"> +<tool id="hd" name="Duplex Sequencing Analysis:" version="0.0.2"> <requirements> <requirement type="package" version="2.7">python</requirement> <requirement type="package" version="1.4">matplotlib</requirement> |