Repository 'hd'
hg clone https://toolshed.g2.bx.psu.edu/repos/mheinzl/hd

Changeset 2:316fbf91dd12 (2018-05-15)
Previous changeset 1:7414792e1cb8 (2018-05-12) Next changeset 3:82eaf30dd089 (2018-05-15)
Commit message:
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
modified:
hd.py
hd.xml
b
diff -r 7414792e1cb8 -r 316fbf91dd12 hd.py
--- a/hd.py Sat May 12 04:52:34 2018 -0400
+++ b/hd.py Tue May 15 10:36:34 2018 -0400
[
b'@@ -63,10 +63,10 @@\n                       edgecolor="None",bins=range1)\n     plt.legend(loc=\'upper right\', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1))\n \n-    plt.title(title_file1, fontsize=12)\n+    #plt.title(title_file1, fontsize=12)\n     plt.suptitle(subtitle, y=1, x=0.5, fontsize=14)\n-    plt.xlabel("No. of Family Members", fontsize=12)\n-    plt.ylabel("Absolute Frequency", fontsize=12)\n+    plt.xlabel("Family size", fontsize=14)\n+    plt.ylabel("Absolute Frequency", fontsize=14)\n \n     ticks = numpy.arange(0, maximumXFS + 1, 1)\n     ticks1 = map(str, ticks)\n@@ -125,9 +125,9 @@\n     bins = counts[1]  # width of bins\n     counts = numpy.array(map(int, counts[0][5]))\n     plt.suptitle(subtitle, y=1, x=0.5, fontsize=14)\n-    plt.title(title_file1, fontsize=12)\n-    plt.xlabel(xlabel, fontsize=12)\n-    plt.ylabel("Absolute Frequency", fontsize=12)\n+   # plt.title(title_file1, fontsize=12)\n+    plt.xlabel(xlabel, fontsize=14)\n+    plt.ylabel("Absolute Frequency", fontsize=14)\n \n     plt.grid(b=True, which=\'major\', color=\'#424242\', linestyle=\':\')\n     plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1))\n@@ -155,7 +155,7 @@\n     fig = plt.figure(figsize=(6, 8))\n     plt.subplots_adjust(bottom=0.1)\n \n-    ham = [numpy.array(min_value), sum1, sum2]  # new hd within tags\n+    ham = [sum1, sum2,numpy.array(min_value)]  # new hd within tags\n \n     maximumX = numpy.amax(numpy.concatenate(ham))\n     minimumX = numpy.amin(numpy.concatenate(ham))\n@@ -167,18 +167,18 @@\n         range1 = range(minimumX, maximumX + 2)\n \n     counts = plt.hist(ham, align="left", rwidth=0.8, stacked=False,\n-                      label=["HD of whole tag", "tag1 - a\\nvs. tag2 - a", "tag1 - b\\nvs. tag2 - b"],\n+                      label=[ "HD a", "HD b","HD a+b"],\n                       bins=range1, color=["#585858", "#58ACFA", "#FA5858"], edgecolor=\'black\', linewidth=1)\n     plt.legend(loc=\'upper right\', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1))\n     plt.suptitle(\'Hamming distances within tags\', fontsize=14)\n-    plt.title(title_file1, fontsize=12)\n-    plt.xlabel("Hamming Distance", fontsize=12)\n-    plt.ylabel("Absolute Frequency", fontsize=12)\n+    #plt.title(title_file1, fontsize=12)\n+    plt.xlabel("Hamming Distance", fontsize=14)\n+    plt.ylabel("Absolute Frequency", fontsize=14)\n     plt.grid(b=True, which=\'major\', color=\'#424242\', linestyle=\':\')\n \n \n     plt.axis((minimumX - 1, maximumX + 1, 0, maximumY * 1.1))\n-    plt.xticks(numpy.arange(minimumX - 1, maximumX + 1, 1.0))\n+    plt.xticks(numpy.arange(0, maximumX + 1, 1.0))\n     plt.ylim((0, maximumY * 1.1))\n \n     legend = "sample size= {:,} against {:,}".format(len(ham[0]), lenTags, lenTags)\n@@ -405,7 +405,7 @@\n def createFileHDwithinTag(summary, sumCol, overallSum, output_file, name,sep):\n     output_file.write(name)\n     output_file.write("\\n")\n-    output_file.write("{}HD of whole tag;tag1-half1 vs. tag2-half1{}tag1-half2 vs. tag2-half2{}sum{}\\n".format(sep,sep,sep,sep))\n+    output_file.write("{}HD a+b;HD a{}HD b{}sum{}\\n".format(sep,sep,sep,sep))\n     for item in summary:\n         for nr in item:\n             if "HD" not in nr:\n@@ -419,8 +419,6 @@\n         output_file.write("{}{}".format(el,sep))\n     output_file.write("{}{}".format(overallSum.astype(int),sep))\n     output_file.write("\\n\\n")\n-\n-\n     \n def hamming(array1, array2):\n     res = 99 * numpy.ones(len(array1))\n@@ -441,14 +439,24 @@\n     array2_half = numpy.array([i[0:(len(i)) / 2] for i in array2]) # mate2 part1\n     array2_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array2])  # mate2 part2\n \n-    diff11 = []\n-    relativeDiffList = []\n-    ham1 = []\n-    ham2 = []\n-    min_valueList = []\n-    min_tagsList = []\n-    diff11_zeros = []\n-    min_tagsList_zeros = []\n+    diff11 = 999 * numpy.ones(len(array2))\n+    relativeDiffList = 999 * numpy.ones(len(array2))\n+    ham1 = 999 * numpy.ones(len(array2))\n+    ham2 = 999 * numpy.ones(len(array2))\n+    min_valueList = 999 * numpy.'..b'###\n             createFileHD(summary, sumCol, overallSum, output_file,\n-                         "Hamming distance with separation after family size: file1", sep)\n+                         "Hamming distance separated by family size", sep)\n             ### FSD ###\n             createFileFSD2(summary5, sumCol5, overallSum5, output_file,\n-                           "Family size distribution with separation after hamming distances: file1", sep,\n+                           "Family size distribution separated by Hamming distance", sep,\n                            diff=False)\n \n             count = numpy.bincount(quant)\n@@ -978,31 +993,31 @@\n                 "The hamming distances were calculated by comparing each half of all tags against the tag(s) with the minimum Hamming distance per half.\\n"\n                 "It is possible that one tag can have the minimum HD from multiple tags, so the sample size in this calculation differs from the sample size entered by the user.\\n")\n             output_file.write(\n-                "file 1: actual number of tags with min HD = {:,} (sample size by user = {:,})\\n".format(\n+                "actual number of tags with min HD = {:,} (sample size by user = {:,})\\n".format(\n                     len(numpy.concatenate(listDifference1)), len(numpy.concatenate(list1))))\n             output_file.write("length of one part of the tag = {}\\n\\n".format(len(data_array[0, 1]) / 2))\n \n             createFileHDwithinTag(summary9, sumCol9, overallSum9, output_file,\n-                                  "Hamming distance of each half in the tag: file1", sep)\n+                                  "Hamming distance of each half in the tag", sep)\n             createFileHD(summary11, sumCol11, overallSum11, output_file,\n-                         "Absolute delta Hamming distances within the tag: file1", sep)\n+                         "Absolute delta Hamming distances within the tag", sep)\n             createFileHD(summary13, sumCol13, overallSum13, output_file,\n-                         "Relative delta Hamming distances within the tag: file1", sep)\n+                         "Relative delta Hamming distances within the tag", sep)\n \n             createFileFSD2(summary19, sumCol19, overallSum19, output_file,\n-                           "Family size distribution with separation after absolute delta Hamming distances: file1",\n+                           "Family size distribution separated by absolute delta Hamming distance",\n                            sep)\n             createFileFSD2(summary21, sumCol21, overallSum21, output_file,\n-                           "Family size distribution with separation after relative delta Hamming distances: file1",\n+                           "Family size distribution separated by relative delta Hamming distance",\n                            sep, rel=True)\n \n             if len(minHD_tags_zeros) != 0:\n                 output_file.write(\n-                    "All tags were filtered: only those tags where at least one half is identical with the half of the min. tag are kept.\\nSo the hamming distance of the non-identical half is compared.\\n")\n+                    "Identifiaction of chimeric reads:\\nAll tags were filtered: only those tags where at least one half is identical with the half of the min. tag are kept.\\nSo the hamming distance of the non-identical half is compared.\\n")\n                 createFileHD(summary15, sumCol15, overallSum15, output_file,\n-                             "Hamming distances of non-zero half: file1", sep)\n+                             "Hamming distances of non-zero half", sep)\n                 createFileFSD2(summary23, sumCol23, overallSum23, output_file,\n-                               "Family size distribution with separation after Hamming distances of non-zero half: file1",\n+                               "Family size distribution separated by Hamming distance of non-zero half",\n                                sep, diff=False)\n             output_file.write("\\n")\n \n'
b
diff -r 7414792e1cb8 -r 316fbf91dd12 hd.xml
--- a/hd.xml Sat May 12 04:52:34 2018 -0400
+++ b/hd.xml Tue May 15 10:36:34 2018 -0400
b
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="hd" name="Duplex Sequencing Analysis:" version="0.0.2">
+<tool id="hd" name="Duplex Sequencing Analysis:" version="0.0.3">
     <requirements>
         <requirement type="package" version="2.7">python</requirement>
         <requirement type="package" version="1.4">matplotlib</requirement>