Repository 'hd'
hg clone https://toolshed.g2.bx.psu.edu/repos/mheinzl/hd

Changeset 14:883e6381ba29 (2018-05-23)
Previous changeset 13:5b0a95f205ad (2018-05-15) Next changeset 15:cf7874bb4934 (2018-05-23)
Commit message:
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
modified:
hd.py
hd.xml
b
diff -r 5b0a95f205ad -r 883e6381ba29 hd.py
--- a/hd.py Tue May 15 14:23:10 2018 -0400
+++ b/hd.py Wed May 23 14:14:10 2018 -0400
[
b'@@ -14,7 +14,7 @@\n # The tool can run on a certain number of processors, which can be defined by the user.\n \n # USAGE: python HDnew6_1Plot_FINAL.py --inputFile filename --inputName1 filename --inputFile2 filename2 --inputName2 filename2 --sample_size int/0 --sep "characterWhichSeparatesCSVFile" /\n-#        --only_DCS True --FamilySize3 True --subset_tag True --nproc int --output_csv outptufile_name_csv --output_pdf outptufile_name_pdf\n+#        --only_DCS True --FamilySize3 True --subset_tag True --nproc int --minFS int --maxFS int --nr_above_bars True/False--output_csv outptufile_name_csv --output_pdf outptufile_name_pdf\n \n import numpy\n import itertools\n@@ -92,7 +92,7 @@\n     plt.close("all")\n     \n def plotHDwithFSD(list1,maximumX,minimumX, subtitle, lenTags, title_file1,pdf,\n-                   xlabel,relative=False):\n+                   xlabel,relative=False, nr_above_bars = True):\n     if relative is True:\n         step = 0.1\n     else:\n@@ -130,15 +130,16 @@\n \n     plt.ylim((0, maximumY * 1.2))\n \n-    bin_centers = -0.4 * numpy.diff(bins) + bins[:-1]\n-    for x_label, label in zip(counts, bin_centers):  # labels for values\n-        if x_label == 0:\n-            continue\n-        else:\n-            plt.annotate("{:,}\\n{:.3f}".format(x_label, float(x_label) / sum(counts), 1),\n-                         xy=(label, x_label + len(con_list1) * 0.01),\n-                         xycoords="data", color="#000066",fontsize=10)\n-\n+    if nr_above_bars is True:\n+        bin_centers = -0.4 * numpy.diff(bins) + bins[:-1]\n+        for x_label, label in zip(counts, bin_centers):  # labels for values\n+            if x_label == 0:\n+                continue\n+            else:\n+                plt.annotate("{:,}\\n{:.3f}".format(x_label, float(x_label) / sum(counts), 1),\n+                             xy=(label, x_label + len(con_list1) * 0.01),\n+                             xycoords="data", color="#000066",fontsize=10)\n+        \n     legend = "sample size= {:,} against {:,}".format(sum(counts), lenTags)\n     plt.text(0.14, -0.01, legend, size=12, transform=plt.gcf().transFigure)\n \n@@ -146,11 +147,13 @@\n     plt.close("all")\n     plt.clf()\n \n-def plotHDwithinSeq_Sum2(sum1, sum2,min_value, lenTags, title_file1, pdf):\n+def plotHDwithinSeq_Sum2(sum1, sum2,sum1min, sum2min, min_value, lenTags, title_file1, pdf):\n     fig = plt.figure(figsize=(6, 8))\n     plt.subplots_adjust(bottom=0.1)\n \n-    ham = [sum1, sum2,numpy.array(min_value)]  # new hd within tags\n+    #ham = [sum1, sum2,numpy.array(min_value)]  # new hd within tags\n+    ham = [sum1, sum2, sum1min, sum2min, numpy.array(min_value)]  # new hd within tags\n+    \n \n     maximumX = numpy.amax(numpy.concatenate(ham))\n     minimumX = numpy.amin(numpy.concatenate(ham))\n@@ -162,12 +165,15 @@\n         range1 = range(minimumX, maximumX + 2)\n \n     counts = plt.hist(ham, align="left", rwidth=0.8, stacked=False,\n-                      label=[ "HD a", "HD b","HD a+b"],\n-                      bins=range1, color=[ "#58ACFA", "#FA5858","#585858"], edgecolor=\'black\', linewidth=1)\n+                     # label=[ "HD a", "HD b","HD a+b"],\n+                     label=[ "HD a","HD b\'", "HD b", "HD a\'", "HD a+b"],\n+                      #bins=range1, color=[ "#58ACFA", "#FA5858","#585858"],\n+                      color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"],\n+                       edgecolor=\'black\', linewidth=1)\n     plt.legend(loc=\'upper right\', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1))\n     plt.suptitle(\'Hamming distances within tags\', fontsize=14)\n     #plt.title(title_file1, fontsize=12)\n-    plt.xlabel("Hamming Distance", fontsize=14)\n+    plt.xlabel("HD", fontsize=14)\n     plt.ylabel("Absolute Frequency", fontsize=14)\n     plt.grid(b=True, which=\'major\', color=\'#424242\', linestyle=\':\')\n \n@@ -448,6 +454,8 @@\n     relativeDiffList = []\n     ham1 = []\n     ham2 = []\n+    ham1min = []\n+    ham2min = []\n     min_valueList = []\n     min_tagsList = []\n     diff11_zeros = []\n@@ -488,13 +'..b'+              #                 relative=False, diff=False, title_file1=name_file)\n \n             ### print all data to a CSV file\n             #### HD ####\n@@ -946,11 +968,11 @@\n \n             ## FSD\n             # absolute difference\n-            summary19, sumCol19 = createTableFSD2(familySizeList1_diff)\n-            overallSum19 = sum(sumCol19)\n+        #    summary19, sumCol19 = createTableFSD2(familySizeList1_diff)\n+        #    overallSum19 = sum(sumCol19)\n             # relative difference\n-            summary21, sumCol21 = createTableFSD2(familySizeList1_reldiff)\n-            overallSum21 = sum(sumCol21)\n+         #   summary21, sumCol21 = createTableFSD2(familySizeList1_reldiff)\n+          #  overallSum21 = sum(sumCol21)\n \n             # chimeric reads\n             if len(minHD_tags_zeros) != 0:\n@@ -958,8 +980,8 @@\n                 summary15, sumCol15 = createTableHD(listDifference1_zeros, "diff=")\n                 overallSum15 = sum(sumCol15)\n                 # absolute difference and tags where at least one half has HD=0\n-                summary23, sumCol23 = createTableFSD2(familySizeList1_diff_zeros, diff=False)\n-                overallSum23 = sum(sumCol23)\n+           #     summary23, sumCol23 = createTableFSD2(familySizeList1_diff_zeros, diff=False)\n+            #    overallSum23 = sum(sumCol23)\n \n             output_file.write("{}\\n".format(name_file))\n             output_file.write("number of tags per file{}{:,} (from {:,}) against {:,}\\n\\n".format(sep, len(\n@@ -994,23 +1016,23 @@\n             createFileHD(summary11, sumCol11, overallSum11, output_file,\n                          "Absolute delta Hamming distances within the tag", sep)\n             createFileHD(summary13, sumCol13, overallSum13, output_file,\n-                         "Relative delta Hamming distances within the tag", sep)\n+                         "Chimera analysis: relative delta Hamming distances", sep)\n \n-            createFileFSD2(summary19, sumCol19, overallSum19, output_file,\n-                           "Family size distribution separated by absolute delta Hamming distance",\n-                           sep)\n-            createFileFSD2(summary21, sumCol21, overallSum21, output_file,\n-                           "Family size distribution separated by relative delta Hamming distance",\n-                           sep, rel=True)\n+        #    createFileFSD2(summary19, sumCol19, overallSum19, output_file,\n+         #                  "Family size distribution separated by absolute delta Hamming distance",\n+          #                 sep)\n+          #  createFileFSD2(summary21, sumCol21, overallSum21, output_file,\n+           #                "Family size distribution separated by relative delta Hamming distance",\n+            #               sep, rel=True)\n \n             if len(minHD_tags_zeros) != 0:\n                 output_file.write(\n-                    "Identifiaction of chimeric reads:\\nAll tags were filtered: only those tags where at least one half is identical with the half of the min. tag are kept.\\nSo the hamming distance of the non-identical half is compared.\\n")\n+                    "Chimeras:\\nAll tags were filtered: only those tags where at least one half is identical with the half of the min. tag are kept.\\nSo the hamming distance of the non-identical half is compared.\\n")\n                 createFileHD(summary15, sumCol15, overallSum15, output_file,\n                              "Hamming distances of non-zero half", sep)\n-                createFileFSD2(summary23, sumCol23, overallSum23, output_file,\n-                               "Family size distribution separated by Hamming distance of non-zero half",\n-                               sep, diff=False)\n+         #       createFileFSD2(summary23, sumCol23, overallSum23, output_file,\n+          #                     "Family size distribution separated by Hamming distance of non-zero half",\n+           #                    sep, diff=False)\n             output_file.write("\\n")\n \n \n'
b
diff -r 5b0a95f205ad -r 883e6381ba29 hd.xml
--- a/hd.xml Tue May 15 14:23:10 2018 -0400
+++ b/hd.xml Wed May 23 14:14:10 2018 -0400
b
@@ -1,12 +1,13 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="hd" name="Duplex Sequencing Analysis:" version="0.0.14">
+<tool id="hd" name="Duplex Sequencing Analysis: hd" version="0.0.15">
     <requirements>
         <requirement type="package" version="2.7">python</requirement>
         <requirement type="package" version="1.4">matplotlib</requirement>
     </requirements>
     <description>Hamming distance (HD) analysis of tags</description>
     <command>
-        python2 $__tool_directory__/hd.py --inputFile "$inputFile" --inputName1 "$inputFile.name" --inputFile2 "$inputFile2" --inputName2 "$inputFile2.name" --sample_size $sampleSize --sep $separator --subset_tag $subsetTag --nproc $nproc $onlyDCS --minFS $minFS --maxFS $maxFS --output_pdf $output_pdf --output_csv $output_csv 
+        python2 $__tool_directory__/hd.py --inputFile "$inputFile" --inputName1 "$inputFile.name" --inputFile2 "$inputFile2" --inputName2 "$inputFile2.name" --sample_size $sampleSize --sep $separator --subset_tag $subsetTag --nproc $nproc $onlyDCS --minFS $minFS --maxFS $maxFS
+ $nr_above_bars --output_pdf $output_pdf --output_csv $output_csv 
         #if $inputFile2:
         --output_pdf2 $output_pdf2 --output_csv2 $output_csv2
         #end if
@@ -21,6 +22,8 @@
         <param name="onlyDCS" type="boolean" label="only DCS in the analysis?" truevalue="" falsevalue="--only_DCS" checked="False" help="Only tags, which have a partner tag in the dataset, are included in the analysis."/>
         <param name="subsetTag" type="integer" label="shorten tag in the analysis?" value="0" help="An analysis with shorter tag length, which is specified by this parameter, is simulated. If this parameter is 0 (by default), the tag with its original length is used in the analysis."/>
         <param name="nproc" type="integer" label="number of processors" value="8" help="Number of processor used for computing."/>
+        <param name="nr_above_bars" type="boolean" label="include numbers above bars?" truevalue="" falsevalue="--nr_above_bars" checked="False" help="The absolute and relative values of the bar can be included or removed in the plot. "/>

     </inputs>
     <outputs>
         <data name="output_csv" format="csv"/>