Previous changeset 13:5b0a95f205ad (2018-05-15) Next changeset 15:cf7874bb4934 (2018-05-23) |
Commit message:
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e |
modified:
hd.py hd.xml |
b |
diff -r 5b0a95f205ad -r 883e6381ba29 hd.py --- a/hd.py Tue May 15 14:23:10 2018 -0400 +++ b/hd.py Wed May 23 14:14:10 2018 -0400 |
[ |
b'@@ -14,7 +14,7 @@\n # The tool can run on a certain number of processors, which can be defined by the user.\n \n # USAGE: python HDnew6_1Plot_FINAL.py --inputFile filename --inputName1 filename --inputFile2 filename2 --inputName2 filename2 --sample_size int/0 --sep "characterWhichSeparatesCSVFile" /\n-# --only_DCS True --FamilySize3 True --subset_tag True --nproc int --output_csv outptufile_name_csv --output_pdf outptufile_name_pdf\n+# --only_DCS True --FamilySize3 True --subset_tag True --nproc int --minFS int --maxFS int --nr_above_bars True/False--output_csv outptufile_name_csv --output_pdf outptufile_name_pdf\n \n import numpy\n import itertools\n@@ -92,7 +92,7 @@\n plt.close("all")\n \n def plotHDwithFSD(list1,maximumX,minimumX, subtitle, lenTags, title_file1,pdf,\n- xlabel,relative=False):\n+ xlabel,relative=False, nr_above_bars = True):\n if relative is True:\n step = 0.1\n else:\n@@ -130,15 +130,16 @@\n \n plt.ylim((0, maximumY * 1.2))\n \n- bin_centers = -0.4 * numpy.diff(bins) + bins[:-1]\n- for x_label, label in zip(counts, bin_centers): # labels for values\n- if x_label == 0:\n- continue\n- else:\n- plt.annotate("{:,}\\n{:.3f}".format(x_label, float(x_label) / sum(counts), 1),\n- xy=(label, x_label + len(con_list1) * 0.01),\n- xycoords="data", color="#000066",fontsize=10)\n-\n+ if nr_above_bars is True:\n+ bin_centers = -0.4 * numpy.diff(bins) + bins[:-1]\n+ for x_label, label in zip(counts, bin_centers): # labels for values\n+ if x_label == 0:\n+ continue\n+ else:\n+ plt.annotate("{:,}\\n{:.3f}".format(x_label, float(x_label) / sum(counts), 1),\n+ xy=(label, x_label + len(con_list1) * 0.01),\n+ xycoords="data", color="#000066",fontsize=10)\n+ \n legend = "sample size= {:,} against {:,}".format(sum(counts), lenTags)\n plt.text(0.14, -0.01, legend, size=12, transform=plt.gcf().transFigure)\n \n@@ -146,11 +147,13 @@\n plt.close("all")\n plt.clf()\n \n-def plotHDwithinSeq_Sum2(sum1, sum2,min_value, lenTags, title_file1, pdf):\n+def plotHDwithinSeq_Sum2(sum1, sum2,sum1min, sum2min, min_value, lenTags, title_file1, pdf):\n fig = plt.figure(figsize=(6, 8))\n plt.subplots_adjust(bottom=0.1)\n \n- ham = [sum1, sum2,numpy.array(min_value)] # new hd within tags\n+ #ham = [sum1, sum2,numpy.array(min_value)] # new hd within tags\n+ ham = [sum1, sum2, sum1min, sum2min, numpy.array(min_value)] # new hd within tags\n+ \n \n maximumX = numpy.amax(numpy.concatenate(ham))\n minimumX = numpy.amin(numpy.concatenate(ham))\n@@ -162,12 +165,15 @@\n range1 = range(minimumX, maximumX + 2)\n \n counts = plt.hist(ham, align="left", rwidth=0.8, stacked=False,\n- label=[ "HD a", "HD b","HD a+b"],\n- bins=range1, color=[ "#58ACFA", "#FA5858","#585858"], edgecolor=\'black\', linewidth=1)\n+ # label=[ "HD a", "HD b","HD a+b"],\n+ label=[ "HD a","HD b\'", "HD b", "HD a\'", "HD a+b"],\n+ #bins=range1, color=[ "#58ACFA", "#FA5858","#585858"],\n+ color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"],\n+ edgecolor=\'black\', linewidth=1)\n plt.legend(loc=\'upper right\', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1))\n plt.suptitle(\'Hamming distances within tags\', fontsize=14)\n #plt.title(title_file1, fontsize=12)\n- plt.xlabel("Hamming Distance", fontsize=14)\n+ plt.xlabel("HD", fontsize=14)\n plt.ylabel("Absolute Frequency", fontsize=14)\n plt.grid(b=True, which=\'major\', color=\'#424242\', linestyle=\':\')\n \n@@ -448,6 +454,8 @@\n relativeDiffList = []\n ham1 = []\n ham2 = []\n+ ham1min = []\n+ ham2min = []\n min_valueList = []\n min_tagsList = []\n diff11_zeros = []\n@@ -488,13 +'..b'+ # relative=False, diff=False, title_file1=name_file)\n \n ### print all data to a CSV file\n #### HD ####\n@@ -946,11 +968,11 @@\n \n ## FSD\n # absolute difference\n- summary19, sumCol19 = createTableFSD2(familySizeList1_diff)\n- overallSum19 = sum(sumCol19)\n+ # summary19, sumCol19 = createTableFSD2(familySizeList1_diff)\n+ # overallSum19 = sum(sumCol19)\n # relative difference\n- summary21, sumCol21 = createTableFSD2(familySizeList1_reldiff)\n- overallSum21 = sum(sumCol21)\n+ # summary21, sumCol21 = createTableFSD2(familySizeList1_reldiff)\n+ # overallSum21 = sum(sumCol21)\n \n # chimeric reads\n if len(minHD_tags_zeros) != 0:\n@@ -958,8 +980,8 @@\n summary15, sumCol15 = createTableHD(listDifference1_zeros, "diff=")\n overallSum15 = sum(sumCol15)\n # absolute difference and tags where at least one half has HD=0\n- summary23, sumCol23 = createTableFSD2(familySizeList1_diff_zeros, diff=False)\n- overallSum23 = sum(sumCol23)\n+ # summary23, sumCol23 = createTableFSD2(familySizeList1_diff_zeros, diff=False)\n+ # overallSum23 = sum(sumCol23)\n \n output_file.write("{}\\n".format(name_file))\n output_file.write("number of tags per file{}{:,} (from {:,}) against {:,}\\n\\n".format(sep, len(\n@@ -994,23 +1016,23 @@\n createFileHD(summary11, sumCol11, overallSum11, output_file,\n "Absolute delta Hamming distances within the tag", sep)\n createFileHD(summary13, sumCol13, overallSum13, output_file,\n- "Relative delta Hamming distances within the tag", sep)\n+ "Chimera analysis: relative delta Hamming distances", sep)\n \n- createFileFSD2(summary19, sumCol19, overallSum19, output_file,\n- "Family size distribution separated by absolute delta Hamming distance",\n- sep)\n- createFileFSD2(summary21, sumCol21, overallSum21, output_file,\n- "Family size distribution separated by relative delta Hamming distance",\n- sep, rel=True)\n+ # createFileFSD2(summary19, sumCol19, overallSum19, output_file,\n+ # "Family size distribution separated by absolute delta Hamming distance",\n+ # sep)\n+ # createFileFSD2(summary21, sumCol21, overallSum21, output_file,\n+ # "Family size distribution separated by relative delta Hamming distance",\n+ # sep, rel=True)\n \n if len(minHD_tags_zeros) != 0:\n output_file.write(\n- "Identifiaction of chimeric reads:\\nAll tags were filtered: only those tags where at least one half is identical with the half of the min. tag are kept.\\nSo the hamming distance of the non-identical half is compared.\\n")\n+ "Chimeras:\\nAll tags were filtered: only those tags where at least one half is identical with the half of the min. tag are kept.\\nSo the hamming distance of the non-identical half is compared.\\n")\n createFileHD(summary15, sumCol15, overallSum15, output_file,\n "Hamming distances of non-zero half", sep)\n- createFileFSD2(summary23, sumCol23, overallSum23, output_file,\n- "Family size distribution separated by Hamming distance of non-zero half",\n- sep, diff=False)\n+ # createFileFSD2(summary23, sumCol23, overallSum23, output_file,\n+ # "Family size distribution separated by Hamming distance of non-zero half",\n+ # sep, diff=False)\n output_file.write("\\n")\n \n \n' |
b |
diff -r 5b0a95f205ad -r 883e6381ba29 hd.xml --- a/hd.xml Tue May 15 14:23:10 2018 -0400 +++ b/hd.xml Wed May 23 14:14:10 2018 -0400 |
b |
@@ -1,12 +1,13 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="hd" name="Duplex Sequencing Analysis:" version="0.0.14"> +<tool id="hd" name="Duplex Sequencing Analysis: hd" version="0.0.15"> <requirements> <requirement type="package" version="2.7">python</requirement> <requirement type="package" version="1.4">matplotlib</requirement> </requirements> <description>Hamming distance (HD) analysis of tags</description> <command> - python2 $__tool_directory__/hd.py --inputFile "$inputFile" --inputName1 "$inputFile.name" --inputFile2 "$inputFile2" --inputName2 "$inputFile2.name" --sample_size $sampleSize --sep $separator --subset_tag $subsetTag --nproc $nproc $onlyDCS --minFS $minFS --maxFS $maxFS --output_pdf $output_pdf --output_csv $output_csv + python2 $__tool_directory__/hd.py --inputFile "$inputFile" --inputName1 "$inputFile.name" --inputFile2 "$inputFile2" --inputName2 "$inputFile2.name" --sample_size $sampleSize --sep $separator --subset_tag $subsetTag --nproc $nproc $onlyDCS --minFS $minFS --maxFS $maxFS + $nr_above_bars --output_pdf $output_pdf --output_csv $output_csv #if $inputFile2: --output_pdf2 $output_pdf2 --output_csv2 $output_csv2 #end if @@ -21,6 +22,8 @@ <param name="onlyDCS" type="boolean" label="only DCS in the analysis?" truevalue="" falsevalue="--only_DCS" checked="False" help="Only tags, which have a partner tag in the dataset, are included in the analysis."/> <param name="subsetTag" type="integer" label="shorten tag in the analysis?" value="0" help="An analysis with shorter tag length, which is specified by this parameter, is simulated. If this parameter is 0 (by default), the tag with its original length is used in the analysis."/> <param name="nproc" type="integer" label="number of processors" value="8" help="Number of processor used for computing."/> + <param name="nr_above_bars" type="boolean" label="include numbers above bars?" truevalue="" falsevalue="--nr_above_bars" checked="False" help="The absolute and relative values of the bar can be included or removed in the plot. "/> + </inputs> <outputs> <data name="output_csv" format="csv"/> |