| Previous changeset 44:a76af7fd9fca (2019-08-14) Next changeset 46:901827154779 (2019-08-27) |
|
Commit message:
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty |
|
modified:
fsd.py fsd.xml test-data/fsd_output1.pdf test-data/fsd_output1.tab test-data/fsd_output2.pdf test-data/fsd_output2.tab |
| b |
| diff -r a76af7fd9fca -r 6651e76baca1 fsd.py --- a/fsd.py Wed Aug 14 13:03:14 2019 -0400 +++ b/fsd.py Tue Aug 27 07:36:53 2019 -0400 |
| [ |
| b'@@ -41,6 +41,7 @@\n parser.add_argument(\'--inputFile4\', default=None, help=\'Tabular File with three columns: ab or ba, tag and family size.\')\n parser.add_argument(\'--inputName4\')\n parser.add_argument(\'--log_axis\', action="store_false", help=\'Transform y axis in log scale.\')\n+ parser.add_argument(\'--rel_freq\', action="store_false", help=\'If False, the relative frequencies are displayed.\')\n parser.add_argument(\'--output_pdf\', default="data.pdf", type=str, help=\'Name of the pdf file.\')\n parser.add_argument(\'--output_tabular\', default="data.tabular", type=str, help=\'Name of the tabular file.\')\n return parser\n@@ -61,6 +62,7 @@\n fourthFile = args.inputFile4\n name4 = args.inputName4\n log_axis = args.log_axis\n+ rel_freq = args.rel_freq\n \n title_file = args.output_tabular\n title_file2 = args.output_pdf\n@@ -78,7 +80,7 @@\n data_array_list = []\n list_to_plot_original = []\n colors = []\n- bins = numpy.arange(1, 22) \n+ bins = numpy.arange(1, 22)\n with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf:\n fig = plt.figure()\n fig.subplots_adjust(left=0.12, right=0.97, bottom=0.23, top=0.94, hspace=0)\n@@ -98,6 +100,8 @@\n # data1[bigFamilies] = 22\n data1 = numpy.clip(integers, bins[0], bins[-1])\n name1 = name1.split(".tabular")[0]\n+ if len(name1) > 40:\n+ name1 = name1[:40]\n list_to_plot.append(data1)\n label.append(name1)\n data_array_list.append(file1)\n@@ -106,21 +110,24 @@\n fig.text(0.05, 0.11, legend, size=10, transform=plt.gcf().transFigure)\n fig2.text(0.05, 0.11, legend, size=10, transform=plt.gcf().transFigure)\n \n- legend1 = "singletons:\\nnr. of tags\\n{:,} ({:.3f})".format(numpy.bincount(data1)[1], float(numpy.bincount(data1)[1]) / len(data1))\n+ legend1 = "singletons:\\nnr. of tags\\n{:,} ({:.3f})".format(numpy.bincount(data1)[1],\n+ float(numpy.bincount(data1)[1]) / len(data1))\n fig.text(0.32, 0.11, legend1, size=10, transform=plt.gcf().transFigure)\n fig2.text(0.32, 0.11, legend1, size=10, transform=plt.gcf().transFigure)\n \n- legend3b = "PE reads\\n{:,} ({:.3f})".format(numpy.bincount(data1)[1], float(numpy.bincount(data1)[1]) / sum(integers))\n+ legend3b = "PE reads\\n{:,} ({:.3f})".format(numpy.bincount(data1)[1],\n+ float(numpy.bincount(data1)[1]) / sum(integers))\n fig.text(0.45, 0.11, legend3b, size=10, transform=plt.gcf().transFigure)\n fig2.text(0.45, 0.11, legend3b, size=10, transform=plt.gcf().transFigure)\n \n legend4 = "family size > 20:\\nnr. of tags\\n{:,} ({:.3f})".format(len(integers[integers > 20]),\n- float(sum(integers[integers > 20]))\n- / sum(integers)) \n+ float(len(integers[integers > 20]))\n+ / len(integers))\n fig.text(0.58, 0.11, legend4, size=10, transform=plt.gcf().transFigure)\n fig2.text(0.58, 0.11, legend4, size=10, transform=plt.gcf().transFigure)\n \n- legend5 = "PE reads\\n{:,} ({:.3f})".format(sum(integers[integers > 20]), float(sum(integers[integers > 20])) / sum(integers))\n+ legend5 = "PE reads\\n{:,} ({:.3f})".format(sum(integers[integers > 20]),\n+ float(sum(integers[integers > 20])) / sum(integers))\n fig.text(0.70, 0.11, legend5, size=10, transform=plt.gcf().transFigure)\n fig2.text(0.70, 0.11, legend5, size=10, transform=plt.gcf().transFigure)\n \n@'..b' float(len(duplTags_FS3)) / (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep,\n+ float(len(duplTags_FS3)) / (len(dataAB_FS3) + len(dataBA_FS3) + duplTags_double_FS3),\n+ float(duplTags_double_FS3) / (len(dataAB_FS3) + len(dataBA_FS3) + duplTags_double_FS3),\n+ sep, float(sum(duplTags_FS3_o)) / (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)),\n+ sep,\n+ float(sum(duplTags_FS3_o)) / (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + duplTags_double_FS3_o),\n+ float(duplTags_double_FS3_o) / (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + duplTags_double_FS3_o)))\n output_file.write("total nr. of tags{}{}{}{}{}{}{}{}{}{}{}{}\\n".format(\n- sep, (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)),\n- sep, (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, (len(dataAB_FS3) + len(dataBA_FS3) + duplTags_double_FS3),\n- sep, (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)), sep, (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + duplTags_double_FS3_o)))\n+ sep, (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep,\n+ (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)),\n+ sep, (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep,\n+ (len(dataAB_FS3) + len(dataBA_FS3) + duplTags_double_FS3),\n+ sep, (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)), sep,\n+ (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + duplTags_double_FS3_o)))\n \n- output_file.write("\\nValues from family size distribution\\n")\n+ counts = [numpy.bincount(d, minlength=22)[1:] for d in list1] # original counts of family sizes\n+ output_file.write("\\nValues from family size distribution based on families\\n")\n output_file.write("{}duplex{}ab{}ba{}sum\\n".format(sep, sep, sep, sep))\n- for dx, ab, ba, fs in zip(counts[0][0], counts[0][1], counts[0][2], counts[1]):\n+\n+ j = 0\n+ for fs in bins:\n if fs == 21:\n fs = ">20"\n else:\n fs = "={}".format(fs)\n- ab1 = ab - dx\n- ba1 = ba - ab\n- output_file.write("FS{}{}{}{}{}{}{}{}{}\\n".format(fs, sep, int(dx), sep, int(ab1), sep, int(ba1), sep, int(ba)))\n+ output_file.write("FS{}{}".format(fs, sep))\n+ for n in range(3):\n+ output_file.write("{}{}".format(int(counts[n][j]), sep))\n+ output_file.write("{}\\n".format(counts[0][j] + counts[1][j] + counts[2][j]))\n+ j += 1\n+ output_file.write("sum{}".format(sep))\n+ for i in counts:\n+ output_file.write("{}{}".format(int(sum(i)), sep))\n+ output_file.write("{}\\n".format(sum(counts[0] + counts[1] + counts[2])))\n+\n+ output_file.write("\\nValues from family size distribution based on PE reads\\n")\n+ output_file.write("{}duplex{}ab{}ba{}sum\\n".format(sep, sep, sep, sep))\n+ j = 0\n+ for fs in bins:\n+ if fs == 21:\n+ fs = ">20"\n+ else:\n+ fs = "={}".format(fs)\n+ output_file.write("FS{}{}".format(fs, sep))\n+ for n in range(3):\n+ output_file.write("{}{}".format(int(reads[n][j]), sep))\n+ output_file.write("{}\\n".format(reads[0][j] + reads[1][j] + reads[2][j]))\n+ j += 1\n+ output_file.write("sum{}".format(sep))\n+ for i in reads:\n+ output_file.write("{}{}".format(int(sum(i)), sep))\n+ output_file.write("{}\\n".format(sum(reads[0] + reads[1] + reads[2])))\n \n print("Files successfully created!")\n \n' |
| b |
| diff -r a76af7fd9fca -r 6651e76baca1 fsd.xml --- a/fsd.xml Wed Aug 14 13:03:14 2019 -0400 +++ b/fsd.xml Tue Aug 27 07:36:53 2019 -0400 |
| b |
| @@ -1,6 +1,6 @@ <?xml version="1.0" encoding="UTF-8"?> <!-- galaxy version 16.04 --> -<tool id="fsd" name="FSD" version="1.0.5"> +<tool id="fsd" name="FSD" version="1.0.6"> <description>: Family Size Distribution of duplex sequencing tags</description> <requirements> <requirement type="package" version="2.7">python</requirement> @@ -10,7 +10,7 @@ <command> python2 '$__tool_directory__/fsd.py' --inputFile1 '${file1}' --inputName1 '${file1.name}' --inputFile2 '${file2}' --inputName2 '${file2.name}' --inputFile3 '${file3}' --inputName3 '${file3.name}' ---inputFile4 '${file4}' --inputName4 '${file4.name}' $log_axis --output_pdf $output_pdf --output_tabular $output_tabular +--inputFile4 '${file4}' --inputName4 '${file4.name}' $log_axis $rel_freq --output_pdf $output_pdf --output_tabular $output_tabular </command> <inputs> <param name="file1" type="data" format="tabular" label="Dataset 1: input tags" optional="false"/> @@ -18,7 +18,7 @@ <param name="file3" type="data" format="tabular" label="Dataset 3: input tags" optional="true" /> <param name="file4" type="data" format="tabular" label="Dataset 4: input tags" optional="true" help="Input in tabular format with the family size, tags and the direction of the strand ('ab' or 'ba') for each family. Name of the files can have max. 34 charcters!"/> <param name="log_axis" type="boolean" label="log scale for y axis?" truevalue="" falsevalue="--log_axis" checked="False" help="Transform y axis in log scale."/> - + <param name="rel_freq" type="boolean" label="relative frequency?" truevalue="" falsevalue="--rel_freq" checked="False" help="If True, the relative frequencies instead of the absolute values are displayed in the plots."/> </inputs> <outputs> <data name="output_pdf" format="pdf" /> |
| b |
| diff -r a76af7fd9fca -r 6651e76baca1 test-data/fsd_output1.pdf |
| b |
| Binary file test-data/fsd_output1.pdf has changed |
| b |
| diff -r a76af7fd9fca -r 6651e76baca1 test-data/fsd_output1.tab --- a/test-data/fsd_output1.tab Wed Aug 14 13:03:14 2019 -0400 +++ b/test-data/fsd_output1.tab Tue Aug 27 07:36:53 2019 -0400 |
| b |
| @@ -1,4 +1,4 @@ -Values from family size distribution with all datasets (tags) +Values from family size distribution with all datasets based on families Family size fsd_data1.tab fsd_data2.tab fsd_data3.tab fsd_data4.tab FS=1 63 63 63 63 @@ -24,7 +24,7 @@ FS>20 1 1 1 1 sum 112 112 112 112 -Values from family size distribution with all datasets (PE reads) +Values from family size distribution with all datasets based on PE reads Family size fsd_data1.tab fsd_data2.tab fsd_data3.tab fsd_data4.tab FS=1 63 63 63 63 @@ -79,7 +79,7 @@ DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) total nr. of tags 41 278 41 42 278 288 -Values from family size distribution +Values from family size distribution based on families duplex ab ba sum FS=1 2 30 31 63 FS=2 0 3 2 5 @@ -102,6 +102,32 @@ FS=19 0 0 0 0 FS=20 0 0 0 0 FS>20 0 0 1 1 +sum 6 47 59 112 + +Values from family size distribution based on PE reads + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 6 4 10 +FS=3 0 9 15 24 +FS=4 8 12 16 36 +FS=5 0 10 5 15 +FS=6 0 6 24 30 +FS=7 0 7 14 21 +FS=8 0 8 16 24 +FS=9 0 0 18 18 +FS=10 10 10 10 30 +FS=11 0 0 11 11 +FS=12 0 12 24 36 +FS=13 13 13 13 39 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 21 21 +sum 33 123 222 378 Dataset: fsd_data2.tab max. family size: 21 @@ -132,7 +158,7 @@ DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) total nr. of tags 41 278 41 42 278 288 -Values from family size distribution +Values from family size distribution based on families duplex ab ba sum FS=1 2 30 31 63 FS=2 0 3 2 5 @@ -155,6 +181,32 @@ FS=19 0 0 0 0 FS=20 0 0 0 0 FS>20 0 0 1 1 +sum 6 47 59 112 + +Values from family size distribution based on PE reads + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 6 4 10 +FS=3 0 9 15 24 +FS=4 8 12 16 36 +FS=5 0 10 5 15 +FS=6 0 6 24 30 +FS=7 0 7 14 21 +FS=8 0 8 16 24 +FS=9 0 0 18 18 +FS=10 10 10 10 30 +FS=11 0 0 11 11 +FS=12 0 12 24 36 +FS=13 13 13 13 39 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 21 21 +sum 33 123 222 378 Dataset: fsd_data3.tab max. family size: 21 @@ -185,7 +237,7 @@ DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) total nr. of tags 41 278 41 42 278 288 -Values from family size distribution +Values from family size distribution based on families duplex ab ba sum FS=1 2 30 31 63 FS=2 0 3 2 5 @@ -208,6 +260,32 @@ FS=19 0 0 0 0 FS=20 0 0 0 0 FS>20 0 0 1 1 +sum 6 47 59 112 + +Values from family size distribution based on PE reads + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 6 4 10 +FS=3 0 9 15 24 +FS=4 8 12 16 36 +FS=5 0 10 5 15 +FS=6 0 6 24 30 +FS=7 0 7 14 21 +FS=8 0 8 16 24 +FS=9 0 0 18 18 +FS=10 10 10 10 30 +FS=11 0 0 11 11 +FS=12 0 12 24 36 +FS=13 13 13 13 39 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 21 21 +sum 33 123 222 378 Dataset: fsd_data4.tab max. family size: 21 @@ -238,7 +316,7 @@ DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) total nr. of tags 41 278 41 42 278 288 -Values from family size distribution +Values from family size distribution based on families duplex ab ba sum FS=1 2 30 31 63 FS=2 0 3 2 5 @@ -261,3 +339,29 @@ FS=19 0 0 0 0 FS=20 0 0 0 0 FS>20 0 0 1 1 +sum 6 47 59 112 + +Values from family size distribution based on PE reads + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 6 4 10 +FS=3 0 9 15 24 +FS=4 8 12 16 36 +FS=5 0 10 5 15 +FS=6 0 6 24 30 +FS=7 0 7 14 21 +FS=8 0 8 16 24 +FS=9 0 0 18 18 +FS=10 10 10 10 30 +FS=11 0 0 11 11 +FS=12 0 12 24 36 +FS=13 13 13 13 39 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 21 21 +sum 33 123 222 378 |
| b |
| diff -r a76af7fd9fca -r 6651e76baca1 test-data/fsd_output2.pdf |
| b |
| Binary file test-data/fsd_output2.pdf has changed |
| b |
| diff -r a76af7fd9fca -r 6651e76baca1 test-data/fsd_output2.tab --- a/test-data/fsd_output2.tab Wed Aug 14 13:03:14 2019 -0400 +++ b/test-data/fsd_output2.tab Tue Aug 27 07:36:53 2019 -0400 |
| b |
| @@ -1,4 +1,4 @@ -Values from family size distribution with all datasets (tags) +Values from family size distribution with all datasets based on families Family size fsd_data1.tab fsd_data2.tab fsd_data3.tab FS=1 63 63 63 @@ -24,7 +24,7 @@ FS>20 1 1 1 sum 112 112 112 -Values from family size distribution with all datasets (PE reads) +Values from family size distribution with all datasets based on PE reads Family size fsd_data1.tab fsd_data2.tab fsd_data3.tab FS=1 63 63 63 @@ -79,7 +79,7 @@ DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) total nr. of tags 41 278 41 42 278 288 -Values from family size distribution +Values from family size distribution based on families duplex ab ba sum FS=1 2 30 31 63 FS=2 0 3 2 5 @@ -102,6 +102,32 @@ FS=19 0 0 0 0 FS=20 0 0 0 0 FS>20 0 0 1 1 +sum 6 47 59 112 + +Values from family size distribution based on PE reads + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 6 4 10 +FS=3 0 9 15 24 +FS=4 8 12 16 36 +FS=5 0 10 5 15 +FS=6 0 6 24 30 +FS=7 0 7 14 21 +FS=8 0 8 16 24 +FS=9 0 0 18 18 +FS=10 10 10 10 30 +FS=11 0 0 11 11 +FS=12 0 12 24 36 +FS=13 13 13 13 39 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 21 21 +sum 33 123 222 378 Dataset: fsd_data2.tab max. family size: 21 @@ -132,7 +158,7 @@ DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) total nr. of tags 41 278 41 42 278 288 -Values from family size distribution +Values from family size distribution based on families duplex ab ba sum FS=1 2 30 31 63 FS=2 0 3 2 5 @@ -155,6 +181,32 @@ FS=19 0 0 0 0 FS=20 0 0 0 0 FS>20 0 0 1 1 +sum 6 47 59 112 + +Values from family size distribution based on PE reads + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 6 4 10 +FS=3 0 9 15 24 +FS=4 8 12 16 36 +FS=5 0 10 5 15 +FS=6 0 6 24 30 +FS=7 0 7 14 21 +FS=8 0 8 16 24 +FS=9 0 0 18 18 +FS=10 10 10 10 30 +FS=11 0 0 11 11 +FS=12 0 12 24 36 +FS=13 13 13 13 39 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 21 21 +sum 33 123 222 378 Dataset: fsd_data3.tab max. family size: 21 @@ -185,7 +237,7 @@ DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) total nr. of tags 41 278 41 42 278 288 -Values from family size distribution +Values from family size distribution based on families duplex ab ba sum FS=1 2 30 31 63 FS=2 0 3 2 5 @@ -208,3 +260,29 @@ FS=19 0 0 0 0 FS=20 0 0 0 0 FS>20 0 0 1 1 +sum 6 47 59 112 + +Values from family size distribution based on PE reads + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 6 4 10 +FS=3 0 9 15 24 +FS=4 8 12 16 36 +FS=5 0 10 5 15 +FS=6 0 6 24 30 +FS=7 0 7 14 21 +FS=8 0 8 16 24 +FS=9 0 0 18 18 +FS=10 10 10 10 30 +FS=11 0 0 11 11 +FS=12 0 12 24 36 +FS=13 13 13 13 39 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 21 21 +sum 33 123 222 378 |