# HG changeset patch # User mheinzl # Date 1543224326 18000 # Node ID eabfdc012d7bdfa482e6f6a69b18289ab9fd0c0a # Parent 6c2608e8d0942caf88fe56265eb3f74a36c23e94 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 31f11c1cb3303d741ee11a25903c3cc42a23f30d diff -r 6c2608e8d094 -r eabfdc012d7b fsd_regions.py --- a/fsd_regions.py Tue Nov 20 09:51:47 2018 -0500 +++ b/fsd_regions.py Mon Nov 26 04:25:26 2018 -0500 @@ -71,7 +71,7 @@ seqDic_ab = dict(zip(all_ab, quant_ab)) seqDic_ba = dict(zip(all_ba, quant_ba)) - if re.search(r'(\d)+_(\d)+$', str(mut_array[0,0])) is None: + if re.search('_(\d)+_(\d)+$', str(mut_array[0,0])) is None: seq_mut, seqMut_index = numpy.unique(numpy.array(mut_array[:, 1]), return_index=True) group = mut_array[seqMut_index,0] mut_array = mut_array[seqMut_index,:] @@ -156,7 +156,7 @@ for i, count in zip(groupUnique, quantAfterRegion): index_of_current_region = numpy.where(group == i)[0] plt.text(0.55, 0.14 - s, "{}=\n".format(i), size=11, transform=plt.gcf().transFigure) - if re.search(r'(\d)+_(\d)+$', str(mut_array[0, 0])) is None: + if re.search('_(\d)+_(\d)+$', str(mut_array[0, 0])) is None: nr_tags_ab = len(numpy.unique(mut_array[index_of_current_region, 1])) else: nr_tags_ab = len(mut_array[index_of_current_region, 1]) diff -r 6c2608e8d094 -r eabfdc012d7b fsd_regions.xml --- a/fsd_regions.xml Tue Nov 20 09:51:47 2018 -0500 +++ b/fsd_regions.xml Mon Nov 26 04:25:26 2018 -0500 @@ -1,16 +1,17 @@ - + Family size distribution (FSD) of user-specified regions in the reference genome python matplotlib - python2 '$__tool_directory__/fsd_regions.py' --inputFile '$file1' --inputName1 '$file1.name' --ref_genome '$file2' --output_pdf $output_pdf --output_tabular $output_tabular + python2 '$__tool_directory__/fsd_regions.py' --inputFile '$file1' --inputName1 '$file1.name' --bamFile '$file2' --rangesFile '$file3' --output_pdf $output_pdf --output_tabular $output_tabular - + + @@ -18,45 +19,39 @@ - - - - + + + + + 20 0 2 +sum 20 4 + + +In the plot, both family sizes of the ab and ba strands were used. +Whereas the total numbers indicate only the single count of the tags per region. +Region total nr. of tags per region +ACH_TDII_5regions_90_633 10 +ACH_TDII_5regions_659_1140 2 diff -r 6c2608e8d094 -r eabfdc012d7b test-data/fsd_reg_ranges.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fsd_reg_ranges.bed Mon Nov 26 04:25:26 2018 -0500 @@ -0,0 +1,2 @@ +ACH_TDII_5regions 90 633 +ACH_TDII_5regions 659 1140 diff -r 6c2608e8d094 -r eabfdc012d7b test-data/output_file.pdf Binary file test-data/output_file.pdf has changed diff -r 6c2608e8d094 -r eabfdc012d7b test-data/output_file.tabular --- a/test-data/output_file.tabular Tue Nov 20 09:51:47 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -Dataset: Test_data - AB BA -max. family size: 85 332 -absolute frequency: 9 1 -relative frequency: 0.209 0.062 - -total nr. of reads 1312 -total nr. of tags 32 (16) - - -Values from family size distribution - ACH_87_636 ACH_656_1143 ACH_1141_1564 ACH_1892_2398 -FS=3 0 0 0 1 -FS=4 2 0 0 0 -FS=5 2 0 0 0 -FS=6 0 1 0 0 -FS=7 3 0 0 2 -FS=8 0 0 0 1 -FS=9 1 0 0 2 -FS=10 2 0 0 0 -FS=11 0 1 1 0 -FS=12 0 0 0 0 -FS=13 0 0 0 0 -FS=14 0 0 0 0 -FS=15 0 0 1 0 -FS=16 0 0 0 0 -FS=17 0 0 0 0 -FS=18 0 0 0 0 -FS=19 0 0 0 0 -FS=20 0 0 0 0 -FS>20 0 8 4 0 -sum 10 10 6 6 - - -In the plot, both family sizes of the ab and ba strands were used. -Whereas the total numbers indicate only the count of the tags per region. - - -Region total nr. of tags per region -ACH_87_636 5 -ACH_656_1143 5 -ACH_1141_1564 3 -ACH_1892_2398 3