changeset 17:b28dcd4051e8 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
author artbio
date Thu, 15 Nov 2018 12:29:57 -0500
parents 600e2498bd21
children 2c95c899d0a4
files small_rna_maps.py small_rna_maps.xml test-data/clustering_unstranded.pdf test-data/clustering_unstranded.tab
diffstat 4 files changed, 173 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/small_rna_maps.py	Tue Nov 13 17:03:46 2018 -0500
+++ b/small_rna_maps.py	Thu Nov 15 12:29:57 2018 -0500
@@ -24,26 +24,33 @@
     the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store',
                             help='list of 2 plot methods (only two) among:\
                             Counts, Max, Mean, Median, Coverage and Size')
+    the_parser.add_argument('--nostrand', action='store_true',
+                            help='Consider reads regardless their polarity')
+
     args = the_parser.parse_args()
     return args
 
 
 class Map:
 
-    def __init__(self, bam_file, sample, minsize, maxsize, cluster):
+    def __init__(self, bam_file, sample, minsize, maxsize, cluster, nostrand):
         self.sample_name = sample
         self.minsize = minsize
         self.maxsize = maxsize
         self.cluster = cluster
+        if not nostrand:
+            self.nostrand = False
+        else:
+            self.nostrand = True
         self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
         self.chromosomes = dict(zip(self.bam_object.references,
                                 self.bam_object.lengths))
         self.map_dict = self.create_map(self.bam_object, self.minsize,
-                                        self.maxsize)
+                                        self.maxsize, self.nostrand)
         if self.cluster:
             self.map_dict = self.tile_map(self.map_dict, self.cluster)
 
-    def create_map(self, bam_object, minsize, maxsize):
+    def create_map(self, bam_object, minsize, maxsize, nostrand=False):
         '''
         Returns a map_dictionary {(chromosome,read_position,polarity):
                                                     [read_length, ...]}
@@ -53,14 +60,24 @@
             # get empty value for start and end of each chromosome
             map_dictionary[(chrom, 1, 'F')] = []
             map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
-            for read in bam_object.fetch(chrom):
-                positions = read.positions  # a list of covered positions
-                if read.is_reverse:
-                    map_dictionary[(chrom, positions[-1]+1, 'R')].append(
-                                    read.query_alignment_length)
-                else:
-                    map_dictionary[(chrom, positions[0]+1, 'F')].append(
-                                    read.query_alignment_length)
+            if not nostrand:
+                for read in bam_object.fetch(chrom):
+                    positions = read.positions  # a list of covered positions
+                    if read.is_reverse:
+                        map_dictionary[(chrom, positions[-1]+1, 'R')].append(
+                                        read.query_alignment_length)
+                    else:
+                        map_dictionary[(chrom, positions[0]+1, 'F')].append(
+                                        read.query_alignment_length)
+            else:
+                for read in bam_object.fetch(chrom):
+                    positions = read.positions  # a list of covered positions
+                    if read.is_reverse:
+                        map_dictionary[(chrom, positions[-1]+1, 'F')].append(
+                                        read.query_alignment_length)
+                    else:
+                        map_dictionary[(chrom, positions[0]+1, 'F')].append(
+                                        read.query_alignment_length)
         return map_dictionary
 
     def grouper(self, iterable, clust_distance):
@@ -271,7 +288,8 @@
             out.write('\t'.join(line) + '\n')
 
 
-def main(inputs, samples, methods, outputs, minsize, maxsize, cluster):
+def main(inputs, samples, methods, outputs, minsize, maxsize, cluster,
+         nostrand):
     for method, output in zip(methods, outputs):
         out = open(output, 'w')
         if method == 'Size':
@@ -285,7 +303,7 @@
                       "Polarity", method]
         out.write('\t'.join(header) + '\n')
         for input, sample in zip(inputs, samples):
-            mapobj = Map(input, sample, minsize, maxsize, cluster)
+            mapobj = Map(input, sample, minsize, maxsize, cluster, nostrand)
             token = {"Counts": mapobj.compute_readcount,
                      "Max": mapobj.compute_max,
                      "Mean": mapobj.compute_mean,
@@ -308,4 +326,4 @@
         args.sample_names = [name + '_' + str(i) for
                              i, name in enumerate(args.sample_names)]
     main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
-         args.minsize, args.maxsize, args.cluster)
+         args.minsize, args.maxsize, args.cluster, args.nostrand)
--- a/small_rna_maps.xml	Tue Nov 13 17:03:46 2018 -0500
+++ b/small_rna_maps.xml	Thu Nov 15 12:29:57 2018 -0500
@@ -1,4 +1,4 @@
-<tool id="small_rna_maps" name="small_rna_maps" version="2.7.0">
+<tool id="small_rna_maps" name="small_rna_maps" version="2.8.0">
   <description></description>
   <requirements>
         <requirement type="package" version="1.11.2=py27_0">numpy</requirement>
@@ -37,7 +37,8 @@
               #elif str($plots_options.plots_options_selector ) == "cluster":
                   --plot_methods 'Counts'
                   --outputs '$output_tab'
-                  --cluster ${plots_options.cluster} &&
+                  --cluster ${plots_options.cluster}
+                  ${plots_options.strandness} &&
               #else:
                   --plot_methods '${plots_options.first_plot}'
                   --outputs '$output_tab' &&
@@ -125,6 +126,8 @@
             <param name="first_plot" type="hidden" value="Counts"/>
             <param name="cluster" type="integer" label="Aggregation distance in nucleotides" value="1"
                    help="Sets the distance (in nt) below which reads are clustered to a single median position" />
+            <param name="strandness" argument="--nostrand" type="boolean" truevalue="--nostrand" falsevalue="" checked="false"
+                   label="Ignore polarity of reads ?" help="Set if you wish to cluster reads regardless of whether they are forward or reverse"/>
         </when>
     </conditional>
     <conditional name="ylimits_cond">
@@ -148,7 +151,7 @@
 </outputs>
 
     <tests>
-        <test> <!-- 1 -->
+        <test> <!-- 0 -->
             <repeat name="series">
                 <param name="inputs" value="input1.bam" ftype="bam" />
                 <param name="normalization" value="1.0" />
@@ -164,7 +167,7 @@
             <output file="input1_input2new_norm_1_2_counts.tab" name="output_tab" />
             <output file="input1_input2new_norm_1_2_single_plot_counts.pdf" name="output_pdf" />
         </test>
-        <test> <!-- 2 -->
+        <test> <!-- 1 -->
             <repeat name="series">
                 <param name="inputs" value="input1.bam" ftype="bam" />
                 <param name="normalization" value="1.0" />
@@ -179,6 +182,20 @@
             <output file="input1_counts_yminneg5_5.tab" name="output_tab" />
             <output file="input1_yminneg5_5_single_plot_counts.pdf" name="output_pdf" />
         </test>
+        <test> <!-- 2 -->
+            <repeat name="series">
+                <param name="inputs" value="input1.bam" ftype="bam" />
+                <param name="normalization" value="1.0" />
+            </repeat>
+            <param name="minsize" value="0" />
+            <param name="maxsize" value="10000" />
+            <param name="plots_options_selector" value="cluster" />
+            <param name="first_plot" value="Counts" />
+            <param name="cluster" value="5" />
+            <param name="strandness" value="false" />
+            <output file="clustering.tab" name="output_tab" />
+            <output file="clustering.pdf" name="output_pdf" />
+        </test>
         <test> <!-- 3 -->
             <repeat name="series">
                 <param name="inputs" value="input1.bam" ftype="bam" />
@@ -189,8 +206,9 @@
             <param name="plots_options_selector" value="cluster" />
             <param name="first_plot" value="Counts" />
             <param name="cluster" value="5" />
-            <output file="clustering.tab" name="output_tab" />
-            <output file="clustering.pdf" name="output_pdf" />
+            <param name="strandness" value="true" />
+            <output file="clustering_unstranded.tab" name="output_tab" />
+            <output file="clustering_unstranded.pdf" name="output_pdf" />
         </test>
         <test> <!-- 4 -->
             <repeat name="series">
Binary file test-data/clustering_unstranded.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/clustering_unstranded.tab	Thu Nov 15 12:29:57 2018 -0500
@@ -0,0 +1,117 @@
+Dataset	Chromosome	Chrom_length	Coordinate	Polarity	Counts	Start-End	Cluster Size	density
+input1.bam	FBtr0070001	72	1	F	1	1-1	1	1.0
+input1.bam	FBtr0070001	72	12	F	14	7-18	12	1.16666666667
+input1.bam	FBtr0070001	72	30	F	42	27-34	8	5.25
+input1.bam	FBtr0070001	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0070533	72	1	F	1	1-1	1	1.0
+input1.bam	FBtr0070533	72	25	F	23	12-38	27	0.851851851852
+input1.bam	FBtr0070533	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0070603	72	21	F	68	1-42	42	1.61904761905
+input1.bam	FBtr0070603	72	53	F	2	51-56	6	0.333333333333
+input1.bam	FBtr0070603	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0070604	72	1	F	1	1-1	1	1.0
+input1.bam	FBtr0070604	72	20	F	2	18-22	5	0.4
+input1.bam	FBtr0070604	72	31	F	36	30-32	3	12.0
+input1.bam	FBtr0070604	72	57	F	1	57-57	1	1.0
+input1.bam	FBtr0070604	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0070911	73	1	F	0	1-1	1	0.0
+input1.bam	FBtr0070911	73	15	F	1	15-15	1	1.0
+input1.bam	FBtr0070911	73	38	F	1	38-38	1	1.0
+input1.bam	FBtr0070911	73	73	F	0	73-73	1	0.0
+input1.bam	FBtr0078490	72	1	F	0	1-1	1	0.0
+input1.bam	FBtr0078490	72	15	F	4	13-18	6	0.666666666667
+input1.bam	FBtr0078490	72	26	F	3	25-28	4	0.75
+input1.bam	FBtr0078490	72	38	F	1	38-38	1	1.0
+input1.bam	FBtr0078490	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0078580	72	16	F	1102	1-31	31	35.5483870968
+input1.bam	FBtr0078580	72	52	F	2	52-52	1	2.0
+input1.bam	FBtr0078580	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0078790	73	1	F	1	1-1	1	1.0
+input1.bam	FBtr0078790	73	17	F	2	17-17	1	2.0
+input1.bam	FBtr0078790	73	33	F	1	33-33	1	1.0
+input1.bam	FBtr0078790	73	47	F	9	42-52	11	0.818181818182
+input1.bam	FBtr0078790	73	71	F	1	69-73	5	0.2
+input1.bam	FBtr0079064	72	2	F	2	1-3	3	0.666666666667
+input1.bam	FBtr0079064	72	33	F	1	33-33	1	1.0
+input1.bam	FBtr0079064	72	52	F	1	52-52	1	1.0
+input1.bam	FBtr0079064	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079090	72	1	F	2	1-1	1	2.0
+input1.bam	FBtr0079090	72	26	F	1	26-26	1	1.0
+input1.bam	FBtr0079090	72	33	F	1	33-33	1	1.0
+input1.bam	FBtr0079090	72	54	F	2	53-56	4	0.5
+input1.bam	FBtr0079090	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079338	73	1	F	0	1-1	1	0.0
+input1.bam	FBtr0079338	73	14	F	5	12-17	6	0.833333333333
+input1.bam	FBtr0079338	73	25	F	1	25-25	1	1.0
+input1.bam	FBtr0079338	73	44	F	10	42-46	5	2.0
+input1.bam	FBtr0079338	73	73	F	0	73-73	1	0.0
+input1.bam	FBtr0079528	71	9	F	97	1-18	18	5.38888888889
+input1.bam	FBtr0079528	71	28	F	1	28-28	1	1.0
+input1.bam	FBtr0079528	71	36	F	3	35-37	3	1.0
+input1.bam	FBtr0079528	71	51	F	5	51-51	1	5.0
+input1.bam	FBtr0079528	71	71	F	0	71-71	1	0.0
+input1.bam	FBtr0079596	73	10	F	148	1-19	19	7.78947368421
+input1.bam	FBtr0079596	73	53	F	4	53-54	2	2.0
+input1.bam	FBtr0079596	73	73	F	0	73-73	1	0.0
+input1.bam	FBtr0079677	72	3	F	2	1-5	5	0.4
+input1.bam	FBtr0079677	72	52	F	2	52-53	2	1.0
+input1.bam	FBtr0079677	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079690	72	1	F	1	1-1	1	1.0
+input1.bam	FBtr0079690	72	24	F	2	22-27	6	0.333333333333
+input1.bam	FBtr0079690	72	33	F	2	33-33	1	2.0
+input1.bam	FBtr0079690	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079692	73	1	F	3	1-1	1	3.0
+input1.bam	FBtr0079692	73	18	F	1	18-18	1	1.0
+input1.bam	FBtr0079692	73	25	F	1	25-25	1	1.0
+input1.bam	FBtr0079692	73	32	F	1	32-32	1	1.0
+input1.bam	FBtr0079692	73	73	F	0	73-73	1	0.0
+input1.bam	FBtr0079693	72	1	F	5	1-1	1	5.0
+input1.bam	FBtr0079693	72	25	F	1	25-25	1	1.0
+input1.bam	FBtr0079693	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079694	72	1	F	5	1-1	1	5.0
+input1.bam	FBtr0079694	72	18	F	1	18-18	1	1.0
+input1.bam	FBtr0079694	72	52	F	1	52-52	1	1.0
+input1.bam	FBtr0079694	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079702	72	1	F	1	1-1	1	1.0
+input1.bam	FBtr0079702	72	19	F	2	19-19	1	2.0
+input1.bam	FBtr0079702	72	56	F	1	56-56	1	1.0
+input1.bam	FBtr0079702	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079728	72	1	F	2	1-1	1	2.0
+input1.bam	FBtr0079728	72	8	F	1	8-8	1	1.0
+input1.bam	FBtr0079728	72	19	F	1	19-19	1	1.0
+input1.bam	FBtr0079728	72	33	F	3	33-33	1	3.0
+input1.bam	FBtr0079728	72	56	F	1	56-56	1	1.0
+input1.bam	FBtr0079728	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079729	72	1	F	1	1-1	1	1.0
+input1.bam	FBtr0079729	72	13	F	1	13-13	1	1.0
+input1.bam	FBtr0079729	72	54	F	2	52-57	6	0.333333333333
+input1.bam	FBtr0079729	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079752	72	1	F	2	1-1	1	2.0
+input1.bam	FBtr0079752	72	9	F	2	7-12	6	0.333333333333
+input1.bam	FBtr0079752	72	33	F	2	33-33	1	2.0
+input1.bam	FBtr0079752	72	52	F	2	52-52	1	2.0
+input1.bam	FBtr0079752	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0079820	74	1	F	0	1-1	1	0.0
+input1.bam	FBtr0079820	74	50	F	13	45-56	12	1.08333333333
+input1.bam	FBtr0079820	74	74	F	0	74-74	1	0.0
+input1.bam	FBtr0080609	72	10	F	60	1-20	20	3.0
+input1.bam	FBtr0080609	72	42	F	1	42-42	1	1.0
+input1.bam	FBtr0080609	72	51	F	2	51-52	2	1.0
+input1.bam	FBtr0080609	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0080644	72	6	F	6	1-12	12	0.5
+input1.bam	FBtr0080644	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0080646	72	4	F	3	1-7	7	0.428571428571
+input1.bam	FBtr0080646	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0080647	72	6	F	10	1-12	12	0.833333333333
+input1.bam	FBtr0080647	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0080660	72	4	F	7	1-7	7	1.0
+input1.bam	FBtr0080660	72	72	F	0	72-72	1	0.0
+input1.bam	FBtr0080663	71	9	F	102	1-17	17	6.0
+input1.bam	FBtr0080663	71	26	F	1	26-26	1	1.0
+input1.bam	FBtr0080663	71	32	F	2	32-32	1	2.0
+input1.bam	FBtr0080663	71	50	F	10	50-51	2	5.0
+input1.bam	FBtr0080663	71	71	F	0	71-71	1	0.0
+input1.bam	FBtr0080664	71	11	F	108	1-21	21	5.14285714286
+input1.bam	FBtr0080664	71	33	F	9	28-38	11	0.818181818182
+input1.bam	FBtr0080664	71	53	F	11	50-56	7	1.57142857143
+input1.bam	FBtr0080664	71	71	F	0	71-71	1	0.0