diff small_rna_maps.py @ 17:b28dcd4051e8 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
author artbio
date Thu, 15 Nov 2018 12:29:57 -0500
parents 600e2498bd21
children 2c95c899d0a4
line wrap: on
line diff
--- a/small_rna_maps.py	Tue Nov 13 17:03:46 2018 -0500
+++ b/small_rna_maps.py	Thu Nov 15 12:29:57 2018 -0500
@@ -24,26 +24,33 @@
     the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store',
                             help='list of 2 plot methods (only two) among:\
                             Counts, Max, Mean, Median, Coverage and Size')
+    the_parser.add_argument('--nostrand', action='store_true',
+                            help='Consider reads regardless their polarity')
+
     args = the_parser.parse_args()
     return args
 
 
 class Map:
 
-    def __init__(self, bam_file, sample, minsize, maxsize, cluster):
+    def __init__(self, bam_file, sample, minsize, maxsize, cluster, nostrand):
         self.sample_name = sample
         self.minsize = minsize
         self.maxsize = maxsize
         self.cluster = cluster
+        if not nostrand:
+            self.nostrand = False
+        else:
+            self.nostrand = True
         self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
         self.chromosomes = dict(zip(self.bam_object.references,
                                 self.bam_object.lengths))
         self.map_dict = self.create_map(self.bam_object, self.minsize,
-                                        self.maxsize)
+                                        self.maxsize, self.nostrand)
         if self.cluster:
             self.map_dict = self.tile_map(self.map_dict, self.cluster)
 
-    def create_map(self, bam_object, minsize, maxsize):
+    def create_map(self, bam_object, minsize, maxsize, nostrand=False):
         '''
         Returns a map_dictionary {(chromosome,read_position,polarity):
                                                     [read_length, ...]}
@@ -53,14 +60,24 @@
             # get empty value for start and end of each chromosome
             map_dictionary[(chrom, 1, 'F')] = []
             map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
-            for read in bam_object.fetch(chrom):
-                positions = read.positions  # a list of covered positions
-                if read.is_reverse:
-                    map_dictionary[(chrom, positions[-1]+1, 'R')].append(
-                                    read.query_alignment_length)
-                else:
-                    map_dictionary[(chrom, positions[0]+1, 'F')].append(
-                                    read.query_alignment_length)
+            if not nostrand:
+                for read in bam_object.fetch(chrom):
+                    positions = read.positions  # a list of covered positions
+                    if read.is_reverse:
+                        map_dictionary[(chrom, positions[-1]+1, 'R')].append(
+                                        read.query_alignment_length)
+                    else:
+                        map_dictionary[(chrom, positions[0]+1, 'F')].append(
+                                        read.query_alignment_length)
+            else:
+                for read in bam_object.fetch(chrom):
+                    positions = read.positions  # a list of covered positions
+                    if read.is_reverse:
+                        map_dictionary[(chrom, positions[-1]+1, 'F')].append(
+                                        read.query_alignment_length)
+                    else:
+                        map_dictionary[(chrom, positions[0]+1, 'F')].append(
+                                        read.query_alignment_length)
         return map_dictionary
 
     def grouper(self, iterable, clust_distance):
@@ -271,7 +288,8 @@
             out.write('\t'.join(line) + '\n')
 
 
-def main(inputs, samples, methods, outputs, minsize, maxsize, cluster):
+def main(inputs, samples, methods, outputs, minsize, maxsize, cluster,
+         nostrand):
     for method, output in zip(methods, outputs):
         out = open(output, 'w')
         if method == 'Size':
@@ -285,7 +303,7 @@
                       "Polarity", method]
         out.write('\t'.join(header) + '\n')
         for input, sample in zip(inputs, samples):
-            mapobj = Map(input, sample, minsize, maxsize, cluster)
+            mapobj = Map(input, sample, minsize, maxsize, cluster, nostrand)
             token = {"Counts": mapobj.compute_readcount,
                      "Max": mapobj.compute_max,
                      "Mean": mapobj.compute_mean,
@@ -308,4 +326,4 @@
         args.sample_names = [name + '_' + str(i) for
                              i, name in enumerate(args.sample_names)]
     main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
-         args.minsize, args.maxsize, args.cluster)
+         args.minsize, args.maxsize, args.cluster, args.nostrand)