comparison small_rna_maps.py @ 26:376fae7c9f32 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 32eba59fa52705ae74fd9fe65f6f55be783bfc74
author artbio
date Sun, 14 Apr 2019 14:43:44 -0400
parents 07aa8f928d4b
children fe1a9cfaf5c3
comparison
equal deleted inserted replaced
25:07aa8f928d4b 26:376fae7c9f32
9 def Parser(): 9 def Parser():
10 the_parser = argparse.ArgumentParser() 10 the_parser = argparse.ArgumentParser()
11 the_parser.add_argument('--inputs', dest='inputs', required=True, 11 the_parser.add_argument('--inputs', dest='inputs', required=True,
12 nargs='+', help='list of input BAM files') 12 nargs='+', help='list of input BAM files')
13 the_parser.add_argument('--minsize', dest='minsize', type=int, 13 the_parser.add_argument('--minsize', dest='minsize', type=int,
14 default=0, help='minimal size of reads') 14 default=19, help='minimal size of reads')
15 the_parser.add_argument('--maxsize', dest='maxsize', type=int, 15 the_parser.add_argument('--maxsize', dest='maxsize', type=int,
16 default=10000, help='maximal size of reads') 16 default=29, help='maximal size of reads')
17 the_parser.add_argument('--cluster', dest='cluster', type=int, 17 the_parser.add_argument('--cluster', dest='cluster', type=int,
18 default=0, help='clustering distance') 18 default=0, help='clustering distance')
19 the_parser.add_argument('--sample_names', dest='sample_names', 19 the_parser.add_argument('--sample_names', dest='sample_names',
20 required=True, nargs='+', 20 required=True, nargs='+',
21 help='list of sample names') 21 help='list of sample names')
61 else: 61 else:
62 self.nostrand = True 62 self.nostrand = True
63 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') 63 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
64 self.chromosomes = dict(zip(self.bam_object.references, 64 self.chromosomes = dict(zip(self.bam_object.references,
65 self.bam_object.lengths)) 65 self.bam_object.lengths))
66 self.map_dict = self.create_map(self.bam_object, self.minsize, 66 self.map_dict = self.create_map(self.bam_object, self.nostrand)
67 self.maxsize, self.nostrand)
68 if self.cluster: 67 if self.cluster:
69 self.map_dict = self.tile_map(self.map_dict, self.cluster) 68 self.map_dict = self.tile_map(self.map_dict, self.cluster)
70 69
71 def create_map(self, bam_object, minsize, maxsize, nostrand=False): 70 def create_map(self, bam_object, nostrand=False):
72 ''' 71 '''
73 Returns a map_dictionary {(chromosome,read_position,polarity): 72 Returns a map_dictionary {(chromosome,read_position,polarity):
74 [read_length, ...]} 73 [read_length, ...]}
75 ''' 74 '''
76 map_dictionary = defaultdict(list) 75 map_dictionary = defaultdict(list)
292 Dataset, Chromosome, Chrom_length, <category (size)>, <some value> 291 Dataset, Chromosome, Chrom_length, <category (size)>, <some value>
293 from a dictionary of sizes: {chrom: {polarity: {size: nbre of reads}}} 292 from a dictionary of sizes: {chrom: {polarity: {size: nbre of reads}}}
294 out is an *open* file handler 293 out is an *open* file handler
295 ''' 294 '''
296 for chrom in sorted(sizedic): 295 for chrom in sorted(sizedic):
297 sizes = sizedic[chrom]['F'].keys() 296 sizes = range(self.minsize, self.maxsize+1)
298 sizes.extend(sizedic[chrom]['R'].keys())
299 strandness = defaultdict(int) 297 strandness = defaultdict(int)
300 sizeness = defaultdict(int) 298 sizeness = defaultdict(int)
301 for polarity in sizedic[chrom]: 299 for polarity in sizedic[chrom]:
302 for size in range(min(sizes), max(sizes)+1): 300 for size in sizes:
303 try: 301 strandness[polarity] += sizedic[chrom][polarity][size]
304 strandness[polarity] += sizedic[chrom][polarity][size]
305 except KeyError:
306 pass
307 sizeness[size] += sizedic[chrom][polarity][size] 302 sizeness[size] += sizedic[chrom][polarity][size]
308 Strandbias = strandness['F'] + strandness['R'] 303 Strandbias = strandness['F'] + strandness['R']
309 if Strandbias: 304 if Strandbias:
310 Strandbias = strandness['F'] / float(Strandbias) 305 Strandbias = strandness['F'] / float(Strandbias)
311 else: 306 else:
316 if StDev: 311 if StDev:
317 sizeness[size] = (sizeness[size] - Mean) / StDev 312 sizeness[size] = (sizeness[size] - Mean) / StDev
318 else: 313 else:
319 sizeness[size] = 0 314 sizeness[size] = 0
320 for polarity in sorted(sizedic[chrom]): 315 for polarity in sorted(sizedic[chrom]):
321 for size in range(min(sizes), max(sizes)+1): 316 for size in sizes:
322 try: 317 try:
323 line = [self.sample_name, chrom, polarity, size, 318 line = [self.sample_name, chrom, polarity, size,
324 sizedic[chrom][polarity][size], 319 sizedic[chrom][polarity][size],
325 Strandbias, sizeness[size]] 320 Strandbias, sizeness[size]]
326 except KeyError: 321 except KeyError: