comparison multicov.py @ 2:35d2db3753d9 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
author artbio
date Sun, 24 Sep 2017 13:34:16 -0400
parents
children 7c4feda2d9c7
comparison
equal deleted inserted replaced
1:ebe5ec2e244d 2:35d2db3753d9
1 import argparse
2
3 import numpy
4
5 import pysam
6
7
8 def Parser():
9 the_parser = argparse.ArgumentParser()
10 the_parser.add_argument('-bams', '--bams', dest='bams', required=True,
11 nargs='+', help='list of input BAM files')
12 the_parser.add_argument('-bed', '--bed', dest='bed', required=True,
13 help='Coordinates of probes in a bed file')
14 args = the_parser.parse_args()
15 return args
16
17
18 def compute_coverage(bam, bed, quality=10):
19 bam_object = pysam.AlignmentFile(bam, 'rb')
20 bed_object = open(bed, 'r')
21 coverage_column = []
22 for line in bed_object:
23 if line[0] == '#':
24 continue
25 fields = line[:-1].split('\t')
26 chr = fields[0]
27 start = fields[1]
28 end = fields[2]
29 coverage = bam_object.count_coverage(reference=chr,
30 start=int(start)-1,
31 end=int(end),
32 quality_threshold=quality)
33 """ Add the 4 coverage values """
34 coverage = [sum(x) for x in zip(*coverage)]
35 coverage_column.append(numpy.mean(coverage))
36 bed_object.close()
37 return coverage_column
38
39
40 def main(bams, bed):
41 column_dict = {}
42 for i, bam in enumerate(bams):
43 column_dict[i] = compute_coverage(bam, bed)
44 F = open(bed, 'r')
45 line_counter = 0
46 for line in F:
47 if line[0] == '#':
48 continue
49 prefix = line[:-1]
50 crossline = []
51 for col in sorted(column_dict):
52 crossline.append(str(column_dict[col][line_counter]))
53 line_counter += 1
54 suffix = '\t'.join(crossline)
55 print('%s\t%s' % (prefix, suffix))
56
57
58 if __name__ == "__main__":
59 args = Parser()
60 main(args.bams, args.bed)