annotate multicov.py @ 6:7c4feda2d9c7 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
author artbio
date Tue, 07 Jan 2020 06:08:56 -0500
parents 35d2db3753d9
children 0adb846ca056
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
1 import argparse
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
2
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
3 import numpy
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
4
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
5 import pysam
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
6
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
7
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
8 def Parser():
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
9 the_parser = argparse.ArgumentParser()
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
10 the_parser.add_argument('-bams', '--bams', dest='bams', required=True,
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
11 nargs='+', help='list of input BAM files')
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
12 the_parser.add_argument('-bed', '--bed', dest='bed', required=True,
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
13 help='Coordinates of probes in a bed file')
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
14 args = the_parser.parse_args()
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
15 return args
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
16
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
17
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
18 def compute_coverage(bam, bed, quality=10):
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
19 bam_object = pysam.AlignmentFile(bam, 'rb')
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
20 bed_object = open(bed, 'r')
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
21 coverage_column = []
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
22 for line in bed_object:
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
23 if line[0] == '#':
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
24 continue
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
25 fields = line[:-1].split('\t')
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
26 chr = fields[0]
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
27 start = fields[1]
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
28 end = fields[2]
6
7c4feda2d9c7 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents: 2
diff changeset
29 coverage = bam_object.count_coverage(contig=chr,
2
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
30 start=int(start)-1,
6
7c4feda2d9c7 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents: 2
diff changeset
31 stop=int(end),
2
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
32 quality_threshold=quality)
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
33 """ Add the 4 coverage values """
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
34 coverage = [sum(x) for x in zip(*coverage)]
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
35 coverage_column.append(numpy.mean(coverage))
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
36 bed_object.close()
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
37 return coverage_column
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
38
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
39
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
40 def main(bams, bed):
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
41 column_dict = {}
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
42 for i, bam in enumerate(bams):
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
43 column_dict[i] = compute_coverage(bam, bed)
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
44 F = open(bed, 'r')
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
45 line_counter = 0
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
46 for line in F:
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
47 if line[0] == '#':
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
48 continue
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
49 prefix = line[:-1]
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
50 crossline = []
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
51 for col in sorted(column_dict):
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
52 crossline.append(str(column_dict[col][line_counter]))
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
53 line_counter += 1
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
54 suffix = '\t'.join(crossline)
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
55 print('%s\t%s' % (prefix, suffix))
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
56
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
57
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
58 if __name__ == "__main__":
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
59 args = Parser()
35d2db3753d9 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff changeset
60 main(args.bams, args.bed)