Mercurial > repos > artbio > probecoverage
annotate multicov.py @ 6:7c4feda2d9c7 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
author | artbio |
---|---|
date | Tue, 07 Jan 2020 06:08:56 -0500 |
parents | 35d2db3753d9 |
children | 0adb846ca056 |
rev | line source |
---|---|
2
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
1 import argparse |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
2 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
3 import numpy |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
4 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
5 import pysam |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
6 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
7 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
8 def Parser(): |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
9 the_parser = argparse.ArgumentParser() |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
10 the_parser.add_argument('-bams', '--bams', dest='bams', required=True, |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
11 nargs='+', help='list of input BAM files') |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
12 the_parser.add_argument('-bed', '--bed', dest='bed', required=True, |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
13 help='Coordinates of probes in a bed file') |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
14 args = the_parser.parse_args() |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
15 return args |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
16 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
17 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
18 def compute_coverage(bam, bed, quality=10): |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
19 bam_object = pysam.AlignmentFile(bam, 'rb') |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
20 bed_object = open(bed, 'r') |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
21 coverage_column = [] |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
22 for line in bed_object: |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
23 if line[0] == '#': |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
24 continue |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
25 fields = line[:-1].split('\t') |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
26 chr = fields[0] |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
27 start = fields[1] |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
28 end = fields[2] |
6
7c4feda2d9c7
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
2
diff
changeset
|
29 coverage = bam_object.count_coverage(contig=chr, |
2
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
30 start=int(start)-1, |
6
7c4feda2d9c7
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 62a8b073b9ac98b0231641e5266768e7f8b80b89"
artbio
parents:
2
diff
changeset
|
31 stop=int(end), |
2
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
32 quality_threshold=quality) |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
33 """ Add the 4 coverage values """ |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
34 coverage = [sum(x) for x in zip(*coverage)] |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
35 coverage_column.append(numpy.mean(coverage)) |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
36 bed_object.close() |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
37 return coverage_column |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
38 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
39 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
40 def main(bams, bed): |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
41 column_dict = {} |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
42 for i, bam in enumerate(bams): |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
43 column_dict[i] = compute_coverage(bam, bed) |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
44 F = open(bed, 'r') |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
45 line_counter = 0 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
46 for line in F: |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
47 if line[0] == '#': |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
48 continue |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
49 prefix = line[:-1] |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
50 crossline = [] |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
51 for col in sorted(column_dict): |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
52 crossline.append(str(column_dict[col][line_counter])) |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
53 line_counter += 1 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
54 suffix = '\t'.join(crossline) |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
55 print('%s\t%s' % (prefix, suffix)) |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
56 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
57 |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
58 if __name__ == "__main__": |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
59 args = Parser() |
35d2db3753d9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
artbio
parents:
diff
changeset
|
60 main(args.bams, args.bed) |