diff multicov.py @ 2:35d2db3753d9 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
author artbio
date Sun, 24 Sep 2017 13:34:16 -0400
parents
children 7c4feda2d9c7
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multicov.py	Sun Sep 24 13:34:16 2017 -0400
@@ -0,0 +1,60 @@
+import argparse
+
+import numpy
+
+import pysam
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument('-bams', '--bams', dest='bams', required=True,
+                            nargs='+', help='list of input BAM files')
+    the_parser.add_argument('-bed', '--bed', dest='bed', required=True,
+                            help='Coordinates of probes in a bed file')
+    args = the_parser.parse_args()
+    return args
+
+
+def compute_coverage(bam, bed, quality=10):
+    bam_object = pysam.AlignmentFile(bam, 'rb')
+    bed_object = open(bed, 'r')
+    coverage_column = []
+    for line in bed_object:
+        if line[0] == '#':
+            continue
+        fields = line[:-1].split('\t')
+        chr = fields[0]
+        start = fields[1]
+        end = fields[2]
+        coverage = bam_object.count_coverage(reference=chr,
+                                             start=int(start)-1,
+                                             end=int(end),
+                                             quality_threshold=quality)
+        """ Add the 4 coverage values """
+        coverage = [sum(x) for x in zip(*coverage)]
+        coverage_column.append(numpy.mean(coverage))
+    bed_object.close()
+    return coverage_column
+
+
+def main(bams, bed):
+    column_dict = {}
+    for i, bam in enumerate(bams):
+        column_dict[i] = compute_coverage(bam, bed)
+    F = open(bed, 'r')
+    line_counter = 0
+    for line in F:
+        if line[0] == '#':
+            continue
+        prefix = line[:-1]
+        crossline = []
+        for col in sorted(column_dict):
+            crossline.append(str(column_dict[col][line_counter]))
+        line_counter += 1
+        suffix = '\t'.join(crossline)
+        print('%s\t%s' % (prefix, suffix))
+
+
+if __name__ == "__main__":
+    args = Parser()
+    main(args.bams, args.bed)