Mercurial > repos > artbio > probecoverage
diff multicov.py @ 2:35d2db3753d9 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
author | artbio |
---|---|
date | Sun, 24 Sep 2017 13:34:16 -0400 |
parents | |
children | 7c4feda2d9c7 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multicov.py Sun Sep 24 13:34:16 2017 -0400 @@ -0,0 +1,60 @@ +import argparse + +import numpy + +import pysam + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument('-bams', '--bams', dest='bams', required=True, + nargs='+', help='list of input BAM files') + the_parser.add_argument('-bed', '--bed', dest='bed', required=True, + help='Coordinates of probes in a bed file') + args = the_parser.parse_args() + return args + + +def compute_coverage(bam, bed, quality=10): + bam_object = pysam.AlignmentFile(bam, 'rb') + bed_object = open(bed, 'r') + coverage_column = [] + for line in bed_object: + if line[0] == '#': + continue + fields = line[:-1].split('\t') + chr = fields[0] + start = fields[1] + end = fields[2] + coverage = bam_object.count_coverage(reference=chr, + start=int(start)-1, + end=int(end), + quality_threshold=quality) + """ Add the 4 coverage values """ + coverage = [sum(x) for x in zip(*coverage)] + coverage_column.append(numpy.mean(coverage)) + bed_object.close() + return coverage_column + + +def main(bams, bed): + column_dict = {} + for i, bam in enumerate(bams): + column_dict[i] = compute_coverage(bam, bed) + F = open(bed, 'r') + line_counter = 0 + for line in F: + if line[0] == '#': + continue + prefix = line[:-1] + crossline = [] + for col in sorted(column_dict): + crossline.append(str(column_dict[col][line_counter])) + line_counter += 1 + suffix = '\t'.join(crossline) + print('%s\t%s' % (prefix, suffix)) + + +if __name__ == "__main__": + args = Parser() + main(args.bams, args.bed)