# HG changeset patch # User artbio # Date 1655289787 0 # Node ID f00479673d47fd1fce538abd8a5ca910c59668f1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mapping_quality_stats commit e4b37874b820a2ac48732667128a08e5755b7c4b diff -r 000000000000 -r f00479673d47 mapping_quality_stats.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapping_quality_stats.py Wed Jun 15 10:43:07 2022 +0000 @@ -0,0 +1,36 @@ +import argparse +from collections import defaultdict + +import pysam + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument('-bam', '--bam', dest='bam', required=True, + help='input BAM file') + the_parser.add_argument('-o', '--output', dest='distribution', + required=True, + help='tabular output for mapq distribution') + args = the_parser.parse_args() + return args + + +def collect_mapq(bam, out): + samfile = pysam.AlignmentFile(bam, "rb") + mapq_dict = defaultdict(int) + for read in samfile: + mapq_dict[read.mapping_quality] += 1 + with open(out, 'w') as out: + out.write('mapq\tnumber_of_alignments\n') + for quality in sorted(mapq_dict): + out.write(f"{quality}\t{mapq_dict[quality]}\n") + return mapq_dict + + +def main(bam, out): + collect_mapq(bam, out) + + +if __name__ == "__main__": + args = Parser() + main(args.bam, args.distribution) diff -r 000000000000 -r f00479673d47 mapping_quality_stats.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapping_quality_stats.r Wed Jun 15 10:43:07 2022 +0000 @@ -0,0 +1,31 @@ +## Setup R error handling to go to stderr +options(show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } +) + + +warnings() +library(optparse) +library(ggplot2) + +option_list <- list( + make_option(c("-i", "--input"), type = "character", help = "Path to tabular file"), + make_option(c("-o", "--output"), type = "character", help = "path to the pdf plot") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) +args <- parse_args(parser) + +# data frame implementation +table <- read.delim(args$input, header = TRUE) +colnames(table) <- c("MAPQ", "Counts") + + +# Barplot +pdf(file = args$output) +ggplot(table, aes(x = MAPQ, y = Counts)) + + geom_bar(stat = "identity") +devname <- dev.off() diff -r 000000000000 -r f00479673d47 mapping_quality_stats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapping_quality_stats.xml Wed Jun 15 10:43:07 2022 +0000 @@ -0,0 +1,62 @@ + + + + r-optparse + r-ggplot2 + pysam + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Collects the values of mapping quality (MAPQ) in a BAM files +Shows the data as a table and a barplot + +**Inputs** + +A bam alignment files which must be sorted + +**Output** + +A data frame of MAPQ counts + +A pdf barplot generated by R and ggplot2 + + + + + 10.1093/bioinformatics/btp352 + + + diff -r 000000000000 -r f00479673d47 test-data/distribution.pdf Binary file test-data/distribution.pdf has changed diff -r 000000000000 -r f00479673d47 test-data/distribution.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/distribution.tab Wed Jun 15 10:43:07 2022 +0000 @@ -0,0 +1,32 @@ +mapq number_of_alignments +0 54 +6 1 +8 2 +12 1 +14 2 +16 1 +17 1 +21 3 +23 1 +24 1 +25 1 +30 2 +33 6 +40 9 +44 2 +45 9 +46 6 +47 180 +48 196 +49 60 +50 17 +51 7 +52 11 +53 4 +54 6 +55 1 +56 8 +57 9 +58 10 +59 10 +60 10496 diff -r 000000000000 -r f00479673d47 test-data/sample.bam Binary file test-data/sample.bam has changed