diff summarize_gff_by_attribute.R @ 0:cf3cea0a3039 draft

Uploaded
author petr-novak
date Thu, 07 Oct 2021 06:07:34 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/summarize_gff_by_attribute.R	Thu Oct 07 06:07:34 2021 +0000
@@ -0,0 +1,13 @@
+#!/usr/bin/env Rscript
+suppressPackageStartupMessages(library(rtracklayer))
+g = import(commandArgs(T)[1])
+attribute_name = commandArgs(T)[2]
+
+m = mcols(g)
+w = width(g)
+total_lengths = by(w, INDICES=m[,attribute_name] , sum)
+total_counts =  by(w, INDICES=m[,attribute_name] , length)
+d = data.frame(attribute = names(total_counts), cbind(counts = total_counts, length=total_lengths))
+colnames(d)[1] = attribute_name
+d = d[order(d$length, decreasing = TRUE),]
+write.table(d, sep = "\t", row.names = FALSE, quote = FALSE)