Mercurial > repos > petr-novak > re_utils
comparison summarize_cluster_table.R @ 18:d7f3eff34c27 draft
Uploaded
author | petr-novak |
---|---|
date | Fri, 14 May 2021 11:08:46 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:d14b68e9fd1d | 18:d7f3eff34c27 |
---|---|
1 #!/usr/bin/env Rscript | |
2 library(optparse) | |
3 option_list <- list( | |
4 make_option(c("-c", "--cluster_table"), default=NA, type = "character", | |
5 help="file from RepeatExplorer2 clustering - CLUSTER_TABLE.csv"), | |
6 | |
7 make_option(c("-m", "--comparative_counts"),default = NA,type = "character", | |
8 help="file from RepeatExplorer2 output - COMPARATIVE_ANALYSIS_COUNTS.csv"), | |
9 make_option(c("-o", "--output"), type="character", | |
10 help="output file name") | |
11 ) | |
12 | |
13 | |
14 opt = parse_args(OptionParser(option_list = option_list)) | |
15 | |
16 ## for testing | |
17 cluster_annotation = opt$cluster_table | |
18 header_line = grep(".*Cluster.*Supercluster.*Size", readLines(cluster_annotation)) | |
19 annot = read.table(cluster_annotation, sep="\t",header=TRUE,as.is=TRUE, skip = header_line - 1) | |
20 | |
21 | |
22 input_read_counts = as.numeric(strsplit( | |
23 grep("Number_of_analyzed_reads", | |
24 readLines(con=cluster_annotation, n=header_line), | |
25 value=TRUE) | |
26 ,split="\t")[[1]][2] | |
27 ) | |
28 | |
29 ## complete classification table: | |
30 unique_groups = sort(unique(annot$Final_annotatio)) | |
31 | |
32 groups_to_remove = grep("contamination|organelle", unique_groups, value=TRUE) | |
33 groups_to_keep = unique_groups[!(unique_groups %in% groups_to_remove)] | |
34 | |
35 if (length(groups_to_remove)>0){ | |
36 input_count_reads_corrected = input_read_counts - sum(annot$Size_adjusted[annot$Final_annotation %in% groups_to_remove]) | |
37 | |
38 }else{ | |
39 input_count_reads_corrected = input_read_counts | |
40 } | |
41 | |
42 proportion = numeric() | |
43 sum_of_reads = numeric() | |
44 for (g in groups_to_keep){ | |
45 sum_of_reads[g] = sum(annot$Size_adjusted[annot$Final_annotation %in% g]) | |
46 proportion[g] = sum_of_reads[g] / input_count_reads_corrected | |
47 } | |
48 | |
49 | |
50 | |
51 summary_table = data.frame(Annotation = groups_to_keep, | |
52 Number_of_reads = sum_of_reads, | |
53 "Proportion[%]" = proportion * 100 , check.names = FALSE) | |
54 | |
55 print(opt$output) | |
56 write.table(summary_table,file = opt$output, | |
57 row.names = FALSE, col.names = TRUE, sep="\t") |