comparison R/micropan_rarefaction.R @ 3:e42d30da7a74 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:52:25 +0000
parents
children
comparison
equal deleted inserted replaced
2:97e4e3e818b6 3:e42d30da7a74
1 #!/usr/bin/R
2
3
4 library(micropan)
5 library(dendextend)
6 library("optparse")
7
8
9 option_list = list(
10 make_option(c("-f", "--file"), type="character", default=NULL,
11 help="dataset file name", metavar="character"),
12 make_option(c("-o", "--out"), type="character", default="out.txt",
13 help="output file name [default= %default]", metavar="character"),
14 make_option(c("-a", "--alpha"), type="character", default="heaps.tsv",
15 help="Heaps output file name [default= %default]", metavar="character"),
16 make_option(c("-p", "--pdf"), type="character", default="out.pdf",
17 help="PDF output file name [default= %default]", metavar="character")
18 );
19 opt_parser = OptionParser(option_list=option_list);
20 opt = parse_args(opt_parser);
21
22 if (is.null(opt$file)){
23 print_help(opt_parser)
24 stop("At least one argument must be supplied (input file).\n", call.=FALSE)
25 }
26
27 if (is.null(opt$out)){
28 print_help(opt_parser)
29 stop("At least one argument must be supplied (out file).\n", call.=FALSE)
30 }
31 if (is.null(opt$alpha)){
32 print_help(opt_parser)
33 stop("At least one argument must be supplied (out file).\n", call.=FALSE)
34 }
35 if (is.null(opt$pdf)){
36 print_help(opt_parser)
37 stop("At least one argument must be supplied (out file).\n", call.=FALSE)
38 }
39
40
41 # Loading a pan-matrix in this package
42 #data(xmpl.panmat)
43
44 mydata <- read.table(opt$file, header=TRUE,sep="\t", row.names="Gene")
45 tmydata<-t(mydata)
46 #xmpl.panmat
47
48 rarefaction = rarefaction(tmydata, n.perm = 100)
49 write.table(
50 rarefaction,
51 opt$out,
52 row.names = FALSE,
53 quote = FALSE,
54 sep = '\t')
55
56
57 # Estimating population openness
58 h.est <- heaps(tmydata, n.perm = 100)
59 write.table(
60 h.est,
61 opt$alpha,
62 row.names = TRUE,
63 quote = FALSE,
64 sep = '\t')
65
66 # If alpha < 1 it indicates an open pan-genome
67
68 pdf(opt$pdf)
69
70 library(ggplot2)
71 library(tidyr)
72 rarefaction %>%
73 gather(key = "Permutation", value = "Clusters", -Genome) %>%
74 ggplot(aes(x = Genome, y = Clusters, group = Permutation)) +
75 geom_point() +
76 stat_summary(aes(y = Clusters,group=1), fun=mean, colour="red", geom="line",group=1)