annotate R/micropan_rarefaction.R @ 3:e42d30da7a74 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:52:25 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/R
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
2
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
3
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
4 library(micropan)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
5 library(dendextend)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
6 library("optparse")
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
7
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
8
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
9 option_list = list(
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
10 make_option(c("-f", "--file"), type="character", default=NULL,
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
11 help="dataset file name", metavar="character"),
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
12 make_option(c("-o", "--out"), type="character", default="out.txt",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
13 help="output file name [default= %default]", metavar="character"),
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
14 make_option(c("-a", "--alpha"), type="character", default="heaps.tsv",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
15 help="Heaps output file name [default= %default]", metavar="character"),
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
16 make_option(c("-p", "--pdf"), type="character", default="out.pdf",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
17 help="PDF output file name [default= %default]", metavar="character")
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
18 );
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
19 opt_parser = OptionParser(option_list=option_list);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
20 opt = parse_args(opt_parser);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
21
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
22 if (is.null(opt$file)){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
23 print_help(opt_parser)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
24 stop("At least one argument must be supplied (input file).\n", call.=FALSE)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
25 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
26
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
27 if (is.null(opt$out)){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
28 print_help(opt_parser)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
29 stop("At least one argument must be supplied (out file).\n", call.=FALSE)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
30 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
31 if (is.null(opt$alpha)){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
32 print_help(opt_parser)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
33 stop("At least one argument must be supplied (out file).\n", call.=FALSE)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
34 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
35 if (is.null(opt$pdf)){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
36 print_help(opt_parser)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
37 stop("At least one argument must be supplied (out file).\n", call.=FALSE)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
38 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
39
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
40
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
41 # Loading a pan-matrix in this package
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
42 #data(xmpl.panmat)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
43
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
44 mydata <- read.table(opt$file, header=TRUE,sep="\t", row.names="Gene")
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
45 tmydata<-t(mydata)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
46 #xmpl.panmat
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
47
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
48 rarefaction = rarefaction(tmydata, n.perm = 100)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
49 write.table(
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
50 rarefaction,
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
51 opt$out,
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
52 row.names = FALSE,
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
53 quote = FALSE,
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
54 sep = '\t')
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
55
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
56
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
57 # Estimating population openness
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
58 h.est <- heaps(tmydata, n.perm = 100)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
59 write.table(
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
60 h.est,
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
61 opt$alpha,
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
62 row.names = TRUE,
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
63 quote = FALSE,
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
64 sep = '\t')
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
65
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
66 # If alpha < 1 it indicates an open pan-genome
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
67
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
68 pdf(opt$pdf)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
69
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
70 library(ggplot2)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
71 library(tidyr)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
72 rarefaction %>%
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
73 gather(key = "Permutation", value = "Clusters", -Genome) %>%
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
74 ggplot(aes(x = Genome, y = Clusters, group = Permutation)) +
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
75 geom_point() +
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
76 stat_summary(aes(y = Clusters,group=1), fun=mean, colour="red", geom="line",group=1)