annotate PanExplorer_workflow/R/micropan_rarefaction.R @ 1:032f6b3806a3 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:16:08 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/R
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
2
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
3
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
4 library(micropan)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
5 library(dendextend)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
6 library("optparse")
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
7
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
8
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
9 option_list = list(
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
10 make_option(c("-f", "--file"), type="character", default=NULL,
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
11 help="dataset file name", metavar="character"),
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
12 make_option(c("-o", "--out"), type="character", default="out.txt",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
13 help="output file name [default= %default]", metavar="character"),
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
14 make_option(c("-a", "--alpha"), type="character", default="heaps.tsv",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
15 help="Heaps output file name [default= %default]", metavar="character"),
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
16 make_option(c("-p", "--pdf"), type="character", default="out.pdf",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
17 help="PDF output file name [default= %default]", metavar="character")
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
18 );
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
19 opt_parser = OptionParser(option_list=option_list);
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
20 opt = parse_args(opt_parser);
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
21
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
22 if (is.null(opt$file)){
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
23 print_help(opt_parser)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
24 stop("At least one argument must be supplied (input file).\n", call.=FALSE)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
25 }
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
26
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
27 if (is.null(opt$out)){
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
28 print_help(opt_parser)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
29 stop("At least one argument must be supplied (out file).\n", call.=FALSE)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
30 }
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
31 if (is.null(opt$alpha)){
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
32 print_help(opt_parser)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
33 stop("At least one argument must be supplied (out file).\n", call.=FALSE)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
34 }
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
35 if (is.null(opt$pdf)){
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
36 print_help(opt_parser)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
37 stop("At least one argument must be supplied (out file).\n", call.=FALSE)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
38 }
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
39
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
40
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
41 # Loading a pan-matrix in this package
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
42 #data(xmpl.panmat)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
43
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
44 mydata <- read.table(opt$file, header=TRUE,sep="\t", row.names="Gene")
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
45 tmydata<-t(mydata)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
46 #xmpl.panmat
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
47
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
48 rarefaction = rarefaction(tmydata, n.perm = 100)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
49 write.table(
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
50 rarefaction,
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
51 opt$out,
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
52 row.names = FALSE,
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
53 quote = FALSE,
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
54 sep = '\t')
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
55
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
56
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
57 # Estimating population openness
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
58 h.est <- heaps(tmydata, n.perm = 100)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
59 write.table(
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
60 h.est,
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
61 opt$alpha,
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
62 row.names = TRUE,
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
63 quote = FALSE,
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
64 sep = '\t')
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
65
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
66 # If alpha < 1 it indicates an open pan-genome
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
67
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
68 pdf(opt$pdf)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
69
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
70 library(ggplot2)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
71 library(tidyr)
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
72 rarefaction %>%
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
73 gather(key = "Permutation", value = "Clusters", -Genome) %>%
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
74 ggplot(aes(x = Genome, y = Clusters, group = Permutation)) +
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
75 geom_point() +
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
76 stat_summary(aes(y = Clusters,group=1), fun=mean, colour="red", geom="line",group=1)