Mercurial > repos > dereeper > pangenome_explorer
diff PanExplorer_workflow/R/micropan_rarefaction.R @ 1:032f6b3806a3 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:16:08 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PanExplorer_workflow/R/micropan_rarefaction.R Thu May 30 11:16:08 2024 +0000 @@ -0,0 +1,76 @@ +#!/usr/bin/R + + +library(micropan) +library(dendextend) +library("optparse") + + +option_list = list( + make_option(c("-f", "--file"), type="character", default=NULL, + help="dataset file name", metavar="character"), + make_option(c("-o", "--out"), type="character", default="out.txt", + help="output file name [default= %default]", metavar="character"), + make_option(c("-a", "--alpha"), type="character", default="heaps.tsv", + help="Heaps output file name [default= %default]", metavar="character"), + make_option(c("-p", "--pdf"), type="character", default="out.pdf", + help="PDF output file name [default= %default]", metavar="character") +); +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +if (is.null(opt$file)){ + print_help(opt_parser) + stop("At least one argument must be supplied (input file).\n", call.=FALSE) +} + +if (is.null(opt$out)){ + print_help(opt_parser) + stop("At least one argument must be supplied (out file).\n", call.=FALSE) +} +if (is.null(opt$alpha)){ + print_help(opt_parser) + stop("At least one argument must be supplied (out file).\n", call.=FALSE) +} +if (is.null(opt$pdf)){ + print_help(opt_parser) + stop("At least one argument must be supplied (out file).\n", call.=FALSE) +} + + +# Loading a pan-matrix in this package +#data(xmpl.panmat) + +mydata <- read.table(opt$file, header=TRUE,sep="\t", row.names="Gene") +tmydata<-t(mydata) +#xmpl.panmat + +rarefaction = rarefaction(tmydata, n.perm = 100) +write.table( + rarefaction, + opt$out, + row.names = FALSE, + quote = FALSE, + sep = '\t') + + +# Estimating population openness +h.est <- heaps(tmydata, n.perm = 100) +write.table( + h.est, + opt$alpha, + row.names = TRUE, + quote = FALSE, + sep = '\t') + +# If alpha < 1 it indicates an open pan-genome + +pdf(opt$pdf) + +library(ggplot2) +library(tidyr) +rarefaction %>% +gather(key = "Permutation", value = "Clusters", -Genome) %>% +ggplot(aes(x = Genome, y = Clusters, group = Permutation)) + +geom_point() + +stat_summary(aes(y = Clusters,group=1), fun=mean, colour="red", geom="line",group=1)