0
|
1 #!/usr/bin/env Rscript
|
|
2 # ABSTRACT: Create R plots
|
|
3 # PODNAME: create_plots.R
|
|
4 # Take the output files from the pan genome pipeline and create nice plots.
|
|
5 library(ggplot2)
|
|
6
|
|
7
|
|
8 mydata = read.table("number_of_new_genes.Rtab")
|
|
9 boxplot(mydata, data=mydata, main="Number of new genes",
|
|
10 xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
|
|
11
|
|
12 mydata = read.table("number_of_conserved_genes.Rtab")
|
|
13 boxplot(mydata, data=mydata, main="Number of conserved genes",
|
|
14 xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
|
|
15
|
|
16 mydata = read.table("number_of_genes_in_pan_genome.Rtab")
|
|
17 boxplot(mydata, data=mydata, main="No. of genes in the pan-genome",
|
|
18 xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
|
|
19
|
|
20 mydata = read.table("number_of_unique_genes.Rtab")
|
|
21 boxplot(mydata, data=mydata, main="Number of unique genes",
|
|
22 xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
|
|
23
|
|
24 mydata = read.table("blast_identity_frequency.Rtab")
|
|
25 plot(mydata,main="Number of blastp hits with different percentage identity", xlab="Blast percentage identity", ylab="No. blast results")
|
|
26
|
|
27
|
|
28 library(ggplot2)
|
|
29 conserved = colMeans(read.table("number_of_conserved_genes.Rtab"))
|
|
30 total = colMeans(read.table("number_of_genes_in_pan_genome.Rtab"))
|
|
31
|
|
32 genes = data.frame( genes_to_genomes = c(conserved,total),
|
|
33 genomes = c(c(1:length(conserved)),c(1:length(conserved))),
|
|
34 Key = c(rep("Conserved genes",length(conserved)), rep("Total genes",length(total))) )
|
|
35
|
|
36 ggplot(data = genes, aes(x = genomes, y = genes_to_genomes, group = Key, linetype=Key)) +geom_line()+
|
|
37 theme_classic() +
|
|
38 ylim(c(1,max(total)))+
|
|
39 xlim(c(1,length(total)))+
|
|
40 xlab("No. of genomes") +
|
|
41 ylab("No. of genes")+ theme_bw(base_size = 16) + theme(legend.justification=c(0,1),legend.position=c(0,1))+
|
|
42 ggsave(filename="conserved_vs_total_genes.png", scale=1)
|
|
43
|
|
44 ######################
|
|
45
|
|
46 unique_genes = colMeans(read.table("number_of_unique_genes.Rtab"))
|
|
47 new_genes = colMeans(read.table("number_of_new_genes.Rtab"))
|
|
48
|
|
49 genes = data.frame( genes_to_genomes = c(unique_genes,new_genes),
|
|
50 genomes = c(c(1:length(unique_genes)),c(1:length(unique_genes))),
|
|
51 Key = c(rep("Unique genes",length(unique_genes)), rep("New genes",length(new_genes))) )
|
|
52
|
|
53 ggplot(data = genes, aes(x = genomes, y = genes_to_genomes, group = Key, linetype=Key)) +geom_line()+
|
|
54 theme_classic() +
|
|
55 ylim(c(1,max(unique_genes)))+
|
|
56 xlim(c(1,length(unique_genes)))+
|
|
57 xlab("No. of genomes") +
|
|
58 ylab("No. of genes")+ theme_bw(base_size = 16) + theme(legend.justification=c(1,1),legend.position=c(1,1))+
|
|
59 ggsave(filename="unique_vs_new_genes.png", scale=1)
|