annotate Roary/bin/create_pan_genome_plots.R @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/env Rscript
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2 # ABSTRACT: Create R plots
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3 # PODNAME: create_plots.R
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4 # Take the output files from the pan genome pipeline and create nice plots.
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5 library(ggplot2)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8 mydata = read.table("number_of_new_genes.Rtab")
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9 boxplot(mydata, data=mydata, main="Number of new genes",
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10 xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 mydata = read.table("number_of_conserved_genes.Rtab")
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13 boxplot(mydata, data=mydata, main="Number of conserved genes",
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14 xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16 mydata = read.table("number_of_genes_in_pan_genome.Rtab")
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17 boxplot(mydata, data=mydata, main="No. of genes in the pan-genome",
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18 xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20 mydata = read.table("number_of_unique_genes.Rtab")
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21 boxplot(mydata, data=mydata, main="Number of unique genes",
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22 xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24 mydata = read.table("blast_identity_frequency.Rtab")
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 plot(mydata,main="Number of blastp hits with different percentage identity", xlab="Blast percentage identity", ylab="No. blast results")
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28 library(ggplot2)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29 conserved = colMeans(read.table("number_of_conserved_genes.Rtab"))
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30 total = colMeans(read.table("number_of_genes_in_pan_genome.Rtab"))
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32 genes = data.frame( genes_to_genomes = c(conserved,total),
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33 genomes = c(c(1:length(conserved)),c(1:length(conserved))),
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34 Key = c(rep("Conserved genes",length(conserved)), rep("Total genes",length(total))) )
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36 ggplot(data = genes, aes(x = genomes, y = genes_to_genomes, group = Key, linetype=Key)) +geom_line()+
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37 theme_classic() +
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38 ylim(c(1,max(total)))+
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39 xlim(c(1,length(total)))+
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40 xlab("No. of genomes") +
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 ylab("No. of genes")+ theme_bw(base_size = 16) + theme(legend.justification=c(0,1),legend.position=c(0,1))+
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 ggsave(filename="conserved_vs_total_genes.png", scale=1)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 ######################
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 unique_genes = colMeans(read.table("number_of_unique_genes.Rtab"))
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 new_genes = colMeans(read.table("number_of_new_genes.Rtab"))
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 genes = data.frame( genes_to_genomes = c(unique_genes,new_genes),
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50 genomes = c(c(1:length(unique_genes)),c(1:length(unique_genes))),
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51 Key = c(rep("Unique genes",length(unique_genes)), rep("New genes",length(new_genes))) )
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53 ggplot(data = genes, aes(x = genomes, y = genes_to_genomes, group = Key, linetype=Key)) +geom_line()+
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 theme_classic() +
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 ylim(c(1,max(unique_genes)))+
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56 xlim(c(1,length(unique_genes)))+
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57 xlab("No. of genomes") +
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58 ylab("No. of genes")+ theme_bw(base_size = 16) + theme(legend.justification=c(1,1),legend.position=c(1,1))+
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59 ggsave(filename="unique_vs_new_genes.png", scale=1)