comparison goseq.r @ 4:ae39895af5fe draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 0798278a90c08228a386516881680b328fc33f0c
author iuc
date Sun, 30 Sep 2018 09:27:05 -0400
parents 783e8b70b047
children bbcf5f7f2af2
comparison
equal deleted inserted replaced
3:783e8b70b047 4:ae39895af5fe
38 dge_file = args$dge_file 38 dge_file = args$dge_file
39 category_file = args$category_file 39 category_file = args$category_file
40 length_file = args$length_file 40 length_file = args$length_file
41 genome = args$genome 41 genome = args$genome
42 gene_id = args$gene_id 42 gene_id = args$gene_id
43 wallenius_tab = args$wallenius_tab
43 sampling_tab = args$sampling_tab 44 sampling_tab = args$sampling_tab
45 nobias_tab = args$nobias_tab
44 length_bias_plot = args$length_bias_plot 46 length_bias_plot = args$length_bias_plot
45 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot 47 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot
46 repcnt = args$repcnt 48 repcnt = args$repcnt
47 p_adj_method = args$p_adj_method 49 p_adj_method = args$p_adj_method
48 use_genes_without_cat = args$use_genes_without_cat 50 use_genes_without_cat = args$use_genes_without_cat
105 } 107 }
106 108
107 results <- list() 109 results <- list()
108 110
109 # wallenius approximation of p-values 111 # wallenius approximation of p-values
110 if (!is.null(args$wallenius_tab)) { 112 if (wallenius_tab != FALSE) {
111 GO.wall=goseq(pwf, genome = genome, id = gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) 113 GO.wall=goseq(pwf, genome = genome, id = gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
112 GO.wall$p.adjust.over_represented = p.adjust(GO.wall$over_represented_pvalue, method=p_adj_method) 114 GO.wall$p.adjust.over_represented = p.adjust(GO.wall$over_represented_pvalue, method=p_adj_method)
113 GO.wall$p.adjust.under_represented = p.adjust(GO.wall$under_represented_pvalue, method=p_adj_method) 115 GO.wall$p.adjust.under_represented = p.adjust(GO.wall$under_represented_pvalue, method=p_adj_method)
114 write.table(GO.wall, args$wallenius_tab, sep="\t", row.names = FALSE, quote = FALSE) 116 write.table(GO.wall, args$wallenius_tab, sep="\t", row.names = FALSE, quote = FALSE)
115 results[['Wallenius']] <- GO.wall 117 results[['Wallenius']] <- GO.wall
116 } 118 }
117 119
118 # hypergeometric (no length bias correction) 120 # hypergeometric (no length bias correction)
119 if (!is.null(args$nobias_tab)) { 121 if (nobias_tab != FALSE) {
120 GO.nobias=goseq(pwf, genome = genome, id = gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) 122 GO.nobias=goseq(pwf, genome = genome, id = gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map)
121 GO.nobias$p.adjust.over_represented = p.adjust(GO.nobias$over_represented_pvalue, method=p_adj_method) 123 GO.nobias$p.adjust.over_represented = p.adjust(GO.nobias$over_represented_pvalue, method=p_adj_method)
122 GO.nobias$p.adjust.under_represented = p.adjust(GO.nobias$under_represented_pvalue, method=p_adj_method) 124 GO.nobias$p.adjust.under_represented = p.adjust(GO.nobias$under_represented_pvalue, method=p_adj_method)
123 write.table(GO.nobias, args$nobias_tab, sep="\t", row.names = FALSE, quote = FALSE) 125 write.table(GO.nobias, args$nobias_tab, sep="\t", row.names = FALSE, quote = FALSE)
124 results[['Hypergeometric']] <- GO.nobias 126 results[['Hypergeometric']] <- GO.nobias
147 } 149 }
148 results[['Sampling']] <- GO.samp 150 results[['Sampling']] <- GO.samp
149 } 151 }
150 152
151 if (!is.null(args$top_plot)) { 153 if (!is.null(args$top_plot)) {
154 cats_title <- gsub("GO:","", args$fetch_cats)
152 # modified from https://bioinformatics-core-shared-training.github.io/cruk-summer-school-2018/RNASeq2018/html/06_Gene_set_testing.nb.html 155 # modified from https://bioinformatics-core-shared-training.github.io/cruk-summer-school-2018/RNASeq2018/html/06_Gene_set_testing.nb.html
153 pdf("top10.pdf") 156 pdf("top10.pdf")
154 for (m in names(results)) { 157 for (m in names(results)) {
155 p <- results[[m]] %>% 158 p <- results[[m]] %>%
156 top_n(10, wt=-p.adjust.over_represented) %>% 159 top_n(10, wt=-p.adjust.over_represented) %>%
157 mutate(hitsPerc=numDEInCat*100/numInCat) %>% 160 mutate(hitsPerc=numDEInCat*100/numInCat) %>%
158 ggplot(aes(x=hitsPerc, 161 ggplot(aes(x=hitsPerc,
159 y=term, 162 y=substr(term, 1, 40), # only use 1st 40 chars of terms otherwise squashes plot
160 colour=p.adjust.over_represented, 163 colour=p.adjust.over_represented,
161 size=numDEInCat)) + 164 size=numDEInCat)) +
162 geom_point() + 165 geom_point() +
163 expand_limits(x=0) + 166 expand_limits(x=0) +
164 labs(x="% DE in category", y="Category", colour="adj. P value", size="Count", title=paste("Top over-represented categories in", fetch_cats), subtitle=paste(m, " method")) + 167 labs(x="% DE in category", y="Category", colour="adj. P value", size="Count", title=paste("Top over-represented categories in", cats_title), subtitle=paste(m, " method")) +
165 theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) 168 theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
166 print(p) 169 print(p)
167 } 170 }
168 dev.off() 171 dev.off()
169 } 172 }