Mercurial > repos > iuc > goseq
comparison goseq.r @ 4:ae39895af5fe draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/goseq commit 0798278a90c08228a386516881680b328fc33f0c
author | iuc |
---|---|
date | Sun, 30 Sep 2018 09:27:05 -0400 |
parents | 783e8b70b047 |
children | bbcf5f7f2af2 |
comparison
equal
deleted
inserted
replaced
3:783e8b70b047 | 4:ae39895af5fe |
---|---|
38 dge_file = args$dge_file | 38 dge_file = args$dge_file |
39 category_file = args$category_file | 39 category_file = args$category_file |
40 length_file = args$length_file | 40 length_file = args$length_file |
41 genome = args$genome | 41 genome = args$genome |
42 gene_id = args$gene_id | 42 gene_id = args$gene_id |
43 wallenius_tab = args$wallenius_tab | |
43 sampling_tab = args$sampling_tab | 44 sampling_tab = args$sampling_tab |
45 nobias_tab = args$nobias_tab | |
44 length_bias_plot = args$length_bias_plot | 46 length_bias_plot = args$length_bias_plot |
45 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot | 47 sample_vs_wallenius_plot = args$sample_vs_wallenius_plot |
46 repcnt = args$repcnt | 48 repcnt = args$repcnt |
47 p_adj_method = args$p_adj_method | 49 p_adj_method = args$p_adj_method |
48 use_genes_without_cat = args$use_genes_without_cat | 50 use_genes_without_cat = args$use_genes_without_cat |
105 } | 107 } |
106 | 108 |
107 results <- list() | 109 results <- list() |
108 | 110 |
109 # wallenius approximation of p-values | 111 # wallenius approximation of p-values |
110 if (!is.null(args$wallenius_tab)) { | 112 if (wallenius_tab != FALSE) { |
111 GO.wall=goseq(pwf, genome = genome, id = gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) | 113 GO.wall=goseq(pwf, genome = genome, id = gene_id, use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) |
112 GO.wall$p.adjust.over_represented = p.adjust(GO.wall$over_represented_pvalue, method=p_adj_method) | 114 GO.wall$p.adjust.over_represented = p.adjust(GO.wall$over_represented_pvalue, method=p_adj_method) |
113 GO.wall$p.adjust.under_represented = p.adjust(GO.wall$under_represented_pvalue, method=p_adj_method) | 115 GO.wall$p.adjust.under_represented = p.adjust(GO.wall$under_represented_pvalue, method=p_adj_method) |
114 write.table(GO.wall, args$wallenius_tab, sep="\t", row.names = FALSE, quote = FALSE) | 116 write.table(GO.wall, args$wallenius_tab, sep="\t", row.names = FALSE, quote = FALSE) |
115 results[['Wallenius']] <- GO.wall | 117 results[['Wallenius']] <- GO.wall |
116 } | 118 } |
117 | 119 |
118 # hypergeometric (no length bias correction) | 120 # hypergeometric (no length bias correction) |
119 if (!is.null(args$nobias_tab)) { | 121 if (nobias_tab != FALSE) { |
120 GO.nobias=goseq(pwf, genome = genome, id = gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) | 122 GO.nobias=goseq(pwf, genome = genome, id = gene_id, method="Hypergeometric", use_genes_without_cat = use_genes_without_cat, gene2cat=go_map) |
121 GO.nobias$p.adjust.over_represented = p.adjust(GO.nobias$over_represented_pvalue, method=p_adj_method) | 123 GO.nobias$p.adjust.over_represented = p.adjust(GO.nobias$over_represented_pvalue, method=p_adj_method) |
122 GO.nobias$p.adjust.under_represented = p.adjust(GO.nobias$under_represented_pvalue, method=p_adj_method) | 124 GO.nobias$p.adjust.under_represented = p.adjust(GO.nobias$under_represented_pvalue, method=p_adj_method) |
123 write.table(GO.nobias, args$nobias_tab, sep="\t", row.names = FALSE, quote = FALSE) | 125 write.table(GO.nobias, args$nobias_tab, sep="\t", row.names = FALSE, quote = FALSE) |
124 results[['Hypergeometric']] <- GO.nobias | 126 results[['Hypergeometric']] <- GO.nobias |
147 } | 149 } |
148 results[['Sampling']] <- GO.samp | 150 results[['Sampling']] <- GO.samp |
149 } | 151 } |
150 | 152 |
151 if (!is.null(args$top_plot)) { | 153 if (!is.null(args$top_plot)) { |
154 cats_title <- gsub("GO:","", args$fetch_cats) | |
152 # modified from https://bioinformatics-core-shared-training.github.io/cruk-summer-school-2018/RNASeq2018/html/06_Gene_set_testing.nb.html | 155 # modified from https://bioinformatics-core-shared-training.github.io/cruk-summer-school-2018/RNASeq2018/html/06_Gene_set_testing.nb.html |
153 pdf("top10.pdf") | 156 pdf("top10.pdf") |
154 for (m in names(results)) { | 157 for (m in names(results)) { |
155 p <- results[[m]] %>% | 158 p <- results[[m]] %>% |
156 top_n(10, wt=-p.adjust.over_represented) %>% | 159 top_n(10, wt=-p.adjust.over_represented) %>% |
157 mutate(hitsPerc=numDEInCat*100/numInCat) %>% | 160 mutate(hitsPerc=numDEInCat*100/numInCat) %>% |
158 ggplot(aes(x=hitsPerc, | 161 ggplot(aes(x=hitsPerc, |
159 y=term, | 162 y=substr(term, 1, 40), # only use 1st 40 chars of terms otherwise squashes plot |
160 colour=p.adjust.over_represented, | 163 colour=p.adjust.over_represented, |
161 size=numDEInCat)) + | 164 size=numDEInCat)) + |
162 geom_point() + | 165 geom_point() + |
163 expand_limits(x=0) + | 166 expand_limits(x=0) + |
164 labs(x="% DE in category", y="Category", colour="adj. P value", size="Count", title=paste("Top over-represented categories in", fetch_cats), subtitle=paste(m, " method")) + | 167 labs(x="% DE in category", y="Category", colour="adj. P value", size="Count", title=paste("Top over-represented categories in", cats_title), subtitle=paste(m, " method")) + |
165 theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) | 168 theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)) |
166 print(p) | 169 print(p) |
167 } | 170 } |
168 dev.off() | 171 dev.off() |
169 } | 172 } |