annotate GO_MWU.R @ 0:91261b42c07e draft

"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
author cristian
date Thu, 14 Apr 2022 13:28:05 +0000
parents
children f7287f82602f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
2 # GO_MWU uses continuous measure of significance (such as fold-change or
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
3 # -log(p-value) ) to identify GO categories that are significantly enriches
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
4 # with either up- or down-regulated genes. The advantage - no need to impose
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
5 # arbitrary significance cutoff.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
6
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
7 # If the measure is binary (0 or 1) the script will perform a typical "GO
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
8 # enrichment" analysis based Fisher's exact test: it will show GO categories
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
9 # over-represented among the genes that have 1 as their measure.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
10
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
11 # On the plot, different fonts are used to indicate significance and color
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
12 # indicates enrichment with either up (red) or down (blue) regulated genes.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
13 # No colors are shown for binary measure analysis.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
14
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
15 # The tree on the plot is hierarchical clustering of GO categories based on
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
16 # shared genes. Categories with no branch length between them are subsets of each other.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
17
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
18 # The fraction next to GO category name indicates the fracton of "good" genes
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
19 # in it; "good" genes being the ones exceeding the arbitrary absValue cutoff
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
20 # (option in gomwuPlot). For Fisher's based test, specify absValue = 0.5.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
21 # This value does not affect statistics and is used for plotting only.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
22
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
23 # Stretch the plot manually to match tree to text
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
24
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
25 # Mikhail V. Matz, UT Austin, February 2015; matz@utexas.edu
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
26
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
27
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
28 # setup R error handling to go to stderr
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
29 options(show.error.messages = F, error = function() {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
30 cat(geterrmessage(), file = stderr())
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
31 q("no", 1, F)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
32 })
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
33
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
34 # we need that to not crash galaxy with an UTF8 error on German LC settings.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
35 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
36
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
37 library("getopt")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
38 library("tools")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
39 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
40 args <- commandArgs(trailingOnly = TRUE)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
41
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
42 # get options, using the spec as defined by the enclosed list.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
43 # we read the options from the default: commandArgs(TRUE).
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
44 spec <- matrix(c(
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
45 "help", "h", 0, "logical",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
46 "scriptdir", "s", 1, "character",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
47 "input", "i", 1, "character",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
48 "goAnnotations", "a", 1, "character",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
49 "goDatabase", "g", 1, "character",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
50 "goDivision", "d", 1, "character",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
51 "largest", "o", 1, "numeric",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
52 "smallest", "m", 1, "numeric",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
53 "clusterheight", "c", 1, "numeric",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
54 "pcut", "p", 1, "numeric",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
55 "hcut", "t", 1, "numeric",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
56 "version", "v", 0, "character"
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
57 ), byrow = TRUE, ncol = 4)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
58 opt <- getopt(spec)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
59
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
60 # if help was asked for print a friendly message
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
61 # and exit with a non-zero error code
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
62 if (!is.null(opt$help)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
63 cat(getopt(spec, usage = TRUE))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
64 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
65 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
66
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
67 if (!is.null(opt$version)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
68 cat("0.1.0\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
69 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
70 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
71
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
72 # enforce the following required arguments
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
73 if (is.null(opt$scriptdir)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
74 cat("'scriptdir' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
75 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
76 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
77 if (is.null(opt$input)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
78 cat("'input' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
79 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
80 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
81 if (is.null(opt$goDatabase)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
82 cat("'goDatabase' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
83 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
84 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
85 if (is.null(opt$goAnnotations)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
86 cat("'goAnnotations' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
87 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
88 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
89 if (is.null(opt$goDivision)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
90 cat("'goDivision' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
91 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
92 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
93 if (is.null(opt$clusterheight)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
94 cat("'clusterheight' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
95 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
96 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
97 if (is.null(opt$largest)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
98 cat("'largest' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
99 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
100 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
101 if (is.null(opt$smallest)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
102 cat("'smallest' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
103 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
104 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
105 if (is.null(opt$pcut)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
106 cat("'pcut' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
107 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
108 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
109 if (is.null(opt$hcut)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
110 cat("'hcut' is required\n")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
111 q(status = 1)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
112 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
113
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
114 source_local <- function(fname) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
115 # argv <- commandArgs(trailingOnly = FALSE)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
116 # base_dir <- dirname(substring(argv[grep("--file = ", argv)], 8))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
117 base_dir <- opt$scriptdir
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
118 source(paste(base_dir, fname, sep = "/"))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
119 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
120
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
121 source_local("gomwu.functions.R")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
122
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
123 # It might take a few minutes for MF and BP. Do not rerun it if you just want
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
124 # to replot the data with different cutoffs, go straight to gomwuPlot. If you
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
125 # change any of the numeric values below, delete the files that were generated
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
126 # in previos runs first.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
127
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
128 gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, opt$scriptdir,
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
129 # replace with full path to perl executable if it is not in your system's PATH already
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
130 perlPath = "perl",
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
131 # a GO category will not be considered if it contains more than this fraction of the total number of genes
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
132 largest = opt$largest,
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
133 smallest = opt$smallest, # a GO category should contain at least this many genes to be considered.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
134 clusterCutHeight = opt$clusterheight, # threshold for merging similar (gene-sharing) terms. See README for details.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
135 # Alternative = "g" # by default the MWU test is two-tailed;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
136 # specify "g" or "l" of you want to test for "greater" or "less" instead.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
137 # Module = TRUE,Alternative="g" # un-remark this if you are analyzing a
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
138 # SIGNED WGCNA module (values: 0 for not in module genes, kME for in-module genes).
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
139 # In the call to gomwuPlot below, specify absValue=0.001 (count number of "good genes" that fall into the module)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
140 # Module = TRUE # un-remark this if you are analyzing an UNSIGNED WGCNA module
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
141 )
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
142 # do not continue if the printout shows that no GO terms pass 10% FDR.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
143
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
144
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
145 # ----------- Plotting results
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
146
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
147 # change this to a pdf output
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
148 pdf()
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
149
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
150 results <- gomwuPlot(opt$input, opt$goAnnotations, opt$goDivision,
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
151 # genes with the measure value exceeding this will be counted as "good genes".
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
152 # This setting is for signed log-pvalues. Specify absValue=0.001 if you are doing
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
153 # Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes").
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
154 absValue = -log(0.05, 10),
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
155 # absValue = 1, # un-remark this if you are using log2-fold changes
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
156 # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
157 level1 = 0.1,
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
158 level2 = 0.05, # FDR cutoff to print in regular (not italic) font.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
159 level3 = 0.01, # FDR cutoff to print in large bold font.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
160 # decrease to fit more on one page, or increase (after rescaling the plot so the tree fits the text)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
161 # for better "word cloud" effect
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
162 txtsize = 1.2,
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
163 treeHeight = 0.5, # height of the hierarchical clustering tree
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
164 # colors = c("dodgerblue2","firebrick1","skyblue2","lightcoral")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
165 # these are default colors, uncomment and change if needed
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
166 )
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
167 # manually rescale the plot so the tree matches the text
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
168 # if there are too many categories displayed, try make it more stringent with level1 = 0.05,level2=0.01,level3=0.001.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
169
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
170 # text representation of results, with actual adjusted p-values
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
171 write.table(results[[1]], "results.tsv", sep = "\t")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
172
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
173
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
174 # ------- extracting representative GOs
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
175
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
176 # this module chooses GO terms that best represent *independent* groups of significant GO terms
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
177
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
178 pcut <- opt$pcut
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
179 hcut <- opt$hcut
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
180
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
181 # plotting the GO tree with the cut level (uncomment the next two lines to plot)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
182 # plot(results[[2]],cex = 0.6)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
183 # abline(h = hcut,col="red")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
184
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
185 # cutting
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
186 ct <- cutree(results[[2]], h = hcut)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
187 annots <- c()
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
188 ci <- 1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
189 for (ci in unique(ct)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
190 message(ci)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
191 rn <- names(ct)[ct == ci]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
192 obs <- grep("obsolete", rn)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
193 if (length(obs) > 0) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
194 rn <- rn[-obs]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
195 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
196 if (length(rn) == 0) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
197 next
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
198 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
199 rr <- results[[1]][rn, ]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
200 bestrr <- rr[which(rr$pval == min(rr$pval)), ]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
201 best <- 1
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
202 if (nrow(bestrr) > 1) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
203 nns <- sub(" .+", "", row.names(bestrr))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
204 fr <- c()
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
205 for (i in 1:length(nns)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
206 fr <- c(fr, eval(parse(text = nns[i])))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
207 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
208 best <- which(fr == max(fr))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
209 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
210 if (bestrr$pval[best] <= pcut) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
211 annots <- c(annots, sub("\\d+\\/\\d+ ", "", row.names(bestrr)[best]))
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
212 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
213 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
214
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
215 mwus <- read.table(paste("MWU", opt$goDivision, opt$input, sep = "_"), header = T)
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
216 best_GOs <- mwus[mwus$name %in% annots, ]
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
217 write.table(best_GOs, "best_go.tsv", sep = "\t")
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
218 dev.off()