Mercurial > repos > cristian > rbgoa
annotate GO_MWU.R @ 1:f7287f82602f draft
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
author | cristian |
---|---|
date | Tue, 19 Apr 2022 08:28:43 +0000 |
parents | 91261b42c07e |
children | 5acf9dfdfa27 |
rev | line source |
---|---|
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
2 # GO_MWU uses continuous measure of significance (such as fold-change or |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
3 # -log(p-value) ) to identify GO categories that are significantly enriches |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
4 # with either up- or down-regulated genes. The advantage - no need to impose |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
5 # arbitrary significance cutoff. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
6 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
7 # If the measure is binary (0 or 1) the script will perform a typical "GO |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
8 # enrichment" analysis based Fisher's exact test: it will show GO categories |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
9 # over-represented among the genes that have 1 as their measure. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
10 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
11 # On the plot, different fonts are used to indicate significance and color |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
12 # indicates enrichment with either up (red) or down (blue) regulated genes. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
13 # No colors are shown for binary measure analysis. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
14 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
15 # The tree on the plot is hierarchical clustering of GO categories based on |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
16 # shared genes. Categories with no branch length between them are subsets of each other. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
17 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
18 # The fraction next to GO category name indicates the fracton of "good" genes |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
19 # in it; "good" genes being the ones exceeding the arbitrary absValue cutoff |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
20 # (option in gomwuPlot). For Fisher's based test, specify absValue = 0.5. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
21 # This value does not affect statistics and is used for plotting only. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
22 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
23 # Stretch the plot manually to match tree to text |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
24 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
25 # Mikhail V. Matz, UT Austin, February 2015; matz@utexas.edu |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
26 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
27 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
28 # setup R error handling to go to stderr |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
29 options(show.error.messages = F, error = function() { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
30 cat(geterrmessage(), file = stderr()) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
31 q("no", 1, F) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
32 }) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
33 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
34 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
35 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
36 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
37 library("getopt") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
38 library("tools") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
39 options(stringAsFactors = FALSE, useFancyQuotes = FALSE) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
40 args <- commandArgs(trailingOnly = TRUE) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
41 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
42 # get options, using the spec as defined by the enclosed list. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
43 # we read the options from the default: commandArgs(TRUE). |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
44 spec <- matrix(c( |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
45 "help", "h", 0, "logical", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
46 "scriptdir", "s", 1, "character", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
47 "input", "i", 1, "character", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
48 "goAnnotations", "a", 1, "character", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
49 "goDatabase", "g", 1, "character", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
50 "goDivision", "d", 1, "character", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
51 "largest", "o", 1, "numeric", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
52 "smallest", "m", 1, "numeric", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
53 "clusterheight", "c", 1, "numeric", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
54 "pcut", "p", 1, "numeric", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
55 "hcut", "t", 1, "numeric", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
56 "version", "v", 0, "character" |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
57 ), byrow = TRUE, ncol = 4) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
58 opt <- getopt(spec) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
59 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
60 # if help was asked for print a friendly message |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
61 # and exit with a non-zero error code |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
62 if (!is.null(opt$help)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
63 cat(getopt(spec, usage = TRUE)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
64 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
65 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
66 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
67 if (!is.null(opt$version)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
68 cat("0.1.0\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
69 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
70 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
71 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
72 # enforce the following required arguments |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
73 if (is.null(opt$scriptdir)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
74 cat("'scriptdir' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
75 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
76 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
77 if (is.null(opt$input)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
78 cat("'input' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
79 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
80 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
81 if (is.null(opt$goDatabase)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
82 cat("'goDatabase' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
83 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
84 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
85 if (is.null(opt$goAnnotations)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
86 cat("'goAnnotations' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
87 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
88 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
89 if (is.null(opt$goDivision)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
90 cat("'goDivision' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
91 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
92 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
93 if (is.null(opt$clusterheight)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
94 cat("'clusterheight' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
95 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
96 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
97 if (is.null(opt$largest)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
98 cat("'largest' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
99 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
100 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
101 if (is.null(opt$smallest)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
102 cat("'smallest' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
103 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
104 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
105 if (is.null(opt$pcut)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
106 cat("'pcut' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
107 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
108 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
109 if (is.null(opt$hcut)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
110 cat("'hcut' is required\n") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
111 q(status = 1) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
112 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
113 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
114 source_local <- function(fname) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
115 # argv <- commandArgs(trailingOnly = FALSE) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
116 # base_dir <- dirname(substring(argv[grep("--file = ", argv)], 8)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
117 base_dir <- opt$scriptdir |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
118 source(paste(base_dir, fname, sep = "/")) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
119 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
120 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
121 source_local("gomwu.functions.R") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
122 |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
123 |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
124 nn <- strsplit(opt$input, "[/.]") |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
125 if (length(nn[[1]]) == 3) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
126 dir <- nn[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
127 name <- nn[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
128 ext <- nn[[1]][3] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
129 } else if (length(nn[[1]]) == 2) { |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
130 dir <- "." |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
131 name <- nn[[1]][1] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
132 ext <- nn[[1]][2] |
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
133 } |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
134 # It might take a few minutes for MF and BP. Do not rerun it if you just want |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
135 # to replot the data with different cutoffs, go straight to gomwuPlot. If you |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
136 # change any of the numeric values below, delete the files that were generated |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
137 # in previos runs first. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
138 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
139 gomwuStats(opt$input, opt$goDatabase, opt$goAnnotations, opt$goDivision, opt$scriptdir, |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
140 # replace with full path to perl executable if it is not in your system's PATH already |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
141 perlPath = "perl", |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
142 # a GO category will not be considered if it contains more than this fraction of the total number of genes |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
143 largest = opt$largest, |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
144 smallest = opt$smallest, # a GO category should contain at least this many genes to be considered. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
145 clusterCutHeight = opt$clusterheight, # threshold for merging similar (gene-sharing) terms. See README for details. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
146 # Alternative = "g" # by default the MWU test is two-tailed; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
147 # specify "g" or "l" of you want to test for "greater" or "less" instead. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
148 # Module = TRUE,Alternative="g" # un-remark this if you are analyzing a |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
149 # SIGNED WGCNA module (values: 0 for not in module genes, kME for in-module genes). |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
150 # In the call to gomwuPlot below, specify absValue=0.001 (count number of "good genes" that fall into the module) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
151 # Module = TRUE # un-remark this if you are analyzing an UNSIGNED WGCNA module |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
152 ) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
153 # do not continue if the printout shows that no GO terms pass 10% FDR. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
154 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
155 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
156 # ----------- Plotting results |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
157 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
158 # change this to a pdf output |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
159 pdf(paste0(dir,"/","Rplots.pdf")) |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
160 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
161 results <- gomwuPlot(opt$input, opt$goAnnotations, opt$goDivision, |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
162 # genes with the measure value exceeding this will be counted as "good genes". |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
163 # This setting is for signed log-pvalues. Specify absValue=0.001 if you are doing |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
164 # Fisher's exact test for standard GO enrichment or analyzing a WGCNA module (all non-zero genes = "good genes"). |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
165 absValue = -log(0.05, 10), |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
166 # absValue = 1, # un-remark this if you are using log2-fold changes |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
167 # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
168 level1 = 0.1, |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
169 level2 = 0.05, # FDR cutoff to print in regular (not italic) font. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
170 level3 = 0.01, # FDR cutoff to print in large bold font. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
171 # decrease to fit more on one page, or increase (after rescaling the plot so the tree fits the text) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
172 # for better "word cloud" effect |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
173 txtsize = 1.2, |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
174 treeHeight = 0.5, # height of the hierarchical clustering tree |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
175 # colors = c("dodgerblue2","firebrick1","skyblue2","lightcoral") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
176 # these are default colors, uncomment and change if needed |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
177 ) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
178 # manually rescale the plot so the tree matches the text |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
179 # if there are too many categories displayed, try make it more stringent with level1 = 0.05,level2=0.01,level3=0.001. |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
180 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
181 # text representation of results, with actual adjusted p-values |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
182 write.table(results[[1]], paste0(dir, "/", "results.tsv"), sep = "\t") |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
183 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
184 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
185 # ------- extracting representative GOs |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
186 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
187 # this module chooses GO terms that best represent *independent* groups of significant GO terms |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
188 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
189 pcut <- opt$pcut |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
190 hcut <- opt$hcut |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
191 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
192 # plotting the GO tree with the cut level (uncomment the next two lines to plot) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
193 # plot(results[[2]],cex = 0.6) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
194 # abline(h = hcut,col="red") |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
195 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
196 # cutting |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
197 ct <- cutree(results[[2]], h = hcut) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
198 annots <- c() |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
199 ci <- 1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
200 for (ci in unique(ct)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
201 message(ci) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
202 rn <- names(ct)[ct == ci] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
203 obs <- grep("obsolete", rn) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
204 if (length(obs) > 0) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
205 rn <- rn[-obs] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
206 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
207 if (length(rn) == 0) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
208 next |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
209 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
210 rr <- results[[1]][rn, ] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
211 bestrr <- rr[which(rr$pval == min(rr$pval)), ] |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
212 best <- 1 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
213 if (nrow(bestrr) > 1) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
214 nns <- sub(" .+", "", row.names(bestrr)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
215 fr <- c() |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
216 for (i in 1:length(nns)) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
217 fr <- c(fr, eval(parse(text = nns[i]))) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
218 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
219 best <- which(fr == max(fr)) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
220 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
221 if (bestrr$pval[best] <= pcut) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
222 annots <- c(annots, sub("\\d+\\/\\d+ ", "", row.names(bestrr)[best])) |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
223 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
224 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
225 |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
226 mwus <- read.table(paste0(dir,"/",paste("MWU", opt$goDivision, name, sep = "_"), ".", ext), header = T) |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
227 best_GOs <- mwus[mwus$name %in% annots, ] |
1
f7287f82602f
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents:
0
diff
changeset
|
228 write.table(best_GOs, paste0(dir, "/","best_go.tsv"), sep = "\t") |
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
229 dev.off() |