comparison maaslin-4450aa4ecc84/src/lib/Misc.R @ 1:a87d5a5f2776

Uploaded the version running on the prod server
author george-weingart
date Sun, 08 Feb 2015 23:08:38 -0500
parents
children
comparison
equal deleted inserted replaced
0:e0b5980139d9 1:a87d5a5f2776
1 #####################################################################################
2 #Copyright (C) <2012>
3 #
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
5 #this software and associated documentation files (the "Software"), to deal in the
6 #Software without restriction, including without limitation the rights to use, copy,
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8 #and to permit persons to whom the Software is furnished to do so, subject to
9 #the following conditions:
10 #
11 #The above copyright notice and this permission notice shall be included in all copies
12 #or substantial portions of the Software.
13 #
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 #
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
22 # authored by the Huttenhower lab at the Harvard School of Public Health
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
24 #####################################################################################
25
26 ### Modified Code
27 ### This code is from the package agricolae by Felipe de Mendiburu
28 ### Modifications here are minimal and allow one to use the p.values from the post hoc comparisons
29 ### Authors do not claim credit for this solution only needed to modify code to use the output.
30 kruskal <- function (y, trt, alpha = 0.05, p.adj = c("none", "holm", "hochberg",
31 "bonferroni", "BH", "BY", "fdr"), group = TRUE, main = NULL)
32 {
33 dfComparisons=NULL
34 dfMeans=NULL
35 dntStudent=NULL
36 dLSD=NULL
37 dHMean=NULL
38 name.y <- paste(deparse(substitute(y)))
39 name.t <- paste(deparse(substitute(trt)))
40 p.adj <- match.arg(p.adj)
41 junto <- subset(data.frame(y, trt), is.na(y) == FALSE)
42 N <- nrow(junto)
43 junto[, 1] <- rank(junto[, 1])
44 means <- tapply.stat(junto[, 1], junto[, 2], stat = "sum")
45 sds <- tapply.stat(junto[, 1], junto[, 2], stat = "sd")
46 nn <- tapply.stat(junto[, 1], junto[, 2], stat = "length")
47 means <- data.frame(means, replication = nn[, 2])
48 names(means)[1:2] <- c(name.t, name.y)
49 ntr <- nrow(means)
50 nk <- choose(ntr, 2)
51 DFerror <- N - ntr
52 rs <- 0
53 U <- 0
54 for (i in 1:ntr) {
55 rs <- rs + means[i, 2]^2/means[i, 3]
56 U <- U + 1/means[i, 3]
57 }
58 S <- (sum(junto[, 1]^2) - (N * (N + 1)^2)/4)/(N - 1)
59 H <- (rs - (N * (N + 1)^2)/4)/S
60 # cat("\nStudy:", main)
61 # cat("\nKruskal-Wallis test's\nTies or no Ties\n")
62 # cat("\nValue:", H)
63 # cat("\ndegrees of freedom:", ntr - 1)
64 p.chisq <- 1 - pchisq(H, ntr - 1)
65 # cat("\nPvalue chisq :", p.chisq, "\n\n")
66 DFerror <- N - ntr
67 Tprob <- qt(1 - alpha/2, DFerror)
68 MSerror <- S * ((N - 1 - H)/(N - ntr))
69 means[, 2] <- means[, 2]/means[, 3]
70 # cat(paste(name.t, ",", sep = ""), " means of the ranks\n\n")
71 dfMeans=data.frame(row.names = means[, 1], means[, -1])
72 if (p.adj != "none") {
73 # cat("\nP value adjustment method:", p.adj)
74 a <- 1e-06
75 b <- 1
76 for (i in 1:100) {
77 x <- (b + a)/2
78 xr <- rep(x, nk)
79 d <- p.adjust(xr, p.adj)[1] - alpha
80 ar <- rep(a, nk)
81 fa <- p.adjust(ar, p.adj)[1] - alpha
82 if (d * fa < 0)
83 b <- x
84 if (d * fa > 0)
85 a <- x
86 }
87 Tprob <- qt(1 - x/2, DFerror)
88 }
89 nr <- unique(means[, 3])
90 if (group) {
91 Tprob <- qt(1 - alpha/2, DFerror)
92 # cat("\nt-Student:", Tprob)
93 # cat("\nAlpha :", alpha)
94 dntStudent=Tprob
95 dAlpha=alpha
96 if (length(nr) == 1) {
97 LSD <- Tprob * sqrt(2 * MSerror/nr)
98 # cat("\nLSD :", LSD, "\n")
99 dLSD=LSD
100 }
101 else {
102 nr1 <- 1/mean(1/nn[, 2])
103 LSD1 <- Tprob * sqrt(2 * MSerror/nr1)
104 # cat("\nLSD :", LSD1, "\n")
105 dLSD =LSD1
106 # cat("\nHarmonic Mean of Cell Sizes ", nr1)
107 dHMean=nr1
108 }
109 # cat("\nMeans with the same letter are not significantly different\n")
110 # cat("\nGroups, Treatments and mean of the ranks\n")
111 output <- order.group(means[, 1], means[, 2], means[,
112 3], MSerror, Tprob, std.err = sqrt(MSerror/means[,
113 3]))
114 dfComparisons=order.group(means[, 1], means[, 2], means[,
115 3], MSerror, Tprob, std.err = sqrt(MSerror/means[,
116 3]))
117 }
118 if (!group) {
119 comb <- combn(ntr, 2)
120 nn <- ncol(comb)
121 dif <- rep(0, nn)
122 LCL <- dif
123 UCL <- dif
124 pvalue <- dif
125 sdtdif <- dif
126 for (k in 1:nn) {
127 i <- comb[1, k]
128 j <- comb[2, k]
129 if (means[i, 2] < means[j, 2]) {
130 comb[1, k] <- j
131 comb[2, k] <- i
132 }
133 dif[k] <- abs(means[i, 2] - means[j, 2])
134 sdtdif[k] <- sqrt(S * ((N - 1 - H)/(N - ntr)) * (1/means[i,
135 3] + 1/means[j, 3]))
136 pvalue[k] <- 2 * round(1 - pt(dif[k]/sdtdif[k], DFerror),
137 6)
138 }
139 if (p.adj != "none")
140 pvalue <- round(p.adjust(pvalue, p.adj), 6)
141 LCL <- dif - Tprob * sdtdif
142 UCL <- dif + Tprob * sdtdif
143 sig <- rep(" ", nn)
144 for (k in 1:nn) {
145 if (pvalue[k] <= 0.001)
146 sig[k] <- "***"
147 else if (pvalue[k] <= 0.01)
148 sig[k] <- "**"
149 else if (pvalue[k] <= 0.05)
150 sig[k] <- "*"
151 else if (pvalue[k] <= 0.1)
152 sig[k] <- "."
153 }
154 tr.i <- means[comb[1, ], 1]
155 tr.j <- means[comb[2, ], 1]
156 dfComparisons <- data.frame(Difference = dif, p.value = pvalue,
157 sig, LCL, UCL)
158 rownames(dfComparisons) <- paste(tr.i, tr.j, sep = " - ")
159 # cat("\nComparison between treatments mean of the ranks\n\n")
160 # print(output)
161 dfMeans <- data.frame(trt = means[, 1], means = means[,
162 2], M = "", N = means[, 3])
163 }
164 # invisible(output)
165 invisible(list(study=main,test="Kruskal-Wallis test",value=H,df=(ntr - 1),chisq.p.value=p.chisq,p.adj.method=p.adj,ntStudent=dntStudent,alpha=alpha,LSD=dLSD,Harmonic.mean=dHMean,comparisons=dfComparisons,means=dfMeans))
166 }
167
168 ### This function is NOT original code but is from the gamlss package.
169 ### It is written here in an effort to over write the gamlss object summary method
170 ### so that I can return information of interest.
171 estimatesgamlss<-function (object, Qr, p1, coef.p,
172 est.disp , df.r,
173 digits = max(3, getOption("digits") - 3),
174 covmat.unscaled , ...)
175 {
176 #covmat.unscaled <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
177 dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p))
178 covmat <- covmat.unscaled #in glm is=dispersion * covmat.unscaled, but here is already multiplied by the dispersion
179 var.cf <- diag(covmat)
180 s.err <- sqrt(var.cf)
181 tvalue <- coef.p/s.err
182 dn <- c("Estimate", "Std. Error")
183 if (!est.disp)
184 {
185 pvalue <- 2 * pnorm(-abs(tvalue))
186 coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
187 dimnames(coef.table) <- list(names(coef.p), c(dn, "z value","Pr(>|z|)"))
188 } else if (df.r > 0) {
189 pvalue <- 2 * pt(-abs(tvalue), df.r)
190 coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
191 dimnames(coef.table) <- list(names(coef.p), c(dn, "t value","Pr(>|t|)"))
192 } else {
193 coef.table <- cbind(coef.p, Inf)
194 dimnames(coef.table) <- list(names(coef.p), dn)
195 }
196 return(coef.table)
197 }
198
199 ### This function is NOT original code but is from the gamlss package.
200 ### It is written here in an effort to over write the gamlss object summary method
201 ### so that I can return information of interest.
202 summary.gamlss<- function (object, type = c("vcov", "qr"), save = FALSE, ...)
203 {
204 return(as.data.frame(estimatesgamlss(object=object,Qr=object$mu.qr, p1=1:(object$mu.df-object$mu.nl.df),
205 coef.p=object$mu.coefficients[object$mu.qr$pivot[1:(object$mu.df-object$mu.nl.df)]],
206 est.disp =TRUE, df.r=(object$noObs - object$mu.df),
207 covmat.unscaled=chol2inv(object$mu.qr$qr[1:(object$mu.df-object$mu.nl.df), 1:(object$mu.df-object$mu.nl.df), drop = FALSE]) )) )
208 }