0
|
1 #####################################################################################
|
|
2 #Copyright (C) <2012>
|
|
3 #
|
|
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
5 #this software and associated documentation files (the "Software"), to deal in the
|
|
6 #Software without restriction, including without limitation the rights to use, copy,
|
|
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
8 #and to permit persons to whom the Software is furnished to do so, subject to
|
|
9 #the following conditions:
|
|
10 #
|
|
11 #The above copyright notice and this permission notice shall be included in all copies
|
|
12 #or substantial portions of the Software.
|
|
13 #
|
|
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
|
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
20 #
|
|
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
|
|
22 # authored by the Huttenhower lab at the Harvard School of Public Health
|
|
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
|
|
24 #####################################################################################
|
|
25
|
|
26 ### Modified Code
|
|
27 ### This code is from the package agricolae by Felipe de Mendiburu
|
|
28 ### Modifications here are minimal and allow one to use the p.values from the post hoc comparisons
|
|
29 ### Authors do not claim credit for this solution only needed to modify code to use the output.
|
|
30 kruskal <- function (y, trt, alpha = 0.05, p.adj = c("none", "holm", "hochberg",
|
|
31 "bonferroni", "BH", "BY", "fdr"), group = TRUE, main = NULL)
|
|
32 {
|
|
33 dfComparisons=NULL
|
|
34 dfMeans=NULL
|
|
35 dntStudent=NULL
|
|
36 dLSD=NULL
|
|
37 dHMean=NULL
|
|
38 name.y <- paste(deparse(substitute(y)))
|
|
39 name.t <- paste(deparse(substitute(trt)))
|
|
40 p.adj <- match.arg(p.adj)
|
|
41 junto <- subset(data.frame(y, trt), is.na(y) == FALSE)
|
|
42 N <- nrow(junto)
|
|
43 junto[, 1] <- rank(junto[, 1])
|
|
44 means <- tapply.stat(junto[, 1], junto[, 2], stat = "sum")
|
|
45 sds <- tapply.stat(junto[, 1], junto[, 2], stat = "sd")
|
|
46 nn <- tapply.stat(junto[, 1], junto[, 2], stat = "length")
|
|
47 means <- data.frame(means, replication = nn[, 2])
|
|
48 names(means)[1:2] <- c(name.t, name.y)
|
|
49 ntr <- nrow(means)
|
|
50 nk <- choose(ntr, 2)
|
|
51 DFerror <- N - ntr
|
|
52 rs <- 0
|
|
53 U <- 0
|
|
54 for (i in 1:ntr) {
|
|
55 rs <- rs + means[i, 2]^2/means[i, 3]
|
|
56 U <- U + 1/means[i, 3]
|
|
57 }
|
|
58 S <- (sum(junto[, 1]^2) - (N * (N + 1)^2)/4)/(N - 1)
|
|
59 H <- (rs - (N * (N + 1)^2)/4)/S
|
|
60 # cat("\nStudy:", main)
|
|
61 # cat("\nKruskal-Wallis test's\nTies or no Ties\n")
|
|
62 # cat("\nValue:", H)
|
|
63 # cat("\ndegrees of freedom:", ntr - 1)
|
|
64 p.chisq <- 1 - pchisq(H, ntr - 1)
|
|
65 # cat("\nPvalue chisq :", p.chisq, "\n\n")
|
|
66 DFerror <- N - ntr
|
|
67 Tprob <- qt(1 - alpha/2, DFerror)
|
|
68 MSerror <- S * ((N - 1 - H)/(N - ntr))
|
|
69 means[, 2] <- means[, 2]/means[, 3]
|
|
70 # cat(paste(name.t, ",", sep = ""), " means of the ranks\n\n")
|
|
71 dfMeans=data.frame(row.names = means[, 1], means[, -1])
|
|
72 if (p.adj != "none") {
|
|
73 # cat("\nP value adjustment method:", p.adj)
|
|
74 a <- 1e-06
|
|
75 b <- 1
|
|
76 for (i in 1:100) {
|
|
77 x <- (b + a)/2
|
|
78 xr <- rep(x, nk)
|
|
79 d <- p.adjust(xr, p.adj)[1] - alpha
|
|
80 ar <- rep(a, nk)
|
|
81 fa <- p.adjust(ar, p.adj)[1] - alpha
|
|
82 if (d * fa < 0)
|
|
83 b <- x
|
|
84 if (d * fa > 0)
|
|
85 a <- x
|
|
86 }
|
|
87 Tprob <- qt(1 - x/2, DFerror)
|
|
88 }
|
|
89 nr <- unique(means[, 3])
|
|
90 if (group) {
|
|
91 Tprob <- qt(1 - alpha/2, DFerror)
|
|
92 # cat("\nt-Student:", Tprob)
|
|
93 # cat("\nAlpha :", alpha)
|
|
94 dntStudent=Tprob
|
|
95 dAlpha=alpha
|
|
96 if (length(nr) == 1) {
|
|
97 LSD <- Tprob * sqrt(2 * MSerror/nr)
|
|
98 # cat("\nLSD :", LSD, "\n")
|
|
99 dLSD=LSD
|
|
100 }
|
|
101 else {
|
|
102 nr1 <- 1/mean(1/nn[, 2])
|
|
103 LSD1 <- Tprob * sqrt(2 * MSerror/nr1)
|
|
104 # cat("\nLSD :", LSD1, "\n")
|
|
105 dLSD =LSD1
|
|
106 # cat("\nHarmonic Mean of Cell Sizes ", nr1)
|
|
107 dHMean=nr1
|
|
108 }
|
|
109 # cat("\nMeans with the same letter are not significantly different\n")
|
|
110 # cat("\nGroups, Treatments and mean of the ranks\n")
|
|
111 output <- order.group(means[, 1], means[, 2], means[,
|
|
112 3], MSerror, Tprob, std.err = sqrt(MSerror/means[,
|
|
113 3]))
|
|
114 dfComparisons=order.group(means[, 1], means[, 2], means[,
|
|
115 3], MSerror, Tprob, std.err = sqrt(MSerror/means[,
|
|
116 3]))
|
|
117 }
|
|
118 if (!group) {
|
|
119 comb <- combn(ntr, 2)
|
|
120 nn <- ncol(comb)
|
|
121 dif <- rep(0, nn)
|
|
122 LCL <- dif
|
|
123 UCL <- dif
|
|
124 pvalue <- dif
|
|
125 sdtdif <- dif
|
|
126 for (k in 1:nn) {
|
|
127 i <- comb[1, k]
|
|
128 j <- comb[2, k]
|
|
129 if (means[i, 2] < means[j, 2]) {
|
|
130 comb[1, k] <- j
|
|
131 comb[2, k] <- i
|
|
132 }
|
|
133 dif[k] <- abs(means[i, 2] - means[j, 2])
|
|
134 sdtdif[k] <- sqrt(S * ((N - 1 - H)/(N - ntr)) * (1/means[i,
|
|
135 3] + 1/means[j, 3]))
|
|
136 pvalue[k] <- 2 * round(1 - pt(dif[k]/sdtdif[k], DFerror),
|
|
137 6)
|
|
138 }
|
|
139 if (p.adj != "none")
|
|
140 pvalue <- round(p.adjust(pvalue, p.adj), 6)
|
|
141 LCL <- dif - Tprob * sdtdif
|
|
142 UCL <- dif + Tprob * sdtdif
|
|
143 sig <- rep(" ", nn)
|
|
144 for (k in 1:nn) {
|
|
145 if (pvalue[k] <= 0.001)
|
|
146 sig[k] <- "***"
|
|
147 else if (pvalue[k] <= 0.01)
|
|
148 sig[k] <- "**"
|
|
149 else if (pvalue[k] <= 0.05)
|
|
150 sig[k] <- "*"
|
|
151 else if (pvalue[k] <= 0.1)
|
|
152 sig[k] <- "."
|
|
153 }
|
|
154 tr.i <- means[comb[1, ], 1]
|
|
155 tr.j <- means[comb[2, ], 1]
|
|
156 dfComparisons <- data.frame(Difference = dif, p.value = pvalue,
|
|
157 sig, LCL, UCL)
|
|
158 rownames(dfComparisons) <- paste(tr.i, tr.j, sep = " - ")
|
|
159 # cat("\nComparison between treatments mean of the ranks\n\n")
|
|
160 # print(output)
|
|
161 dfMeans <- data.frame(trt = means[, 1], means = means[,
|
|
162 2], M = "", N = means[, 3])
|
|
163 }
|
|
164 # invisible(output)
|
|
165 invisible(list(study=main,test="Kruskal-Wallis test",value=H,df=(ntr - 1),chisq.p.value=p.chisq,p.adj.method=p.adj,ntStudent=dntStudent,alpha=alpha,LSD=dLSD,Harmonic.mean=dHMean,comparisons=dfComparisons,means=dfMeans))
|
|
166 }
|
|
167
|
|
168 ### This function is NOT original code but is from the gamlss package.
|
|
169 ### It is written here in an effort to over write the gamlss object summary method
|
|
170 ### so that I can return information of interest.
|
|
171 estimatesgamlss<-function (object, Qr, p1, coef.p,
|
|
172 est.disp , df.r,
|
|
173 digits = max(3, getOption("digits") - 3),
|
|
174 covmat.unscaled , ...)
|
|
175 {
|
|
176 #covmat.unscaled <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
|
|
177 dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p))
|
|
178 covmat <- covmat.unscaled #in glm is=dispersion * covmat.unscaled, but here is already multiplied by the dispersion
|
|
179 var.cf <- diag(covmat)
|
|
180 s.err <- sqrt(var.cf)
|
|
181 tvalue <- coef.p/s.err
|
|
182 dn <- c("Estimate", "Std. Error")
|
|
183 if (!est.disp)
|
|
184 {
|
|
185 pvalue <- 2 * pnorm(-abs(tvalue))
|
|
186 coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
|
|
187 dimnames(coef.table) <- list(names(coef.p), c(dn, "z value","Pr(>|z|)"))
|
|
188 } else if (df.r > 0) {
|
|
189 pvalue <- 2 * pt(-abs(tvalue), df.r)
|
|
190 coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
|
|
191 dimnames(coef.table) <- list(names(coef.p), c(dn, "t value","Pr(>|t|)"))
|
|
192 } else {
|
|
193 coef.table <- cbind(coef.p, Inf)
|
|
194 dimnames(coef.table) <- list(names(coef.p), dn)
|
|
195 }
|
|
196 return(coef.table)
|
|
197 }
|
|
198
|
|
199 ### This function is NOT original code but is from the gamlss package.
|
|
200 ### It is written here in an effort to over write the gamlss object summary method
|
|
201 ### so that I can return information of interest.
|
|
202 summary.gamlss<- function (object, type = c("vcov", "qr"), save = FALSE, ...)
|
|
203 {
|
|
204 return(as.data.frame(estimatesgamlss(object=object,Qr=object$mu.qr, p1=1:(object$mu.df-object$mu.nl.df),
|
|
205 coef.p=object$mu.coefficients[object$mu.qr$pivot[1:(object$mu.df-object$mu.nl.df)]],
|
|
206 est.disp =TRUE, df.r=(object$noObs - object$mu.df),
|
|
207 covmat.unscaled=chol2inv(object$mu.qr$qr[1:(object$mu.df-object$mu.nl.df), 1:(object$mu.df-object$mu.nl.df), drop = FALSE]) )) )
|
|
208 } |