Mercurial > repos > george-weingart > maaslin
comparison src/lib/Misc.R @ 8:e9677425c6c3 default tip
Updated the structure of the libraries
author | george.weingart@gmail.com |
---|---|
date | Mon, 09 Feb 2015 12:17:40 -0500 |
parents | e0b5980139d9 |
children |
comparison
equal
deleted
inserted
replaced
7:c72e14eabb08 | 8:e9677425c6c3 |
---|---|
1 ##################################################################################### | |
2 #Copyright (C) <2012> | |
3 # | |
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of | |
5 #this software and associated documentation files (the "Software"), to deal in the | |
6 #Software without restriction, including without limitation the rights to use, copy, | |
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
8 #and to permit persons to whom the Software is furnished to do so, subject to | |
9 #the following conditions: | |
10 # | |
11 #The above copyright notice and this permission notice shall be included in all copies | |
12 #or substantial portions of the Software. | |
13 # | |
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | |
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | |
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
20 # | |
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), | |
22 # authored by the Huttenhower lab at the Harvard School of Public Health | |
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu). | |
24 ##################################################################################### | |
25 | |
26 ### Modified Code | |
27 ### This code is from the package agricolae by Felipe de Mendiburu | |
28 ### Modifications here are minimal and allow one to use the p.values from the post hoc comparisons | |
29 ### Authors do not claim credit for this solution only needed to modify code to use the output. | |
30 kruskal <- function (y, trt, alpha = 0.05, p.adj = c("none", "holm", "hochberg", | |
31 "bonferroni", "BH", "BY", "fdr"), group = TRUE, main = NULL) | |
32 { | |
33 dfComparisons=NULL | |
34 dfMeans=NULL | |
35 dntStudent=NULL | |
36 dLSD=NULL | |
37 dHMean=NULL | |
38 name.y <- paste(deparse(substitute(y))) | |
39 name.t <- paste(deparse(substitute(trt))) | |
40 p.adj <- match.arg(p.adj) | |
41 junto <- subset(data.frame(y, trt), is.na(y) == FALSE) | |
42 N <- nrow(junto) | |
43 junto[, 1] <- rank(junto[, 1]) | |
44 means <- tapply.stat(junto[, 1], junto[, 2], stat = "sum") | |
45 sds <- tapply.stat(junto[, 1], junto[, 2], stat = "sd") | |
46 nn <- tapply.stat(junto[, 1], junto[, 2], stat = "length") | |
47 means <- data.frame(means, replication = nn[, 2]) | |
48 names(means)[1:2] <- c(name.t, name.y) | |
49 ntr <- nrow(means) | |
50 nk <- choose(ntr, 2) | |
51 DFerror <- N - ntr | |
52 rs <- 0 | |
53 U <- 0 | |
54 for (i in 1:ntr) { | |
55 rs <- rs + means[i, 2]^2/means[i, 3] | |
56 U <- U + 1/means[i, 3] | |
57 } | |
58 S <- (sum(junto[, 1]^2) - (N * (N + 1)^2)/4)/(N - 1) | |
59 H <- (rs - (N * (N + 1)^2)/4)/S | |
60 # cat("\nStudy:", main) | |
61 # cat("\nKruskal-Wallis test's\nTies or no Ties\n") | |
62 # cat("\nValue:", H) | |
63 # cat("\ndegrees of freedom:", ntr - 1) | |
64 p.chisq <- 1 - pchisq(H, ntr - 1) | |
65 # cat("\nPvalue chisq :", p.chisq, "\n\n") | |
66 DFerror <- N - ntr | |
67 Tprob <- qt(1 - alpha/2, DFerror) | |
68 MSerror <- S * ((N - 1 - H)/(N - ntr)) | |
69 means[, 2] <- means[, 2]/means[, 3] | |
70 # cat(paste(name.t, ",", sep = ""), " means of the ranks\n\n") | |
71 dfMeans=data.frame(row.names = means[, 1], means[, -1]) | |
72 if (p.adj != "none") { | |
73 # cat("\nP value adjustment method:", p.adj) | |
74 a <- 1e-06 | |
75 b <- 1 | |
76 for (i in 1:100) { | |
77 x <- (b + a)/2 | |
78 xr <- rep(x, nk) | |
79 d <- p.adjust(xr, p.adj)[1] - alpha | |
80 ar <- rep(a, nk) | |
81 fa <- p.adjust(ar, p.adj)[1] - alpha | |
82 if (d * fa < 0) | |
83 b <- x | |
84 if (d * fa > 0) | |
85 a <- x | |
86 } | |
87 Tprob <- qt(1 - x/2, DFerror) | |
88 } | |
89 nr <- unique(means[, 3]) | |
90 if (group) { | |
91 Tprob <- qt(1 - alpha/2, DFerror) | |
92 # cat("\nt-Student:", Tprob) | |
93 # cat("\nAlpha :", alpha) | |
94 dntStudent=Tprob | |
95 dAlpha=alpha | |
96 if (length(nr) == 1) { | |
97 LSD <- Tprob * sqrt(2 * MSerror/nr) | |
98 # cat("\nLSD :", LSD, "\n") | |
99 dLSD=LSD | |
100 } | |
101 else { | |
102 nr1 <- 1/mean(1/nn[, 2]) | |
103 LSD1 <- Tprob * sqrt(2 * MSerror/nr1) | |
104 # cat("\nLSD :", LSD1, "\n") | |
105 dLSD =LSD1 | |
106 # cat("\nHarmonic Mean of Cell Sizes ", nr1) | |
107 dHMean=nr1 | |
108 } | |
109 # cat("\nMeans with the same letter are not significantly different\n") | |
110 # cat("\nGroups, Treatments and mean of the ranks\n") | |
111 output <- order.group(means[, 1], means[, 2], means[, | |
112 3], MSerror, Tprob, std.err = sqrt(MSerror/means[, | |
113 3])) | |
114 dfComparisons=order.group(means[, 1], means[, 2], means[, | |
115 3], MSerror, Tprob, std.err = sqrt(MSerror/means[, | |
116 3])) | |
117 } | |
118 if (!group) { | |
119 comb <- combn(ntr, 2) | |
120 nn <- ncol(comb) | |
121 dif <- rep(0, nn) | |
122 LCL <- dif | |
123 UCL <- dif | |
124 pvalue <- dif | |
125 sdtdif <- dif | |
126 for (k in 1:nn) { | |
127 i <- comb[1, k] | |
128 j <- comb[2, k] | |
129 if (means[i, 2] < means[j, 2]) { | |
130 comb[1, k] <- j | |
131 comb[2, k] <- i | |
132 } | |
133 dif[k] <- abs(means[i, 2] - means[j, 2]) | |
134 sdtdif[k] <- sqrt(S * ((N - 1 - H)/(N - ntr)) * (1/means[i, | |
135 3] + 1/means[j, 3])) | |
136 pvalue[k] <- 2 * round(1 - pt(dif[k]/sdtdif[k], DFerror), | |
137 6) | |
138 } | |
139 if (p.adj != "none") | |
140 pvalue <- round(p.adjust(pvalue, p.adj), 6) | |
141 LCL <- dif - Tprob * sdtdif | |
142 UCL <- dif + Tprob * sdtdif | |
143 sig <- rep(" ", nn) | |
144 for (k in 1:nn) { | |
145 if (pvalue[k] <= 0.001) | |
146 sig[k] <- "***" | |
147 else if (pvalue[k] <= 0.01) | |
148 sig[k] <- "**" | |
149 else if (pvalue[k] <= 0.05) | |
150 sig[k] <- "*" | |
151 else if (pvalue[k] <= 0.1) | |
152 sig[k] <- "." | |
153 } | |
154 tr.i <- means[comb[1, ], 1] | |
155 tr.j <- means[comb[2, ], 1] | |
156 dfComparisons <- data.frame(Difference = dif, p.value = pvalue, | |
157 sig, LCL, UCL) | |
158 rownames(dfComparisons) <- paste(tr.i, tr.j, sep = " - ") | |
159 # cat("\nComparison between treatments mean of the ranks\n\n") | |
160 # print(output) | |
161 dfMeans <- data.frame(trt = means[, 1], means = means[, | |
162 2], M = "", N = means[, 3]) | |
163 } | |
164 # invisible(output) | |
165 invisible(list(study=main,test="Kruskal-Wallis test",value=H,df=(ntr - 1),chisq.p.value=p.chisq,p.adj.method=p.adj,ntStudent=dntStudent,alpha=alpha,LSD=dLSD,Harmonic.mean=dHMean,comparisons=dfComparisons,means=dfMeans)) | |
166 } | |
167 | |
168 ### This function is NOT original code but is from the gamlss package. | |
169 ### It is written here in an effort to over write the gamlss object summary method | |
170 ### so that I can return information of interest. | |
171 estimatesgamlss<-function (object, Qr, p1, coef.p, | |
172 est.disp , df.r, | |
173 digits = max(3, getOption("digits") - 3), | |
174 covmat.unscaled , ...) | |
175 { | |
176 #covmat.unscaled <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) | |
177 dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p)) | |
178 covmat <- covmat.unscaled #in glm is=dispersion * covmat.unscaled, but here is already multiplied by the dispersion | |
179 var.cf <- diag(covmat) | |
180 s.err <- sqrt(var.cf) | |
181 tvalue <- coef.p/s.err | |
182 dn <- c("Estimate", "Std. Error") | |
183 if (!est.disp) | |
184 { | |
185 pvalue <- 2 * pnorm(-abs(tvalue)) | |
186 coef.table <- cbind(coef.p, s.err, tvalue, pvalue) | |
187 dimnames(coef.table) <- list(names(coef.p), c(dn, "z value","Pr(>|z|)")) | |
188 } else if (df.r > 0) { | |
189 pvalue <- 2 * pt(-abs(tvalue), df.r) | |
190 coef.table <- cbind(coef.p, s.err, tvalue, pvalue) | |
191 dimnames(coef.table) <- list(names(coef.p), c(dn, "t value","Pr(>|t|)")) | |
192 } else { | |
193 coef.table <- cbind(coef.p, Inf) | |
194 dimnames(coef.table) <- list(names(coef.p), dn) | |
195 } | |
196 return(coef.table) | |
197 } | |
198 | |
199 ### This function is NOT original code but is from the gamlss package. | |
200 ### It is written here in an effort to over write the gamlss object summary method | |
201 ### so that I can return information of interest. | |
202 summary.gamlss<- function (object, type = c("vcov", "qr"), save = FALSE, ...) | |
203 { | |
204 return(as.data.frame(estimatesgamlss(object=object,Qr=object$mu.qr, p1=1:(object$mu.df-object$mu.nl.df), | |
205 coef.p=object$mu.coefficients[object$mu.qr$pivot[1:(object$mu.df-object$mu.nl.df)]], | |
206 est.disp =TRUE, df.r=(object$noObs - object$mu.df), | |
207 covmat.unscaled=chol2inv(object$mu.qr$qr[1:(object$mu.df-object$mu.nl.df), 1:(object$mu.df-object$mu.nl.df), drop = FALSE]) )) ) | |
208 } |