maaslin: maaslin-4450aa4ecc84/src/lib/Misc.R comparison

comparison maaslin-4450aa4ecc84/src/lib/Misc.R @ 1:a87d5a5f2776

Uploaded the version running on the prod server

author	george-weingart
date	Sun, 08 Feb 2015 23:08:38 -0500
parents
children

comparison

equal deleted inserted replaced

-:e0b5980139d9
+:a87d5a5f2776
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
+# authored by the Huttenhower lab at the Harvard School of Public Health
+# (contact Timothy Tickle, ttickle@hsph.harvard.edu).
+#####################################################################################
+### Modified Code
+### This code is from the package agricolae by Felipe de Mendiburu
+### Modifications here are minimal and allow one to use the p.values from the post hoc comparisons
+### Authors do not claim credit for this solution only needed to modify code to use the output.
+kruskal <- function (y, trt, alpha = 0.05, p.adj = c("none", "holm", "hochberg",
+"bonferroni", "BH", "BY", "fdr"), group = TRUE, main = NULL)
+{
+dfComparisons=NULL
+dfMeans=NULL
+dntStudent=NULL
+dLSD=NULL
+dHMean=NULL
+name.y <- paste(deparse(substitute(y)))
+name.t <- paste(deparse(substitute(trt)))
+p.adj <- match.arg(p.adj)
+junto <- subset(data.frame(y, trt), is.na(y) == FALSE)
+N <- nrow(junto)
+junto[, 1] <- rank(junto[, 1])
+means <- tapply.stat(junto[, 1], junto[, 2], stat = "sum")
+sds <- tapply.stat(junto[, 1], junto[, 2], stat = "sd")
+nn <- tapply.stat(junto[, 1], junto[, 2], stat = "length")
+means <- data.frame(means, replication = nn[, 2])
+names(means)[1:2] <- c(name.t, name.y)
+ntr <- nrow(means)
+nk <- choose(ntr, 2)
+DFerror <- N - ntr
+rs <- 0
+U <- 0
+for (i in 1:ntr) {
+rs <- rs + means[i, 2]^2/means[i, 3]
+U <- U + 1/means[i, 3]
+}
+S <- (sum(junto[, 1]^2) - (N * (N + 1)^2)/4)/(N - 1)
+H <- (rs - (N * (N + 1)^2)/4)/S
+#    cat("\nStudy:", main)
+#    cat("\nKruskal-Wallis test's\nTies or no Ties\n")
+#    cat("\nValue:", H)
+#    cat("\ndegrees of freedom:", ntr - 1)
+p.chisq <- 1 - pchisq(H, ntr - 1)
+#    cat("\nPvalue chisq  :", p.chisq, "\n\n")
+DFerror <- N - ntr
+Tprob <- qt(1 - alpha/2, DFerror)
+MSerror <- S * ((N - 1 - H)/(N - ntr))
+means[, 2] <- means[, 2]/means[, 3]
+#    cat(paste(name.t, ",", sep = ""), " means of the ranks\n\n")
+dfMeans=data.frame(row.names = means[, 1], means[, -1])
+if (p.adj != "none") {
+#        cat("\nP value adjustment method:", p.adj)
+a <- 1e-06
+b <- 1
+for (i in 1:100) {
+x <- (b + a)/2
+xr <- rep(x, nk)
+d <- p.adjust(xr, p.adj)[1] - alpha
+ar <- rep(a, nk)
+fa <- p.adjust(ar, p.adj)[1] - alpha
+if (d * fa < 0)
+b <- x
+if (d * fa > 0)
+a <- x
+}
+Tprob <- qt(1 - x/2, DFerror)
+}
+nr <- unique(means[, 3])
+if (group) {
+Tprob <- qt(1 - alpha/2, DFerror)
+#        cat("\nt-Student:", Tprob)
+#        cat("\nAlpha    :", alpha)
+dntStudent=Tprob
+dAlpha=alpha
+if (length(nr) == 1) {
+LSD <- Tprob * sqrt(2 * MSerror/nr)
+#            cat("\nLSD      :", LSD, "\n")
+dLSD=LSD
+}
+else {
+nr1 <- 1/mean(1/nn[, 2])
+LSD1 <- Tprob * sqrt(2 * MSerror/nr1)
+#            cat("\nLSD      :", LSD1, "\n")
+dLSD =LSD1
+#            cat("\nHarmonic Mean of Cell Sizes ", nr1)
+dHMean=nr1
+}
+#        cat("\nMeans with the same letter are not significantly different\n")
+#        cat("\nGroups, Treatments and mean of the ranks\n")
+output <- order.group(means[, 1], means[, 2], means[,
+3], MSerror, Tprob, std.err = sqrt(MSerror/means[,
+3]))
+dfComparisons=order.group(means[, 1], means[, 2], means[,
+3], MSerror, Tprob, std.err = sqrt(MSerror/means[,
+3]))
+}
+if (!group) {
+comb <- combn(ntr, 2)
+nn <- ncol(comb)
+dif <- rep(0, nn)
+LCL <- dif
+UCL <- dif
+pvalue <- dif
+sdtdif <- dif
+for (k in 1:nn) {
+i <- comb[1, k]
+j <- comb[2, k]
+if (means[i, 2] < means[j, 2]) {
+comb[1, k] <- j
+comb[2, k] <- i
+}
+dif[k] <- abs(means[i, 2] - means[j, 2])
+sdtdif[k] <- sqrt(S * ((N - 1 - H)/(N - ntr)) * (1/means[i,
+3] + 1/means[j, 3]))
+pvalue[k] <- 2 * round(1 - pt(dif[k]/sdtdif[k], DFerror),
+6)
+}
+if (p.adj != "none")
+pvalue <- round(p.adjust(pvalue, p.adj), 6)
+LCL <- dif - Tprob * sdtdif
+UCL <- dif + Tprob * sdtdif
+sig <- rep(" ", nn)
+for (k in 1:nn) {
+if (pvalue[k] <= 0.001)
+sig[k] <- "***"
+else if (pvalue[k] <= 0.01)
+sig[k] <- "**"
+else if (pvalue[k] <= 0.05)
+sig[k] <- "*"
+else if (pvalue[k] <= 0.1)
+sig[k] <- "."
+}
+tr.i <- means[comb[1, ], 1]
+tr.j <- means[comb[2, ], 1]
+dfComparisons <- data.frame(Difference = dif, p.value = pvalue,
+sig, LCL, UCL)
+rownames(dfComparisons) <- paste(tr.i, tr.j, sep = " - ")
+#        cat("\nComparison between treatments mean of the ranks\n\n")
+#        print(output)
+dfMeans <- data.frame(trt = means[, 1], means = means[,
+2], M = "", N = means[, 3])
+}
+#    invisible(output)
+invisible(list(study=main,test="Kruskal-Wallis test",value=H,df=(ntr - 1),chisq.p.value=p.chisq,p.adj.method=p.adj,ntStudent=dntStudent,alpha=alpha,LSD=dLSD,Harmonic.mean=dHMean,comparisons=dfComparisons,means=dfMeans))
+}
+### This function is NOT original code but is from the gamlss package.
+### It is written here in an effort to over write the gamlss object summary method
+### so that I can return information of interest.
+estimatesgamlss<-function (object, Qr, p1, coef.p,
+est.disp , df.r,
+digits = max(3, getOption("digits") - 3),
+covmat.unscaled , ...)
+{
+#covmat.unscaled <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
+dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p))
+covmat <- covmat.unscaled #in glm is=dispersion * covmat.unscaled, but here is already multiplied by the dispersion
+var.cf <- diag(covmat)
+s.err <- sqrt(var.cf)
+tvalue <- coef.p/s.err
+dn <- c("Estimate", "Std. Error")
+if (!est.disp)
+{
+pvalue <- 2 * pnorm(-abs(tvalue))
+coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
+dimnames(coef.table) <- list(names(coef.p), c(dn, "z value","Pr(>|z|)"))
+} else if (df.r > 0) {
+pvalue <- 2 * pt(-abs(tvalue), df.r)
+coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
+dimnames(coef.table) <- list(names(coef.p), c(dn, "t value","Pr(>|t|)"))
+} else {
+coef.table <- cbind(coef.p, Inf)
+dimnames(coef.table) <- list(names(coef.p), dn)
+}
+return(coef.table)
+}
+### This function is NOT original code but is from the gamlss package.
+### It is written here in an effort to over write the gamlss object summary method
+### so that I can return information of interest.
+summary.gamlss<- function (object, type = c("vcov", "qr"), save = FALSE, ...)
+{
+return(as.data.frame(estimatesgamlss(object=object,Qr=object$mu.qr, p1=1:(object$mu.df-object$mu.nl.df),
+coef.p=object$mu.coefficients[object$mu.qr$pivot[1:(object$mu.df-object$mu.nl.df)]],
+est.disp =TRUE, df.r=(object$noObs - object$mu.df),
+covmat.unscaled=chol2inv(object$mu.qr$qr[1:(object$mu.df-object$mu.nl.df), 1:(object$mu.df-object$mu.nl.df), drop = FALSE]) )) )
+}

Mercurial > repos > george-weingart > maaslin

comparison maaslin-4450aa4ecc84/src/lib/Misc.R @ 1:a87d5a5f2776