annotate maaslin-4450aa4ecc84/src/lib/AnalysisModules.R @ 1:a87d5a5f2776

Uploaded the version running on the prod server
author george-weingart
date Sun, 08 Feb 2015 23:08:38 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1 #####################################################################################
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
2 #Copyright (C) <2012>
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
3 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
5 #this software and associated documentation files (the "Software"), to deal in the
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
6 #Software without restriction, including without limitation the rights to use, copy,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
8 #and to permit persons to whom the Software is furnished to do so, subject to
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
9 #the following conditions:
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
10 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
11 #The above copyright notice and this permission notice shall be included in all copies
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
12 #or substantial portions of the Software.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
13 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
20 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
22 # authored by the Huttenhower lab at the Harvard School of Public Health
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
24 #####################################################################################
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
25
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
26 inlinedocs <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
27 ##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
28 ##description<< Allows one to plug in new modules to perform analysis (univariate or multivariate), regularization, and data (response) transformation.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
29 ) { return( pArgs ) }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
30
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
31 # Libraries
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
32 suppressMessages(library( agricolae, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
33 # Needed for the pot-hoc Kruskal wallis comparisons
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
34 suppressMessages(library( penalized, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
35 # Needed for stepAIC
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
36 suppressMessages(library( MASS, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
37 # Needed for na action behavior
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
38 suppressMessages(library( gam, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
39 # Needed for boosting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
40 suppressMessages(library( gbm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
41 # Needed for LASSO
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
42 suppressMessages(library( glmnet, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
43 # Needed for mixed models
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
44 #suppressMessages(library( lme4, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
45 suppressMessages(library( nlme, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
46
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
47 # Needed for zero inflated models
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
48 #suppressMessages(library( MCMCglmm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
49 suppressMessages(library( pscl, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
50 suppressMessages(library( gamlss, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
51 # Do not use #suppressMessages(library( glmmADMB, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
52
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
53 fAddBack = TRUE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
54 dUnevenMax = .9
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
55
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
56
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
57 ### Helper functions
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
58 # OK
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
59 funcMakeContrasts <- function
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
60 ### Makes univariate contrasts of all predictors in the model formula with the response.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
61 (strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
62 ### lm style string defining reponse and predictors
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
63 strRandomFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
64 ### mixed model string defining the fixed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
65 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
66 ### The data frame to find predictor data in
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
67 iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
68 ### Taxon
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
69 functionContrast,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
70 ### functionContrast The univariate test to perform
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
71 lsQCCounts
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
72 ### QC info
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
73 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
74 #TODO are we updating the QCCounts?
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
75 lsSig = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
76 ### Holds all the significance results from the tests
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
77 adP = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
78 ### Holds the p-values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
79 sCurDataName = names(frmeTmp)[iTaxon]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
80 ### The name of the taxon (data row) that is being associated (always assumed to be numeric)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
81 #Get test comparisons (predictor names from formula string)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
82 asComparisons = unique(c(funcFormulaStrToList(strFormula),funcFormulaStrToList(strRandomFormula)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
83
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
84 #Change metadata in formula to univariate comparisons
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
85 for(sComparison in asComparisons)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
86 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
87 # Metadata values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
88 vxTest = frmeTmp[[sComparison]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
89
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
90 # Get the levels in the comparison
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
91 # Can ignore the first level because it is the reference level
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
92 asLevels = sComparison
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
93 if(is.factor(vxTest)){asLevels = levels(vxTest)[2:length(vxTest)]}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
94
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
95 lasComparisonResults = functionContrast(x=sComparison, adCur=frmeTmp[[sCurDataName]], dfData=frmeTmp)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
96 for(asComparison in lasComparisonResults)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
97 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
98 if( is.na( asComparison$p.value ) ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
99 # Get pvalue
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
100 adP = c(adP, asComparison$p.value)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
101 # Get SD, if not available, give SD of covariate
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
102 dSTD = asComparison$SD
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
103 # TODO Is using sd on factor and binary data correct?
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
104 if(is.na(dSTD) || is.null(dSTD)){dSTD = sd(vxTest)}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
105
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
106 lsSig[[length( lsSig ) + 1]] <- list(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
107 #Current metadata name (string column name) ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
108 name = sComparison,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
109 #Current metadatda name (string, as a factor level if existing as such) ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
110 orig = asComparison$name,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
111 #Taxon feature name (string) ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
112 taxon = colnames( frmeTmp )[iTaxon],
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
113 #Taxon data / response (double vector) ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
114 data = frmeTmp[,iTaxon],
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
115 #Name of column ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
116 factors = sComparison,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
117 #Metadata values (metadata as a factor or raw numeric) ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
118 metadata = vxTest,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
119 #Current coefficient value (named coef value with level name (from coefs) ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
120 value = asComparison$coef,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
121 #Standard deviation (numeric) ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
122 std = dSTD,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
123 #Model coefficients (output from coefs with intercept) ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
124 allCoefs = asComparison$coef)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
125 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
126 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
127 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
128 ### Returns a list of p-value, standard deviation, and comparison which produced the p-value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
129 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
130
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
131 #Ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
132 funcGetStepPredictors <- function
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
133 ### Retrieve the predictors of the reduced model after stepwise model selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
134 (lmod,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
135 ### Linear model resulting from step-wise model selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
136 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
137 strLog
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
138 ### File to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
139 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
140 #Document
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
141 funcWrite( "#model", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
142 funcWrite( lmod$fit, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
143 #TODO funcWrite( lmod$train.error, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
144 #TODO funcWrite( lmod$Terms, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
145 funcWrite( "#summary-gbm", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
146 funcWrite( summary(lmod), strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
147
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
148 #Get Names from coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
149 asStepCoefsFactors = coefficients(lmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
150 astrCoefNames = setdiff(names(asStepCoefsFactors[as.vector(!is.na(asStepCoefsFactors))==TRUE]),"(Intercept)")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
151 asStepCoefsFactors = unique(as.vector(sapply(astrCoefNames,funcCoef2Col, frmeData=frmeTmp)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
152
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
153 if(length(asStepCoefsFactors)<1){ return(NA) }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
154 return( asStepCoefsFactors )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
155 ### Vector of string predictor names
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
156 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
157
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
158 funcGetUnivariateResults <- function
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
159 ### Reduce the list of list of results to the correct format
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
160 ( llmod,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
161 ### The list of univariate models
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
162 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
163 ### Data analysis is performed on
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
164 liTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
165 ### The response id
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
166 dSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
167 ### Significance level for q-values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
168 adP,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
169 ### List of pvalues from all associations performed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
170 lsSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
171 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
172 strLog,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
173 ### File to which to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
174 lsQCCounts,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
175 ### Records of counts associated with quality control
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
176 lastrCols,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
177 ### Predictors used in the association
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
178 asSuppressCovariates=c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
179 ### Vector of covariates to suppress and not give results for
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
180 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
181 adP = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
182 lsSig = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
183 for(lmod in llmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
184 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
185 adP = c(adP,lmod$adP)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
186 lsSig = c(lsSig,lmod$lsSig)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
187 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
188 return(list(adP=adP, lsSig=lsSig, lsQCCounts=llmod[length(llmod)]$lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
189 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
190
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
191 # OK
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
192 funcGetLMResults <- function
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
193 ### Reduce the lm object return to just the data needed for further analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
194 ( llmod,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
195 ### The result from a linear model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
196 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
197 ### Data analysis is performed on
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
198 liTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
199 ### The response id
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
200 dSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
201 ### Significance level for q-values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
202 adP,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
203 ### List of pvalues from all associations performed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
204 lsSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
205 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
206 strLog,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
207 ### File to which to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
208 lsQCCounts,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
209 ### Records of counts associated with quality control
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
210 lastrCols,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
211 ### Predictors used in the association
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
212 asSuppressCovariates=c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
213 ### Vector of covariates to suppress and not give results for
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
214 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
215 ilmodIndex = 0
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
216 for( lmod in llmod )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
217 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
218 ilmodIndex = ilmodIndex + 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
219 lmod = llmod[[ ilmodIndex ]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
220 iTaxon = liTaxon[[ ilmodIndex ]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
221 astrCols = lastrCols[[ ilmodIndex ]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
222
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
223 #Exclude none and errors
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
224 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
225 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
226 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
227 iPValuePosition = 4
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
228
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
229 #Get the column name of the iTaxon index
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
230 #frmeTmp needs to be what?
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
231 strTaxon = colnames( frmeData )[iTaxon]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
232 #Get summary information from the linear model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
233 lsSum = try( summary( lmod ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
234 #The following can actually happen when the stranger regressors return broken results
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
235 if( class( lsSum ) == "try-error" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
236 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
237 next
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
238 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
239
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
240 #Write summary information to log file
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
241 funcWrite( "#model", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
242 funcWrite( lmod, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
243 funcWrite( "#summary", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
244 #Unbelievably, some of the more unusual regression methods crash out when _printing_ their results
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
245 try( funcWrite( lsSum, strLog ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
246
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
247 #Get the coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
248 #This should work for linear models
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
249 frmeCoefs <- try( coefficients( lsSum ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
250
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
251 if( ( class(frmeCoefs ) == "try-error" ) || is.null( frmeCoefs ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
252 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
253 adCoefs = try( coefficients( lmod ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
254 if(class( adCoefs ) == "try-error")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
255 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
256 adCoefs = coef(lmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
257 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
258 frmeCoefs <- NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
259 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
260 if( class( frmeCoefs ) == "list" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
261 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
262 frmeCoefs <- frmeCoefs$count
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
263 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
264 adCoefs = frmeCoefs[,1]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
265 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
266
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
267 #Go through each coefficient
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
268 astrRows <- names( adCoefs )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
269
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
270 ##lmm
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
271 if( is.null( astrRows ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
272 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
273 astrRows = rownames( lsSum$tTable )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
274 frmeCoefs = lsSum$tTable
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
275 iPValuePosition = 5
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
276 adCoefs = frmeCoefs[,1]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
277 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
278
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
279 for( iMetadata in 1:length( astrRows ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
280 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
281 #Current coef which is being evaluated
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
282 strOrig = astrRows[ iMetadata ]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
283 #Skip y interscept
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
284 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
285 #Skip suppressed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
286 if( funcCoef2Col( strOrig, frmeData ) %in% asSuppressCovariates){ next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
287
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
288 #Extract pvalue and std in standard model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
289 dP = frmeCoefs[ strOrig, iPValuePosition ]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
290 dStd = frmeCoefs[ strOrig, 2 ]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
291
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
292 #Attempt to extract the pvalue and std in mixed effects summary
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
293 #Could not get the pvalue so skip result
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
294 if( is.nan( dP ) || is.na( dP ) || is.null( dP ) ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
295
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
296 dCoef = adCoefs[ iMetadata ]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
297
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
298 #Setting adMetadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
299 #Metadata values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
300 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
301 if( is.na( strMetadata ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
302 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
303 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
304 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
305 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
306 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
307 adMetadata <- frmeData[, strMetadata ]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
308
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
309 # Store (factor level modified) p-value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
310 # Store general results for each coef
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
311 adP <- c( adP, dP )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
312
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
313 # Append to the list of information about associations
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
314 lsSig[[ length( lsSig ) + 1 ]] <- list(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
315 # Current metadata name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
316 name = strMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
317 # Current metadatda name (as a factor level if existing as such)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
318 orig = strOrig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
319 # Taxon feature name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
320 taxon = strTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
321 # Taxon data / response
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
322 data = frmeData[, iTaxon ],
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
323 # All levels
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
324 factors = c( strMetadata ),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
325 # Metadata values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
326 metadata = adMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
327 # Current coeficient value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
328 value = dCoef,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
329 # Standard deviation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
330 std = dStd,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
331 # Model coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
332 allCoefs = adCoefs )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
333 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
334 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
335 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
336 return( list( adP = adP, lsSig = lsSig, lsQCCounts = lsQCCounts ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
337 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
338 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
339
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
340 funcGetZeroInflatedResults <- function
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
341 ### Reduce the lm object return to just the data needed for further analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
342 ( llmod,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
343 ### The result from a linear model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
344 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
345 ### Data analysis is performed on
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
346 liTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
347 ### The response id
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
348 dSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
349 ### Significance level for q-values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
350 adP,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
351 ### List of pvalues from all associations performed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
352 lsSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
353 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
354 strLog,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
355 ### File to which to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
356 lsQCCounts,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
357 ### Records of counts associated with quality control
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
358 lastrCols,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
359 ### Predictors used in the association
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
360 asSuppressCovariates=c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
361 ### Vector of covariates to suppress and not give results for
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
362 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
363 ilmodIndex = 0
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
364 for(lmod in llmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
365 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
366 ilmodIndex = ilmodIndex + 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
367 lmod = llmod[[ilmodIndex]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
368 iTaxon = liTaxon[[ilmodIndex]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
369 astrCols = lastrCols[[ilmodIndex]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
370
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
371 #Exclude none and errors
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
372 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
373 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
374 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
375 iPValuePosition = 4
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
376
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
377 #Get the column name of the iTaxon index
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
378 #frmeTmp needs to be what?
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
379 strTaxon = colnames( frmeData )[iTaxon]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
380
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
381 #Write summary information to log file
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
382 funcWrite( "#model", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
383 funcWrite( lmod, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
384
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
385 #Get the coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
386 #This should work for linear models
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
387 frmeCoefs <- summary(lmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
388 if(! is.null( frmeCoefs$coefficients$count ) ) # Needed for zeroinfl
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
389 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
390 frmeCoefs = frmeCoefs$coefficients$count
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
391 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
392 adCoefs = frmeCoefs[,1]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
393 names(adCoefs) = row.names(frmeCoefs)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
394
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
395 funcWrite( "#Coefs", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
396 funcWrite( frmeCoefs, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
397
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
398 #Go through each coefficient
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
399 astrRows <- row.names( frmeCoefs )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
400
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
401 for( iMetadata in 1:length( astrRows ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
402 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
403 #Current coef which is being evaluated
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
404 strOrig = astrRows[iMetadata]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
405 #Skip y interscept
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
406 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
407 #Skip suppressed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
408 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
409
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
410 #Extract pvalue and std in standard model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
411 dP = frmeCoefs[strOrig, iPValuePosition]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
412 if(is.nan(dP)){next}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
413 dStd = frmeCoefs[strOrig,2]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
414
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
415 dCoef = adCoefs[iMetadata]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
416
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
417 #Setting adMetadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
418 #Metadata values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
419 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
420 if( is.na( strMetadata ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
421 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
422 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
423 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
424 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
425 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
426 adMetadata <- frmeData[,strMetadata]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
427
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
428 #Store (factor level modified) p-value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
429 #Store general results for each coef
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
430 adP <- c(adP, dP)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
431 lsSig[[length( lsSig ) + 1]] <- list(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
432 #Current metadata name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
433 name = strMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
434 #Current metadatda name (as a factor level if existing as such)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
435 orig = strOrig,#
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
436 #Taxon feature name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
437 taxon = strTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
438 #Taxon data / response
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
439 data = frmeData[,iTaxon],
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
440 #All levels
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
441 factors = c(strMetadata),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
442 #Metadata values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
443 metadata = adMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
444 #Current coeficient value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
445 value = dCoef,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
446 #Standard deviation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
447 std = dStd,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
448 #Model coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
449 allCoefs = adCoefs)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
450 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
451 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
452 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
453 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
454 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
455 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
456
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
457 oldfuncGetZeroInflatedResults <- function
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
458 ### Reduce the lm object return to just the data needed for further analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
459 ( llmod,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
460 ### The result from a linear model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
461 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
462 ### Data analysis is performed on
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
463 liTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
464 ### The response id
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
465 dSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
466 ### Significance level for q-values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
467 adP,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
468 ### List of pvalues from all associations performed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
469 lsSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
470 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
471 strLog,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
472 ### File to which to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
473 lsQCCounts,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
474 ### Records of counts associated with quality control
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
475 lastrCols,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
476 ### Predictors used in the association
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
477 asSuppressCovariates=c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
478 ### Vector of covariates to suppress and not give results for
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
479 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
480 ilmodIndex = 0
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
481 for(lmod in llmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
482 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
483 ilmodIndex = ilmodIndex + 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
484 lmod = llmod[[ilmodIndex]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
485 iTaxon = liTaxon[[ilmodIndex]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
486 astrCols = lastrCols[[ilmodIndex]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
487
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
488 #Exclude none and errors
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
489 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
490 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
491 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
492 iPValuePosition = 4
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
493
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
494 #Get the column name of the iTaxon index
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
495 #frmeTmp needs to be what?
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
496 strTaxon = colnames( frmeData )[iTaxon]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
497
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
498 #Write summary information to log file
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
499 funcWrite( "#model", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
500 funcWrite( lmod, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
501
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
502 #Get the coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
503 #This should work for linear models
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
504 frmeCoefs <- summary(lmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
505 adCoefs = frmeCoefs[,1]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
506 names(adCoefs) = row.names(frmeCoefs)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
507
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
508 #Go through each coefficient
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
509 astrRows <- row.names( frmeCoefs )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
510
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
511 for( iMetadata in 1:length( astrRows ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
512 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
513 #Current coef which is being evaluated
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
514 strOrig = astrRows[iMetadata]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
515 #Skip y interscept
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
516 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
517 #Skip suppressed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
518 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
519
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
520 #Extract pvalue and std in standard model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
521 dP = frmeCoefs[strOrig, iPValuePosition]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
522 dStd = frmeCoefs[strOrig,2]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
523
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
524 dCoef = adCoefs[iMetadata]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
525
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
526 #Setting adMetadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
527 #Metadata values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
528 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
529 if( is.na( strMetadata ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
530 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
531 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
532 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
533 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
534 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
535 adMetadata <- frmeData[,strMetadata]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
536
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
537 #Store (factor level modified) p-value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
538 #Store general results for each coef
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
539 adP <- c(adP, dP)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
540 lsSig[[length( lsSig ) + 1]] <- list(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
541 #Current metadata name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
542 name = strMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
543 #Current metadatda name (as a factor level if existing as such)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
544 orig = strOrig,#
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
545 #Taxon feature name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
546 taxon = strTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
547 #Taxon data / response
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
548 data = frmeData[,iTaxon],
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
549 #All levels
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
550 factors = c(strMetadata),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
551 #Metadata values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
552 metadata = adMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
553 #Current coeficient value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
554 value = dCoef,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
555 #Standard deviation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
556 std = dStd,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
557 #Model coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
558 allCoefs = adCoefs)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
559 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
560 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
561 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
562 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
563 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
564 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
565
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
566
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
567 notfuncGetZeroInflatedResults <- function
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
568 ### Reduce the lm object return to just the data needed for further analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
569 ( llmod,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
570 ### The result from a linear model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
571 frmeData,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
572 ### Data analysis is performed on
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
573 liTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
574 ### The response id
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
575 dSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
576 ### Significance level for q-values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
577 adP,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
578 ### List of pvalues from all associations performed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
579 lsSig,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
580 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
581 strLog,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
582 ### File to which to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
583 lsQCCounts,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
584 ### Records of counts associated with quality control
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
585 lastrCols,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
586 ### Predictors used in the association
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
587 asSuppressCovariates=c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
588 ### Vector of covariates to suppress and not give results for
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
589 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
590 ilmodIndex = 0
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
591 for(lmod in llmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
592 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
593 ilmodIndex = ilmodIndex + 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
594 lmod = llmod[[ilmodIndex]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
595 iTaxon = liTaxon[[ilmodIndex]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
596 astrCols = lastrCols[[ilmodIndex]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
597
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
598 #Exclude none and errors
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
599 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
600 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
601 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
602 iPValuePosition = 4
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
603
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
604 #Get the column name of the iTaxon index
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
605 #frmeTmp needs to be what?
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
606 strTaxon = colnames( frmeData )[iTaxon]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
607
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
608 #Write summary information to log file
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
609 funcWrite( "#model", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
610 funcWrite( lmod, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
611
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
612 #Get the coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
613 #This should work for linear models
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
614 frmeCoefs <- summary(lmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
615 frmeCoefs = frmeCoefs$coefficients$count
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
616 funcWrite( "#Coefs", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
617 funcWrite( frmeCoefs, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
618
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
619 adCoefs = frmeCoefs[,1]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
620 names(adCoefs) = row.names(frmeCoefs)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
621
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
622 #Go through each coefficient
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
623 astrRows <- row.names( frmeCoefs )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
624
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
625 for( iMetadata in 1:length( astrRows ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
626 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
627 #Current coef which is being evaluated
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
628 strOrig = astrRows[iMetadata]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
629
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
630 #Skip y interscept
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
631 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
632 #Skip suppressed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
633 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
634
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
635 #Extract pvalue and std in standard model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
636 dP = frmeCoefs[strOrig, iPValuePosition]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
637 if(is.nan(dP)){ next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
638 dStd = frmeCoefs[strOrig,2]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
639 dCoef = adCoefs[iMetadata]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
640
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
641 #Setting adMetadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
642 #Metadata values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
643 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
644 if( is.na( strMetadata ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
645 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
646 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
647 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
648 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
649 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
650 adMetadata <- frmeData[,strMetadata]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
651
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
652 #Store (factor level modified) p-value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
653 #Store general results for each coef
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
654 adP <- c(adP, dP)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
655 lsSig[[length( lsSig ) + 1]] <- list(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
656 #Current metadata name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
657 name = strMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
658 #Current metadatda name (as a factor level if existing as such)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
659 orig = strOrig,#
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
660 #Taxon feature name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
661 taxon = strTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
662 #Taxon data / response
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
663 data = frmeData[,iTaxon],
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
664 #All levels
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
665 factors = c(strMetadata),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
666 #Metadata values
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
667 metadata = adMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
668 #Current coeficient value
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
669 value = dCoef,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
670 #Standard deviation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
671 std = dStd,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
672 #Model coefficients
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
673 allCoefs = adCoefs)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
674 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
675 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
676 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
677 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
678 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
679 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
680
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
681
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
682 ### Options for variable selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
683 # OK
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
684 funcBoostModel <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
685 ### Perform model selection / regularization with boosting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
686 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
687 ### The formula of the full model before boosting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
688 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
689 ### The data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
690 adCur,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
691 ### The response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
692 lsParameters,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
693 ### User controlled parameters needed specific to boosting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
694 lsForcedParameters = NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
695 ### Force these predictors to be in the model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
696 strLog
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
697 ### File to which to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
698 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
699 funcWrite( c("#Boost formula", strFormula), strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
700 lmod = try( gbm( as.formula( strFormula ), data=frmeTmp, distribution="laplace", verbose=FALSE, n.minobsinnode=min(10, round(0.1 * nrow( frmeTmp ) ) ), n.trees=1000 ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
701 #TODO# lmod = try( gbm( as.formula( strFormula ), data=frmeTmp, distribution="gaussian", verbose=FALSE, n.minobsinnode=min(10, round(0.1 * nrow( frmeTmp ) ) ), n.trees=1000 ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
702
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
703 astrTerms <- c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
704 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
705 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
706 #Get boosting summary results
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
707 lsSum <- summary( lmod, plotit = FALSE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
708
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
709 #Document
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
710 funcWrite( "#model-gbm", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
711 funcWrite( lmod$fit, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
712 funcWrite( lmod$train.error, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
713 funcWrite( lmod$Terms, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
714 funcWrite( "#summary-gbm", strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
715 funcWrite( lsSum, strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
716
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
717 # Uneven metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
718 vstrUneven = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
719 # Kept metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
720 vstrKeepMetadata = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
721
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
722 #Select model predictors
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
723 #Check the frequency of selection and skip if not selected more than set threshold dFreq
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
724 for( strMetadata in lmod$var.names )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
725 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
726 #Get the name of the metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
727 strTerm <- funcCoef2Col( strMetadata, frmeTmp, c(astrMetadata, astrGenetics) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
728
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
729 #Add back in uneven metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
730 if(fAddBack)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
731 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
732 ldMetadata = frmeTmp[[strMetadata]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
733 if(length(which(table(ldMetadata)/length(ldMetadata)>dUnevenMax))>0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
734 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
735 astrTerms <- c(astrTerms, strTerm)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
736 vstrUneven = c(vstrUneven,strMetadata)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
737 next
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
738 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
739 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
740
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
741 #If the selprob is less than a certain frequency, skip
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
742 dSel <- lsSum$rel.inf[which( lsSum$var == strMetadata )] / 100
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
743 if( is.na(dSel) || ( dSel <= lsParameters$dFreq ) ){ next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
744 #TODO# if( is.na(dSel) || ( dSel < lsParameters$dFreq ) ){ next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
745
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
746 #If you should ignore the metadata, continue
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
747 if( is.null( strTerm ) ) { next }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
748
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
749 #If you cant find the metadata name, write
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
750 if( is.na( strTerm ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
751 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
752 c_logrMaaslin$error( "Unknown coefficient: %s", strMetadata )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
753 next
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
754 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
755
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
756 #Collect metadata names
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
757 astrTerms <- c(astrTerms, strTerm)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
758 vstrKeepMetadata = c(vstrKeepMetadata,strTerm)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
759 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
760 } else { astrTerms = lsForcedParameters }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
761
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
762 # funcBoostInfluencePlot(vdRelInf=lsSum$rel.inf, sFeature=lsParameters$sBugName, vsPredictorNames=lsSum$var, vstrKeepMetadata=vstrKeepMetadata, vstrUneven=vstrUneven)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
763
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
764 return(unique(c(astrTerms,lsForcedParameters)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
765 ### Return a vector of predictor names to use in a reduced model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
766 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
767
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
768 #Glmnet default is to standardize the variables.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
769 #used as an example for implementation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
770 #http://r.789695.n4.nabble.com/estimating-survival-times-with-glmnet-and-coxph-td4614225.html
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
771 funcPenalizedModel <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
772 ### Perform penalized regularization for variable selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
773 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
774 ### The formula of the full model before boosting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
775 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
776 ### The data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
777 adCur,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
778 ### The response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
779 lsParameters,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
780 ### User controlled parameters needed specific to boosting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
781 lsForcedParameters = NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
782 ### Force these predictors to be in the model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
783 strLog
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
784 ### File to which to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
785 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
786 #Convert the data frame to a model matrix
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
787 mtrxDesign = model.matrix(as.formula(strFormula), data=frmeTmp)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
788
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
789 #Cross validate the lambda
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
790 cvRet = cv.glmnet(x=mtrxDesign,y=adCur,alpha=lsParameters$dPAlpha)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
791
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
792 #Perform lasso
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
793 glmnetMod = glmnet(x=mtrxDesign,y=adCur,family=lsParameters$family,alpha=lsParameters$dPAlpha,lambda=cvRet$lambda.min)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
794
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
795 #Get non zero beta and return column names for covariate names.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
796 ldBeta = glmnetMod$beta[,which.max(glmnetMod$dev.ratio)]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
797 ldBeta = names(ldBeta[which(abs(ldBeta)>0)])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
798 return(sapply(ldBeta,funcCoef2Col,frmeData=frmeTmp))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
799 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
800
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
801 # OK
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
802 funcForwardModel <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
803 ### Perform model selection with forward stepwise selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
804 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
805 ### lm style string defining reposne and predictors
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
806 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
807 ### Data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
808 adCur,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
809 ### Response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
810 lsParameters,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
811 ### User controlled parameters needed specific to boosting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
812 lsForcedParameters = NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
813 ### Force these predictors to be in the model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
814 strLog
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
815 ### File to which to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
816 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
817 funcWrite( c("#Forward formula", strFormula), strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
818
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
819 strNullFormula = "adCur ~ 1"
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
820 if(!is.null(lsForcedParameters))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
821 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
822 strNullFormula = paste( "adCur ~", paste( sprintf( "`%s`", lsForcedParameters ), collapse = " + " ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
823 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
824 lmodNull <- try( lm(as.formula( strNullFormula ), data=frmeTmp))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
825 lmodFull <- try( lm(as.formula( strFormula ), data=frmeTmp ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
826 if(!("try-error" %in% c(class( lmodNull ),class( lmodFull ))))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
827 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
828 lmod = stepAIC(lmodNull, scope=list(lower=lmodNull,upper=lmodFull), direction="forward", trace=0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
829 return(funcGetStepPredictors(lmod, frmeTmp, strLog))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
830 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
831 return( lsForcedParameters )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
832 ### Return a vector of predictor names to use in a reduced model or NA on error
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
833 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
834
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
835 # OK
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
836 # Select model with backwards selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
837 funcBackwardsModel <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
838 ### Perform model selection with backwards stepwise selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
839 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
840 ### lm style string defining reponse and predictors
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
841 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
842 ### Data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
843 adCur,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
844 ### Response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
845 lsParameters,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
846 ### User controlled parameters needed specific to boosting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
847 lsForcedParameters = NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
848 ### Force these predictors to be in the model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
849 strLog
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
850 ### File to which to document logging
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
851 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
852 funcWrite( c("#Backwards formula", strFormula), strLog )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
853
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
854 strNullFormula = "adCur ~ 1"
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
855 if(!is.null(lsForcedParameters))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
856 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
857 strNullFormula = paste( "adCur ~", paste( sprintf( "`%s`", lsForcedParameters ), collapse = " + " ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
858 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
859
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
860 lmodNull <- try( lm(as.formula( strNullFormula ), data=frmeTmp))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
861 lmodFull <- try( lm(as.formula( strFormula ), data=frmeTmp ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
862
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
863 if(! class( lmodFull ) == "try-error" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
864 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
865 lmod = stepAIC(lmodFull, scope=list(lower=lmodNull, upper=lmodFull), direction="backward")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
866 return(funcGetStepPredictors(lmod, frmeTmp, strLog))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
867 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
868 return( lsForcedParameters ) }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
869 ### Return a vector of predictor names to use in a reduced model or NA on error
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
870 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
871
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
872 ### Analysis methods
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
873 ### Univariate options
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
874
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
875 # Sparse Dir. Model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
876 #TODO# Implement in sfle
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
877
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
878 # Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
879 # Correlation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
880 # NOTE: Ignores the idea of random and fixed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
881 funcSpearman <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
882 ### Perform multiple univariate comparisons producing spearman correlations for association
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
883 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
884 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
885 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
886 ### Data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
887 iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
888 ### Index of the response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
889 lsQCCounts,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
890 ### List recording anything important to QC
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
891 strRandomFormula = NULL
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
892 ### Has the formula for random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
893 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
894 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
895 functionContrast=function(x,adCur,dfData)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
896 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
897 retList = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
898 ret = cor.test(as.formula(paste("~",x,"+ adCur")), data=dfData, method="spearman", na.action=c_strNA_Action)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
899 #Returning rho for the coef in a named vector
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
900 vdCoef = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
901 vdCoef[[x]]=ret$estimate
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
902 retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
903 return(retList)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
904 }, lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
905 ### List of contrast information, pvalue, contrast and std per univariate test
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
906 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
907
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
908 # Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
909 # Wilcoxon (T-Test)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
910 # NOTE: Ignores the idea of random and fixed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
911 funcWilcoxon <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
912 ### Perform multiple univariate comparisons performing wilcoxon tests on discontinuous data with 2 levels
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
913 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
914 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
915 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
916 ### Data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
917 iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
918 ### Index of the response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
919 lsQCCounts,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
920 ### List recording anything important to QC
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
921 strRandomFormula = NULL
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
922 ### Has the formula for random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
923 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
924 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
925 functionContrast=function(x,adCur,dfData)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
926 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
927 retList = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
928 ret = wilcox.test(as.formula(paste("adCur",x,sep=" ~ ")), data=dfData, na.action=c_strNA_Action)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
929 #Returning NA for the coef in a named vector
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
930 vdCoef = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
931 vdCoef[[x]]=ret$statistic
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
932 retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
933 return(retList)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
934 }, lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
935 ### List of contrast information, pvalue, contrast and std per univariate test
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
936 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
937
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
938 # Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
939 # Kruskal.Wallis (Nonparameteric anova)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
940 # NOTE: Ignores the idea of random and fixed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
941 funcKruskalWallis <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
942 ### Perform multiple univariate comparisons performing Kruskal wallis rank sum tests on discontuous data with more than 2 levels
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
943 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
944 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
945 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
946 ### Data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
947 iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
948 ### Index of the response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
949 lsQCCounts,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
950 ### List recording anything important to QC
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
951 strRandomFormula = NULL
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
952 ### Has the formula for random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
953 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
954 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
955 functionContrast=function(x,adCur,dfData)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
956 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
957 retList = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
958 lmodKW = kruskal(adCur,dfData[[x]],group=FALSE,p.adj="holm")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
959
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
960 asLevels = levels(dfData[[x]])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
961 # The names of the generated comparisons, sometimes the control is first sometimes it is not so
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
962 # We will just check which is in the names and use that
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
963 asComparisons = row.names(lmodKW$comparisons)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
964 #Get the comparison with the control
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
965 for(sLevel in asLevels[2:length(asLevels)])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
966 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
967 sComparison = intersect(c(paste(asLevels[1],sLevel,sep=" - "),paste(sLevel,asLevels[1],sep=" - ")),asComparisons)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
968 #Returning NA for the coef in a named vector
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
969 vdCoef = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
970 vdCoef[[paste(x,sLevel,sep="")]]=lmodKW$comparisons[sComparison,"Difference"]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
971 # vdCoef[[paste(x,sLevel,sep="")]]=NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
972 retList[[length(retList)+1]]=list(p.value=lmodKW$comparisons[sComparison,"p.value"],SD=1.0,name=paste(x,sLevel,sep=""),coef=vdCoef)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
973 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
974 return(retList)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
975 }, lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
976 ### List of contrast information, pvalue, contrast and std per univariate test
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
977 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
978
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
979 # Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
980 # NOTE: Ignores the idea of random and fixed covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
981 funcDoUnivariate <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
982 ### Perform multiple univariate comparisons producing spearman correlations for association
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
983 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
984 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
985 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
986 ### Data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
987 iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
988 ### Index of the response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
989 lsHistory,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
990 ### List recording p-values, association information, and QC counts
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
991 strRandomFormula = NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
992 ### Has the formula for random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
993 fZeroInflate = FALSE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
994 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
995 if(fZeroInflate)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
996 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
997 throw("There are no zero-inflated univariate models to perform your analysis.")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
998 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
999
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1000 # Get covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1001 astrCovariates = unique(c(funcFormulaStrToList(strFormula),funcFormulaStrToList(strRandomFormula)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1002
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1003 # For each covariate
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1004 for(sCovariate in astrCovariates)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1005 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1006 ## Check to see if it is discrete
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1007 axData = frmeTmp[[sCovariate]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1008 lsRet = NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1009 if(is.factor(axData) || is.logical(axData))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1010 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1011 ## If discrete check how many levels
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1012 lsDataLevels = levels(axData)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1013 ## If 2 levels do wilcoxon test
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1014 if(length(lsDataLevels) < 3)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1015 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1016 lsRet = funcWilcoxon(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1017 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1018 ## If 3 or more levels do kruskal wallis test
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1019 lsRet = funcKruskalWallis(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1020 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1021 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1022 ## If not discrete do spearman test (list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1023 lsRet = funcSpearman(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1024 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1025 lsHistory[["adP"]] = c(lsHistory[["adP"]], lsRet[["adP"]])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1026 lsHistory[["lsSig"]] = c(lsHistory[["lsSig"]], lsRet[["lsSig"]])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1027 lsHistory[["lsQCCounts"]] = lsRet[["lsQCCounts"]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1028 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1029 return(lsHistory)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1030 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1031
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1032 ### Multivariate
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1033
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1034 # Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1035 funcLM <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1036 ### Perform vanilla linear regression
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1037 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1038 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1039 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1040 ### Data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1041 iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1042 ### Index of the response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1043 lsHistory,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1044 ### List recording p-values, association information, and QC counts
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1045 strRandomFormula = NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1046 ### Has the formula for random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1047 fZeroInflated = FALSE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1048 ### Turns on the zero inflated model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1049 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1050 adCur = frmeTmp[,iTaxon]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1051 if(fZeroInflated)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1052 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1053 return(try(gamlss(formula=as.formula(strFormula), family=BEZI, data=frmeTmp))) # gamlss
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1054 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1055 if(!is.null(strRandomFormula))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1056 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1057 return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=gaussian(link="identity"), data=frmeTmp)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1058 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1059 #return(try( lmer(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action) ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1060 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1061 return(try( lm(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action) ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1062 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1063 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1064 ### lmod result object from lm
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1065 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1066
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1067 # Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1068 funcBinomialMult <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1069 ### Perform linear regression with negative binomial link
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1070 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1071 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1072 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1073 ### Data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1074 iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1075 ### Index of the response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1076 lsHistory,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1077 ### List recording p-values, association information, and QC counts
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1078 strRandomFormula = NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1079 ### Has the formula for random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1080 fZeroInflated = FALSE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1081 ### Turns on the zero inflated model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1082 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1083 adCur = frmeTmp[,iTaxon]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1084 if(fZeroInflated)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1085 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1086 return(try(zeroinfl(as.formula(strFormula), data=frmeTmp, dist="negbin"))) # pscl
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1087 # return(try(gamlss(as.formula(strFormula), family=ZINBI, data=frmeTmp))) # pscl
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1088 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1089 if(!is.null(strRandomFormula))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1090 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1091 throw("This analysis flow is not completely developed, please choose an option other than negative bionomial with random covariates")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1092 #TODO need to estimate the theta
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1093 #return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=negative.binomial(theta = 2, link=log), data=frmeTmp)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1094 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1095 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1096 return(try( glm.nb(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action )))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1097 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1098 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1099 ### lmod result object from lm
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1100 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1101
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1102 # Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1103 funcQuasiMult <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1104 ### Perform linear regression with quasi-poisson link
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1105 strFormula,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1106 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1107 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1108 ### Data on which to perform analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1109 iTaxon,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1110 ### Index of the response data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1111 lsHistory,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1112 ### List recording p-values, association information, and QC counts
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1113 strRandomFormula = NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1114 ### Has the formula for random covariates
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1115 fZeroInflated = FALSE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1116 ### Turns on a zero infalted model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1117 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1118 adCur = frmeTmp[,iTaxon]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1119 if(fZeroInflated)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1120 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1121 # return(try(gamlss(formula=as.formula(strFormula), family=ZIP, data=frmeTmp))) # gamlss
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1122 return(try(zeroinfl(as.formula(strFormula), data=frmeTmp, dist="poisson"))) # pscl
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1123 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1124 #Check to see if | is in the model, if so use a lmm otherwise the standard glm is ok
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1125 if(!is.null(strRandomFormula))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1126 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1127 return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family= quasipoisson, data=frmeTmp)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1128 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1129 #return(try ( glmer(as.formula(strFormula), data=frmeTmp, family=quasipoisson, na.action=c_strNA_Action) ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1130 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1131 return(try( glm(as.formula(strFormula), family=quasipoisson, data=frmeTmp, na.action=c_strNA_Action) ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1132 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1133 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1134 ### lmod result object from lm
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1135 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1136
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1137 ### Transformations
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1138 # Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1139 funcArcsinSqrt <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1140 # Transform data with arcsin sqrt transformation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1141 aData
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1142 ### The data on which to perform the transformation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1143 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1144 return(asin(sqrt(aData)))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1145 ### Transformed data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1146 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1147
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1148 funcSquareSin <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1149 # Transform data with square sin transformation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1150 # Opposite of the funcArcsinSqrt
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1151 aData
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1152 ### The data on which to perform the transformation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1153 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1154 return(sin(aData)^2)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1155 ### Transformed data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1156 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1157
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1158 # Tested
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1159 funcNoTransform <-function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1160 ### Pass data without transform
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1161 aData
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1162 ### The data on which to perform the transformation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1163 ### Only given here to preserve the pattern, not used.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1164 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1165 return(aData)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1166 ### Transformed data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1167 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1168
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1169 funcGetAnalysisMethods <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1170 ### Returns the appropriate functions for regularization, analysis, data transformation, and analysis object inspection.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1171 ### This allows modular customization per analysis step.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1172 ### To add a new method insert an entry in the switch for either the selection, transform, or method
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1173 ### Insert them by using the pattern optparse_keyword_without_quotes = function_in_AnalysisModules
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1174 ### Order in the return listy is currently set and expected to be selection, transforms/links, analysis method
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1175 ### none returns null
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1176 sModelSelectionKey,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1177 ### Keyword defining the method of model selection
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1178 sTransformKey,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1179 ### Keyword defining the method of data transformation
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1180 sMethodKey,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1181 ### Keyword defining the method of analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1182 fZeroInflated = FALSE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1183 ### Indicates if using zero inflated models
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1184 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1185 lRetMethods = list()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1186 #Insert selection methods here
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1187 lRetMethods[[c_iSelection]] = switch(sModelSelectionKey,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1188 boost = funcBoostModel,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1189 penalized = funcPenalizedModel,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1190 forward = funcForwardModel,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1191 backward = funcBackwardsModel,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1192 none = NA)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1193
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1194 #Insert transforms
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1195 lRetMethods[[c_iTransform]] = switch(sTransformKey,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1196 asinsqrt = funcArcsinSqrt,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1197 none = funcNoTransform)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1198
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1199 #Insert untransform
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1200 lRetMethods[[c_iUnTransform]] = switch(sTransformKey,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1201 asinsqrt = funcNoTransform,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1202 none = funcNoTransform)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1203
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1204 #Insert analysis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1205 lRetMethods[[c_iAnalysis]] = switch(sMethodKey,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1206 neg_binomial = funcBinomialMult,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1207 quasi = funcQuasiMult,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1208 univariate = funcDoUnivariate,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1209 lm = funcLM,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1210 none = NA)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1211
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1212 # If a univariate method is used it is required to set this to true
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1213 # For correct handling.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1214 lRetMethods[[c_iIsUnivariate]]=sMethodKey=="univariate"
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1215
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1216 #Insert method to get results
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1217 if(fZeroInflated)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1218 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1219 lRetMethods[[c_iResults]] = switch(sMethodKey,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1220 neg_binomial = funcGetZeroInflatedResults,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1221 quasi = funcGetZeroInflatedResults,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1222 univariate = funcGetUnivariateResults,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1223 lm = funcGetZeroInflatedResults,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1224 none = NA)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1225 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1226 lRetMethods[[c_iResults]] = switch(sMethodKey,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1227 neg_binomial = funcGetLMResults,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1228 quasi = funcGetLMResults,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1229 univariate = funcGetUnivariateResults,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1230 lm = funcGetLMResults,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1231 none = NA)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1232 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1233
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1234 return(lRetMethods)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1235 ### Returns a list of functions to be passed for regularization, data transformation, analysis,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1236 ### and custom analysis results introspection functions to pull from return objects data of interest
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1237 }