comparison maaslin-4450aa4ecc84/src/lib/AnalysisModules.R @ 1:a87d5a5f2776

Uploaded the version running on the prod server
author george-weingart
date Sun, 08 Feb 2015 23:08:38 -0500
parents
children
comparison
equal deleted inserted replaced
0:e0b5980139d9 1:a87d5a5f2776
1 #####################################################################################
2 #Copyright (C) <2012>
3 #
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
5 #this software and associated documentation files (the "Software"), to deal in the
6 #Software without restriction, including without limitation the rights to use, copy,
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8 #and to permit persons to whom the Software is furnished to do so, subject to
9 #the following conditions:
10 #
11 #The above copyright notice and this permission notice shall be included in all copies
12 #or substantial portions of the Software.
13 #
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 #
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
22 # authored by the Huttenhower lab at the Harvard School of Public Health
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
24 #####################################################################################
25
26 inlinedocs <- function(
27 ##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
28 ##description<< Allows one to plug in new modules to perform analysis (univariate or multivariate), regularization, and data (response) transformation.
29 ) { return( pArgs ) }
30
31 # Libraries
32 suppressMessages(library( agricolae, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
33 # Needed for the pot-hoc Kruskal wallis comparisons
34 suppressMessages(library( penalized, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
35 # Needed for stepAIC
36 suppressMessages(library( MASS, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
37 # Needed for na action behavior
38 suppressMessages(library( gam, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
39 # Needed for boosting
40 suppressMessages(library( gbm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
41 # Needed for LASSO
42 suppressMessages(library( glmnet, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
43 # Needed for mixed models
44 #suppressMessages(library( lme4, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
45 suppressMessages(library( nlme, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
46
47 # Needed for zero inflated models
48 #suppressMessages(library( MCMCglmm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
49 suppressMessages(library( pscl, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
50 suppressMessages(library( gamlss, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
51 # Do not use #suppressMessages(library( glmmADMB, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE))
52
53 fAddBack = TRUE
54 dUnevenMax = .9
55
56
57 ### Helper functions
58 # OK
59 funcMakeContrasts <- function
60 ### Makes univariate contrasts of all predictors in the model formula with the response.
61 (strFormula,
62 ### lm style string defining reponse and predictors
63 strRandomFormula,
64 ### mixed model string defining the fixed covariates
65 frmeTmp,
66 ### The data frame to find predictor data in
67 iTaxon,
68 ### Taxon
69 functionContrast,
70 ### functionContrast The univariate test to perform
71 lsQCCounts
72 ### QC info
73 ){
74 #TODO are we updating the QCCounts?
75 lsSig = list()
76 ### Holds all the significance results from the tests
77 adP = c()
78 ### Holds the p-values
79 sCurDataName = names(frmeTmp)[iTaxon]
80 ### The name of the taxon (data row) that is being associated (always assumed to be numeric)
81 #Get test comparisons (predictor names from formula string)
82 asComparisons = unique(c(funcFormulaStrToList(strFormula),funcFormulaStrToList(strRandomFormula)))
83
84 #Change metadata in formula to univariate comparisons
85 for(sComparison in asComparisons)
86 {
87 # Metadata values
88 vxTest = frmeTmp[[sComparison]]
89
90 # Get the levels in the comparison
91 # Can ignore the first level because it is the reference level
92 asLevels = sComparison
93 if(is.factor(vxTest)){asLevels = levels(vxTest)[2:length(vxTest)]}
94
95 lasComparisonResults = functionContrast(x=sComparison, adCur=frmeTmp[[sCurDataName]], dfData=frmeTmp)
96 for(asComparison in lasComparisonResults)
97 {
98 if( is.na( asComparison$p.value ) ) { next }
99 # Get pvalue
100 adP = c(adP, asComparison$p.value)
101 # Get SD, if not available, give SD of covariate
102 dSTD = asComparison$SD
103 # TODO Is using sd on factor and binary data correct?
104 if(is.na(dSTD) || is.null(dSTD)){dSTD = sd(vxTest)}
105
106 lsSig[[length( lsSig ) + 1]] <- list(
107 #Current metadata name (string column name) ok
108 name = sComparison,
109 #Current metadatda name (string, as a factor level if existing as such) ok
110 orig = asComparison$name,
111 #Taxon feature name (string) ok
112 taxon = colnames( frmeTmp )[iTaxon],
113 #Taxon data / response (double vector) ok
114 data = frmeTmp[,iTaxon],
115 #Name of column ok
116 factors = sComparison,
117 #Metadata values (metadata as a factor or raw numeric) ok
118 metadata = vxTest,
119 #Current coefficient value (named coef value with level name (from coefs) ok
120 value = asComparison$coef,
121 #Standard deviation (numeric) ok
122 std = dSTD,
123 #Model coefficients (output from coefs with intercept) ok
124 allCoefs = asComparison$coef)
125 }
126 }
127 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
128 ### Returns a list of p-value, standard deviation, and comparison which produced the p-value
129 }
130
131 #Ok
132 funcGetStepPredictors <- function
133 ### Retrieve the predictors of the reduced model after stepwise model selection
134 (lmod,
135 ### Linear model resulting from step-wise model selection
136 frmeTmp,
137 strLog
138 ### File to document logging
139 ){
140 #Document
141 funcWrite( "#model", strLog )
142 funcWrite( lmod$fit, strLog )
143 #TODO funcWrite( lmod$train.error, strLog )
144 #TODO funcWrite( lmod$Terms, strLog )
145 funcWrite( "#summary-gbm", strLog )
146 funcWrite( summary(lmod), strLog )
147
148 #Get Names from coefficients
149 asStepCoefsFactors = coefficients(lmod)
150 astrCoefNames = setdiff(names(asStepCoefsFactors[as.vector(!is.na(asStepCoefsFactors))==TRUE]),"(Intercept)")
151 asStepCoefsFactors = unique(as.vector(sapply(astrCoefNames,funcCoef2Col, frmeData=frmeTmp)))
152
153 if(length(asStepCoefsFactors)<1){ return(NA) }
154 return( asStepCoefsFactors )
155 ### Vector of string predictor names
156 }
157
158 funcGetUnivariateResults <- function
159 ### Reduce the list of list of results to the correct format
160 ( llmod,
161 ### The list of univariate models
162 frmeData,
163 ### Data analysis is performed on
164 liTaxon,
165 ### The response id
166 dSig,
167 ### Significance level for q-values
168 adP,
169 ### List of pvalues from all associations performed
170 lsSig,
171 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
172 strLog,
173 ### File to which to document logging
174 lsQCCounts,
175 ### Records of counts associated with quality control
176 lastrCols,
177 ### Predictors used in the association
178 asSuppressCovariates=c()
179 ### Vector of covariates to suppress and not give results for
180 ){
181 adP = c()
182 lsSig = list()
183 for(lmod in llmod)
184 {
185 adP = c(adP,lmod$adP)
186 lsSig = c(lsSig,lmod$lsSig)
187 }
188 return(list(adP=adP, lsSig=lsSig, lsQCCounts=llmod[length(llmod)]$lsQCCounts))
189 }
190
191 # OK
192 funcGetLMResults <- function
193 ### Reduce the lm object return to just the data needed for further analysis
194 ( llmod,
195 ### The result from a linear model
196 frmeData,
197 ### Data analysis is performed on
198 liTaxon,
199 ### The response id
200 dSig,
201 ### Significance level for q-values
202 adP,
203 ### List of pvalues from all associations performed
204 lsSig,
205 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
206 strLog,
207 ### File to which to document logging
208 lsQCCounts,
209 ### Records of counts associated with quality control
210 lastrCols,
211 ### Predictors used in the association
212 asSuppressCovariates=c()
213 ### Vector of covariates to suppress and not give results for
214 ){
215 ilmodIndex = 0
216 for( lmod in llmod )
217 {
218 ilmodIndex = ilmodIndex + 1
219 lmod = llmod[[ ilmodIndex ]]
220 iTaxon = liTaxon[[ ilmodIndex ]]
221 astrCols = lastrCols[[ ilmodIndex ]]
222
223 #Exclude none and errors
224 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
225 {
226 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
227 iPValuePosition = 4
228
229 #Get the column name of the iTaxon index
230 #frmeTmp needs to be what?
231 strTaxon = colnames( frmeData )[iTaxon]
232 #Get summary information from the linear model
233 lsSum = try( summary( lmod ) )
234 #The following can actually happen when the stranger regressors return broken results
235 if( class( lsSum ) == "try-error" )
236 {
237 next
238 }
239
240 #Write summary information to log file
241 funcWrite( "#model", strLog )
242 funcWrite( lmod, strLog )
243 funcWrite( "#summary", strLog )
244 #Unbelievably, some of the more unusual regression methods crash out when _printing_ their results
245 try( funcWrite( lsSum, strLog ) )
246
247 #Get the coefficients
248 #This should work for linear models
249 frmeCoefs <- try( coefficients( lsSum ) )
250
251 if( ( class(frmeCoefs ) == "try-error" ) || is.null( frmeCoefs ) )
252 {
253 adCoefs = try( coefficients( lmod ))
254 if(class( adCoefs ) == "try-error")
255 {
256 adCoefs = coef(lmod)
257 }
258 frmeCoefs <- NA
259 } else {
260 if( class( frmeCoefs ) == "list" )
261 {
262 frmeCoefs <- frmeCoefs$count
263 }
264 adCoefs = frmeCoefs[,1]
265 }
266
267 #Go through each coefficient
268 astrRows <- names( adCoefs )
269
270 ##lmm
271 if( is.null( astrRows ) )
272 {
273 astrRows = rownames( lsSum$tTable )
274 frmeCoefs = lsSum$tTable
275 iPValuePosition = 5
276 adCoefs = frmeCoefs[,1]
277 }
278
279 for( iMetadata in 1:length( astrRows ) )
280 {
281 #Current coef which is being evaluated
282 strOrig = astrRows[ iMetadata ]
283 #Skip y interscept
284 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
285 #Skip suppressed covariates
286 if( funcCoef2Col( strOrig, frmeData ) %in% asSuppressCovariates){ next }
287
288 #Extract pvalue and std in standard model
289 dP = frmeCoefs[ strOrig, iPValuePosition ]
290 dStd = frmeCoefs[ strOrig, 2 ]
291
292 #Attempt to extract the pvalue and std in mixed effects summary
293 #Could not get the pvalue so skip result
294 if( is.nan( dP ) || is.na( dP ) || is.null( dP ) ) { next }
295
296 dCoef = adCoefs[ iMetadata ]
297
298 #Setting adMetadata
299 #Metadata values
300 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
301 if( is.na( strMetadata ) )
302 {
303 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
304 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
305 }
306 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
307 adMetadata <- frmeData[, strMetadata ]
308
309 # Store (factor level modified) p-value
310 # Store general results for each coef
311 adP <- c( adP, dP )
312
313 # Append to the list of information about associations
314 lsSig[[ length( lsSig ) + 1 ]] <- list(
315 # Current metadata name
316 name = strMetadata,
317 # Current metadatda name (as a factor level if existing as such)
318 orig = strOrig,
319 # Taxon feature name
320 taxon = strTaxon,
321 # Taxon data / response
322 data = frmeData[, iTaxon ],
323 # All levels
324 factors = c( strMetadata ),
325 # Metadata values
326 metadata = adMetadata,
327 # Current coeficient value
328 value = dCoef,
329 # Standard deviation
330 std = dStd,
331 # Model coefficients
332 allCoefs = adCoefs )
333 }
334 }
335 }
336 return( list( adP = adP, lsSig = lsSig, lsQCCounts = lsQCCounts ) )
337 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
338 }
339
340 funcGetZeroInflatedResults <- function
341 ### Reduce the lm object return to just the data needed for further analysis
342 ( llmod,
343 ### The result from a linear model
344 frmeData,
345 ### Data analysis is performed on
346 liTaxon,
347 ### The response id
348 dSig,
349 ### Significance level for q-values
350 adP,
351 ### List of pvalues from all associations performed
352 lsSig,
353 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
354 strLog,
355 ### File to which to document logging
356 lsQCCounts,
357 ### Records of counts associated with quality control
358 lastrCols,
359 ### Predictors used in the association
360 asSuppressCovariates=c()
361 ### Vector of covariates to suppress and not give results for
362 ){
363 ilmodIndex = 0
364 for(lmod in llmod)
365 {
366 ilmodIndex = ilmodIndex + 1
367 lmod = llmod[[ilmodIndex]]
368 iTaxon = liTaxon[[ilmodIndex]]
369 astrCols = lastrCols[[ilmodIndex]]
370
371 #Exclude none and errors
372 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
373 {
374 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
375 iPValuePosition = 4
376
377 #Get the column name of the iTaxon index
378 #frmeTmp needs to be what?
379 strTaxon = colnames( frmeData )[iTaxon]
380
381 #Write summary information to log file
382 funcWrite( "#model", strLog )
383 funcWrite( lmod, strLog )
384
385 #Get the coefficients
386 #This should work for linear models
387 frmeCoefs <- summary(lmod)
388 if(! is.null( frmeCoefs$coefficients$count ) ) # Needed for zeroinfl
389 {
390 frmeCoefs = frmeCoefs$coefficients$count
391 }
392 adCoefs = frmeCoefs[,1]
393 names(adCoefs) = row.names(frmeCoefs)
394
395 funcWrite( "#Coefs", strLog )
396 funcWrite( frmeCoefs, strLog )
397
398 #Go through each coefficient
399 astrRows <- row.names( frmeCoefs )
400
401 for( iMetadata in 1:length( astrRows ) )
402 {
403 #Current coef which is being evaluated
404 strOrig = astrRows[iMetadata]
405 #Skip y interscept
406 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
407 #Skip suppressed covariates
408 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
409
410 #Extract pvalue and std in standard model
411 dP = frmeCoefs[strOrig, iPValuePosition]
412 if(is.nan(dP)){next}
413 dStd = frmeCoefs[strOrig,2]
414
415 dCoef = adCoefs[iMetadata]
416
417 #Setting adMetadata
418 #Metadata values
419 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
420 if( is.na( strMetadata ) )
421 {
422 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
423 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
424 }
425 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
426 adMetadata <- frmeData[,strMetadata]
427
428 #Store (factor level modified) p-value
429 #Store general results for each coef
430 adP <- c(adP, dP)
431 lsSig[[length( lsSig ) + 1]] <- list(
432 #Current metadata name
433 name = strMetadata,
434 #Current metadatda name (as a factor level if existing as such)
435 orig = strOrig,#
436 #Taxon feature name
437 taxon = strTaxon,
438 #Taxon data / response
439 data = frmeData[,iTaxon],
440 #All levels
441 factors = c(strMetadata),
442 #Metadata values
443 metadata = adMetadata,
444 #Current coeficient value
445 value = dCoef,
446 #Standard deviation
447 std = dStd,
448 #Model coefficients
449 allCoefs = adCoefs)
450 }
451 }
452 }
453 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
454 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
455 }
456
457 oldfuncGetZeroInflatedResults <- function
458 ### Reduce the lm object return to just the data needed for further analysis
459 ( llmod,
460 ### The result from a linear model
461 frmeData,
462 ### Data analysis is performed on
463 liTaxon,
464 ### The response id
465 dSig,
466 ### Significance level for q-values
467 adP,
468 ### List of pvalues from all associations performed
469 lsSig,
470 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
471 strLog,
472 ### File to which to document logging
473 lsQCCounts,
474 ### Records of counts associated with quality control
475 lastrCols,
476 ### Predictors used in the association
477 asSuppressCovariates=c()
478 ### Vector of covariates to suppress and not give results for
479 ){
480 ilmodIndex = 0
481 for(lmod in llmod)
482 {
483 ilmodIndex = ilmodIndex + 1
484 lmod = llmod[[ilmodIndex]]
485 iTaxon = liTaxon[[ilmodIndex]]
486 astrCols = lastrCols[[ilmodIndex]]
487
488 #Exclude none and errors
489 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
490 {
491 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
492 iPValuePosition = 4
493
494 #Get the column name of the iTaxon index
495 #frmeTmp needs to be what?
496 strTaxon = colnames( frmeData )[iTaxon]
497
498 #Write summary information to log file
499 funcWrite( "#model", strLog )
500 funcWrite( lmod, strLog )
501
502 #Get the coefficients
503 #This should work for linear models
504 frmeCoefs <- summary(lmod)
505 adCoefs = frmeCoefs[,1]
506 names(adCoefs) = row.names(frmeCoefs)
507
508 #Go through each coefficient
509 astrRows <- row.names( frmeCoefs )
510
511 for( iMetadata in 1:length( astrRows ) )
512 {
513 #Current coef which is being evaluated
514 strOrig = astrRows[iMetadata]
515 #Skip y interscept
516 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
517 #Skip suppressed covariates
518 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
519
520 #Extract pvalue and std in standard model
521 dP = frmeCoefs[strOrig, iPValuePosition]
522 dStd = frmeCoefs[strOrig,2]
523
524 dCoef = adCoefs[iMetadata]
525
526 #Setting adMetadata
527 #Metadata values
528 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
529 if( is.na( strMetadata ) )
530 {
531 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
532 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
533 }
534 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
535 adMetadata <- frmeData[,strMetadata]
536
537 #Store (factor level modified) p-value
538 #Store general results for each coef
539 adP <- c(adP, dP)
540 lsSig[[length( lsSig ) + 1]] <- list(
541 #Current metadata name
542 name = strMetadata,
543 #Current metadatda name (as a factor level if existing as such)
544 orig = strOrig,#
545 #Taxon feature name
546 taxon = strTaxon,
547 #Taxon data / response
548 data = frmeData[,iTaxon],
549 #All levels
550 factors = c(strMetadata),
551 #Metadata values
552 metadata = adMetadata,
553 #Current coeficient value
554 value = dCoef,
555 #Standard deviation
556 std = dStd,
557 #Model coefficients
558 allCoefs = adCoefs)
559 }
560 }
561 }
562 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
563 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
564 }
565
566
567 notfuncGetZeroInflatedResults <- function
568 ### Reduce the lm object return to just the data needed for further analysis
569 ( llmod,
570 ### The result from a linear model
571 frmeData,
572 ### Data analysis is performed on
573 liTaxon,
574 ### The response id
575 dSig,
576 ### Significance level for q-values
577 adP,
578 ### List of pvalues from all associations performed
579 lsSig,
580 ### List of information from the lm containing, metadata name, metadatda name (as a factor level if existing as such), Taxon feature name, Taxon data / response, All levels, Metadata values, Current coeficient value, Standard deviation, Model coefficients
581 strLog,
582 ### File to which to document logging
583 lsQCCounts,
584 ### Records of counts associated with quality control
585 lastrCols,
586 ### Predictors used in the association
587 asSuppressCovariates=c()
588 ### Vector of covariates to suppress and not give results for
589 ){
590 ilmodIndex = 0
591 for(lmod in llmod)
592 {
593 ilmodIndex = ilmodIndex + 1
594 lmod = llmod[[ilmodIndex]]
595 iTaxon = liTaxon[[ilmodIndex]]
596 astrCols = lastrCols[[ilmodIndex]]
597
598 #Exclude none and errors
599 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
600 {
601 #holds the location of the pvlaues if an lm, if lmm is detected this will be changed
602 iPValuePosition = 4
603
604 #Get the column name of the iTaxon index
605 #frmeTmp needs to be what?
606 strTaxon = colnames( frmeData )[iTaxon]
607
608 #Write summary information to log file
609 funcWrite( "#model", strLog )
610 funcWrite( lmod, strLog )
611
612 #Get the coefficients
613 #This should work for linear models
614 frmeCoefs <- summary(lmod)
615 frmeCoefs = frmeCoefs$coefficients$count
616 funcWrite( "#Coefs", strLog )
617 funcWrite( frmeCoefs, strLog )
618
619 adCoefs = frmeCoefs[,1]
620 names(adCoefs) = row.names(frmeCoefs)
621
622 #Go through each coefficient
623 astrRows <- row.names( frmeCoefs )
624
625 for( iMetadata in 1:length( astrRows ) )
626 {
627 #Current coef which is being evaluated
628 strOrig = astrRows[iMetadata]
629
630 #Skip y interscept
631 if( strOrig %in% c("(Intercept)", "Intercept", "Log(theta)") ) { next }
632 #Skip suppressed covariates
633 if( funcCoef2Col(strOrig,frmeData) %in% asSuppressCovariates){next}
634
635 #Extract pvalue and std in standard model
636 dP = frmeCoefs[strOrig, iPValuePosition]
637 if(is.nan(dP)){ next }
638 dStd = frmeCoefs[strOrig,2]
639 dCoef = adCoefs[iMetadata]
640
641 #Setting adMetadata
642 #Metadata values
643 strMetadata = funcCoef2Col( strOrig, frmeData, astrCols )
644 if( is.na( strMetadata ) )
645 {
646 if( substring( strOrig, nchar( strOrig ) - 1 ) == "NA" ) { next }
647 c_logrMaaslin$error( "Unknown coefficient: %s", strOrig )
648 }
649 if( substring( strOrig, nchar( strMetadata ) + 1 ) == "NA" ) { next }
650 adMetadata <- frmeData[,strMetadata]
651
652 #Store (factor level modified) p-value
653 #Store general results for each coef
654 adP <- c(adP, dP)
655 lsSig[[length( lsSig ) + 1]] <- list(
656 #Current metadata name
657 name = strMetadata,
658 #Current metadatda name (as a factor level if existing as such)
659 orig = strOrig,#
660 #Taxon feature name
661 taxon = strTaxon,
662 #Taxon data / response
663 data = frmeData[,iTaxon],
664 #All levels
665 factors = c(strMetadata),
666 #Metadata values
667 metadata = adMetadata,
668 #Current coeficient value
669 value = dCoef,
670 #Standard deviation
671 std = dStd,
672 #Model coefficients
673 allCoefs = adCoefs)
674 }
675 }
676 }
677 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
678 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data
679 }
680
681
682 ### Options for variable selection
683 # OK
684 funcBoostModel <- function(
685 ### Perform model selection / regularization with boosting
686 strFormula,
687 ### The formula of the full model before boosting
688 frmeTmp,
689 ### The data on which to perform analysis
690 adCur,
691 ### The response data
692 lsParameters,
693 ### User controlled parameters needed specific to boosting
694 lsForcedParameters = NULL,
695 ### Force these predictors to be in the model
696 strLog
697 ### File to which to document logging
698 ){
699 funcWrite( c("#Boost formula", strFormula), strLog )
700 lmod = try( gbm( as.formula( strFormula ), data=frmeTmp, distribution="laplace", verbose=FALSE, n.minobsinnode=min(10, round(0.1 * nrow( frmeTmp ) ) ), n.trees=1000 ) )
701 #TODO# lmod = try( gbm( as.formula( strFormula ), data=frmeTmp, distribution="gaussian", verbose=FALSE, n.minobsinnode=min(10, round(0.1 * nrow( frmeTmp ) ) ), n.trees=1000 ) )
702
703 astrTerms <- c()
704 if( !is.na( lmod ) && ( class( lmod ) != "try-error" ) )
705 {
706 #Get boosting summary results
707 lsSum <- summary( lmod, plotit = FALSE )
708
709 #Document
710 funcWrite( "#model-gbm", strLog )
711 funcWrite( lmod$fit, strLog )
712 funcWrite( lmod$train.error, strLog )
713 funcWrite( lmod$Terms, strLog )
714 funcWrite( "#summary-gbm", strLog )
715 funcWrite( lsSum, strLog )
716
717 # Uneven metadata
718 vstrUneven = c()
719 # Kept metadata
720 vstrKeepMetadata = c()
721
722 #Select model predictors
723 #Check the frequency of selection and skip if not selected more than set threshold dFreq
724 for( strMetadata in lmod$var.names )
725 {
726 #Get the name of the metadata
727 strTerm <- funcCoef2Col( strMetadata, frmeTmp, c(astrMetadata, astrGenetics) )
728
729 #Add back in uneven metadata
730 if(fAddBack)
731 {
732 ldMetadata = frmeTmp[[strMetadata]]
733 if(length(which(table(ldMetadata)/length(ldMetadata)>dUnevenMax))>0)
734 {
735 astrTerms <- c(astrTerms, strTerm)
736 vstrUneven = c(vstrUneven,strMetadata)
737 next
738 }
739 }
740
741 #If the selprob is less than a certain frequency, skip
742 dSel <- lsSum$rel.inf[which( lsSum$var == strMetadata )] / 100
743 if( is.na(dSel) || ( dSel <= lsParameters$dFreq ) ){ next }
744 #TODO# if( is.na(dSel) || ( dSel < lsParameters$dFreq ) ){ next }
745
746 #If you should ignore the metadata, continue
747 if( is.null( strTerm ) ) { next }
748
749 #If you cant find the metadata name, write
750 if( is.na( strTerm ) )
751 {
752 c_logrMaaslin$error( "Unknown coefficient: %s", strMetadata )
753 next
754 }
755
756 #Collect metadata names
757 astrTerms <- c(astrTerms, strTerm)
758 vstrKeepMetadata = c(vstrKeepMetadata,strTerm)
759 }
760 } else { astrTerms = lsForcedParameters }
761
762 # funcBoostInfluencePlot(vdRelInf=lsSum$rel.inf, sFeature=lsParameters$sBugName, vsPredictorNames=lsSum$var, vstrKeepMetadata=vstrKeepMetadata, vstrUneven=vstrUneven)
763
764 return(unique(c(astrTerms,lsForcedParameters)))
765 ### Return a vector of predictor names to use in a reduced model
766 }
767
768 #Glmnet default is to standardize the variables.
769 #used as an example for implementation
770 #http://r.789695.n4.nabble.com/estimating-survival-times-with-glmnet-and-coxph-td4614225.html
771 funcPenalizedModel <- function(
772 ### Perform penalized regularization for variable selection
773 strFormula,
774 ### The formula of the full model before boosting
775 frmeTmp,
776 ### The data on which to perform analysis
777 adCur,
778 ### The response data
779 lsParameters,
780 ### User controlled parameters needed specific to boosting
781 lsForcedParameters = NULL,
782 ### Force these predictors to be in the model
783 strLog
784 ### File to which to document logging
785 ){
786 #Convert the data frame to a model matrix
787 mtrxDesign = model.matrix(as.formula(strFormula), data=frmeTmp)
788
789 #Cross validate the lambda
790 cvRet = cv.glmnet(x=mtrxDesign,y=adCur,alpha=lsParameters$dPAlpha)
791
792 #Perform lasso
793 glmnetMod = glmnet(x=mtrxDesign,y=adCur,family=lsParameters$family,alpha=lsParameters$dPAlpha,lambda=cvRet$lambda.min)
794
795 #Get non zero beta and return column names for covariate names.
796 ldBeta = glmnetMod$beta[,which.max(glmnetMod$dev.ratio)]
797 ldBeta = names(ldBeta[which(abs(ldBeta)>0)])
798 return(sapply(ldBeta,funcCoef2Col,frmeData=frmeTmp))
799 }
800
801 # OK
802 funcForwardModel <- function(
803 ### Perform model selection with forward stepwise selection
804 strFormula,
805 ### lm style string defining reposne and predictors
806 frmeTmp,
807 ### Data on which to perform analysis
808 adCur,
809 ### Response data
810 lsParameters,
811 ### User controlled parameters needed specific to boosting
812 lsForcedParameters = NULL,
813 ### Force these predictors to be in the model
814 strLog
815 ### File to which to document logging
816 ){
817 funcWrite( c("#Forward formula", strFormula), strLog )
818
819 strNullFormula = "adCur ~ 1"
820 if(!is.null(lsForcedParameters))
821 {
822 strNullFormula = paste( "adCur ~", paste( sprintf( "`%s`", lsForcedParameters ), collapse = " + " ))
823 }
824 lmodNull <- try( lm(as.formula( strNullFormula ), data=frmeTmp))
825 lmodFull <- try( lm(as.formula( strFormula ), data=frmeTmp ))
826 if(!("try-error" %in% c(class( lmodNull ),class( lmodFull ))))
827 {
828 lmod = stepAIC(lmodNull, scope=list(lower=lmodNull,upper=lmodFull), direction="forward", trace=0)
829 return(funcGetStepPredictors(lmod, frmeTmp, strLog))
830 }
831 return( lsForcedParameters )
832 ### Return a vector of predictor names to use in a reduced model or NA on error
833 }
834
835 # OK
836 # Select model with backwards selection
837 funcBackwardsModel <- function(
838 ### Perform model selection with backwards stepwise selection
839 strFormula,
840 ### lm style string defining reponse and predictors
841 frmeTmp,
842 ### Data on which to perform analysis
843 adCur,
844 ### Response data
845 lsParameters,
846 ### User controlled parameters needed specific to boosting
847 lsForcedParameters = NULL,
848 ### Force these predictors to be in the model
849 strLog
850 ### File to which to document logging
851 ){
852 funcWrite( c("#Backwards formula", strFormula), strLog )
853
854 strNullFormula = "adCur ~ 1"
855 if(!is.null(lsForcedParameters))
856 {
857 strNullFormula = paste( "adCur ~", paste( sprintf( "`%s`", lsForcedParameters ), collapse = " + " ))
858 }
859
860 lmodNull <- try( lm(as.formula( strNullFormula ), data=frmeTmp))
861 lmodFull <- try( lm(as.formula( strFormula ), data=frmeTmp ))
862
863 if(! class( lmodFull ) == "try-error" )
864 {
865 lmod = stepAIC(lmodFull, scope=list(lower=lmodNull, upper=lmodFull), direction="backward")
866 return(funcGetStepPredictors(lmod, frmeTmp, strLog))
867 } else {
868 return( lsForcedParameters ) }
869 ### Return a vector of predictor names to use in a reduced model or NA on error
870 }
871
872 ### Analysis methods
873 ### Univariate options
874
875 # Sparse Dir. Model
876 #TODO# Implement in sfle
877
878 # Tested
879 # Correlation
880 # NOTE: Ignores the idea of random and fixed covariates
881 funcSpearman <- function(
882 ### Perform multiple univariate comparisons producing spearman correlations for association
883 strFormula,
884 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
885 frmeTmp,
886 ### Data on which to perform analysis
887 iTaxon,
888 ### Index of the response data
889 lsQCCounts,
890 ### List recording anything important to QC
891 strRandomFormula = NULL
892 ### Has the formula for random covariates
893 ){
894 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
895 functionContrast=function(x,adCur,dfData)
896 {
897 retList = list()
898 ret = cor.test(as.formula(paste("~",x,"+ adCur")), data=dfData, method="spearman", na.action=c_strNA_Action)
899 #Returning rho for the coef in a named vector
900 vdCoef = c()
901 vdCoef[[x]]=ret$estimate
902 retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)
903 return(retList)
904 }, lsQCCounts))
905 ### List of contrast information, pvalue, contrast and std per univariate test
906 }
907
908 # Tested
909 # Wilcoxon (T-Test)
910 # NOTE: Ignores the idea of random and fixed covariates
911 funcWilcoxon <- function(
912 ### Perform multiple univariate comparisons performing wilcoxon tests on discontinuous data with 2 levels
913 strFormula,
914 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
915 frmeTmp,
916 ### Data on which to perform analysis
917 iTaxon,
918 ### Index of the response data
919 lsQCCounts,
920 ### List recording anything important to QC
921 strRandomFormula = NULL
922 ### Has the formula for random covariates
923 ){
924 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
925 functionContrast=function(x,adCur,dfData)
926 {
927 retList = list()
928 ret = wilcox.test(as.formula(paste("adCur",x,sep=" ~ ")), data=dfData, na.action=c_strNA_Action)
929 #Returning NA for the coef in a named vector
930 vdCoef = c()
931 vdCoef[[x]]=ret$statistic
932 retList[[1]]=list(p.value=ret$p.value,SD=sd(dfData[[x]]),name=x,coef=vdCoef)
933 return(retList)
934 }, lsQCCounts))
935 ### List of contrast information, pvalue, contrast and std per univariate test
936 }
937
938 # Tested
939 # Kruskal.Wallis (Nonparameteric anova)
940 # NOTE: Ignores the idea of random and fixed covariates
941 funcKruskalWallis <- function(
942 ### Perform multiple univariate comparisons performing Kruskal wallis rank sum tests on discontuous data with more than 2 levels
943 strFormula,
944 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
945 frmeTmp,
946 ### Data on which to perform analysis
947 iTaxon,
948 ### Index of the response data
949 lsQCCounts,
950 ### List recording anything important to QC
951 strRandomFormula = NULL
952 ### Has the formula for random covariates
953 ){
954 return(funcMakeContrasts(strFormula=strFormula, strRandomFormula=strRandomFormula, frmeTmp=frmeTmp, iTaxon=iTaxon,
955 functionContrast=function(x,adCur,dfData)
956 {
957 retList = list()
958 lmodKW = kruskal(adCur,dfData[[x]],group=FALSE,p.adj="holm")
959
960 asLevels = levels(dfData[[x]])
961 # The names of the generated comparisons, sometimes the control is first sometimes it is not so
962 # We will just check which is in the names and use that
963 asComparisons = row.names(lmodKW$comparisons)
964 #Get the comparison with the control
965 for(sLevel in asLevels[2:length(asLevels)])
966 {
967 sComparison = intersect(c(paste(asLevels[1],sLevel,sep=" - "),paste(sLevel,asLevels[1],sep=" - ")),asComparisons)
968 #Returning NA for the coef in a named vector
969 vdCoef = c()
970 vdCoef[[paste(x,sLevel,sep="")]]=lmodKW$comparisons[sComparison,"Difference"]
971 # vdCoef[[paste(x,sLevel,sep="")]]=NA
972 retList[[length(retList)+1]]=list(p.value=lmodKW$comparisons[sComparison,"p.value"],SD=1.0,name=paste(x,sLevel,sep=""),coef=vdCoef)
973 }
974 return(retList)
975 }, lsQCCounts))
976 ### List of contrast information, pvalue, contrast and std per univariate test
977 }
978
979 # Tested
980 # NOTE: Ignores the idea of random and fixed covariates
981 funcDoUnivariate <- function(
982 ### Perform multiple univariate comparisons producing spearman correlations for association
983 strFormula,
984 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
985 frmeTmp,
986 ### Data on which to perform analysis
987 iTaxon,
988 ### Index of the response data
989 lsHistory,
990 ### List recording p-values, association information, and QC counts
991 strRandomFormula = NULL,
992 ### Has the formula for random covariates
993 fZeroInflate = FALSE
994 ){
995 if(fZeroInflate)
996 {
997 throw("There are no zero-inflated univariate models to perform your analysis.")
998 }
999
1000 # Get covariates
1001 astrCovariates = unique(c(funcFormulaStrToList(strFormula),funcFormulaStrToList(strRandomFormula)))
1002
1003 # For each covariate
1004 for(sCovariate in astrCovariates)
1005 {
1006 ## Check to see if it is discrete
1007 axData = frmeTmp[[sCovariate]]
1008 lsRet = NA
1009 if(is.factor(axData) || is.logical(axData))
1010 {
1011 ## If discrete check how many levels
1012 lsDataLevels = levels(axData)
1013 ## If 2 levels do wilcoxon test
1014 if(length(lsDataLevels) < 3)
1015 {
1016 lsRet = funcWilcoxon(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
1017 } else {
1018 ## If 3 or more levels do kruskal wallis test
1019 lsRet = funcKruskalWallis(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
1020 }
1021 } else {
1022 ## If not discrete do spearman test (list(adP=adP, lsSig=lsSig, lsQCCounts=lsQCCounts))
1023 lsRet = funcSpearman(strFormula=paste("adCur",sCovariate,sep=" ~ "), frmeTmp=frmeTmp, iTaxon=iTaxon, lsQCCounts=lsHistory$lsQCCounts)
1024 }
1025 lsHistory[["adP"]] = c(lsHistory[["adP"]], lsRet[["adP"]])
1026 lsHistory[["lsSig"]] = c(lsHistory[["lsSig"]], lsRet[["lsSig"]])
1027 lsHistory[["lsQCCounts"]] = lsRet[["lsQCCounts"]]
1028 }
1029 return(lsHistory)
1030 }
1031
1032 ### Multivariate
1033
1034 # Tested
1035 funcLM <- function(
1036 ### Perform vanilla linear regression
1037 strFormula,
1038 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
1039 frmeTmp,
1040 ### Data on which to perform analysis
1041 iTaxon,
1042 ### Index of the response data
1043 lsHistory,
1044 ### List recording p-values, association information, and QC counts
1045 strRandomFormula = NULL,
1046 ### Has the formula for random covariates
1047 fZeroInflated = FALSE
1048 ### Turns on the zero inflated model
1049 ){
1050 adCur = frmeTmp[,iTaxon]
1051 if(fZeroInflated)
1052 {
1053 return(try(gamlss(formula=as.formula(strFormula), family=BEZI, data=frmeTmp))) # gamlss
1054 } else {
1055 if(!is.null(strRandomFormula))
1056 {
1057 return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=gaussian(link="identity"), data=frmeTmp)))
1058 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
1059 #return(try( lmer(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action) ))
1060 } else {
1061 return(try( lm(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action) ))
1062 }
1063 }
1064 ### lmod result object from lm
1065 }
1066
1067 # Tested
1068 funcBinomialMult <- function(
1069 ### Perform linear regression with negative binomial link
1070 strFormula,
1071 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
1072 frmeTmp,
1073 ### Data on which to perform analysis
1074 iTaxon,
1075 ### Index of the response data
1076 lsHistory,
1077 ### List recording p-values, association information, and QC counts
1078 strRandomFormula = NULL,
1079 ### Has the formula for random covariates
1080 fZeroInflated = FALSE
1081 ### Turns on the zero inflated model
1082 ){
1083 adCur = frmeTmp[,iTaxon]
1084 if(fZeroInflated)
1085 {
1086 return(try(zeroinfl(as.formula(strFormula), data=frmeTmp, dist="negbin"))) # pscl
1087 # return(try(gamlss(as.formula(strFormula), family=ZINBI, data=frmeTmp))) # pscl
1088 } else {
1089 if(!is.null(strRandomFormula))
1090 {
1091 throw("This analysis flow is not completely developed, please choose an option other than negative bionomial with random covariates")
1092 #TODO need to estimate the theta
1093 #return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family=negative.binomial(theta = 2, link=log), data=frmeTmp)))
1094 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
1095 } else {
1096 return(try( glm.nb(as.formula(strFormula), data=frmeTmp, na.action=c_strNA_Action )))
1097 }
1098 }
1099 ### lmod result object from lm
1100 }
1101
1102 # Tested
1103 funcQuasiMult <- function(
1104 ### Perform linear regression with quasi-poisson link
1105 strFormula,
1106 ### lm style string defining reponse and predictors, for mixed effects models this holds the fixed variables
1107 frmeTmp,
1108 ### Data on which to perform analysis
1109 iTaxon,
1110 ### Index of the response data
1111 lsHistory,
1112 ### List recording p-values, association information, and QC counts
1113 strRandomFormula = NULL,
1114 ### Has the formula for random covariates
1115 fZeroInflated = FALSE
1116 ### Turns on a zero infalted model
1117 ){
1118 adCur = frmeTmp[,iTaxon]
1119 if(fZeroInflated)
1120 {
1121 # return(try(gamlss(formula=as.formula(strFormula), family=ZIP, data=frmeTmp))) # gamlss
1122 return(try(zeroinfl(as.formula(strFormula), data=frmeTmp, dist="poisson"))) # pscl
1123 } else {
1124 #Check to see if | is in the model, if so use a lmm otherwise the standard glm is ok
1125 if(!is.null(strRandomFormula))
1126 {
1127 return(try(glmmPQL(fixed=as.formula(strFormula), random=as.formula(strRandomFormula), family= quasipoisson, data=frmeTmp)))
1128 #lme4 package but does not have pvalues for the fixed variables (have to use a mcmcsamp/pvals.fnc function which are currently disabled)
1129 #return(try ( glmer(as.formula(strFormula), data=frmeTmp, family=quasipoisson, na.action=c_strNA_Action) ))
1130 } else {
1131 return(try( glm(as.formula(strFormula), family=quasipoisson, data=frmeTmp, na.action=c_strNA_Action) ))
1132 }
1133 }
1134 ### lmod result object from lm
1135 }
1136
1137 ### Transformations
1138 # Tested
1139 funcArcsinSqrt <- function(
1140 # Transform data with arcsin sqrt transformation
1141 aData
1142 ### The data on which to perform the transformation
1143 ){
1144 return(asin(sqrt(aData)))
1145 ### Transformed data
1146 }
1147
1148 funcSquareSin <- function(
1149 # Transform data with square sin transformation
1150 # Opposite of the funcArcsinSqrt
1151 aData
1152 ### The data on which to perform the transformation
1153 ){
1154 return(sin(aData)^2)
1155 ### Transformed data
1156 }
1157
1158 # Tested
1159 funcNoTransform <-function(
1160 ### Pass data without transform
1161 aData
1162 ### The data on which to perform the transformation
1163 ### Only given here to preserve the pattern, not used.
1164 ){
1165 return(aData)
1166 ### Transformed data
1167 }
1168
1169 funcGetAnalysisMethods <- function(
1170 ### Returns the appropriate functions for regularization, analysis, data transformation, and analysis object inspection.
1171 ### This allows modular customization per analysis step.
1172 ### To add a new method insert an entry in the switch for either the selection, transform, or method
1173 ### Insert them by using the pattern optparse_keyword_without_quotes = function_in_AnalysisModules
1174 ### Order in the return listy is currently set and expected to be selection, transforms/links, analysis method
1175 ### none returns null
1176 sModelSelectionKey,
1177 ### Keyword defining the method of model selection
1178 sTransformKey,
1179 ### Keyword defining the method of data transformation
1180 sMethodKey,
1181 ### Keyword defining the method of analysis
1182 fZeroInflated = FALSE
1183 ### Indicates if using zero inflated models
1184 ){
1185 lRetMethods = list()
1186 #Insert selection methods here
1187 lRetMethods[[c_iSelection]] = switch(sModelSelectionKey,
1188 boost = funcBoostModel,
1189 penalized = funcPenalizedModel,
1190 forward = funcForwardModel,
1191 backward = funcBackwardsModel,
1192 none = NA)
1193
1194 #Insert transforms
1195 lRetMethods[[c_iTransform]] = switch(sTransformKey,
1196 asinsqrt = funcArcsinSqrt,
1197 none = funcNoTransform)
1198
1199 #Insert untransform
1200 lRetMethods[[c_iUnTransform]] = switch(sTransformKey,
1201 asinsqrt = funcNoTransform,
1202 none = funcNoTransform)
1203
1204 #Insert analysis
1205 lRetMethods[[c_iAnalysis]] = switch(sMethodKey,
1206 neg_binomial = funcBinomialMult,
1207 quasi = funcQuasiMult,
1208 univariate = funcDoUnivariate,
1209 lm = funcLM,
1210 none = NA)
1211
1212 # If a univariate method is used it is required to set this to true
1213 # For correct handling.
1214 lRetMethods[[c_iIsUnivariate]]=sMethodKey=="univariate"
1215
1216 #Insert method to get results
1217 if(fZeroInflated)
1218 {
1219 lRetMethods[[c_iResults]] = switch(sMethodKey,
1220 neg_binomial = funcGetZeroInflatedResults,
1221 quasi = funcGetZeroInflatedResults,
1222 univariate = funcGetUnivariateResults,
1223 lm = funcGetZeroInflatedResults,
1224 none = NA)
1225 } else {
1226 lRetMethods[[c_iResults]] = switch(sMethodKey,
1227 neg_binomial = funcGetLMResults,
1228 quasi = funcGetLMResults,
1229 univariate = funcGetUnivariateResults,
1230 lm = funcGetLMResults,
1231 none = NA)
1232 }
1233
1234 return(lRetMethods)
1235 ### Returns a list of functions to be passed for regularization, data transformation, analysis,
1236 ### and custom analysis results introspection functions to pull from return objects data of interest
1237 }