Mercurial > repos > george-weingart > maaslin
annotate maaslin-4450aa4ecc84/src/lib/BoostGLM.R @ 1:a87d5a5f2776
Uploaded the version running on the prod server
author | george-weingart |
---|---|
date | Sun, 08 Feb 2015 23:08:38 -0500 |
parents | |
children |
rev | line source |
---|---|
1
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
1 ##################################################################################### |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
2 #Copyright (C) <2012> |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
3 # |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
5 #this software and associated documentation files (the "Software"), to deal in the |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
6 #Software without restriction, including without limitation the rights to use, copy, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
8 #and to permit persons to whom the Software is furnished to do so, subject to |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
9 #the following conditions: |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
10 # |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
11 #The above copyright notice and this permission notice shall be included in all copies |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
12 #or substantial portions of the Software. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
13 # |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
20 # |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models), |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
22 # authored by the Huttenhower lab at the Harvard School of Public Health |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu). |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
24 ##################################################################################### |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
25 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
26 inlinedocs <- function( |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
27 ##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu> |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
28 ##description<< Manages the quality control of data and the performance of analysis (univariate or multivariate), regularization, and data (response) transformation. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
29 ) { return( pArgs ) } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
30 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
31 ### Load libraries quietly |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
32 suppressMessages(library( gam, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
33 suppressMessages(library( gbm, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
34 suppressMessages(library( logging, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
35 suppressMessages(library( outliers, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
36 suppressMessages(library( robustbase, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
37 suppressMessages(library( pscl, warn.conflicts=FALSE, quietly=TRUE, verbose=FALSE)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
38 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
39 ### Get constants |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
40 #source(file.path("input","maaslin","src","Constants.R")) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
41 #source("Constants.R") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
42 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
43 ## Get logger |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
44 c_logrMaaslin <- getLogger( "maaslin" ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
45 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
46 funcDoGrubbs <- function( |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
47 ### Use the Grubbs Test to identify outliers |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
48 iData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
49 ### Column index in the data frame to test |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
50 frmeData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
51 ### The data frame holding the data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
52 dPOutlier, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
53 ### P-value threshold to indicate an outlier is significant |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
54 lsQC |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
55 ### List holding the QC info of the cleaning step. Which indices are outliers is added. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
56 ){ |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
57 adData <- frmeData[,iData] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
58 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
59 # Original number of NA |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
60 viNAOrig = which(is.na(adData)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
61 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
62 while( TRUE ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
63 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
64 lsTest <- try( grubbs.test( adData ), silent = TRUE ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
65 if( ( class( lsTest ) == "try-error" ) || is.na( lsTest$p.value ) || ( lsTest$p.value > dPOutlier ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
66 {break} |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
67 viOutliers = outlier( adData, logical = TRUE ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
68 adData[viOutliers] <- NA |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
69 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
70 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
71 # Record removed data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
72 viNAAfter = which(is.na(adData)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
73 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
74 # If all were set to NA then ignore the filtering |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
75 if(length(adData)==length(viNAAfter)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
76 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
77 viNAAfter = viNAOrig |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
78 adData = frmeData[,iData] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
79 c_logrMaaslin$info( paste("Grubbs Test:: Identifed all data as outliers so was inactived for index=",iData," data=",paste(as.vector(frmeData[,iData]),collapse=","), "number zeros=", length(which(frmeData[,iData]==0)), sep = " " )) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
80 } else if(mean(adData, na.rm=TRUE) == 0) { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
81 viNAAfter = viNAOrig |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
82 adData = frmeData[,iData] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
83 c_logrMaaslin$info( paste("Grubbs Test::Removed all values but 0, ignored. Index=",iData,".",sep=" " ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
84 } else { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
85 # Document removal |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
86 if( sum( is.na( adData ) ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
87 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
88 c_logrMaaslin$info( "Grubbs Test::Removing %d outliers from %s", sum( is.na( adData ) ), colnames(frmeData)[iData] ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
89 c_logrMaaslin$info( format( rownames( frmeData )[is.na( adData )] )) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
90 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
91 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
92 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
93 return(list(data=adData,outliers=length(viNAAfter)-length(viNAOrig),indices=setdiff(viNAAfter,viNAOrig))) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
94 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
95 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
96 funcDoFenceTest <- function( |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
97 ### Use a threshold based on the quartiles of the data to identify outliers |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
98 iData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
99 ### Column index in the data frame to test |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
100 frmeData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
101 ### The data frame holding the data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
102 dFence |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
103 ### The fence outside the first and third quartiles to use as a threshold for cutt off. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
104 ### This many times the interquartile range +/- to the 3rd/1st quartiles |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
105 ){ |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
106 # Establish fence |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
107 adData <- frmeData[,iData] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
108 adQ <- quantile( adData, c(0.25, 0.5, 0.75), na.rm = TRUE ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
109 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
110 dIQR <- adQ[3] - adQ[1] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
111 if(!dIQR) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
112 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
113 dIQR = sd(adData,na.rm = TRUE) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
114 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
115 dUF <- adQ[3] + ( dFence * dIQR ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
116 dLF <- adQ[1] - ( dFence * dIQR ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
117 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
118 # Record indices of values outside of fence to remove and remove. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
119 aiRemove <- c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
120 for( j in 1:length( adData ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
121 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
122 d <- adData[j] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
123 if( !is.na( d ) && ( ( d < dLF ) || ( d > dUF ) ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
124 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
125 aiRemove <- c(aiRemove, j) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
126 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
127 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
128 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
129 if(length(aiRemove)==length(adData)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
130 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
131 aiRemove = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
132 c_logrMaaslin$info( "OutliersByFence:: Identified all data as outlier so was inactivated for index=", iData,"data=", paste(as.vector(frmeData[,iData]),collapse=","), "number zeros=", length(which(frmeData[,iData]==0)), sep=" " ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
133 } else { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
134 adData[aiRemove] <- NA |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
135 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
136 # Document to screen |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
137 if( length( aiRemove ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
138 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
139 c_logrMaaslin$info( "OutliersByFence::Removing %d outliers from %s", length( aiRemove ), colnames(frmeData)[iData] ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
140 c_logrMaaslin$info( format( rownames( frmeData )[aiRemove] )) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
141 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
142 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
143 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
144 return(list(data=adData,outliers=length(aiRemove),indices=aiRemove)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
145 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
146 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
147 funcZerosAreUneven = function( |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
148 ### |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
149 vdRawData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
150 ### Raw data to be checked during transformation |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
151 funcTransform, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
152 ### Data transform to perform |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
153 vsStratificationFeatures, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
154 ### Groupings to check for unevenness |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
155 dfData |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
156 ### Data frame holding the features |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
157 ){ |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
158 # Return indicator of unevenness |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
159 fUneven = FALSE |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
160 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
161 # Transform the data to compare |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
162 vdTransformed = funcTransform( vdRawData ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
163 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
164 # Go through each stratification of data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
165 for( sStratification in vsStratificationFeatures ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
166 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
167 # Current stratification |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
168 vFactorStrats = dfData[[ sStratification ]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
169 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
170 # If the metadata is not a factor then skip |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
171 # Only binned data can be evaluated this way. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
172 if( !is.factor( vFactorStrats )){ next } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
173 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
174 viZerosCountsRaw = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
175 for( sLevel in levels( vFactorStrats ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
176 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
177 vdTest = vdRawData[ which( vFactorStrats == sLevel ) ] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
178 viZerosCountsRaw = c( viZerosCountsRaw, length(which(vdTest == 0))) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
179 vdTest = vdTransformed[ which( vFactorStrats == sLevel ) ] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
180 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
181 dExpectation = 1 / length( viZerosCountsRaw ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
182 dMin = dExpectation / 2 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
183 dMax = dExpectation + dMin |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
184 viZerosCountsRaw = viZerosCountsRaw / sum( viZerosCountsRaw ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
185 if( ( length( which( viZerosCountsRaw <= dMin ) ) > 0 ) || ( length( which( viZerosCountsRaw >= dMax ) ) > 0 ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
186 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
187 return( TRUE ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
188 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
189 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
190 return( fUneven ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
191 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
192 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
193 funcTransformIncreasesOutliers = function( |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
194 ### Checks if a data transform increases outliers in a distribution |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
195 vdRawData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
196 ### Raw data to check for outlier zeros |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
197 funcTransform |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
198 ){ |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
199 iUnOutliers = length( boxplot( vdRawData, plot = FALSE )$out ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
200 iTransformedOutliers = length( boxplot( funcTransform( vdRawData ), plot = FALSE )$out ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
201 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
202 return( iUnOutliers <= iTransformedOutliers ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
203 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
204 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
205 funcClean <- function( |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
206 ### Properly clean / get data ready for analysis |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
207 ### Includes custom analysis from the custom R script if it exists |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
208 frmeData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
209 ### Data frame, input data to be acted on |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
210 funcDataProcess, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
211 ### Custom script that can be given to perform specialized processing before MaAsLin does. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
212 aiMetadata, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
213 ### Indices of columns in frmeData which are metadata for analysis. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
214 aiData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
215 ### Indices of column in frmeData which are (abundance) data for analysis. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
216 lsQCCounts, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
217 ### List that will hold the quality control information which is written in the output directory. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
218 astrNoImpute = c(), |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
219 ### An array of column names of frmeData not to impute. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
220 dMinSamp, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
221 ### Minimum number of samples |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
222 dMinAbd, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
223 # Minimum sample abundance |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
224 dFence, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
225 ### How many quartile ranges defines the fence to define outliers. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
226 funcTransform, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
227 ### The data transformation function or a dummy function that does not affect the data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
228 dPOutlier = 0.05 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
229 ### The significance threshold for the grubbs test to identify an outlier. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
230 ){ |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
231 # Call the custom script and set current data and indicies to the processes data and indicies. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
232 c_logrMaaslin$debug( "Start Clean") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
233 if( !is.null( funcDataProcess ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
234 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
235 c_logrMaaslin$debug("Additional preprocess function attempted.") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
236 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
237 pTmp <- funcDataProcess( frmeData=frmeData, aiMetadata=aiMetadata, aiData=aiData) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
238 frmeData = pTmp$frmeData |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
239 aiMetadata = pTmp$aiMetadata |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
240 aiData = pTmp$aiData |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
241 lsQCCounts$lsQCCustom = pTmp$lsQCCounts |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
242 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
243 # Set data indicies after custom QC process. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
244 lsQCCounts$aiAfterPreprocess = aiData |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
245 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
246 # Remove missing data, remove any sample that has less than dMinSamp * the number of data or low abundance |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
247 aiRemove = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
248 aiRemoveLowAbundance = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
249 for( iCol in aiData ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
250 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
251 adCol = frmeData[,iCol] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
252 adCol[!is.finite( adCol )] <- NA |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
253 if( ( sum( !is.na( adCol ) ) < ( dMinSamp * length( adCol ) ) ) || |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
254 ( length( unique( na.omit( adCol ) ) ) < 2 ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
255 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
256 aiRemove = c(aiRemove, iCol) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
257 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
258 if( sum(adCol > dMinAbd, na.rm=TRUE ) < (dMinSamp * length( adCol))) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
259 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
260 aiRemoveLowAbundance = c(aiRemoveLowAbundance, iCol) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
261 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
262 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
263 # Remove and document |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
264 aiData = setdiff( aiData, aiRemove ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
265 aiData = setdiff( aiData, aiRemoveLowAbundance ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
266 lsQCCounts$iMissingData = aiRemove |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
267 lsQCCounts$iLowAbundanceData = aiRemoveLowAbundance |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
268 if(length(aiRemove)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
269 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
270 c_logrMaaslin$info( "Removing the following for data lower bound.") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
271 c_logrMaaslin$info( format( colnames( frmeData )[aiRemove] )) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
272 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
273 if(length(aiRemoveLowAbundance)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
274 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
275 c_logrMaaslin$info( "Removing the following for too many low abundance bugs.") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
276 c_logrMaaslin$info( format( colnames( frmeData )[aiRemoveLowAbundance] )) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
277 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
278 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
279 #Transform data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
280 iTransformed = 0 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
281 viNotTransformedData = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
282 for(aiDatum in aiData) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
283 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
284 adValues = frmeData[,aiDatum] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
285 # if( ! funcTransformIncreasesOutliers( adValues, funcTransform ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
286 # { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
287 frmeData[,aiDatum] = funcTransform( adValues ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
288 # iTransformed = iTransformed + 1 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
289 # } else { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
290 # viNotTransformedData = c( viNotTransformedData, aiDatum ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
291 # } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
292 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
293 c_logrMaaslin$info(paste("Number of features transformed = ",iTransformed)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
294 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
295 # Metadata: Properly factorize all logical data and integer and number data with less than iNonFactorLevelThreshold |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
296 # Also record which are numeric metadata |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
297 aiNumericMetadata = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
298 for( i in aiMetadata ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
299 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
300 if( ( class( frmeData[,i] ) %in% c("integer", "numeric", "logical") ) && |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
301 ( length( unique( frmeData[,i] ) ) < c_iNonFactorLevelThreshold ) ) { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
302 c_logrMaaslin$debug(paste("Changing metadatum from numeric/integer/logical to factor",colnames(frmeData)[i],sep="=")) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
303 frmeData[,i] = factor( frmeData[,i] ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
304 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
305 if( class( frmeData[,i] ) %in% c("integer","numeric") ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
306 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
307 aiNumericMetadata = c(aiNumericMetadata,i) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
308 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
309 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
310 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
311 # Remove outliers |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
312 # If the dFence Value is set use the method of defining the outllier as |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
313 # dFence * the interquartile range + or - the 3rd and first quartile respectively. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
314 # If not the gibbs test is used. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
315 lsQCCounts$aiDataSumOutlierPerDatum = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
316 lsQCCounts$aiMetadataSumOutlierPerDatum = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
317 lsQCCounts$liOutliers = list() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
318 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
319 if( dFence > 0.0 ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
320 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
321 # For data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
322 for( iData in aiData ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
323 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
324 lOutlierInfo <- funcDoFenceTest(iData=iData,frmeData=frmeData,dFence=dFence) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
325 frmeData[,iData] <- lOutlierInfo[["data"]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
326 lsQCCounts$aiDataSumOutlierPerDatum <- c(lsQCCounts$aiDataSumOutlierPerDatum,lOutlierInfo[["outliers"]]) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
327 if(lOutlierInfo[["outliers"]]>0) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
328 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
329 lsQCCounts$liOutliers[[paste(iData,sep="")]] <- lOutlierInfo[["indices"]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
330 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
331 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
332 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
333 # Remove outlier non-factor metadata |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
334 for( iMetadata in aiNumericMetadata ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
335 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
336 lOutlierInfo <- funcDoFenceTest(iData=iMetadata,frmeData=frmeData,dFence=dFence) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
337 frmeData[,iMetadata] <- lOutlierInfo[["data"]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
338 lsQCCounts$aiMetadataSumOutlierPerDatum <- c(lsQCCounts$aiMetadataSumOutlierPerDatum,lOutlierInfo[["outliers"]]) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
339 if(lOutlierInfo[["outliers"]]>0) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
340 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
341 lsQCCounts$liOutliers[[paste(iMetadata,sep="")]] <- lOutlierInfo[["indices"]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
342 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
343 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
344 #Do not use the fence, use the Grubbs test |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
345 } else if(dPOutlier!=0.0){ |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
346 # For data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
347 for( iData in aiData ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
348 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
349 lOutlierInfo <- funcDoGrubbs(iData=iData,frmeData=frmeData,dPOutlier=dPOutlier) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
350 frmeData[,iData] <- lOutlierInfo[["data"]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
351 lsQCCounts$aiDataSumOutlierPerDatum <- c(lsQCCounts$aiDataSumOutlierPerDatum,lOutlierInfo[["outliers"]]) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
352 if(lOutlierInfo[["outliers"]]>0) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
353 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
354 lsQCCounts$liOutliers[[paste(iData,sep="")]] <- lOutlierInfo[["indices"]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
355 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
356 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
357 for( iMetadata in aiNumericMetadata ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
358 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
359 lOutlierInfo <- funcDoGrubbs(iData=iMetadata,frmeData=frmeData,dPOutlier=dPOutlier) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
360 frmeData[,iMetadata] <- lOutlierInfo[["data"]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
361 lsQCCounts$aiMetadataSumOutlierPerDatum <- c(lsQCCounts$aiMetadataSumOutlierPerDatum,lOutlierInfo[["outliers"]]) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
362 if(lOutlierInfo[["outliers"]]>0) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
363 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
364 lsQCCounts$liOutliers[[paste(iMetadata,sep="")]] <- lOutlierInfo[["indices"]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
365 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
366 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
367 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
368 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
369 # Metadata: Remove missing data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
370 # This is defined as if there is only one non-NA value or |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
371 # if the number of NOT NA data is less than a percentage of the data defined by dMinSamp |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
372 aiRemove = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
373 for( iCol in c(aiMetadata) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
374 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
375 adCol = frmeData[,iCol] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
376 if( ( sum( !is.na( adCol ) ) < ( dMinSamp * length( adCol ) ) ) || |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
377 ( length( unique( na.omit( adCol ) ) ) < 2 ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
378 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
379 aiRemove = c(aiRemove, iCol) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
380 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
381 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
382 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
383 # Remove metadata |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
384 aiMetadata = setdiff( aiMetadata, aiRemove ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
385 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
386 # Update the data which was removed. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
387 lsQCCounts$iMissingMetadata = aiRemove |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
388 if(length(aiRemove)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
389 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
390 c_logrMaaslin$info("Removing the following metadata for too much missing data or only one data value outside of NA.") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
391 c_logrMaaslin$info(format(colnames( frmeData )[aiRemove])) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
392 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
393 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
394 # Keep track of factor levels in a list for later use |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
395 lslsFactors <- list() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
396 for( iCol in c(aiMetadata) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
397 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
398 aCol <- frmeData[,iCol] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
399 if( class( aCol ) == "factor" ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
400 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
401 lslsFactors[[length( lslsFactors ) + 1]] <- list(iCol, levels( aCol )) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
402 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
403 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
404 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
405 # Replace missing data values by the mean of the data column. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
406 # Remove samples that were all NA from the cleaning and so could not be imputed. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
407 aiRemoveData = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
408 for( iCol in aiData ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
409 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
410 adCol <- frmeData[,iCol] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
411 adCol[is.infinite( adCol )] <- NA |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
412 adCol[is.na( adCol )] <- mean( adCol[which(adCol>0)], na.rm = TRUE ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
413 frmeData[,iCol] <- adCol |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
414 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
415 if(length(which(is.na(frmeData[,iCol]))) == length(frmeData[,iCol])) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
416 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
417 c_logrMaaslin$info( paste("Removing data", iCol, "for being all NA after QC")) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
418 aiRemoveData = c(aiRemoveData,iCol) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
419 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
420 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
421 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
422 # Remove and document |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
423 aiData = setdiff( aiData, aiRemoveData ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
424 lsQCCounts$iMissingData = c(lsQCCounts$iMissingData,aiRemoveData) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
425 if(length(aiRemoveData)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
426 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
427 c_logrMaaslin$info( "Removing the following for having only NAs after cleaning (maybe due to only having NA after outlier testing).") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
428 c_logrMaaslin$info( format( colnames( frmeData )[aiRemoveData] )) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
429 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
430 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
431 #Use na.gam.replace to manage NA metadata |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
432 aiTmp <- setdiff( aiMetadata, which( colnames( frmeData ) %in% astrNoImpute ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
433 # Keep tack of NAs so the may not be plotted later. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
434 liNaIndices = list() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
435 lsNames = names(frmeData) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
436 for( i in aiTmp) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
437 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
438 liNaIndices[[lsNames[i]]] = which(is.na(frmeData[,i])) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
439 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
440 frmeData[,aiTmp] <- na.gam.replace( frmeData[,aiTmp] ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
441 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
442 #If NA is a value in factor data, set the NA as a level. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
443 for( lsFactor in lslsFactors ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
444 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
445 iCol <- lsFactor[[1]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
446 aCol <- frmeData[,iCol] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
447 if( "NA" %in% levels( aCol ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
448 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
449 if(! lsNames[iCol] %in% astrNoImpute) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
450 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
451 liNaIndices[[lsNames[iCol]]] = union(which(is.na(frmeData[,iCol])),which(frmeData[,iCol]=="NA")) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
452 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
453 frmeData[,iCol] <- factor( aCol, levels = c(lsFactor[[2]], "NA") ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
454 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
455 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
456 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
457 # Make sure there is a minimum number of non-0 measurements |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
458 aiRemove = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
459 for( iCol in aiData ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
460 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
461 adCol = frmeData[,iCol] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
462 if(length( which(adCol!=0)) < ( dMinSamp * length( adCol ) ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
463 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
464 aiRemove = c(aiRemove, iCol) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
465 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
466 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
467 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
468 # Remove and document |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
469 aiData = setdiff( aiData, aiRemove) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
470 lsQCCounts$iZeroDominantData = aiRemove |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
471 if(length(aiRemove)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
472 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
473 c_logrMaaslin$info( "Removing the following for having not enough non-zero measurments for analysis.") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
474 c_logrMaaslin$info( format( colnames( frmeData )[aiRemove] )) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
475 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
476 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
477 c_logrMaaslin$debug("End FuncClean") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
478 return( list(frmeData = frmeData, aiMetadata = aiMetadata, aiData = aiData, lsQCCounts = lsQCCounts, liNaIndices=liNaIndices, viNotTransformedData = viNotTransformedData) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
479 ### Return list of |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
480 ### frmeData: The Data after cleaning |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
481 ### aiMetadata: The indices of the metadata still being used after filtering |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
482 ### aiData: The indices of the data still being used after filtering |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
483 ### lsQCCOunts: QC info |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
484 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
485 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
486 funcBugs <- function( |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
487 ### Run analysis of all data features against all metadata |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
488 frmeData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
489 ### Cleaned data including metadata, and data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
490 lsData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
491 ### This list is a general container for data as the analysis occurs, think about it as a cache for the analysis |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
492 aiMetadata, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
493 ### Indices of metadata used in analysis |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
494 aiData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
495 ### Indices of response data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
496 aiNotTransformedData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
497 ### Indicies of the data not transformed |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
498 strData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
499 ### Log file name |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
500 dSig, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
501 ### Significance threshold for the qvalue cut off |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
502 fInvert=FALSE, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
503 ### Invert images to have a black background |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
504 strDirOut = NA, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
505 ### Output project directory |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
506 funcReg=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
507 ### Function for regularization |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
508 funcTransform=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
509 ### Function used to transform the data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
510 funcUnTransform=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
511 ### If a transform is used the opposite of that transfor must be used on the residuals in the partial residual plots |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
512 lsNonPenalizedPredictors=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
513 ### These predictors will not be penalized in the feature (model) selection step |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
514 funcAnalysis=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
515 ### Function to perform association analysis |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
516 lsRandomCovariates=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
517 ### List of string names of metadata which will be treated as random covariates |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
518 funcGetResults=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
519 ### Function to unpack results from analysis |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
520 fDoRPlot=TRUE, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
521 ### Plot residuals |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
522 fOmitLogFile = FALSE, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
523 ### Stops the creation of the log file |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
524 fAllvAll=FALSE, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
525 ### Flag to turn on all against all comparisons |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
526 liNaIndices = list(), |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
527 ### Indicies of imputed NA data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
528 lxParameters=list(), |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
529 ### List holds parameters for different variable selection techniques |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
530 strTestingCorrection = "BH", |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
531 ### Correction for multiple testing |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
532 fIsUnivariate = FALSE, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
533 ### Indicates if the function is univariate |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
534 fZeroInflated = FALSE |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
535 ### Indicates to use a zero infalted model |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
536 ){ |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
537 c_logrMaaslin$debug("Start funcBugs") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
538 # If no output directory is indicated |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
539 # Then make it the current directory |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
540 if( is.na( strDirOut ) || is.null( strDirOut ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
541 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
542 if( !is.na( strData ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
543 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
544 strDirOut <- paste( dirname( strData ), "/", sep = "" ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
545 } else { strDirOut = "" } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
546 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
547 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
548 # Make th log file and output file names based on the log file name |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
549 strLog = NA |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
550 strBase = "" |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
551 if(!is.na(strData)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
552 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
553 strBaseOut <- paste( strDirOut, sub( "\\.([^.]+)$", "", basename(strData) ), sep = "/" ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
554 strLog <- paste( strBaseOut,c_sLogFileSuffix, ".txt", sep = "" ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
555 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
556 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
557 # If indicated, stop the creation of the log file |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
558 # Otherwise delete the log file if it exists and log |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
559 if(fOmitLogFile){ strLog = NA } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
560 if(!is.na(strLog)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
561 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
562 c_logrMaaslin$info( "Outputting to: %s", strLog ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
563 unlink( strLog ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
564 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
565 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
566 # Will contain pvalues |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
567 adP = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
568 adPAdj = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
569 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
570 # List of lists with association information |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
571 lsSig <- list() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
572 # Go through each data that was not previously removed and perform inference |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
573 for( iTaxon in aiData ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
574 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
575 # Log to screen progress per 10 associations. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
576 # Can be thown off if iTaxon is missing a mod 10 value |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
577 # So the taxons may not be logged every 10 but not a big deal |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
578 if( !( iTaxon %% 10 ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
579 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
580 c_logrMaaslin$info( "Taxon %d/%d", iTaxon, max( aiData ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
581 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
582 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
583 # Call analysis method |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
584 lsOne <- funcBugHybrid( iTaxon=iTaxon, frmeData=frmeData, lsData=lsData, aiMetadata=aiMetadata, dSig=dSig, adP=adP, lsSig=lsSig, funcTransform=funcTransform, funcUnTransform=funcUnTransform, strLog=strLog, funcReg=funcReg, lsNonPenalizedPredictors=lsNonPenalizedPredictors, funcAnalysis=funcAnalysis, lsRandomCovariates=lsRandomCovariates, funcGetResult=funcGetResults, fAllvAll=fAllvAll, fIsUnivariate=fIsUnivariate, lxParameters=lxParameters, fZeroInflated=fZeroInflated, fIsTransformed= ! iTaxon %in% aiNotTransformedData ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
585 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
586 # If you get a NA (happens when the lmm gets all random covariates) move on |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
587 if( is.na( lsOne ) ){ next } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
588 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
589 # The updating of the following happens in the inference method call in the funcBugHybrid call |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
590 # New pvalue array |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
591 adP <- lsOne$adP |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
592 # New lsSig contains data about significant feature v metadata comparisons |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
593 lsSig <- lsOne$lsSig |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
594 # New qc data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
595 lsData$lsQCCounts = lsOne$lsQCCounts |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
596 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
597 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
598 # Log the QC info |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
599 c_logrMaaslin$debug("lsData$lsQCCounts") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
600 c_logrMaaslin$debug(format(lsData$lsQCCounts)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
601 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
602 if( is.null( adP ) ) { return( NULL ) } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
603 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
604 # Perform bonferonni corrections on factor data (for levels), calculate the number of tests performed, and FDR adjust for multiple hypotheses |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
605 # Perform Bonferonni adjustment on factor data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
606 for( iADIndex in 1:length( adP ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
607 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
608 # Only perform on factor data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
609 if( is.factor( lsSig[[ iADIndex ]]$metadata ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
610 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
611 adPAdj = c( adPAdj, funcBonferonniCorrectFactorData( dPvalue = adP[ iADIndex ], vsFactors = lsSig[[ iADIndex ]]$metadata, fIgnoreNAs = length(liNaIndices)>0) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
612 } else { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
613 adPAdj = c( adPAdj, adP[ iADIndex ] ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
614 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
615 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
616 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
617 iTests = funcCalculateTestCounts(iDataCount = length(aiData), asMetadata = intersect( lsData$astrMetadata, colnames( frmeData )[aiMetadata] ), asForced = lsNonPenalizedPredictors, asRandom = lsRandomCovariates, fAllvAll = fAllvAll) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
618 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
619 #Get indices of sorted data after the factor correction but before the multiple hypothesis corrections. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
620 aiSig <- sort.list( adPAdj ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
621 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
622 # Perform FDR BH |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
623 adQ = p.adjust(adPAdj, method=strTestingCorrection, n=max(length(adPAdj), iTests)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
624 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
625 # Find all covariates that had significant associations |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
626 astrNames <- c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
627 for( i in 1:length( lsSig ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
628 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
629 astrNames <- c(astrNames, lsSig[[i]]$name) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
630 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
631 astrNames <- unique( astrNames ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
632 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
633 # Sets up named label return for global plotting |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
634 lsReturnTaxa <- list() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
635 for( j in aiSig ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
636 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
637 if( adQ[j] > dSig ) { next } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
638 strTaxon <- lsSig[[j]]$taxon |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
639 if(strTaxon %in% names(lsReturnTaxa)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
640 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
641 lsReturnTaxa[[strTaxon]] = min(lsReturnTaxa[[strTaxon]],adQ[j]) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
642 } else { lsReturnTaxa[[strTaxon]] = adQ[j]} |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
643 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
644 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
645 # For each covariate with significant associations |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
646 # Write out a file with association information |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
647 for( strName in astrNames ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
648 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
649 strFileTXT <- NA |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
650 strFilePDF <- NA |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
651 for( j in aiSig ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
652 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
653 lsCur <- lsSig[[j]] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
654 strCur <- lsCur$name |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
655 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
656 if( strCur != strName ) { next } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
657 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
658 strTaxon <- lsCur$taxon |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
659 adData <- lsCur$data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
660 astrFactors <- lsCur$factors |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
661 adCur <- lsCur$metadata |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
662 adY <- adData |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
663 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
664 if( is.na( strData ) ) { next } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
665 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
666 ## If the text file output is not written to yet |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
667 ## make the file names, and delete any previous file output |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
668 if( is.na( strFileTXT ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
669 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
670 strFileTXT <- sprintf( "%s-%s.txt", strBaseOut, strName ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
671 unlink(strFileTXT) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
672 funcWrite( c("Variable", "Feature", "Value", "Coefficient", "N", "N not 0", "P-value", "Q-value"), strFileTXT ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
673 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
674 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
675 ## Write text output |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
676 funcWrite( c(strName, strTaxon, lsCur$orig, lsCur$value, length( adData ), sum( adData > 0 ), adP[j], adQ[j]), strFileTXT ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
677 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
678 ## If the significance meets the threshold |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
679 ## Write PDF file output |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
680 if( adQ[j] > dSig ) { next } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
681 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
682 # Do not make residuals plots if univariate is selected |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
683 strFilePDF = funcPDF( frmeTmp=frmeData, lsCur=lsCur, curPValue=adP[j], curQValue=adQ[j], strFilePDF=strFilePDF, strBaseOut=strBaseOut, strName=strName, funcUnTransform= funcUnTransform, fDoResidualPlot=fDoRPlot, fInvert=fInvert, liNaIndices=liNaIndices ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
684 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
685 if( dev.cur( ) != 1 ) { dev.off( ) } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
686 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
687 aiTmp <- aiData |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
688 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
689 logdebug("End funcBugs", c_logMaaslin) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
690 return(list(lsReturnBugs=lsReturnTaxa, lsQCCounts=lsData$lsQCCounts)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
691 ### List of data features successfully associated without error and quality control data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
692 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
693 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
694 #Lightly Tested |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
695 ### Performs analysis for 1 feature |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
696 ### iTaxon: integer Taxon index to be associated with data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
697 ### frmeData: Data frame The full data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
698 ### lsData: List of all associated data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
699 ### aiMetadata: Numeric vector of indices |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
700 ### dSig: Numeric significance threshold for q-value cut off |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
701 ### adP: List of pvalues from associations |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
702 ### lsSig: List which serves as a cache of data about significant associations |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
703 ### strLog: String file to log to |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
704 funcBugHybrid <- function( |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
705 ### Performs analysis for 1 feature |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
706 iTaxon, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
707 ### integer Taxon index to be associated with data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
708 frmeData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
709 ### Data frame, the full data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
710 lsData, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
711 ### List of all associated data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
712 aiMetadata, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
713 ### Numeric vector of indices |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
714 dSig, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
715 ### Numeric significance threshold for q-value cut off |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
716 adP, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
717 ### List of pvalues from associations |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
718 lsSig, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
719 ### List which serves as a cache of data about significant associations |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
720 funcTransform, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
721 ### The tranform used on the data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
722 funcUnTransform, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
723 ### The reverse transform on the data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
724 strLog = NA, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
725 ### String, file to which to log |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
726 funcReg=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
727 ### Function to perform regularization |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
728 lsNonPenalizedPredictors=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
729 ### These predictors will not be penalized in the feature (model) selection step |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
730 funcAnalysis=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
731 ### Function to perform association analysis |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
732 lsRandomCovariates=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
733 ### List of string names of metadata which will be treated as random covariates |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
734 funcGetResult=NULL, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
735 ### Function to unpack results from analysis |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
736 fAllvAll=FALSE, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
737 ### Flag to turn on all against all comparisons |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
738 fIsUnivariate = FALSE, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
739 ### Indicates the analysis function is univariate |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
740 lxParameters=list(), |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
741 ### List holds parameters for different variable selection techniques |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
742 fZeroInflated = FALSE, |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
743 ### Indicates if to use a zero infalted model |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
744 fIsTransformed = TRUE |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
745 ### Indicates that the bug is transformed |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
746 ){ |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
747 #dTime00 <- proc.time()[3] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
748 #Get metadata column names |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
749 astrMetadata = intersect( lsData$astrMetadata, colnames( frmeData )[aiMetadata] ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
750 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
751 #Get data measurements that are not NA |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
752 aiRows <- which( !is.na( frmeData[,iTaxon] ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
753 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
754 #Get the dataframe of non-na data measurements |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
755 frmeTmp <- frmeData[aiRows,] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
756 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
757 #Set the min boosting selection frequency to a default if not given |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
758 if( is.na( lxParameters$dFreq ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
759 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
760 lxParameters$dFreq <- 0.5 / length( c(astrMetadata) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
761 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
762 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
763 # Get the full data for the bug feature |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
764 adCur = frmeTmp[,iTaxon] |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
765 lxParameters$sBugName = names(frmeTmp[iTaxon]) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
766 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
767 # This can run multiple models so some of the results are held in lists and some are not |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
768 llmod = list() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
769 liTaxon = list() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
770 lastrTerms = list() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
771 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
772 # Build formula for simple mixed effects models |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
773 # Removes random covariates from variable selection |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
774 astrMetadata = setdiff(astrMetadata, lsRandomCovariates) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
775 strFormula <- paste( "adCur ~", paste( sprintf( "`%s`", astrMetadata ), collapse = " + " ), sep = " " ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
776 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
777 # Document the model |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
778 funcWrite( c("#taxon", colnames( frmeTmp )[iTaxon]), strLog ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
779 funcWrite( c("#metadata", astrMetadata), strLog ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
780 funcWrite( c("#samples", rownames( frmeTmp )), strLog ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
781 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
782 #Model terms |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
783 astrTerms <- c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
784 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
785 # Attempt feature (model) selection |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
786 if(!is.na(funcReg)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
787 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
788 #Count model selection method attempts |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
789 lsData$lsQCCounts$iBoosts = lsData$lsQCCounts$iBoosts + 1 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
790 #Perform model selection |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
791 astrTerms <- funcReg(strFormula=strFormula, frmeTmp=frmeTmp, adCur=adCur, lsParameters=lxParameters, lsForcedParameters=lsNonPenalizedPredictors, strLog=strLog) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
792 #If the feature selection function is set to None, set all terms of the model to all the metadata |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
793 } else { astrTerms = astrMetadata } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
794 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
795 # Get look through the boosting results to get a model |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
796 # Holds the predictors in the predictors in the model that were selected by the boosting |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
797 if(is.null( astrTerms )){lsData$lsQCCounts$iBoostErrors = lsData$lsQCCounts$iBoostErrors + 1} |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
798 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
799 # Get the indices that are transformed |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
800 # Of those indices check for uneven metadata |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
801 # Untransform any of the metadata that failed |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
802 # Failed means true for uneven occurences of zeros |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
803 # if( fIsTransformed ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
804 # { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
805 # vdUnevenZeroCheck = funcUnTransform( frmeData[[ iTaxon ]] ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
806 # if( funcZerosAreUneven( vdRawData=vdUnevenZeroCheck, funcTransform=funcTransform, vsStratificationFeatures=astrTerms, dfData=frmeData ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
807 # { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
808 # frmeData[[ iTaxon ]] = vdUnevenZeroCheck |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
809 # c_logrMaaslin$debug( paste( "Taxon transformation reversed due to unevenness of zero distribution.", iTaxon ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
810 # } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
811 # } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
812 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
813 # Run association analysis if predictors exist and an analysis function is specified |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
814 # Run analysis |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
815 if(!is.na(funcAnalysis) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
816 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
817 #If there are selected and forced fixed covariates |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
818 if( length( astrTerms ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
819 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
820 #Count the association attempt |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
821 lsData$lsQCCounts$iLms = lsData$lsQCCounts$iLms + 1 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
822 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
823 #Make the lm formula |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
824 #Build formula for simple mixed effects models using random covariates |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
825 strRandomCovariatesFormula = NULL |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
826 #Random covariates are forced |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
827 if(length(lsRandomCovariates)>0) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
828 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
829 #Format for lme |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
830 #Needed for changes to not allowing random covariates through the boosting process |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
831 strRandomCovariatesFormula <- paste( "adCur ~ ", paste( sprintf( "1|`%s`", lsRandomCovariates), collapse = " + " )) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
832 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
833 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
834 #Set up a list of formula containing selected fixed variables changing and the forced fixed covariates constant |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
835 vstrFormula = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
836 #Set up suppressing forced covariates in a all v all scenario only |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
837 asSuppress = c() |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
838 #Enable all against all comparisons |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
839 if(fAllvAll && !fIsUnivariate) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
840 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
841 lsVaryingCovariates = setdiff(astrTerms,lsNonPenalizedPredictors) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
842 lsConstantCovariates = setdiff(lsNonPenalizedPredictors,lsRandomCovariates) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
843 strConstantFormula = paste( sprintf( "`%s`", lsConstantCovariates ), collapse = " + " ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
844 asSuppress = lsConstantCovariates |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
845 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
846 if(length(lsVaryingCovariates)==0L) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
847 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
848 vstrFormula <- c( paste( "adCur ~ ", paste( sprintf( "`%s`", lsConstantCovariates ), collapse = " + " )) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
849 } else { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
850 for( sVarCov in lsVaryingCovariates ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
851 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
852 strTempFormula = paste( "adCur ~ `", sVarCov,"`",sep="") |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
853 if(length(lsConstantCovariates)>0){ strTempFormula = paste(strTempFormula,strConstantFormula,sep=" + ") } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
854 vstrFormula <- c( vstrFormula, strTempFormula ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
855 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
856 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
857 } else { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
858 #This is either the multivariate case formula for all covariates in an lm or fixed covariates in the lmm |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
859 vstrFormula <- c( paste( "adCur ~ ", paste( sprintf( "`%s`", astrTerms ), collapse = " + " )) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
860 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
861 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
862 #Run the association |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
863 for( strAnalysisFormula in vstrFormula ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
864 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
865 i = length(llmod)+1 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
866 llmod[[i]] = funcAnalysis(strFormula=strAnalysisFormula, frmeTmp=frmeTmp, iTaxon=iTaxon, lsHistory=list(adP=adP, lsSig=lsSig, lsQCCounts=lsData$lsQCCounts), strRandomFormula=strRandomCovariatesFormula, fZeroInflated=fZeroInflated) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
867 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
868 liTaxon[[i]] = iTaxon |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
869 lastrTerms[[i]] = funcFormulaStrToList(strAnalysisFormula) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
870 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
871 } else { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
872 #If there are no selected or forced fixed covariates |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
873 lsData$lsQCCounts$iNoTerms = lsData$lsQCCounts$iNoTerms + 1 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
874 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsData$lsQCCounts)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
875 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
876 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
877 |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
878 #Call funcBugResults and return it's return |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
879 if(!is.na(funcGetResult)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
880 { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
881 #Format the results to a consistent expected result. |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
882 return( funcGetResult( llmod=llmod, frmeData=frmeData, liTaxon=liTaxon, dSig=dSig, adP=adP, lsSig=lsSig, strLog=strLog, lsQCCounts=lsData$lsQCCounts, lastrCols=lastrTerms, asSuppressCovariates=asSuppress ) ) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
883 } else { |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
884 return(list(adP=adP, lsSig=lsSig, lsQCCounts=lsData$lsQCCounts)) |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
885 } |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
886 ### List containing a list of pvalues, a list of significant data per association, and a list of QC data |
a87d5a5f2776
Uploaded the version running on the prod server
george-weingart
parents:
diff
changeset
|
887 } |