annotate maaslin-4450aa4ecc84/src/lib/MaaslinPlots.R @ 1:a87d5a5f2776

Uploaded the version running on the prod server
author george-weingart
date Sun, 08 Feb 2015 23:08:38 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
1 #####################################################################################
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
2 #Copyright (C) <2012>
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
3 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
4 #Permission is hereby granted, free of charge, to any person obtaining a copy of
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
5 #this software and associated documentation files (the "Software"), to deal in the
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
6 #Software without restriction, including without limitation the rights to use, copy,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
7 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
8 #and to permit persons to whom the Software is furnished to do so, subject to
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
9 #the following conditions:
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
10 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
11 #The above copyright notice and this permission notice shall be included in all copies
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
12 #or substantial portions of the Software.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
13 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
14 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
15 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
16 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
17 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
18 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
19 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
20 #
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
21 # This file is a component of the MaAsLin (Multivariate Associations Using Linear Models),
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
22 # authored by the Huttenhower lab at the Harvard School of Public Health
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
23 # (contact Timothy Tickle, ttickle@hsph.harvard.edu).
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
24 #####################################################################################
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
25
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
26 inlinedocs <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
27 ##author<< Curtis Huttenhower <chuttenh@hsph.harvard.edu> and Timothy Tickle <ttickle@hsph.harvard.edu>
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
28 ##description<< Holds MaAsLin related plotting
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
29 ) { return( pArgs ) }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
30
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
31 funcPDF <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
32 ### Function to plot raw data with linear model information.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
33 ### Continuous and integer variables are plotted with a line of best fit.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
34 ### Other data is plotted as boxplots.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
35 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
36 lsCur,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
37 ### Linear model information
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
38 curPValue,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
39 ### Pvalue to display
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
40 curQValue,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
41 ### Qvalue to display
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
42 strFilePDF,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
43 ### PDF file to create or to which to append
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
44 strBaseOut,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
45 ### Project directory to place pdf in
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
46 strName,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
47 ### Name of taxon
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
48 funcUnTransform=NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
49 ### If a transform is used the appropriate of that transfor must be used on the residuals in the partial residual plots
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
50 fDoResidualPlot = TRUE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
51 ### Plot the residual plots
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
52 fInvert = FALSE,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
53 ### Invert the figure so the background is black
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
54 liNaIndices = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
55 ### Indices of NA data that was imputed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
56 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
57 if( is.na( strFilePDF ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
58 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
59 strFilePDF <- sprintf( "%s-%s.pdf", strBaseOut, strName )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
60 pdf( strFilePDF, width = 11, useDingbats=FALSE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
61 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
62
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
63 #Invert plots
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
64 adColorMin <- c(1, 0, 0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
65 adColorMax <- c(0, 1, 0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
66 adColorMed <- c(0, 0, 0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
67 if( fInvert )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
68 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
69 par( bg = "black", fg = "white", col.axis = "white", col.lab = "white", col.main = "white", col.sub = "white" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
70 adColorMin <- c(1, 1, 0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
71 adColorMax <- c(0, 1, 1)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
72 adColorMed <- c(1, 1, 1)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
73 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
74
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
75 #Create linear model title data string
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
76 strTitle <- sprintf( "%s (%.3g sd %.3g, p=%.3g, q=%.3g)", lsCur$orig, lsCur$value, lsCur$std, curPValue, curQValue )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
77 adMar <- c(5, 4, 4, 2) + 0.1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
78 dLine <- NA
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
79 strTaxon <- lsCur$taxon
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
80 if( nchar( strTaxon ) > 80 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
81 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
82 dCEX <- 0.75
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
83 iLen <- nchar( strTaxon )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
84 if( iLen > 120 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
85 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
86 dLine <- 2.5
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
87 i <- round( iLen / 2 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
88 strTaxon <- paste( substring( strTaxon, 0, i ), substring( strTaxon, i + 1 ), sep = "\n" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
89 adMar[2] <- adMar[2] + 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
90 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
91 } else { dCEX = 1 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
92
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
93 #Plot 1x2 graphs per page
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
94 if(fDoResidualPlot){par(mfrow=c(1,2))}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
95
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
96 # Plot factor data as boxplot if is descrete data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
97 # Otherwise plot as a line
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
98 adCur <- lsCur$metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
99 adY <- lsCur$data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
100
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
101 # Remove NAs from data visualization if set to do so (if liNaIndices is not empty)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
102 if(lsCur$name %in% names(liNaIndices)&&(length(liNaIndices[[lsCur$name]])>0))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
103 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
104 adY <- adY[-1*liNaIndices[[lsCur$name]]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
105 adCur = adCur[-1*liNaIndices[[lsCur$name]]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
106 if(class(adCur)=="factor")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
107 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
108 adCur = factor(adCur)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
109 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
110 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
111
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
112 # Set the factor levels to include NA if they still exist
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
113 # This is so if something is not imputed, then if there are NAs they will be plotted (to show no imputing)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
114 if( class( lsCur$metadata ) == "factor" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
115 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
116 sCurLevels = levels(adCur)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
117 adCur = (as.character(adCur))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
118 aiCurNAs = which(is.na(adCur))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
119 if(length(aiCurNAs) > 0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
120 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
121 adCur[aiCurNAs]="NA"
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
122 sCurLevels = c(sCurLevels,"NA")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
123 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
124 adCur = factor(adCur, levels = sCurLevels)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
125 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
126
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
127 if( class( lsCur$metadata ) == "factor" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
128 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
129 astrNames <- c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
130 astrColors <- c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
131 dMed <- median( adY[adCur == levels( adCur )[1]], na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
132 adIQR <- quantile( adY, probs = c(0.25, 0.75), na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
133 dIQR <- adIQR[2] - adIQR[1]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
134 if( dIQR <= 0 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
135 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
136 dIQR <- sd( adY, na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
137 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
138 dIQR <- dIQR / 2
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
139
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
140 #Print boxplots/strip charts of raw data. Add model data to it.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
141 for( strLevel in levels( adCur ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
142 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
143 c_iLen <- 32
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
144 strLength <- strLevel
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
145 if( nchar( strLength ) > c_iLen )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
146 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
147 iTmp <- ( c_iLen / 2 ) - 2
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
148 strLength <- paste( substr( strLength, 1, iTmp ), substring( strLength, nchar( strLength ) - iTmp ), sep = "..." )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
149 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
150 astrNames <- c(astrNames, sprintf( "%s (%d)", strLength, sum( adCur == strLevel, na.rm = TRUE ) ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
151 astrColors <- c(astrColors, sprintf( "%sAA", funcColor( ( median( adY[adCur == strLevel], na.rm = TRUE ) - dMed ) /
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
152 dIQR, dMax = 3, dMin = -3, adMax = adColorMin, adMin = adColorMax, adMed = adColorMed ) ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
153 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
154 #Controls boxplot labels
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
155 #(When there are many factor levels some are skipped and not plotted
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
156 #So this must be reduced)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
157 dBoxPlotLabelCex = dCEX
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
158 if(length(astrNames)>8)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
159 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
160 dBoxPlotLabelCex = dBoxPlotLabelCex * 1.5/(length(astrNames)/8)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
161 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
162 par(cex.axis = dBoxPlotLabelCex)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
163 boxplot( adY ~ adCur, notch = TRUE, names = astrNames, mar = adMar, col = astrColors,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
164 main = strTitle, cex.main = 48/nchar(strTitle), xlab = lsCur$name, ylab = NA, cex.lab = dCEX, outpch = 4, outcex = 0.5 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
165 par(cex.axis = dCEX)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
166 stripchart( adY ~ adCur, add = TRUE, col = astrColors, method = "jitter", vertical = TRUE, pch = 20 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
167 title( ylab = strTaxon, cex.lab = dCEX, line = dLine )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
168 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
169 #Plot continuous data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
170 plot( adCur, adY, mar = adMar, main = strTitle, xlab = lsCur$name, pch = 20,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
171 col = sprintf( "%s99", funcGetColor( ) ), ylab = NA, xaxt = "s" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
172 title( ylab = strTaxon, cex.lab = dCEX )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
173 lmod <- lm( adY ~ adCur )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
174 dColor <- lmod$coefficients[2] * mean( adCur, na.rm = TRUE ) / mean( adY, na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
175 strColor <- sprintf( "%sDD", funcColor( dColor, adMax = adColorMin, adMin = adColorMax, adMed = adColorMed ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
176 abline( reg = lmod, col = strColor, lwd = 3 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
177 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
178 ### Plot the residual plot
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
179 if(fDoResidualPlot){funcResidualPlot(lsCur=lsCur, frmeTmp=frmeTmp, adColorMin=adColorMin, adColorMax=adColorMax, adColorMed=adColorMed, adMar, funcUnTransform=funcUnTransform, liNaIndices)}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
180 return(strFilePDF)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
181 ### File to which the pdf was written
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
182 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
183
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
184 ### Plot 1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
185 # axis 1 gene expression (one bug)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
186 # axis 2 PC1 (bugs and metadata)(MFA)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
187 # over plot real data vs the residuals (real data verses the prediction)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
188 # remove all but 1 bug + metadata
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
189 ### Plot 2
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
190 #residuals (y) PCL1 (1 bug + metadata)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
191 #Plot 3
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
192 ### Can plot the residuals against all the variables in a grid/lattic
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
193 funcGetFactorBoxColors = function(adCur,adY,adColorMin,adColorMax,adColorMed)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
194 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
195 astrColors = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
196
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
197 if( class( adCur ) == "factor" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
198 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
199 if( "NA" %in% levels( adCur ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
200 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
201 afNA = adCur == "NA"
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
202 adY = adY[!afNA]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
203 adCur = adCur[!afNA]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
204 adCur = factor( adCur, levels = setdiff( levels( adCur ), "NA" ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
205 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
206 dMed = median( adY[adCur == levels( adCur )[1]], na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
207 adIQR = quantile( adY, probs = c(0.25, 0.75), na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
208 dIQR = adIQR[2] - adIQR[1]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
209 if( dIQR <= 0 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
210 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
211 dIQR <- sd( adY, na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
212 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
213 dIQR <- dIQR / 2
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
214
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
215 for( strLevel in levels( adCur ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
216 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
217 astrColors <- c(astrColors, sprintf( "%sAA", funcColor( ( median( adY[adCur == strLevel], na.rm = TRUE ) - dMed ) /
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
218 dIQR, dMax = 3, dMin = -3, adMax = adColorMin, adMin = adColorMax, adMed = adColorMed ) ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
219 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
220 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
221 return(astrColors)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
222 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
223
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
224 funcResidualPlotHelper <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
225 frmTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
226 ### The dataframe to plot from
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
227 sResponseFeature,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
228 lsFullModelCovariateNames,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
229 ### All covariates in lm (Column Names)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
230 lsCovariateToControlForNames,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
231 ### Y Axis: All the covariate which will be plotted together respectively * their beta with the residuals of the full model added. (These should dummied names for factor data; not column names).
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
232 sCovariateOfInterest,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
233 ### X Axis: raw data of the covariate of interest. (Column Name)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
234 adColorMin,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
235 ### Min color in color range for markers
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
236 adColorMax,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
237 ### Max color in color range for markers
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
238 adColorMed,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
239 ### Medium color in color range for markers
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
240 adMar,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
241 ### Standardized margins
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
242 funcUnTransform = NULL,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
243 ### If a transform is used the opposite of that transfor must be used on the residuals in the partial residual plots
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
244 liNaIndices = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
245 ### Indices of NA data that was imputed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
246 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
247 # Get model matrix (raw data)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
248 adCur = frmTmp[[sResponseFeature]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
249
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
250 # Make a formula to calculate the new model to get the full model residuals
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
251 strFormula = paste("adCur",paste(sprintf( "`%s`", lsFullModelCovariateNames ),sep="", collapse="+"),sep="~")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
252
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
253 # Calculate lm
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
254 lmod = (lm(as.formula(strFormula),frmTmp))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
255
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
256 # Get all coefficient data in the new model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
257 dfdCoefs = coefficients(lmod)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
258
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
259 # Get all coefficient names in the new model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
260 asAllCoefNames = names(dfdCoefs)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
261
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
262 # Get Y
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
263 # For each covariate that is being plotted on the y axis
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
264 # Convert the coefficient name to the column name
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
265 # If they are equal then the data is not discontinuous and you can use the raw data as is and multiply it by the coefficient in the model
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
266 # If they are not equal than the data is discontinuous, get the value for the data, set all but the levels equal to it to zero and multiply by the ceofficient from the model.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
267 vY = rep(coefficients(lmod)[["(Intercept)"]],dim(frmTmp)[1])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
268 # vY = rep(0,dim(frmTmp)[1])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
269
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
270 #Here we are not dealing with column names but, if factor data, the coefficient names
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
271 for(iCvIndex in 1:length(lsCovariateToControlForNames))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
272 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
273 sCurrentCovariate = lsCovariateToControlForNames[iCvIndex]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
274 #Get the column name of the current covariate (this must be used to compare to other column names)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
275 sCurCovariateColumnName = funcCoef2Col(sCurrentCovariate, frmTmp)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
276
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
277 #This is continuous data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
278 if(sCurrentCovariate == sCurCovariateColumnName)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
279 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
280 vY = vY + dfdCoefs[sCurrentCovariate]*frmTmp[[sCurCovariateColumnName]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
281 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
282 #Discontinuous data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
283 # Get level
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
284 xLevel = substr(sCurrentCovariate,nchar(sCurCovariateColumnName)+1,nchar(sCurrentCovariate))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
285
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
286 # Get locations where the data = level
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
287 aiLevelIndices = rep(0,dim(frmTmp)[1])
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
288 aiLevelIndices[which(frmTmp[sCurCovariateColumnName] == xLevel)]=1
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
289 sCurrentCovariateBeta = dfdCoefs[sCurrentCovariate]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
290 if(is.na(sCurrentCovariateBeta)){sCurrentCovariateBeta=0}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
291 vY = vY + sCurrentCovariateBeta * aiLevelIndices
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
292 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
293 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
294 #TODO based on transform vY = vY+sin(residuals(lmod))^2
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
295 if(!is.null(funcUnTransform))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
296 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
297 vY = vY + funcUnTransform(residuals(lmod))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
298 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
299 vY = vY + residuals(lmod) }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
300
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
301 # Plot x, raw data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
302 ## y label
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
303 sYLabel = paste(paste("B",lsCovariateToControlForNames,sep="*",collapse="+"),"e",sep="+")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
304 sTitle = "Partial Residual Plot"
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
305
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
306 adCurXValues = frmTmp[[sCovariateOfInterest]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
307
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
308 # If not plotting metadata that was originally NA then remove the imputed values here
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
309 if(sCovariateOfInterest %in% names(liNaIndices)&&(length(liNaIndices[[sCovariateOfInterest]])>0))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
310 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
311 adCurXValues = adCurXValues[-1*liNaIndices[[sCovariateOfInterest]]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
312 vY <- vY[-1*liNaIndices[[sCovariateOfInterest]]]
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
313 if(is.factor(adCurXValues)){adCurXValues = factor(adCurXValues)}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
314 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
315
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
316 # Set the factor levels to include NA if they still exist
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
317 # This is so if something is not imputed, then if there are NAs they will be plotted (to show no imputing)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
318 # Do not forget to keep te level order incase it was changed by the custom scripts.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
319 if( class( adCurXValues ) == "factor" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
320 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
321 vsLevels = levels(adCurXValues)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
322 if(sum(is.na(adCurXValues))>0)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
323 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
324 adCurXValues = as.character(adCurXValues)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
325 adCurXValues[is.na(adCurXValues)]="NA"
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
326 adCurXValues = factor(adCurXValues, levels=c(vsLevels,"NA"))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
327 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
328 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
329
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
330 # Scale to the original range
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
331 if(!(class( adCurXValues ) == "factor" ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
332 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
333 vY = vY + mean(adCurXValues,rm.na=TRUE)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
334 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
335
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
336 # Plot Partial Residual Plot
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
337 # If we are printing discontinuous data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
338 # Get the color of the box plots
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
339 # Plot box plots
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
340 # Plot data as strip charts
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
341 if(is.factor(adCurXValues))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
342 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
343 # adCurXValues = factor(adCurXValues)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
344 astrColors = funcGetFactorBoxColors(adCurXValues,vY,adColorMin,adColorMax,adColorMed)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
345 asNames = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
346 for(sLevel in levels(adCurXValues))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
347 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
348 asNames = c(asNames,sprintf( "%s (%d)", sLevel, sum( adCurXValues == sLevel, na.rm = TRUE ) ))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
349 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
350
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
351 plot(adCurXValues, vY, xlab=sCovariateOfInterest, ylab=sYLabel, names=asNames, notch = TRUE,mar = adMar,col = astrColors, main=sTitle, outpch = 4, outcex = 0.5 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
352 stripchart( vY ~ adCurXValues, add = TRUE, col = astrColors, method = "jitter", vertical = TRUE, pch = 20 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
353
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
354 } else {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
355 plot( adCurXValues, vY, mar = adMar, main = sTitle, xlab=sCovariateOfInterest, col = sprintf( "%s99", funcGetColor( ) ), pch = 20,ylab = sYLabel, xaxt = "s" )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
356
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
357 lmodLine = lm(vY~adCurXValues)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
358
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
359 dColor <- lmodLine$coefficients[2] * mean( adCurXValues, na.rm = TRUE ) / mean( vY, na.rm = TRUE )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
360 strColor <- sprintf( "%sDD", funcColor( dColor, adMax = adColorMin, adMin = adColorMax, adMed = adColorMed ) )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
361 abline( reg =lmodLine, col = strColor, lwd = 3 )
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
362 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
363 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
364
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
365 funcBoostInfluencePlot <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
366 # Plot to show the rel.inf from boosting, what to know if the rank order is correct, better ranks for spiked data.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
367 # Show the cut off and features identified as uneven.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
368 vdRelInf,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
369 sFeature,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
370 vsPredictorNames,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
371 vstrKeepMetadata,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
372 vstrUneven = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
373 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
374 vsCol = rep("black",length(vdRelInf))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
375 vsCol[which(vsPredictorNames %in% vstrKeepMetadata)]="green"
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
376 vsCol[which(vsPredictorNames %in% vstrUneven)] = "orange"
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
377 plot(vdRelInf, col=vsCol, main=sFeature, xlab="Index", ylab="Relative Influence")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
378 legend("topright", pch = paste(1:length(vsPredictorNames)), legend= vsPredictorNames, text.col=vsCol, col=vsCol)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
379 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
380
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
381 funcResidualPlot <- function(
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
382 ### Plot to data after confounding.
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
383 ### That is, in a linear model with significant coefficient b1 for variable x1,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
384 ### that's been sparsified to some subset of terms: y = b0 + b1*x1 + sum(bi*xi)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
385 ### Plot x1 on the X axis, and instead of y on the Y axis, instead plot:
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
386 ### y' = b0 + sum(bi*xi)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
387 lsCur,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
388 ### Assocation to plot
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
389 frmeTmp,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
390 ### Data frame of orginal data
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
391 adColorMin,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
392 ### Min color in color range for markers
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
393 adColorMax,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
394 ### Max color in color range for markers
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
395 adColorMed,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
396 ### Medium color in color range for markers
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
397 adMar,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
398 ### Standardized margins
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
399 funcUnTransform,
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
400 ### If a transform is used the opporite of that transfor must be used on the residuals in the partial residual plots
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
401 liNaIndices = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
402 ### Indices of NA data that was imputed
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
403 ){
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
404 #Now plot residual hat plot
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
405 #Get coefficient names
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
406 asAllCoefs = setdiff(names(lsCur$allCoefs),c("(Intercept)"))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
407 asAllColNames = c()
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
408 for(sCoef in asAllCoefs)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
409 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
410 asAllColNames = c(asAllColNames,funcCoef2Col(sCoef,frmeData=frmeTmp))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
411 }
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
412 asAllColNames = unique(asAllColNames)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
413
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
414 # All coefficients except for the one of interest
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
415 lsOtherCoefs = setdiff(asAllColNames, c(lsCur$name))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
416
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
417 lsCovariatesToPlot = NULL
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
418 if(is.factor(lsCur$metadata))
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
419 {
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
420 lsCovariatesToPlot = paste(lsCur$name,levels(lsCur$metadata),sep="")
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
421 }else{lsCovariatesToPlot=c(lsCur$orig)}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
422
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
423 # If there are no other coefficients then skip plot
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
424 # if(!length(lsOtherCoefs)){return()}
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
425
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
426 # Plot residuals
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
427 funcResidualPlotHelper(frmTmp=frmeTmp, sResponseFeature=lsCur$taxon, lsFullModelCovariateNames=asAllColNames, lsCovariateToControlForNames=lsCovariatesToPlot, sCovariateOfInterest=lsCur$name, adColorMin=adColorMin, adColorMax=adColorMax, adColorMed=adColorMed, adMar=adMar, funcUnTransform=funcUnTransform, liNaIndices=liNaIndices)
a87d5a5f2776 Uploaded the version running on the prod server
george-weingart
parents:
diff changeset
428 }