|
0
|
1 #***************************************************************************************************************************************
|
|
|
2 # Functions: Start
|
|
|
3 #***************************************************************************************************************************************
|
|
|
4
|
|
|
5 #===============================================================================
|
|
|
6 # PCA
|
|
|
7 #===============================================================================
|
|
|
8 multisample_PCA = function(df, sampleinfo_df, outfile)
|
|
|
9 {
|
|
|
10 tempdf = df[,-1];
|
|
|
11 tempcol = colnames(tempdf);
|
|
|
12 tempgrp = sampleinfo_df[tempcol,2];
|
|
|
13 tempdf = t(tempdf) %>% as.data.frame();
|
|
|
14 tempdf[is.na(tempdf)] = 0;
|
|
|
15 tempdf$Group = tempgrp;
|
|
|
16 png(outfile, width = 6, height = 6, units = 'in', res=300);
|
|
|
17 # bitmap(outfile, "png16m");
|
|
|
18 g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3);
|
|
|
19 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outplot)))
|
|
|
20 plot(g);
|
|
|
21 dev.off();
|
|
|
22 }
|
|
|
23
|
|
|
24 #===============================================================================
|
|
|
25 # Regression and Cook's distance
|
|
|
26 #===============================================================================
|
|
|
27 singlesample_regression = function(PE_TE_data,htmloutfile, append=TRUE)
|
|
|
28 {
|
|
|
29 rownames(PE_TE_data) = PE_TE_data$PE_ID;
|
|
|
30 regmodel = lm(PE_abundance~TE_abundance, data=PE_TE_data);
|
|
|
31 regmodel_summary = summary(regmodel);
|
|
|
32
|
|
|
33 cat("<font><h3>Linear Regression model fit between Proteome and Transcriptome data</h3></font>\n",
|
|
|
34 "<p>Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.</p>\n",
|
|
|
35 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n',
|
|
|
36 file = htmloutfile, append = TRUE);
|
|
|
37
|
|
|
38 cat("<tr><td>Formula</td><td>","PE_abundance~TE_abundance","</td></tr>\n",
|
|
|
39 "<tr><td colspan='2' align='center'> <b>Coefficients</b></td>","</tr>\n",
|
|
|
40 "<tr><td>",names(regmodel$coefficients[1]),"</td><td>",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","</td></tr>\n",
|
|
|
41 "<tr><td>",names(regmodel$coefficients[2]),"</td><td>",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","</td></tr>\n",
|
|
|
42 "<tr><td colspan='2' align='center'> <b>Model parameters</b></td>","</tr>\n",
|
|
|
43 "<tr><td>Residual standard error</td><td>",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)</td></tr>\n",
|
|
|
44 "<tr><td>F-statistic</td><td>",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)</td></tr>\n",
|
|
|
45 "<tr><td>R-squared</td><td>",regmodel_summary$r.squared,"</td></tr>\n",
|
|
|
46 "<tr><td>Adjusted R-squared</td><td>",regmodel_summary$adj.r.squared,"</td></tr>\n",
|
|
|
47 file = htmloutfile, append = TRUE);
|
|
|
48
|
|
|
49 cat("</table>\n", file = htmloutfile, append = TRUE);
|
|
|
50
|
|
|
51 cat(
|
|
|
52 "<font color='#ff0000'><h3>Regression and diagnostics plots</h3></font>\n",
|
|
|
53 file = htmloutfile, append = TRUE);
|
|
|
54
|
|
|
55 outplot = paste(outdir,"/PE_TE_lm_1.png",sep="",collapse="");
|
|
|
56 png(outplot, width = 10, height = 10, units = 'in',res=300);
|
|
|
57 # bitmap(outplot, "png16m");
|
|
|
58 par(mfrow=c(1,1));
|
|
|
59 plot(regmodel, 1, cex.lab=1.5);
|
|
|
60 dev.off();
|
|
|
61
|
|
|
62 g <- autoplot(regmodel, label = FALSE)[[1]] +
|
|
|
63 geom_point(aes(text=sprintf("Residual: %.2f<br>Fitted value: %.2f<br>Gene: %s", .fitted, .resid, PE_TE_data$PE_ID)),
|
|
|
64 shape = 1, size = .1, stroke = .2) +
|
|
|
65 theme_light()
|
|
|
66 saveWidget(ggplotly(g, tooltip= c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
|
|
|
67
|
|
|
68 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse="");
|
|
|
69 png(outplot,width = 10, height = 10, units = 'in', res=300);
|
|
|
70 # bitmap(outplot, "png16m");
|
|
|
71 par(mfrow=c(1,1));
|
|
|
72 g <- plot(regmodel, 2, cex.lab=1.5);
|
|
|
73 ggplotly(g)
|
|
|
74 dev.off();
|
|
|
75
|
|
|
76 g <- autoplot(regmodel, label = FALSE)[[2]] +
|
|
|
77 geom_point(aes(text=sprintf("Standarized residual: %.2f<br>Theoretical quantile: %.2f<br>Gene: %s", .qqx, .qqy, PE_TE_data$PE_ID)),
|
|
|
78 shape = 1, size = .1) +
|
|
|
79 theme_light()
|
|
|
80 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
|
|
|
81
|
|
|
82
|
|
|
83 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse="");
|
|
|
84 png(outplot, width = 10, height = 10, units = 'in',res=300);
|
|
|
85 # bitmap(outplot, "png16m");
|
|
|
86 par(mfrow=c(1,1));
|
|
|
87 plot(regmodel, 5, cex.lab=1.5);
|
|
|
88 dev.off();
|
|
|
89
|
|
|
90 cd_cont_pos <- function(leverage, level, model) {sqrt(level*length(coef(model))*(1-leverage)/leverage)}
|
|
|
91 cd_cont_neg <- function(leverage, level, model) {-cd_cont_pos(leverage, level, model)}
|
|
|
92
|
|
|
93 g <- autoplot(regmodel, label = FALSE)[[4]] +
|
|
|
94 aes(label = PE_TE_data$PE_ID) +
|
|
|
95 geom_point(aes(text=sprintf("Leverage: %.2f<br>Standardized residual: %.2f<br>Gene: %s", .hat, .stdresid, PE_TE_data$PE_ID))) +
|
|
|
96 theme_light()
|
|
|
97 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
|
|
|
98
|
|
|
99 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE);
|
|
|
100
|
|
|
101 cat(
|
|
|
102 '<tr bgcolor="#7a0019"><th>', "<font color='#ffcc33'><h4>1) <u>Residuals vs Fitted plot</h4></font></u></th>\n",
|
|
|
103 '<th><font color=#ffcc33><h4>2) <u>Normal Q-Q plot of residuals</h4></font></u></th></tr>\n',
|
|
|
104 file = htmloutfile, append = TRUE);
|
|
|
105
|
|
|
106 cat(
|
|
|
107 '<tr><td align=center><img src="PE_TE_lm_1.png" width=600 height=600>',
|
|
|
108 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$widget_div),
|
|
|
109 '</td><td align=center><img src="PE_TE_lm_2.png" width=600 height=600>',
|
|
|
110 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$widget_div),
|
|
|
111 '</td></tr>\n', file = htmloutfile, append = TRUE);
|
|
|
112
|
|
|
113 cat(
|
|
|
114 '<tr><td align=center>This plot checks for linear relationship assumptions.<br>If a horizontal line is observed without any distinct patterns, it indicates a linear relationship.</td>\n',
|
|
|
115 '<td align=center>This plot checks whether residuals are normally distributed or not.<br>It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.</td></tr></table>\n',
|
|
|
116 file = htmloutfile, append = TRUE);
|
|
|
117
|
|
|
118
|
|
|
119 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
|
120 # Residuals data
|
|
|
121 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
|
122 res_all = regmodel$residuals;
|
|
|
123 res_mean = mean(res_all);
|
|
|
124 res_sd = sd(res_all);
|
|
|
125 res_diff = (res_all-res_mean);
|
|
|
126 res_zscore = res_diff/res_sd;
|
|
|
127 # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))]
|
|
|
128
|
|
|
129
|
|
|
130 tempind = which((res_zscore > 2)|(res_zscore < -2));
|
|
|
131 res_PE_TE_data_no_outlier = PE_TE_data[-tempind,];
|
|
|
132 res_PE_TE_data_no_outlier$residuals = res_all[-tempind];
|
|
|
133 res_PE_TE_data_outlier = PE_TE_data[tempind,];
|
|
|
134 res_PE_TE_data_outlier$residuals = res_all[tempind];
|
|
|
135
|
|
|
136 # Save the complete table for download (influential_observations)
|
|
|
137 temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals)
|
|
|
138 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value")
|
|
|
139 outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse="");
|
|
|
140 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F);
|
|
|
141
|
|
|
142
|
|
|
143 # Save the complete table for download (non influential_observations)
|
|
|
144 temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all)
|
|
|
145 colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value")
|
|
|
146 outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse="");
|
|
|
147 write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F);
|
|
|
148
|
|
|
149
|
|
|
150 cat('<br><h2 id="inf_obs"><font color=#ff0000>Outliers based on the residuals from regression analysis</font></h2>\n',
|
|
|
151 file = htmloutfile, append = TRUE);
|
|
|
152 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
|
|
|
153 '<tr bgcolor="#7a0019"><th colspan=2><font color=#ffcc33>Residuals from Regression</font></th></tr>\n',
|
|
|
154 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n',
|
|
|
155 file = htmloutfile, append = TRUE);
|
|
|
156
|
|
|
157 cat("<tr><td>Mean Residual value</td><td>",res_mean,"</td></tr>\n",
|
|
|
158 "<tr><td>Standard deviation (Residuals)</td><td>",res_sd,"</td></tr>\n",
|
|
|
159 '<tr><td>Total outliers (Residual value > 2 standard deviation from the mean)</td><td>',length(tempind),' <font size=4>(<b><a href="PE_TE_outliers_residuals.txt" target="_blank">Download these ',length(tempind),' data points with high residual values here</a></b>)</font></td>\n',
|
|
|
160 '<tr><td colspan=2 align=center>',
|
|
|
161 '<font size=4>(<b><a href="PE_TE_abundance_residuals.txt" target="_blank">Download the complete residuals data here</a></b>)</font>',
|
|
|
162 "</td></tr>\n</table><br><br>\n",
|
|
|
163 file = htmloutfile, append = TRUE);
|
|
|
164
|
|
|
165 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
|
166
|
|
|
167
|
|
|
168 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE);
|
|
|
169
|
|
|
170 cat(
|
|
|
171 '<tr bgcolor="#7a0019"><th><font color=#ffcc33><h4>3) <u>Residuals vs Leverage plot</h4></font></u></th></tr>\n',
|
|
|
172 file = htmloutfile, append = TRUE);
|
|
|
173
|
|
|
174 cat(
|
|
|
175 '<tr><td align=center><img src="PE_TE_lm_5.png" width=600 height=600>',
|
|
|
176 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$widget_div)
|
|
|
177 , '</td></tr>\n',
|
|
|
178 file = htmloutfile, append = TRUE);
|
|
|
179
|
|
|
180 cat(
|
|
|
181 '<tr><td align=center>This plot is useful to identify any influential cases, that is outliers or extreme values.<br>They might influence the regression results upon inclusion or exclusion from the analysis.</td></tr></table><br>\n',
|
|
|
182 file = htmloutfile, append = TRUE);
|
|
|
183
|
|
|
184
|
|
|
185
|
|
|
186 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
|
187 # Cook's Distance
|
|
|
188 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
|
189 cat('<hr/><h2 id="inf_obs"><font color=#ff0000>INFLUENTIAL OBSERVATIONS</font></h2>\n',
|
|
|
190 file = htmloutfile, append = TRUE);
|
|
|
191 cat(
|
|
|
192 '<p><b>Cook\'s distance</b> computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.<br>In general use, those observations that have a <b>Cook\'s distance > than ', cookdist_upper_cutoff,' times the mean</b> may be classified as <b>influential.</b></p>\n',
|
|
|
193 file = htmloutfile, append = TRUE);
|
|
|
194
|
|
|
195 cooksd <- cooks.distance(regmodel);
|
|
|
196
|
|
|
197 outplot = paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse="");
|
|
|
198 png(outplot, width = 10, height = 10, units = 'in', res=300);
|
|
|
199 # bitmap(outplot, "png16m");
|
|
|
200 par(mfrow=c(1,1));
|
|
|
201 plot(cooksd, main="Influential Obs. by Cook\'s distance", ylab="Cook\'s distance", xlab="Observations", type="n") # plot cooks distance
|
|
|
202 sel_outlier=which(cooksd>=as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))
|
|
|
203 sel_nonoutlier=which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))
|
|
|
204 points(sel_outlier, cooksd[sel_outlier],pch="*", cex=2, cex.lab=1.5, col="red")
|
|
|
205 points(sel_nonoutlier, cooksd[sel_nonoutlier],pch="*", cex=2, cex.lab=1.5, col="black")
|
|
|
206 abline(h = as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T), col="red") # add cutoff line
|
|
|
207 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels
|
|
|
208 dev.off();
|
|
|
209
|
|
|
210 cooksd_df <- data.frame(cooksd)
|
|
|
211 cooksd_df$genes <- row.names(cooksd_df)
|
|
|
212 cooksd_df$index <- 1:nrow(cooksd_df)
|
|
|
213 cooksd_df$colors <- "black"
|
|
|
214 cutoff <- as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)
|
|
|
215 cooksd_df[cooksd_df$cooksd > cutoff,]$colors <- "red"
|
|
|
216
|
|
|
217 g <- ggplot(cooksd_df, aes(x = index, y = cooksd, label = row.names(cooksd_df), color=as.factor(colors),
|
|
|
218 text=sprintf("Gene: %s<br>Cook's Distance: %.3f", row.names(cooksd_df), cooksd))) +
|
|
|
219 ggtitle("Influential Obs. by Cook's distance") + xlab("Observations") + ylab("Cook's Distance") +
|
|
|
220 #xlim(0, 3000) + ylim(0, .15) +
|
|
|
221 scale_shape_discrete(solid=F) +
|
|
|
222 geom_point(size = 2, shape = 8) +
|
|
|
223 geom_hline(yintercept = cutoff,
|
|
|
224 linetype = "dashed", color = "red") +
|
|
|
225 scale_color_manual(values = c("black" = "black", "red" = "red")) +
|
|
|
226 theme_light() + theme(legend.position="none")
|
|
|
227 saveWidget(ggplotly(g, tooltip= "text"), file.path(gsub("\\.png", "\\.html", outplot)))
|
|
|
228
|
|
|
229 cat(
|
|
|
230 '<img src="PE_TE_lm_cooksd.png" width=800 height=800>',
|
|
|
231 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div),
|
|
|
232 '<br>In the above plot, observations above red line (',cookdist_upper_cutoff,' * mean Cook\'s distance) are influential. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients<br><br>',
|
|
|
233 file = htmloutfile, append = TRUE);
|
|
|
234
|
|
|
235 tempind = which(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T));
|
|
|
236 PE_TE_data_no_outlier = PE_TE_data[-tempind,];
|
|
|
237 PE_TE_data_no_outlier$cooksd = cooksd[-tempind];
|
|
|
238 PE_TE_data_outlier = PE_TE_data[tempind,];
|
|
|
239 PE_TE_data_outlier$cooksd = cooksd[tempind];
|
|
|
240 a = sort(PE_TE_data_outlier$cooksd, decreasing=T, index.return=T);
|
|
|
241 PE_TE_data_outlier_sorted = PE_TE_data_outlier[a$ix,];
|
|
|
242
|
|
|
243 cat(
|
|
|
244 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n',
|
|
|
245 file = htmloutfile, append = TRUE);
|
|
|
246
|
|
|
247 # Save the complete table for download (influential_observations)
|
|
|
248 temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd)
|
|
|
249 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance")
|
|
|
250 outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse="");
|
|
|
251 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F);
|
|
|
252
|
|
|
253
|
|
|
254 # Save the complete table for download (non influential_observations)
|
|
|
255 temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd)
|
|
|
256 colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance")
|
|
|
257 outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse="");
|
|
|
258 write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F);
|
|
|
259
|
|
|
260
|
|
|
261 cat("<tr><td>Mean Cook\'s distance</td><td>",mean(cooksd, na.rm=T),"</td></tr>\n",
|
|
|
262 "<tr><td>Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)</td><td>",length(tempind),"</td>\n",
|
|
|
263
|
|
|
264 "<tr><td>Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance</td><td>",length(which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))),"</td>\n",
|
|
|
265 "</table><br><br>\n",
|
|
|
266 file = htmloutfile, append = TRUE);
|
|
|
267
|
|
|
268
|
|
|
269 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
|
270 # Scatter plot after removal of influential points
|
|
|
271 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
|
272 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse="");
|
|
|
273 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
|
|
|
274 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
|
|
|
275 png(outplot, width = 10, height = 10, units = 'in', res=300);
|
|
|
276 # bitmap(outplot,"png16m");
|
|
|
277 g = ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
|
|
|
278 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
|
|
|
279 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
|
|
|
280 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
|
|
|
281 PE_ID, TE_abundance, PE_abundance)))
|
|
|
282 suppressMessages(plot(g));
|
|
|
283 suppressMessages(saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot))));
|
|
|
284 dev.off();
|
|
|
285
|
|
|
286
|
|
|
287 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatterplot: Before removal</font></th><th><font color=#ffcc33>Scatterplot: After removal</font></th></tr>\n', file = htmloutfile, append = TRUE);
|
|
|
288 # Before
|
|
|
289 cat("<tr><td align=center><!--<font color='#ff0000'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n-->",
|
|
|
290 '<img src="TE_PE_scatter.png" width=600 height=600>',
|
|
|
291 gsub('id="html', 'id="secondhtml"',
|
|
|
292 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$widget_div)),
|
|
|
293 '</td>\n',
|
|
|
294 file = htmloutfile, append = TRUE);
|
|
|
295
|
|
|
296 # After
|
|
|
297 cat("<td align=center>\n",
|
|
|
298 '<img src="AbundancePlot_scatter_without_outliers.png" width=600 height=600>',
|
|
|
299 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(outplot)$widget_div),
|
|
|
300
|
|
|
301 '</td></tr>\n',
|
|
|
302 file = htmloutfile, append = TRUE);
|
|
|
303 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
|
304
|
|
|
305
|
|
|
306 cor_result_pearson = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "pearson");
|
|
|
307 cor_result_spearman = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "spearman");
|
|
|
308 cor_result_kendall = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "kendall");
|
|
|
309
|
|
|
310 cat('<tr><td>\n', file = htmloutfile, append=TRUE);
|
|
|
311 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
|
|
|
312 cat('</td>\n', file = htmloutfile, append=TRUE);
|
|
|
313
|
|
|
314
|
|
|
315 cat('<td><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n',
|
|
|
316 file = htmloutfile, append = TRUE);
|
|
|
317
|
|
|
318 cat(
|
|
|
319 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n",
|
|
|
320 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n",
|
|
|
321 file = htmloutfile, append = TRUE)
|
|
|
322 cat("</table></td></tr></table>\n", file = htmloutfile, append = TRUE)
|
|
|
323
|
|
|
324
|
|
|
325
|
|
|
326 if(dim(PE_TE_data_outlier)[1]<10)
|
|
|
327 {
|
|
|
328 tab_n_row = dim(PE_TE_data_outlier)[1];
|
|
|
329 }else{
|
|
|
330 tab_n_row = 10;
|
|
|
331 }
|
|
|
332
|
|
|
333 cat("<br><br><font size=5><b><a href='PE_TE_influential_observation.txt' target='_blank'>Download the complete list of influential observations</a></b></font> ",
|
|
|
334 "<font size=5><b><a href='PE_TE_non_influential_observation.txt' target='_blank'>Download the complete list (After removing influential points)</a></b></font><br>\n",
|
|
|
335 '<br><font color="brown"><h4>Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)</h4></font>\n',
|
|
|
336 file = htmloutfile, append = TRUE);
|
|
|
337
|
|
|
338 cat('<table border=1 cellspacing=0 cellpadding=5> <tr bgcolor="#7a0019">\n', sep = "",file = htmloutfile, append = TRUE);
|
|
|
339 cat("<th><font color=#ffcc33>Gene</font></th><th><font color=#ffcc33>Protein Log Fold-Change</font></th><th><font color=#ffcc33>Transcript Log Fold-Change</font></th><th><font color=#ffcc33>Cook's Distance</font></th></tr>\n",
|
|
|
340 file = htmloutfile, append = TRUE);
|
|
|
341
|
|
|
342
|
|
|
343 for(i in 1:tab_n_row)
|
|
|
344 {
|
|
|
345 cat(
|
|
|
346 '<tr>','<td>',as.character(PE_TE_data_outlier_sorted[i,1]),'</td>\n',
|
|
|
347 '<td>',format(PE_TE_data_outlier_sorted[i,2], scientific=F),'</td>\n',
|
|
|
348 '<td>',PE_TE_data_outlier_sorted[i,4],'</td>\n',
|
|
|
349 '<td>',format(PE_TE_data_outlier_sorted[i,5], scientific=F),'</td></tr>\n',
|
|
|
350 file = htmloutfile, append = TRUE);
|
|
|
351 }
|
|
|
352 cat('</table><br><br>\n',file = htmloutfile, append = TRUE);
|
|
|
353
|
|
|
354
|
|
|
355 }
|
|
|
356
|
|
|
357
|
|
|
358
|
|
|
359 #===============================================================================
|
|
|
360 # Heatmap
|
|
|
361 #===============================================================================
|
|
|
362 singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){
|
|
|
363 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Heatmap of PE and TE abundance values (Hierarchical clustering)</font></th><th><font color=#ffcc33>Number of clusters to extract: ',hm_nclust,'</font></th></tr>\n',
|
|
|
364 file = htmloutfile, append = TRUE);
|
|
|
365
|
|
|
366 hc=hclust(dist(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")])))
|
|
|
367 hm_cluster = cutree(hc,k=hm_nclust);
|
|
|
368
|
|
|
369 outplot = paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="");
|
|
|
370 png(outplot, width = 10, height = 10, units = 'in', res=300);
|
|
|
371 # bitmap(outplot, "png16m");
|
|
|
372 par(mfrow=c(1,1));
|
|
|
373 hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]),
|
|
|
374 trace="none", cexCol=1, col=greenred(100),Colv=F,
|
|
|
375 labCol=c("Proteins","Transcripts"), scale="col",
|
|
|
376 hclustfun = hclust, distfun = dist);
|
|
|
377
|
|
|
378 dev.off();
|
|
|
379
|
|
|
380
|
|
|
381 p <- d3heatmap(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), scale = "col",
|
|
|
382 dendrogram = "row", colors = greenred(100),
|
|
|
383 hclustfun = hclust, distfun = dist,
|
|
|
384 show_grid = FALSE)
|
|
|
385 saveWidget(p, file.path(gsub("\\.png", "\\.html", outplot)))
|
|
|
386
|
|
|
387 cat('<tr><td align=center colspan="2">',
|
|
|
388 '<img src="PE_TE_heatmap.png" width=800 height=800>',
|
|
|
389 gsub("width:960px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div),
|
|
|
390 '</td></tr>\n',
|
|
|
391 file = htmloutfile, append = TRUE);
|
|
|
392
|
|
|
393
|
|
|
394 temp_PE_TE_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, hm_cluster);
|
|
|
395 colnames(temp_PE_TE_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cluster (Hierarchical clustering)")
|
|
|
396 tempoutfile = paste(outdir,"/PE_TE_hc_clusterpoints.txt",sep="",collapse="");
|
|
|
397 write.table(temp_PE_TE_data, file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n")
|
|
|
398
|
|
|
399
|
|
|
400 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_hc_clusterpoints.txt" target="_blank"><b>Download the hierarchical cluster list</b></a></font></td></tr></table>\n',
|
|
|
401 file = htmloutfile, append = TRUE);
|
|
|
402 }
|
|
|
403
|
|
|
404
|
|
|
405 #===============================================================================
|
|
|
406 # K-means clustering
|
|
|
407 #===============================================================================
|
|
|
408 singlesample_kmeans=function(PE_TE_data, htmloutfile, nclust){
|
|
|
409 PE_TE_data_kdata = PE_TE_data;
|
|
|
410 k1 = kmeans(PE_TE_data_kdata[,c("PE_abundance","TE_abundance")], nclust);
|
|
|
411 outplot = paste(outdir,"/PE_TE_kmeans.png",sep="",collapse="");
|
|
|
412 png(outplot, width = 10, height = 10, units = 'in', res=300);
|
|
|
413 # bitmap(outplot, "png16m");
|
|
|
414 par(mfrow=c(1,1));
|
|
|
415 scatter.smooth(PE_TE_data_kdata[,"TE_abundance"], PE_TE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
|
|
|
416 legend(1, 95, legend=c("Cluster 1", "Line 2"), col="red", lty=1:1, cex=0.8)
|
|
|
417 legend(1, 95, legend="Cluster 2", col="green", lty=1:1, cex=0.8)
|
|
|
418
|
|
|
419 ind=which(k1$cluster==1);
|
|
|
420 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="red", pch=16);
|
|
|
421 ind=which(k1$cluster==2);
|
|
|
422 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="green", pch=16);
|
|
|
423 ind=which(k1$cluster==3);
|
|
|
424 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="blue", pch=16);
|
|
|
425 ind=which(k1$cluster==4);
|
|
|
426 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="cyan", pch=16);
|
|
|
427 ind=which(k1$cluster==5);
|
|
|
428 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="black", pch=16);
|
|
|
429 ind=which(k1$cluster==6);
|
|
|
430 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="brown", pch=16);
|
|
|
431 ind=which(k1$cluster==7);
|
|
|
432 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="gold", pch=16);
|
|
|
433 ind=which(k1$cluster==8);
|
|
|
434 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="thistle", pch=16);
|
|
|
435 ind=which(k1$cluster==9);
|
|
|
436 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="yellow", pch=16);
|
|
|
437 ind=which(k1$cluster==10);
|
|
|
438 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16);
|
|
|
439 dev.off();
|
|
|
440
|
|
|
441 # Interactive plot for k-means clustering
|
|
|
442 g <- ggplot(PE_TE_data, aes(x = TE_abundance, y = PE_abundance, label = row.names(PE_TE_data),
|
|
|
443 text=sprintf("Gene: %s<br>Transcript Abundance: %.3f<br>Protein Abundance: %.3f",
|
|
|
444 PE_ID, TE_abundance, PE_abundance),
|
|
|
445 color=as.factor(k1$cluster))) +
|
|
|
446 xlab("Transcript Abundance") + ylab("Protein Abundance") +
|
|
|
447 scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) +
|
|
|
448 geom_point(size = 1, shape = 8) +
|
|
|
449 theme_light() + theme(legend.position="none")
|
|
|
450 saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
|
|
|
451
|
|
|
452 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>K-mean clustering</font></th><th><font color=#ffcc33>Number of clusters: ',nclust,'</font></th></tr>\n',
|
|
|
453 file = htmloutfile, append = TRUE);
|
|
|
454
|
|
|
455 tempind = order(k1$cluster);
|
|
|
456 tempoutfile = paste(outdir,"/PE_TE_kmeans_clusterpoints.txt",sep="",collapse="");
|
|
|
457 write.table(data.frame(PE_TE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n")
|
|
|
458
|
|
|
459 #paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="");
|
|
|
460 cat('<tr><td colspan="2" align=center><img src="PE_TE_kmeans.png" width=800 height=800>',
|
|
|
461 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), '</td></tr>\n',
|
|
|
462 file = htmloutfile, append = TRUE);
|
|
|
463 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_kmeans_clusterpoints.txt" target="_blank"><b>Download the cluster list</b></a></font></td></tr></table><br><hr/>\n',
|
|
|
464 file = htmloutfile, append = TRUE);
|
|
|
465
|
|
|
466 }
|
|
|
467
|
|
|
468 #===============================================================================
|
|
|
469 # scatter plot
|
|
|
470 #===============================================================================
|
|
|
471 singlesample_scatter = function(PE_TE_data, outfile)
|
|
|
472 {
|
|
|
473 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
|
|
|
474 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
|
|
|
475 png(outfile, width = 10, height = 10, units = 'in', res=300);
|
|
|
476 # bitmap(outfile, "png16m");
|
|
|
477 g = ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
|
|
|
478 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
|
|
|
479 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
|
|
|
480 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
|
|
|
481 PE_ID, TE_abundance, PE_abundance)),
|
|
|
482 size = .5)
|
|
|
483 suppressMessages(plot(g));
|
|
|
484 suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile))))
|
|
|
485 dev.off();
|
|
|
486 }
|
|
|
487
|
|
|
488 #===============================================================================
|
|
|
489 # Correlation table
|
|
|
490 #===============================================================================
|
|
|
491 singlesample_cor = function(PE_TE_data, htmloutfile, append=TRUE)
|
|
|
492 {
|
|
|
493 cor_result_pearson = cor.test(PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, method = "pearson");
|
|
|
494 cor_result_spearman = cor.test(PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, method = "spearman");
|
|
|
495 cor_result_kendall = cor.test(PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, method = "kendall");
|
|
|
496
|
|
|
497 cat(
|
|
|
498 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n',
|
|
|
499 file = htmloutfile, append = TRUE);
|
|
|
500
|
|
|
501 cat(
|
|
|
502 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n",
|
|
|
503 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n",
|
|
|
504 file = htmloutfile, append = TRUE)
|
|
|
505 cat("</table>\n", file = htmloutfile, append = TRUE);
|
|
|
506
|
|
|
507 }
|
|
|
508
|
|
|
509 #===============================================================================
|
|
|
510 # Boxplot
|
|
|
511 #===============================================================================
|
|
|
512 multisample_boxplot = function(df, sampleinfo_df, outfile, fill_leg, user_xlab, user_ylab)
|
|
|
513 {
|
|
|
514 tempdf = df[,-1, drop=FALSE];
|
|
|
515 tempdf = t(tempdf) %>% as.data.frame();
|
|
|
516 tempdf[is.na(tempdf)] = 0;
|
|
|
517 tempdf$Sample = rownames(tempdf);
|
|
|
518 tempdf1 = melt(tempdf, id.vars = "Sample");
|
|
|
519
|
|
|
520 if("Gene" %in% colnames(df)){
|
|
|
521 tempdf1$Name = df$Gene;
|
|
|
522 } else if ("Protein" %in% colnames(df)){
|
|
|
523 tempdf1$Name = df$Protein;
|
|
|
524 } else if ("Genes" %in% colnames(df)){
|
|
|
525 tempdf1$Name = df$Genes;
|
|
|
526 }
|
|
|
527
|
|
|
528 tempdf1$Group = sampleinfo_df[tempdf1$Sample,2];
|
|
|
529 png(outplot, width = 6, height = 6, units = 'in', res=300);
|
|
|
530 # bitmap(outplot, "png16m");
|
|
|
531 if(fill_leg=="No"){
|
|
|
532 tempdf1$Group = c("case", "control")
|
|
|
533 }
|
|
|
534
|
|
|
535 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) +
|
|
|
536 geom_boxplot()+
|
|
|
537 labs(x=user_xlab) + labs(y=user_ylab)
|
|
|
538 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outfile)))
|
|
|
539 plot(g);
|
|
|
540 dev.off();
|
|
|
541 }
|
|
|
542
|
|
|
543 ## A wrapper to saveWidget which compensates for arguable BUG in
|
|
|
544 ## saveWidget which requires `file` to be in current working
|
|
|
545 ## directory.
|
|
|
546 saveWidget <- function (widget,file,...) {
|
|
|
547 wd<-getwd()
|
|
|
548 on.exit(setwd(wd))
|
|
|
549 outDir<-dirname(file)
|
|
|
550 file<-basename(file)
|
|
|
551 setwd(outDir);
|
|
|
552 htmlwidgets::saveWidget(widget,file=file,selfcontained = FALSE)
|
|
|
553 }
|
|
|
554
|
|
|
555 #===============================================================================
|
|
|
556 # Mean or Median of Replicates
|
|
|
557 #===============================================================================
|
|
|
558
|
|
|
559 mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method)
|
|
|
560 {
|
|
|
561 grps = unique(sampleinfo_df[,2]);
|
|
|
562
|
|
|
563 TE_df_merged <<- sapply(grps, function(x){
|
|
|
564 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1]
|
|
|
565 if(length(tempsample)!=1){
|
|
|
566 apply(TE_df[,tempsample],1,method);
|
|
|
567 }else{
|
|
|
568 return(TE_df[,tempsample]);
|
|
|
569 }
|
|
|
570 });
|
|
|
571 TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged);
|
|
|
572 colnames(TE_df_merged) = c(colnames(TE_df)[1], grps);
|
|
|
573
|
|
|
574 PE_df_merged <<- sapply(grps, function(x){
|
|
|
575 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1]
|
|
|
576 if(length(tempsample)!=1){
|
|
|
577 apply(PE_df[,tempsample],1,method);
|
|
|
578 }else{
|
|
|
579 return(PE_df[,tempsample]);
|
|
|
580 }
|
|
|
581 });
|
|
|
582
|
|
|
583 PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged);
|
|
|
584 colnames(PE_df_merged) = c(colnames(PE_df)[1], grps);
|
|
|
585
|
|
|
586 #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F);
|
|
|
587 sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F);
|
|
|
588
|
|
|
589 return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged));
|
|
|
590 }
|
|
|
591
|
|
|
592 #===============================================================================
|
|
|
593 # (T-Test or Wilcoxon ranksum test) and Volcano Plot
|
|
|
594 #===============================================================================
|
|
|
595
|
|
|
596 perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with)
|
|
|
597 {
|
|
|
598
|
|
|
599 PE_colnames = colnames(PE_df_data);
|
|
|
600 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1];
|
|
|
601 control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)});
|
|
|
602 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1];
|
|
|
603 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)});
|
|
|
604
|
|
|
605 if(method=="mean"){
|
|
|
606 #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value);
|
|
|
607 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
|
|
|
608 }else{
|
|
|
609 if(method=="median"){
|
|
|
610 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
|
|
|
611 # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value);
|
|
|
612 }
|
|
|
613 }
|
|
|
614 PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval))
|
|
|
615
|
|
|
616
|
|
|
617 TE_colnames = colnames(TE_df_data);
|
|
|
618 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1];
|
|
|
619 control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)});
|
|
|
620 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1];
|
|
|
621 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)});
|
|
|
622
|
|
|
623 if(method=="mean"){
|
|
|
624 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value);
|
|
|
625 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
|
|
|
626 }else{
|
|
|
627 if(method=="median"){
|
|
|
628 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
|
|
|
629 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value);
|
|
|
630 }
|
|
|
631 }
|
|
|
632 TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval))
|
|
|
633
|
|
|
634
|
|
|
635 PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval);
|
|
|
636 colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)");
|
|
|
637 outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse="");
|
|
|
638 write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F);
|
|
|
639 cat("<br><br><font size=5><b><a href='PE_TE_logfold_pval.txt' target='_blank'>Download the complete fold change data here</a></b></font><br>\n",
|
|
|
640 file = htmloutfile, append = TRUE);
|
|
|
641
|
|
|
642 if(length(condition_ind)!=1)
|
|
|
643 {
|
|
|
644 # Volcano Plot
|
|
|
645
|
|
|
646 if(volc_with=="adj_pval")
|
|
|
647 {
|
|
|
648 PE_pval = PE_adj_pval
|
|
|
649 TE_pval = TE_adj_pval
|
|
|
650 volc_ylab = "-log10 Adjusted p-value";
|
|
|
651 }else{
|
|
|
652 if(volc_with=="pval")
|
|
|
653 {
|
|
|
654 volc_ylab = "-log10 p-value";
|
|
|
655 }
|
|
|
656 }
|
|
|
657 outplot_PE = paste(outdir,"/PE_volcano.png",sep="",collapse="");
|
|
|
658 png(outplot_PE, width = 10, height = 10, units = 'in', res=300);
|
|
|
659 # bitmap(outplot, "png16m");
|
|
|
660 par(mfrow=c(1,1));
|
|
|
661
|
|
|
662 plot(PE_df_logfold$LogFold, -log10(PE_pval),
|
|
|
663 xlab="log2 fold change", ylab=volc_ylab,
|
|
|
664 type="n")
|
|
|
665 sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use
|
|
|
666 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black")
|
|
|
667 PE_df_logfold$color <- "black"
|
|
|
668 #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use
|
|
|
669 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2)))
|
|
|
670 sel1 <- which(PE_pval<=0.05)
|
|
|
671 sel=intersect(sel,sel1)
|
|
|
672 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="red")
|
|
|
673 PE_df_logfold[sel,]$color <- "red"
|
|
|
674 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2)))
|
|
|
675 sel1 <- which(PE_pval>0.05)
|
|
|
676 sel=intersect(sel,sel1)
|
|
|
677 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue")
|
|
|
678 PE_df_logfold[sel,]$color <- "blue"
|
|
|
679 abline(h = -log(0.05,base=10), col="red", lty=2)
|
|
|
680 abline(v = log(2,base=2), col="red", lty=2)
|
|
|
681 abline(v = log(0.5,base=2), col="red", lty=2)
|
|
|
682 dev.off();
|
|
|
683
|
|
|
684 g <- ggplot(PE_df_logfold, aes(x = LogFold, -log10(PE_pval), color = as.factor(color),
|
|
|
685 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
|
|
|
686 Genes, LogFold, -log10(PE_pval), PE_pval))) +
|
|
|
687 xlab("log2 fold change") + ylab("-log10 p-value") +
|
|
|
688 geom_point(shape=1, size = 1.5, stroke = .2) +
|
|
|
689 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
|
|
|
690 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
|
|
|
691 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") +
|
|
|
692 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") +
|
|
|
693 theme_light() + theme(legend.position="none")
|
|
|
694 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_PE)))
|
|
|
695
|
|
|
696 outplot_TE = paste(outdir,"/TE_volcano.png",sep="",collapse="");
|
|
|
697 png(outplot_TE, width = 10, height = 10, units = 'in', res=300);
|
|
|
698 # bitmap(outplot, "png16m");
|
|
|
699 par(mfrow=c(1,1));
|
|
|
700
|
|
|
701 plot(TE_df_logfold$LogFold, -log10(TE_pval),
|
|
|
702 xlab="log2 fold change", ylab=volc_ylab,
|
|
|
703 type="n")
|
|
|
704
|
|
|
705 sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use
|
|
|
706 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black")
|
|
|
707 TE_df_logfold$color <- "black"
|
|
|
708 #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use
|
|
|
709 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2)))
|
|
|
710 sel1 <- which(TE_pval<=0.05)
|
|
|
711 sel=intersect(sel,sel1)
|
|
|
712 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="red")
|
|
|
713 TE_df_logfold[sel,]$color <- "red"
|
|
|
714 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2)))
|
|
|
715 sel1 <- which(TE_pval>0.05)
|
|
|
716 sel=intersect(sel,sel1)
|
|
|
717 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue")
|
|
|
718 TE_df_logfold[sel,]$color <- "blue"
|
|
|
719 abline(h = -log(0.05,base=10), col="red", lty=2)
|
|
|
720 abline(v = log(2,base=2), col="red", lty=2)
|
|
|
721 abline(v = log(0.5,base=2), col="red", lty=2)
|
|
|
722 dev.off();
|
|
|
723
|
|
|
724 g <- ggplot(TE_df_logfold, aes(x = LogFold, -log10(TE_pval), color = as.factor(color),
|
|
|
725 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
|
|
|
726 Genes, LogFold, -log10(TE_pval), TE_pval))) +
|
|
|
727 xlab("log2 fold change") + ylab("-log10 p-value") +
|
|
|
728 geom_point(shape=1, size = 1.5, stroke = .2) +
|
|
|
729 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
|
|
|
730 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
|
|
|
731 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") +
|
|
|
732 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") +
|
|
|
733 theme_light() + theme(legend.position="none")
|
|
|
734 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_TE)))
|
|
|
735
|
|
|
736
|
|
|
737 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Transcript Fold-Change</font></th><th><font color=#ffcc33>Protein Fold-Change</font></th></tr>\n', file = htmloutfile, append = TRUE);
|
|
|
738 cat("<tr><td align=center>",
|
|
|
739 '<img src="TE_volcano.png" width=600 height=600>',
|
|
|
740 extractWidgetCode(outplot_TE)$widget_div,
|
|
|
741 '</td>\n', file = htmloutfile, append = TRUE);
|
|
|
742 cat("<td align=center>",
|
|
|
743 '<img src="PE_volcano.png" width=600 height=600>',
|
|
|
744 extractWidgetCode(outplot_PE)$widget_div,
|
|
|
745 '</td></tr></table><br>\n',
|
|
|
746 file = htmloutfile, append = TRUE);
|
|
|
747
|
|
|
748
|
|
|
749 }else{
|
|
|
750 cat('<br><br><b><font color=red>!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!</font></b><br><br>',
|
|
|
751 file = htmloutfile, append = TRUE);
|
|
|
752 }
|
|
|
753
|
|
|
754 }
|
|
|
755
|
|
|
756
|
|
|
757 #***************************************************************************************************************************************
|
|
|
758 # Functions: End
|
|
|
759 #***************************************************************************************************************************************
|
|
|
760
|
|
|
761
|
|
|
762 #===============================================================================
|
|
|
763 # Arguments
|
|
|
764 #===============================================================================
|
|
|
765 noargs = 12;
|
|
|
766 args = commandArgs(trailingOnly = TRUE);
|
|
|
767 if(length(args) != noargs)
|
|
|
768 {
|
|
|
769 stop(paste("Please check usage. Number of arguments is not equal to ",noargs,sep="",collapse=""));
|
|
|
770 }
|
|
|
771
|
|
|
772 mode = args[1]; # "multiple" or "logfold"
|
|
|
773 method = args[2]; # "mean" or "median"
|
|
|
774 sampleinfo_file = args[3];
|
|
|
775 proteome_file = args[4];
|
|
|
776 transcriptome_file = args[5];
|
|
|
777 correction_method = args[6];
|
|
|
778 cookdist_upper_cutoff = args[7];
|
|
|
779 numCluster = args[8];
|
|
|
780 hm_nclust = args[9];
|
|
|
781 volc_with = args[10];
|
|
|
782
|
|
|
783 htmloutfile = args[11]; # html output file
|
|
|
784 outdir = args[12]; # html supporting files
|
|
|
785
|
|
|
786 #===============================================================================
|
|
|
787 # Check for file existance
|
|
|
788 #===============================================================================
|
|
|
789 if(! file.exists(proteome_file))
|
|
|
790 {
|
|
|
791 stop(paste("Proteome Data file does not exists. Path given: ",proteome_file,sep="",collapse=""));
|
|
|
792 }
|
|
|
793 if(! file.exists(transcriptome_file))
|
|
|
794 {
|
|
|
795 stop(paste("Transcriptome Data file does not exists. Path given: ",transcriptome_file,sep="",collapse=""));
|
|
|
796 }
|
|
|
797
|
|
|
798 #===============================================================================
|
|
|
799 # Load library
|
|
|
800 #===============================================================================
|
|
|
801 options(warn=-1);
|
|
|
802
|
|
|
803 suppressPackageStartupMessages(library(dplyr));
|
|
|
804 suppressPackageStartupMessages(library(data.table));
|
|
|
805 suppressPackageStartupMessages(library(gplots));
|
|
|
806 suppressPackageStartupMessages(library(ggplot2));
|
|
|
807 suppressPackageStartupMessages(library(ggfortify));
|
|
|
808 suppressPackageStartupMessages(library(plotly));
|
|
|
809 suppressPackageStartupMessages(library(d3heatmap));
|
|
|
810
|
|
|
811 #===============================================================================
|
|
|
812 # Select mode and parse experiment design file
|
|
|
813 #===============================================================================
|
|
|
814 if(mode=="multiple")
|
|
|
815 {
|
|
|
816 expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame();
|
|
|
817 expDesign_cc = expDesign[1:2,];
|
|
|
818
|
|
|
819 sampleinfo_df = expDesign[3:nrow(expDesign),];
|
|
|
820 rownames(sampleinfo_df)=1:nrow(sampleinfo_df);
|
|
|
821 colnames(sampleinfo_df) = c("Sample","Group");
|
|
|
822
|
|
|
823 condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1];
|
|
|
824 condition_g_name = "case";
|
|
|
825 control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1];
|
|
|
826 control_g_name = "control";
|
|
|
827 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case";
|
|
|
828 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control";
|
|
|
829 sampleinfo_df_orig = sampleinfo_df;
|
|
|
830 }
|
|
|
831
|
|
|
832 if(mode=="logfold")
|
|
|
833 {
|
|
|
834 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change"))
|
|
|
835 }
|
|
|
836
|
|
|
837 #===============================================================================
|
|
|
838 # Parse Transcriptome data
|
|
|
839 #===============================================================================
|
|
|
840 TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame();
|
|
|
841 if(mode=="multiple")
|
|
|
842 {
|
|
|
843 TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)];
|
|
|
844 }
|
|
|
845 if(mode=="logfold")
|
|
|
846 {
|
|
|
847 TE_df = TE_df_orig;
|
|
|
848 colnames(TE_df) = c("Genes", "LogFold");
|
|
|
849 }
|
|
|
850 #===============================================================================
|
|
|
851 # Parse Proteome data
|
|
|
852 #===============================================================================
|
|
|
853 PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame();
|
|
|
854 if(mode=="multiple")
|
|
|
855 {
|
|
|
856 PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)];
|
|
|
857 }
|
|
|
858 if(mode=="logfold")
|
|
|
859 {
|
|
|
860 PE_df = PE_df_orig;
|
|
|
861 colnames(PE_df) = c("Genes", "LogFold");
|
|
|
862 }
|
|
|
863
|
|
|
864 #=============================================================================================================
|
|
|
865 # Create directory structures and then set the working directory to output directory
|
|
|
866 #=============================================================================================================
|
|
|
867 if(! file.exists(outdir))
|
|
|
868 {
|
|
|
869 dir.create(outdir);
|
|
|
870 }
|
|
|
871 #===============================================================================
|
|
|
872 # Write initial data summary in html outfile
|
|
|
873 #===============================================================================
|
|
|
874 cat("<html><head></head><body>\n", file = htmloutfile);
|
|
|
875
|
|
|
876 cat("<h1><u>QuanTP: Association between abundance ratios of transcript and protein</u></h1><hr/>\n",
|
|
|
877 "<font><h3>Input data summary</h3></font>\n",
|
|
|
878 "<ul>\n",
|
|
|
879 "<li>Abbreviations used: PE (Proteome data) and TE (Transcriptome data)","</li><br>\n",
|
|
|
880 "<li>Input Proteome data dimension (Row Column): ", dim(PE_df)[1]," x ", dim(PE_df)[2],"</li>\n",
|
|
|
881 "<li>Input Transcriptome data dimension (Row Column): ", dim(TE_df)[1]," x ", dim(TE_df)[2],"</li></ul><hr/>\n",
|
|
|
882 file = htmloutfile, append = TRUE);
|
|
|
883
|
|
|
884 cat("<h3 id=table_of_content>Table of Contents:</h3>\n",
|
|
|
885 "<ul>\n",
|
|
|
886 "<li><a href=#sample_dist>Sample distribution</a></li>\n",
|
|
|
887 "<li><a href=#corr_data>Correlation</a></li>\n",
|
|
|
888 "<li><a href=#regression_data>Regression analysis</a></li>\n",
|
|
|
889 "<li><a href=#inf_obs>Influential observations</a></li>\n",
|
|
|
890 "<li><a href=#cluster_data>Cluster analysis</a></li></ul><hr/>\n",
|
|
|
891 file = htmloutfile, append = TRUE);
|
|
|
892 #===============================================================================
|
|
|
893 # Find common samples
|
|
|
894 #===============================================================================
|
|
|
895 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]);
|
|
|
896
|
|
|
897 if(length(common_samples)==0)
|
|
|
898 {
|
|
|
899 stop("No common samples found ");
|
|
|
900 cat("<b>Please check your experiment design file. Sample names (column names) in the Transcriptome and the Proteome data do not match. </b>\n",file = htmloutfile, append = TRUE);
|
|
|
901 }
|
|
|
902
|
|
|
903 #===============================================================================
|
|
|
904 # Create subsets based on common samples
|
|
|
905 #===============================================================================
|
|
|
906 TE_df = select(TE_df, 1, common_samples);
|
|
|
907 PE_df = select(PE_df, 1, common_samples);
|
|
|
908 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples);
|
|
|
909 rownames(sampleinfo_df) = sampleinfo_df[,1];
|
|
|
910
|
|
|
911 #===============================================================================
|
|
|
912 # Check for number of rows similarity
|
|
|
913 #===============================================================================
|
|
|
914 if(nrow(TE_df) != nrow(PE_df))
|
|
|
915 {
|
|
|
916 stop("Number of rows in Transcriptome and Proteome data are not same i.e. they are not paired");
|
|
|
917 cat("<b>The correlation analysis expects paired TE and PE data i.e. (i)th gene/transcript of TE file should correspond to (i)th protein of PE file. In the current input provided there is mismatch in terms of number of rows of TE and PE file. Please make sure you provide paired data.</b>\n",file = htmloutfile, append = TRUE);
|
|
|
918 }
|
|
|
919
|
|
|
920 #===============================================================================
|
|
|
921 # Number of groups
|
|
|
922 #===============================================================================
|
|
|
923 ngrps = unique(sampleinfo_df[,2]) %>% length();
|
|
|
924 grps = unique(sampleinfo_df[,2]);
|
|
|
925 names(grps) = grps;
|
|
|
926
|
|
|
927 #===============================================================================
|
|
|
928 # Change column1 name
|
|
|
929 #===============================================================================
|
|
|
930 colnames(TE_df)[1] = "Gene";
|
|
|
931 colnames(PE_df)[1] = "Protein";
|
|
|
932
|
|
|
933 #===============================================================================
|
|
|
934 # Treat missing values
|
|
|
935 #===============================================================================
|
|
|
936 TE_nacount = sum(is.na(TE_df));
|
|
|
937 PE_nacount = sum(is.na(PE_df));
|
|
|
938
|
|
|
939 TE_df[is.na(TE_df)] = 0;
|
|
|
940 PE_df[is.na(PE_df)] = 0;
|
|
|
941
|
|
|
942 #===============================================================================
|
|
|
943 # Obtain JS/HTML lines for interactive visualization
|
|
|
944 #===============================================================================
|
|
|
945 extractWidgetCode = function(outplot){
|
|
|
946 lines <- readLines(gsub("\\.png", "\\.html", outplot))
|
|
|
947 return(list(
|
|
|
948 'prescripts' = c('',
|
|
|
949 gsub('script', 'script',
|
|
|
950 lines[grep('<head>',lines) + 3
|
|
|
951 :grep('</head>' ,lines) - 5]),
|
|
|
952 ''),
|
|
|
953 'widget_div' = paste('',
|
|
|
954 gsub('width:100%;height:400px',
|
|
|
955 'width:500px;height:500px',
|
|
|
956 lines[grep(lines, pattern='html-widget')]),
|
|
|
957 '', sep=''),
|
|
|
958 'postscripts' = paste('',
|
|
|
959 gsub('script', 'script',
|
|
|
960 lines[grep(lines, pattern='<script type')]),
|
|
|
961 '', sep='')));
|
|
|
962 }
|
|
|
963 prescripts <- list()
|
|
|
964 postscripts <- list()
|
|
|
965
|
|
|
966
|
|
|
967 #===============================================================================
|
|
|
968 # Decide based on analysis mode
|
|
|
969 #===============================================================================
|
|
|
970 if(mode=="logfold")
|
|
|
971 {
|
|
|
972 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n',
|
|
|
973 file = htmloutfile, append = TRUE);
|
|
|
974
|
|
|
975 # TE Boxplot
|
|
|
976 outplot = paste(outdir,"/Box_TE.png",sep="",collape="");
|
|
|
977 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
|
|
|
978 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
|
|
|
979 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE);
|
|
|
980 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data");
|
|
|
981
|
|
|
982 # PE Boxplot
|
|
|
983 outplot = paste(outdir,"/Box_PE.png",sep="",collape="");
|
|
|
984 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE);
|
|
|
985 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data");
|
|
|
986
|
|
|
987 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n',
|
|
|
988 file = htmloutfile, append = TRUE);
|
|
|
989
|
|
|
990 # TE PE scatter
|
|
|
991 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape="");
|
|
|
992 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE);
|
|
|
993 singlesample_scatter(PE_TE_data, outplot);
|
|
|
994 lines <- extractWidgetCode(outplot);
|
|
|
995 postscripts <- c(postscripts, lines$postscripts);
|
|
|
996 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', lines$widget_div, '</td></tr>\n', file = htmloutfile, append = TRUE);
|
|
|
997 PE_TE_data = data.frame(PE_df, TE_df);
|
|
|
998 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
|
|
|
999
|
|
|
1000 # TE PE Cor
|
|
|
1001 cat("<tr><td align=center>", file = htmloutfile, append = TRUE);
|
|
|
1002 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
|
|
|
1003 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n',
|
|
|
1004 file = htmloutfile, append = TRUE);
|
|
|
1005 cat('</td></table>',
|
|
|
1006 file = htmloutfile, append = TRUE);
|
|
|
1007
|
|
|
1008 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n',
|
|
|
1009 file = htmloutfile, append = TRUE);
|
|
|
1010
|
|
|
1011 # TE PE Regression
|
|
|
1012 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE);
|
|
|
1013 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts,
|
|
|
1014 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts,
|
|
|
1015 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts,
|
|
|
1016 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts,
|
|
|
1017 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts));
|
|
|
1018
|
|
|
1019 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n',
|
|
|
1020 file = htmloutfile, append = TRUE);
|
|
|
1021
|
|
|
1022 # TE PE Heatmap
|
|
|
1023 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust);
|
|
|
1024 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""))
|
|
|
1025 postscripts <- c(postscripts, lines$postscripts)
|
|
|
1026 prescripts <- c(prescripts, lines$prescripts)
|
|
|
1027
|
|
|
1028
|
|
|
1029 # TE PE Clustering (kmeans)
|
|
|
1030 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster))
|
|
|
1031 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts)
|
|
|
1032
|
|
|
1033 }else{
|
|
|
1034 if(mode=="multiple")
|
|
|
1035 {
|
|
|
1036 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n',
|
|
|
1037 file = htmloutfile, append = TRUE);
|
|
|
1038
|
|
|
1039 # TE Boxplot
|
|
|
1040 outplot = paste(outdir,"/Box_TE_all_rep.png",sep="",collape="");
|
|
|
1041 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)]));
|
|
|
1042 colnames(temp_df_te_data) = colnames(TE_df);
|
|
|
1043 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance (log)");
|
|
|
1044 lines <- extractWidgetCode(outplot)
|
|
|
1045 prescripts <- c(prescripts, lines$prescripts)
|
|
|
1046 postscripts <- c(postscripts, lines$postscripts)
|
|
|
1047 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
|
|
|
1048 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
|
|
|
1049 "<tr><td align=center>", file = htmloutfile, append = TRUE);
|
|
|
1050 cat('<img src="Box_TE_all_rep.png" width=500 height=500>',
|
|
|
1051 lines$widget_div, '</td>', file = htmloutfile, append = TRUE);
|
|
|
1052
|
|
|
1053 # PE Boxplot
|
|
|
1054 outplot = paste(outdir,"/Box_PE_all_rep.png",sep="",collape="");
|
|
|
1055 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)]));
|
|
|
1056 colnames(temp_df_pe_data) = colnames(PE_df);
|
|
|
1057 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance (log)");
|
|
|
1058 lines <- extractWidgetCode(outplot)
|
|
|
1059 #prescripts <- c(prescripts, lines$prescripts)
|
|
|
1060 postscripts <- c(postscripts, lines$postscripts)
|
|
|
1061 cat("<td align=center>", '<img src="Box_PE_all_rep.png" width=500 height=500>',
|
|
|
1062 lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE);
|
|
|
1063
|
|
|
1064 # Calc TE PCA
|
|
|
1065 outplot = paste(outdir,"/PCA_TE_all_rep.png",sep="",collape="");
|
|
|
1066 multisample_PCA(TE_df, sampleinfo_df, outplot);
|
|
|
1067 PCA_TE <- extractWidgetCode(outplot)
|
|
|
1068 postscripts <- c(postscripts, PCA_TE$postscripts)
|
|
|
1069
|
|
|
1070 # Calc PE PCA
|
|
|
1071 outplot = paste(outdir,"/PCA_PE_all_rep.png",sep="",collape="");
|
|
|
1072 multisample_PCA(PE_df, sampleinfo_df, outplot);
|
|
|
1073 PCA_PE <- extractWidgetCode(outplot)
|
|
|
1074 postscripts <- c(postscripts, PCA_PE$postscripts)
|
|
|
1075
|
|
|
1076 # Replicate mode
|
|
|
1077 templist = mergeReplicates(TE_df,PE_df, sampleinfo_df, method);
|
|
|
1078 TE_df = templist$TE_df_merged;
|
|
|
1079 PE_df = templist$PE_df_merged;
|
|
|
1080 sampleinfo_df = templist$sampleinfo_df_merged;
|
|
|
1081 rownames(sampleinfo_df) = sampleinfo_df[,1];
|
|
|
1082
|
|
|
1083 # TE Boxplot
|
|
|
1084 outplot = paste(outdir,"/Box_TE_rep.png",sep="",collape="");
|
|
|
1085 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)]));
|
|
|
1086 colnames(temp_df_te_data) = colnames(TE_df);
|
|
|
1087 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Transcript Abundance (log)");
|
|
|
1088 lines <- extractWidgetCode(outplot)
|
|
|
1089 #prescripts <- c(prescripts, lines$prescripts)
|
|
|
1090 postscripts <- c(postscripts, lines$postscripts)
|
|
|
1091 cat('<br><font color="#ff0000"><h3>Sample wise distribution (Box plot) after using ',method,' on replicates </h3></font><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
|
|
|
1092 "<tr><td align=center>", '<img src="Box_TE_rep.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE);
|
|
|
1093
|
|
|
1094 # PE Boxplot
|
|
|
1095 outplot = paste(outdir,"/Box_PE_rep.png",sep="",collape="");
|
|
|
1096 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)]));
|
|
|
1097 colnames(temp_df_pe_data) = colnames(PE_df);
|
|
|
1098 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Protein Abundance (log)");
|
|
|
1099 lines <- extractWidgetCode(outplot)
|
|
|
1100 #prescripts <- c(prescripts, lines$prescripts)
|
|
|
1101 postscripts <- c(postscripts, lines$postscripts)
|
|
|
1102 cat("<td align=center>", '<img src="Box_PE_rep.png" width=500 height=500>', lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE);
|
|
|
1103
|
|
|
1104 #===============================================================================
|
|
|
1105 # Calculating log fold change and running the "single" code part
|
|
|
1106 #===============================================================================
|
|
|
1107
|
|
|
1108 TE_df = data.frame("Genes"=TE_df[,1], "LogFold"=apply(TE_df[,c(which(colnames(TE_df)==condition_g_name),which(colnames(TE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2)));
|
|
|
1109 PE_df = data.frame("Genes"=PE_df[,1], "LogFold"=apply(PE_df[,c(which(colnames(PE_df)==condition_g_name),which(colnames(PE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2)));
|
|
|
1110
|
|
|
1111 #===============================================================================
|
|
|
1112 # Treat missing values
|
|
|
1113 #===============================================================================
|
|
|
1114
|
|
|
1115 TE_df[is.infinite(TE_df[,2]),2] = NA;
|
|
|
1116 PE_df[is.infinite(PE_df[,2]),2] = NA;
|
|
|
1117 TE_df[is.na(TE_df)] = 0;
|
|
|
1118 PE_df[is.na(PE_df)] = 0;
|
|
|
1119
|
|
|
1120 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change"))
|
|
|
1121 #===============================================================================
|
|
|
1122 # Find common samples
|
|
|
1123 #===============================================================================
|
|
|
1124
|
|
|
1125 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]);
|
|
|
1126 TE_df = select(TE_df, 1, common_samples);
|
|
|
1127 PE_df = select(PE_df, 1, common_samples);
|
|
|
1128 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples);
|
|
|
1129 rownames(sampleinfo_df) = sampleinfo_df[,1];
|
|
|
1130
|
|
|
1131 # TE Boxplot
|
|
|
1132 outplot = paste(outdir,"/Box_TE.png",sep="",collape="");
|
|
|
1133 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Transcript Abundance fold-change (log2)");
|
|
|
1134 lines <- extractWidgetCode(outplot)
|
|
|
1135 postscripts <- c(postscripts, lines$postscripts)
|
|
|
1136 cat('<br><font color="#ff0000"><h3>Distribution (Box plot) of log fold change </h3></font>', file = htmloutfile, append = TRUE);
|
|
|
1137 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
|
|
|
1138 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE);
|
|
|
1139
|
|
|
1140 # PE Boxplot
|
|
|
1141 outplot = paste(outdir,"/Box_PE.png",sep="",collape="");
|
|
|
1142 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Protein Abundance fold-change(log2)");
|
|
|
1143 lines <- extractWidgetCode(outplot)
|
|
|
1144 postscripts <- c(postscripts, lines$postscripts)
|
|
|
1145 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500>', lines$widget_div,'</td></tr></table>\n', file = htmloutfile, append = TRUE);
|
|
|
1146
|
|
|
1147
|
|
|
1148 # Log Fold Data
|
|
|
1149 perform_Test_Volcano(TE_df_orig,PE_df_orig,TE_df, PE_df,sampleinfo_df_orig,method,correction_method,volc_with)
|
|
|
1150 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/TE_volcano.png",sep="",collapse=""))$postscripts)
|
|
|
1151 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_volcano.png",sep="",collapse=""))$postscripts)
|
|
|
1152
|
|
|
1153 # Print PCA
|
|
|
1154
|
|
|
1155 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>PCA plot: Transcriptome data</font></th><th><font color=#ffcc33>PCA plot: Proteome data</font></th></tr>\n',
|
|
|
1156 "<tr><td align=center>", '<img src="PCA_TE_all_rep.png" width=500 height=500>', PCA_TE$widget_div, '</td>\n',
|
|
|
1157 "<td align=center>", '<img src="PCA_PE_all_rep.png" width=500 height=500>', PCA_PE$widget_div, '</td></tr></table>\n',
|
|
|
1158 file = htmloutfile, append = TRUE);
|
|
|
1159
|
|
|
1160
|
|
|
1161
|
|
|
1162 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n',
|
|
|
1163 file = htmloutfile, append = TRUE);
|
|
|
1164
|
|
|
1165 PE_TE_data = data.frame(PE_df, TE_df);
|
|
|
1166 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
|
|
|
1167
|
|
|
1168 # TE PE scatter
|
|
|
1169 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape="");
|
|
|
1170 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE);
|
|
|
1171 singlesample_scatter(PE_TE_data, outplot);
|
|
|
1172 lines <- extractWidgetCode(outplot);
|
|
|
1173 postscripts <- c(postscripts, lines$postscripts);
|
|
|
1174 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', gsub('width:500px;height:500px', 'width:800px;height:800px' , lines$widget_div),
|
|
|
1175 '</td>\n', file = htmloutfile, append = TRUE);
|
|
|
1176
|
|
|
1177 # TE PE Cor
|
|
|
1178 cat("<tr><td align=center>\n", file = htmloutfile, append = TRUE);
|
|
|
1179 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
|
|
|
1180 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n',
|
|
|
1181 file = htmloutfile, append = TRUE);
|
|
|
1182 cat('</td></table>',
|
|
|
1183 file = htmloutfile, append = TRUE);
|
|
|
1184
|
|
|
1185 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n',
|
|
|
1186 file = htmloutfile, append = TRUE);
|
|
|
1187
|
|
|
1188 # TE PE Regression
|
|
|
1189 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE);
|
|
|
1190 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts,
|
|
|
1191 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts,
|
|
|
1192 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts,
|
|
|
1193 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts,
|
|
|
1194 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts,
|
|
|
1195 gsub('data-for="html', 'data-for="secondhtml"',
|
|
|
1196 extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$postscripts)));
|
|
|
1197
|
|
|
1198 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n',
|
|
|
1199 file = htmloutfile, append = TRUE);
|
|
|
1200
|
|
|
1201 #TE PE Heatmap
|
|
|
1202 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust);
|
|
|
1203 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""))
|
|
|
1204 postscripts <- c(postscripts, lines$postscripts)
|
|
|
1205 prescripts <- c(prescripts, lines$prescripts)
|
|
|
1206
|
|
|
1207 #TE PE Clustering (kmeans)
|
|
|
1208 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster))
|
|
|
1209 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts);
|
|
|
1210 }
|
|
|
1211 }
|
|
|
1212 cat("<h3>Go To:</h3>\n",
|
|
|
1213 "<ul>\n",
|
|
|
1214 "<li><a href=#sample_dist>Sample distribution</a></li>\n",
|
|
|
1215 "<li><a href=#corr_data>Correlation</a></li>\n",
|
|
|
1216 "<li><a href=#regression_data>Regression analysis</a></li>\n",
|
|
|
1217 "<li><a href=#inf_obs>Influential observations</a></li>\n",
|
|
|
1218 "<li><a href=#cluster_data>Cluster analysis</a></li></ul>\n",
|
|
|
1219 "<br><a href=#>TOP</a>",
|
|
|
1220 file = htmloutfile, append = TRUE);
|
|
|
1221 cat("</body></html>\n", file = htmloutfile, append = TRUE);
|
|
|
1222
|
|
|
1223
|
|
|
1224 #===============================================================================
|
|
|
1225 # Add masked-javascripts tags to HTML file in the head and end
|
|
|
1226 #===============================================================================
|
|
|
1227
|
|
|
1228 htmllines <- readLines(htmloutfile)
|
|
|
1229 htmllines[1] <- paste('<html>\n<head>\n', paste(prescripts, collapse='\n'), '\n</head>\n<body>')
|
|
|
1230 cat(paste(htmllines, collapse='\n'), file = htmloutfile)
|
|
|
1231 cat('\n', paste(postscripts, collapse='\n'), "\n",
|
|
|
1232 "</body>\n</html>\n", file = htmloutfile, append = TRUE);
|