annotate quantp.r @ 1:6d7b1287c449 draft default tip

planemo upload
author rsajulga
date Thu, 20 Dec 2018 15:16:01 -0500
parents 0072d7fe861a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1 #***************************************************************************************************************************************
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
2 # Functions: Start
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
3 #***************************************************************************************************************************************
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
4
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
5 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
6 # PCA
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
7 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
8 multisample_PCA = function(df, sampleinfo_df, outfile)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
9 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
10 tempdf = df[,-1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
11 tempcol = colnames(tempdf);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
12 tempgrp = sampleinfo_df[tempcol,2];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
13 tempdf = t(tempdf) %>% as.data.frame();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
14 tempdf[is.na(tempdf)] = 0;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
15 tempdf$Group = tempgrp;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
16 png(outfile, width = 6, height = 6, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
17 # bitmap(outfile, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
18 g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
19 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outplot)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
20 plot(g);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
21 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
22 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
23
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
24 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
25 # Regression and Cook's distance
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
26 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
27 singlesample_regression = function(PE_TE_data,htmloutfile, append=TRUE)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
28 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
29 rownames(PE_TE_data) = PE_TE_data$PE_ID;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
30 regmodel = lm(PE_abundance~TE_abundance, data=PE_TE_data);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
31 regmodel_summary = summary(regmodel);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
32
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
33 cat("<font><h3>Linear Regression model fit between Proteome and Transcriptome data</h3></font>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
34 "<p>Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.</p>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
35 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
36 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
37
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
38 cat("<tr><td>Formula</td><td>","PE_abundance~TE_abundance","</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
39 "<tr><td colspan='2' align='center'> <b>Coefficients</b></td>","</tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
40 "<tr><td>",names(regmodel$coefficients[1]),"</td><td>",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
41 "<tr><td>",names(regmodel$coefficients[2]),"</td><td>",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
42 "<tr><td colspan='2' align='center'> <b>Model parameters</b></td>","</tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
43 "<tr><td>Residual standard error</td><td>",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
44 "<tr><td>F-statistic</td><td>",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
45 "<tr><td>R-squared</td><td>",regmodel_summary$r.squared,"</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
46 "<tr><td>Adjusted R-squared</td><td>",regmodel_summary$adj.r.squared,"</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
47 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
48
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
49 cat("</table>\n", file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
50
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
51 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
52 "<font color='#ff0000'><h3>Regression and diagnostics plots</h3></font>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
53 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
54
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
55 outplot = paste(outdir,"/PE_TE_lm_1.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
56 png(outplot, width = 10, height = 10, units = 'in',res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
57 # bitmap(outplot, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
58 par(mfrow=c(1,1));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
59 plot(regmodel, 1, cex.lab=1.5);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
60 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
61
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
62 g <- autoplot(regmodel, label = FALSE)[[1]] +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
63 geom_point(aes(text=sprintf("Residual: %.2f<br>Fitted value: %.2f<br>Gene: %s", .fitted, .resid, PE_TE_data$PE_ID)),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
64 shape = 1, size = .1, stroke = .2) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
65 theme_light()
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
66 saveWidget(ggplotly(g, tooltip= c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
67
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
68 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
69 png(outplot,width = 10, height = 10, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
70 # bitmap(outplot, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
71 par(mfrow=c(1,1));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
72 g <- plot(regmodel, 2, cex.lab=1.5);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
73 ggplotly(g)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
74 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
75
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
76 g <- autoplot(regmodel, label = FALSE)[[2]] +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
77 geom_point(aes(text=sprintf("Standarized residual: %.2f<br>Theoretical quantile: %.2f<br>Gene: %s", .qqx, .qqy, PE_TE_data$PE_ID)),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
78 shape = 1, size = .1) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
79 theme_light()
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
80 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
81
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
82
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
83 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
84 png(outplot, width = 10, height = 10, units = 'in',res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
85 # bitmap(outplot, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
86 par(mfrow=c(1,1));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
87 plot(regmodel, 5, cex.lab=1.5);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
88 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
89
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
90 cd_cont_pos <- function(leverage, level, model) {sqrt(level*length(coef(model))*(1-leverage)/leverage)}
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
91 cd_cont_neg <- function(leverage, level, model) {-cd_cont_pos(leverage, level, model)}
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
92
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
93 g <- autoplot(regmodel, label = FALSE)[[4]] +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
94 aes(label = PE_TE_data$PE_ID) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
95 geom_point(aes(text=sprintf("Leverage: %.2f<br>Standardized residual: %.2f<br>Gene: %s", .hat, .stdresid, PE_TE_data$PE_ID))) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
96 theme_light()
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
97 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
98
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
99 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
100
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
101 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
102 '<tr bgcolor="#7a0019"><th>', "<font color='#ffcc33'><h4>1) <u>Residuals vs Fitted plot</h4></font></u></th>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
103 '<th><font color=#ffcc33><h4>2) <u>Normal Q-Q plot of residuals</h4></font></u></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
104 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
105
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
106 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
107 '<tr><td align=center><img src="PE_TE_lm_1.png" width=600 height=600>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
108 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$widget_div),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
109 '</td><td align=center><img src="PE_TE_lm_2.png" width=600 height=600>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
110 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$widget_div),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
111 '</td></tr>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
112
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
113 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
114 '<tr><td align=center>This plot checks for linear relationship assumptions.<br>If a horizontal line is observed without any distinct patterns, it indicates a linear relationship.</td>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
115 '<td align=center>This plot checks whether residuals are normally distributed or not.<br>It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.</td></tr></table>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
116 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
117
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
118
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
119 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
120 # Residuals data
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
121 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
122 res_all = regmodel$residuals;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
123 res_mean = mean(res_all);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
124 res_sd = sd(res_all);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
125 res_diff = (res_all-res_mean);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
126 res_zscore = res_diff/res_sd;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
127 # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))]
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
128
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
129
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
130 tempind = which((res_zscore > 2)|(res_zscore < -2));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
131 res_PE_TE_data_no_outlier = PE_TE_data[-tempind,];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
132 res_PE_TE_data_no_outlier$residuals = res_all[-tempind];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
133 res_PE_TE_data_outlier = PE_TE_data[tempind,];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
134 res_PE_TE_data_outlier$residuals = res_all[tempind];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
135
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
136 # Save the complete table for download (influential_observations)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
137 temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
138 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
139 outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
140 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
141
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
142
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
143 # Save the complete table for download (non influential_observations)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
144 temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
145 colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
146 outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
147 write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
148
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
149
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
150 cat('<br><h2 id="inf_obs"><font color=#ff0000>Outliers based on the residuals from regression analysis</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
151 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
152 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
153 '<tr bgcolor="#7a0019"><th colspan=2><font color=#ffcc33>Residuals from Regression</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
154 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
155 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
156
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
157 cat("<tr><td>Mean Residual value</td><td>",res_mean,"</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
158 "<tr><td>Standard deviation (Residuals)</td><td>",res_sd,"</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
159 '<tr><td>Total outliers (Residual value > 2 standard deviation from the mean)</td><td>',length(tempind),' <font size=4>(<b><a href="PE_TE_outliers_residuals.txt" target="_blank">Download these ',length(tempind),' data points with high residual values here</a></b>)</font></td>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
160 '<tr><td colspan=2 align=center>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
161 '<font size=4>(<b><a href="PE_TE_abundance_residuals.txt" target="_blank">Download the complete residuals data here</a></b>)</font>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
162 "</td></tr>\n</table><br><br>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
163 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
164
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
165 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
166
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
167
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
168 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
169
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
170 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
171 '<tr bgcolor="#7a0019"><th><font color=#ffcc33><h4>3) <u>Residuals vs Leverage plot</h4></font></u></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
172 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
173
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
174 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
175 '<tr><td align=center><img src="PE_TE_lm_5.png" width=600 height=600>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
176 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$widget_div)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
177 , '</td></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
178 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
179
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
180 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
181 '<tr><td align=center>This plot is useful to identify any influential cases, that is outliers or extreme values.<br>They might influence the regression results upon inclusion or exclusion from the analysis.</td></tr></table><br>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
182 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
183
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
184
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
185
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
186 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
187 # Cook's Distance
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
188 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
189 cat('<hr/><h2 id="inf_obs"><font color=#ff0000>INFLUENTIAL OBSERVATIONS</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
190 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
191 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
192 '<p><b>Cook\'s distance</b> computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.<br>In general use, those observations that have a <b>Cook\'s distance > than ', cookdist_upper_cutoff,' times the mean</b> may be classified as <b>influential.</b></p>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
193 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
194
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
195 cooksd <- cooks.distance(regmodel);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
196
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
197 outplot = paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
198 png(outplot, width = 10, height = 10, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
199 # bitmap(outplot, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
200 par(mfrow=c(1,1));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
201 plot(cooksd, main="Influential Obs. by Cook\'s distance", ylab="Cook\'s distance", xlab="Observations", type="n") # plot cooks distance
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
202 sel_outlier=which(cooksd>=as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
203 sel_nonoutlier=which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
204 points(sel_outlier, cooksd[sel_outlier],pch="*", cex=2, cex.lab=1.5, col="red")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
205 points(sel_nonoutlier, cooksd[sel_nonoutlier],pch="*", cex=2, cex.lab=1.5, col="black")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
206 abline(h = as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T), col="red") # add cutoff line
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
207 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
208 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
209
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
210 cooksd_df <- data.frame(cooksd)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
211 cooksd_df$genes <- row.names(cooksd_df)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
212 cooksd_df$index <- 1:nrow(cooksd_df)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
213 cooksd_df$colors <- "black"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
214 cutoff <- as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
215 cooksd_df[cooksd_df$cooksd > cutoff,]$colors <- "red"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
216
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
217 g <- ggplot(cooksd_df, aes(x = index, y = cooksd, label = row.names(cooksd_df), color=as.factor(colors),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
218 text=sprintf("Gene: %s<br>Cook's Distance: %.3f", row.names(cooksd_df), cooksd))) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
219 ggtitle("Influential Obs. by Cook's distance") + xlab("Observations") + ylab("Cook's Distance") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
220 #xlim(0, 3000) + ylim(0, .15) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
221 scale_shape_discrete(solid=F) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
222 geom_point(size = 2, shape = 8) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
223 geom_hline(yintercept = cutoff,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
224 linetype = "dashed", color = "red") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
225 scale_color_manual(values = c("black" = "black", "red" = "red")) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
226 theme_light() + theme(legend.position="none")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
227 saveWidget(ggplotly(g, tooltip= "text"), file.path(gsub("\\.png", "\\.html", outplot)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
228
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
229 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
230 '<img src="PE_TE_lm_cooksd.png" width=800 height=800>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
231 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
232 '<br>In the above plot, observations above red line (',cookdist_upper_cutoff,' * mean Cook\'s distance) are influential. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients<br><br>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
233 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
234
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
235 tempind = which(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
236 PE_TE_data_no_outlier = PE_TE_data[-tempind,];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
237 PE_TE_data_no_outlier$cooksd = cooksd[-tempind];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
238 PE_TE_data_outlier = PE_TE_data[tempind,];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
239 PE_TE_data_outlier$cooksd = cooksd[tempind];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
240 a = sort(PE_TE_data_outlier$cooksd, decreasing=T, index.return=T);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
241 PE_TE_data_outlier_sorted = PE_TE_data_outlier[a$ix,];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
242
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
243 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
244 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
245 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
246
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
247 # Save the complete table for download (influential_observations)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
248 temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
249 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
250 outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
251 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
252
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
253
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
254 # Save the complete table for download (non influential_observations)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
255 temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
256 colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
257 outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
258 write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
259
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
260
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
261 cat("<tr><td>Mean Cook\'s distance</td><td>",mean(cooksd, na.rm=T),"</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
262 "<tr><td>Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)</td><td>",length(tempind),"</td>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
263
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
264 "<tr><td>Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance</td><td>",length(which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))),"</td>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
265 "</table><br><br>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
266 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
267
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
268
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
269 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
270 # Scatter plot after removal of influential points
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
271 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
272 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
273 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
274 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
275 png(outplot, width = 10, height = 10, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
276 # bitmap(outplot,"png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
277 g = ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
278 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
279 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
280 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
281 PE_ID, TE_abundance, PE_abundance)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
282 suppressMessages(plot(g));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
283 suppressMessages(saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot))));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
284 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
285
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
286
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
287 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatterplot: Before removal</font></th><th><font color=#ffcc33>Scatterplot: After removal</font></th></tr>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
288 # Before
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
289 cat("<tr><td align=center><!--<font color='#ff0000'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n-->",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
290 '<img src="TE_PE_scatter.png" width=600 height=600>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
291 gsub('id="html', 'id="secondhtml"',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
292 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$widget_div)),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
293 '</td>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
294 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
295
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
296 # After
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
297 cat("<td align=center>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
298 '<img src="AbundancePlot_scatter_without_outliers.png" width=600 height=600>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
299 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(outplot)$widget_div),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
300
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
301 '</td></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
302 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
303 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
304
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
305
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
306 cor_result_pearson = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "pearson");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
307 cor_result_spearman = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "spearman");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
308 cor_result_kendall = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "kendall");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
309
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
310 cat('<tr><td>\n', file = htmloutfile, append=TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
311 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
312 cat('</td>\n', file = htmloutfile, append=TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
313
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
314
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
315 cat('<td><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
316 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
317
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
318 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
319 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
320 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
321 file = htmloutfile, append = TRUE)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
322 cat("</table></td></tr></table>\n", file = htmloutfile, append = TRUE)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
323
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
324
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
325
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
326 if(dim(PE_TE_data_outlier)[1]<10)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
327 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
328 tab_n_row = dim(PE_TE_data_outlier)[1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
329 }else{
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
330 tab_n_row = 10;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
331 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
332
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
333 cat("<br><br><font size=5><b><a href='PE_TE_influential_observation.txt' target='_blank'>Download the complete list of influential observations</a></b></font>&nbsp;&nbsp;&nbsp;&nbsp;",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
334 "<font size=5><b><a href='PE_TE_non_influential_observation.txt' target='_blank'>Download the complete list (After removing influential points)</a></b></font><br>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
335 '<br><font color="brown"><h4>Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)</h4></font>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
336 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
337
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
338 cat('<table border=1 cellspacing=0 cellpadding=5> <tr bgcolor="#7a0019">\n', sep = "",file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
339 cat("<th><font color=#ffcc33>Gene</font></th><th><font color=#ffcc33>Protein Log Fold-Change</font></th><th><font color=#ffcc33>Transcript Log Fold-Change</font></th><th><font color=#ffcc33>Cook's Distance</font></th></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
340 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
341
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
342
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
343 for(i in 1:tab_n_row)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
344 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
345 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
346 '<tr>','<td>',as.character(PE_TE_data_outlier_sorted[i,1]),'</td>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
347 '<td>',format(PE_TE_data_outlier_sorted[i,2], scientific=F),'</td>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
348 '<td>',PE_TE_data_outlier_sorted[i,4],'</td>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
349 '<td>',format(PE_TE_data_outlier_sorted[i,5], scientific=F),'</td></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
350 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
351 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
352 cat('</table><br><br>\n',file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
353
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
354
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
355 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
356
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
357
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
358
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
359 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
360 # Heatmap
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
361 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
362 singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
363 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Heatmap of PE and TE abundance values (Hierarchical clustering)</font></th><th><font color=#ffcc33>Number of clusters to extract: ',hm_nclust,'</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
364 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
365
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
366 hc=hclust(dist(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")])))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
367 hm_cluster = cutree(hc,k=hm_nclust);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
368
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
369 outplot = paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
370 png(outplot, width = 10, height = 10, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
371 # bitmap(outplot, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
372 par(mfrow=c(1,1));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
373 hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
374 trace="none", cexCol=1, col=greenred(100),Colv=F,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
375 labCol=c("Proteins","Transcripts"), scale="col",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
376 hclustfun = hclust, distfun = dist);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
377
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
378 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
379
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
380
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
381 p <- d3heatmap(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), scale = "col",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
382 dendrogram = "row", colors = greenred(100),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
383 hclustfun = hclust, distfun = dist,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
384 show_grid = FALSE)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
385 saveWidget(p, file.path(gsub("\\.png", "\\.html", outplot)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
386
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
387 cat('<tr><td align=center colspan="2">',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
388 '<img src="PE_TE_heatmap.png" width=800 height=800>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
389 gsub("width:960px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
390 '</td></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
391 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
392
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
393
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
394 temp_PE_TE_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, hm_cluster);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
395 colnames(temp_PE_TE_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cluster (Hierarchical clustering)")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
396 tempoutfile = paste(outdir,"/PE_TE_hc_clusterpoints.txt",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
397 write.table(temp_PE_TE_data, file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
398
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
399
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
400 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_hc_clusterpoints.txt" target="_blank"><b>Download the hierarchical cluster list</b></a></font></td></tr></table>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
401 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
402 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
403
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
404
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
405 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
406 # K-means clustering
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
407 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
408 singlesample_kmeans=function(PE_TE_data, htmloutfile, nclust){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
409 PE_TE_data_kdata = PE_TE_data;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
410 k1 = kmeans(PE_TE_data_kdata[,c("PE_abundance","TE_abundance")], nclust);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
411 outplot = paste(outdir,"/PE_TE_kmeans.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
412 png(outplot, width = 10, height = 10, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
413 # bitmap(outplot, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
414 par(mfrow=c(1,1));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
415 scatter.smooth(PE_TE_data_kdata[,"TE_abundance"], PE_TE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
416 legend(1, 95, legend=c("Cluster 1", "Line 2"), col="red", lty=1:1, cex=0.8)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
417 legend(1, 95, legend="Cluster 2", col="green", lty=1:1, cex=0.8)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
418
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
419 ind=which(k1$cluster==1);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
420 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="red", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
421 ind=which(k1$cluster==2);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
422 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="green", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
423 ind=which(k1$cluster==3);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
424 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="blue", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
425 ind=which(k1$cluster==4);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
426 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="cyan", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
427 ind=which(k1$cluster==5);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
428 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="black", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
429 ind=which(k1$cluster==6);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
430 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="brown", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
431 ind=which(k1$cluster==7);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
432 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="gold", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
433 ind=which(k1$cluster==8);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
434 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="thistle", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
435 ind=which(k1$cluster==9);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
436 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="yellow", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
437 ind=which(k1$cluster==10);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
438 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
439 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
440
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
441 # Interactive plot for k-means clustering
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
442 g <- ggplot(PE_TE_data, aes(x = TE_abundance, y = PE_abundance, label = row.names(PE_TE_data),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
443 text=sprintf("Gene: %s<br>Transcript Abundance: %.3f<br>Protein Abundance: %.3f",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
444 PE_ID, TE_abundance, PE_abundance),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
445 color=as.factor(k1$cluster))) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
446 xlab("Transcript Abundance") + ylab("Protein Abundance") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
447 scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
448 geom_point(size = 1, shape = 8) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
449 theme_light() + theme(legend.position="none")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
450 saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
451
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
452 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>K-mean clustering</font></th><th><font color=#ffcc33>Number of clusters: ',nclust,'</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
453 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
454
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
455 tempind = order(k1$cluster);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
456 tempoutfile = paste(outdir,"/PE_TE_kmeans_clusterpoints.txt",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
457 write.table(data.frame(PE_TE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
458
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
459 #paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
460 cat('<tr><td colspan="2" align=center><img src="PE_TE_kmeans.png" width=800 height=800>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
461 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), '</td></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
462 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
463 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_kmeans_clusterpoints.txt" target="_blank"><b>Download the cluster list</b></a></font></td></tr></table><br><hr/>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
464 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
465
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
466 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
467
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
468 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
469 # scatter plot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
470 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
471 singlesample_scatter = function(PE_TE_data, outfile)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
472 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
473 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
474 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
475 png(outfile, width = 10, height = 10, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
476 # bitmap(outfile, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
477 g = ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
478 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
479 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
480 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
481 PE_ID, TE_abundance, PE_abundance)),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
482 size = .5)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
483 suppressMessages(plot(g));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
484 suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile))))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
485 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
486 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
487
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
488 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
489 # Correlation table
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
490 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
491 singlesample_cor = function(PE_TE_data, htmloutfile, append=TRUE)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
492 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
493 cor_result_pearson = cor.test(PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, method = "pearson");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
494 cor_result_spearman = cor.test(PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, method = "spearman");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
495 cor_result_kendall = cor.test(PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, method = "kendall");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
496
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
497 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
498 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
499 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
500
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
501 cat(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
502 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
503 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
504 file = htmloutfile, append = TRUE)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
505 cat("</table>\n", file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
506
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
507 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
508
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
509 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
510 # Boxplot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
511 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
512 multisample_boxplot = function(df, sampleinfo_df, outfile, fill_leg, user_xlab, user_ylab)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
513 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
514 tempdf = df[,-1, drop=FALSE];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
515 tempdf = t(tempdf) %>% as.data.frame();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
516 tempdf[is.na(tempdf)] = 0;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
517 tempdf$Sample = rownames(tempdf);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
518 tempdf1 = melt(tempdf, id.vars = "Sample");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
519
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
520 if("Gene" %in% colnames(df)){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
521 tempdf1$Name = df$Gene;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
522 } else if ("Protein" %in% colnames(df)){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
523 tempdf1$Name = df$Protein;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
524 } else if ("Genes" %in% colnames(df)){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
525 tempdf1$Name = df$Genes;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
526 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
527
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
528 tempdf1$Group = sampleinfo_df[tempdf1$Sample,2];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
529 png(outplot, width = 6, height = 6, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
530 # bitmap(outplot, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
531 if(fill_leg=="No"){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
532 tempdf1$Group = c("case", "control")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
533 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
534
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
535 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
536 geom_boxplot()+
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
537 labs(x=user_xlab) + labs(y=user_ylab)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
538 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outfile)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
539 plot(g);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
540 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
541 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
542
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
543 ## A wrapper to saveWidget which compensates for arguable BUG in
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
544 ## saveWidget which requires `file` to be in current working
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
545 ## directory.
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
546 saveWidget <- function (widget,file,...) {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
547 wd<-getwd()
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
548 on.exit(setwd(wd))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
549 outDir<-dirname(file)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
550 file<-basename(file)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
551 setwd(outDir);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
552 htmlwidgets::saveWidget(widget,file=file,selfcontained = FALSE)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
553 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
554
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
555 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
556 # Mean or Median of Replicates
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
557 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
558
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
559 mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
560 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
561 grps = unique(sampleinfo_df[,2]);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
562
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
563 TE_df_merged <<- sapply(grps, function(x){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
564 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1]
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
565 if(length(tempsample)!=1){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
566 apply(TE_df[,tempsample],1,method);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
567 }else{
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
568 return(TE_df[,tempsample]);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
569 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
570 });
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
571 TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
572 colnames(TE_df_merged) = c(colnames(TE_df)[1], grps);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
573
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
574 PE_df_merged <<- sapply(grps, function(x){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
575 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1]
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
576 if(length(tempsample)!=1){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
577 apply(PE_df[,tempsample],1,method);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
578 }else{
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
579 return(PE_df[,tempsample]);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
580 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
581 });
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
582
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
583 PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
584 colnames(PE_df_merged) = c(colnames(PE_df)[1], grps);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
585
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
586 #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
587 sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
588
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
589 return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
590 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
591
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
592 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
593 # (T-Test or Wilcoxon ranksum test) and Volcano Plot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
594 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
595
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
596 perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
597 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
598
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
599 PE_colnames = colnames(PE_df_data);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
600 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
601 control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)});
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
602 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
603 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)});
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
604
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
605 if(method=="mean"){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
606 #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
607 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
608 }else{
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
609 if(method=="median"){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
610 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
611 # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
612 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
613 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
614 PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
615
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
616
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
617 TE_colnames = colnames(TE_df_data);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
618 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
619 control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)});
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
620 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
621 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)});
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
622
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
623 if(method=="mean"){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
624 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
625 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
626 }else{
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
627 if(method=="median"){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
628 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}})
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
629 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
630 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
631 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
632 TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
633
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
634
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
635 PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
636 colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
637 outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
638 write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
639 cat("<br><br><font size=5><b><a href='PE_TE_logfold_pval.txt' target='_blank'>Download the complete fold change data here</a></b></font><br>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
640 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
641
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
642 if(length(condition_ind)!=1)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
643 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
644 # Volcano Plot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
645
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
646 if(volc_with=="adj_pval")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
647 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
648 PE_pval = PE_adj_pval
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
649 TE_pval = TE_adj_pval
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
650 volc_ylab = "-log10 Adjusted p-value";
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
651 }else{
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
652 if(volc_with=="pval")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
653 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
654 volc_ylab = "-log10 p-value";
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
655 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
656 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
657 outplot_PE = paste(outdir,"/PE_volcano.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
658 png(outplot_PE, width = 10, height = 10, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
659 # bitmap(outplot, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
660 par(mfrow=c(1,1));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
661
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
662 plot(PE_df_logfold$LogFold, -log10(PE_pval),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
663 xlab="log2 fold change", ylab=volc_ylab,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
664 type="n")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
665 sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
666 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
667 PE_df_logfold$color <- "black"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
668 #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
669 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
670 sel1 <- which(PE_pval<=0.05)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
671 sel=intersect(sel,sel1)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
672 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="red")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
673 PE_df_logfold[sel,]$color <- "red"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
674 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
675 sel1 <- which(PE_pval>0.05)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
676 sel=intersect(sel,sel1)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
677 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
678 PE_df_logfold[sel,]$color <- "blue"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
679 abline(h = -log(0.05,base=10), col="red", lty=2)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
680 abline(v = log(2,base=2), col="red", lty=2)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
681 abline(v = log(0.5,base=2), col="red", lty=2)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
682 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
683
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
684 g <- ggplot(PE_df_logfold, aes(x = LogFold, -log10(PE_pval), color = as.factor(color),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
685 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
686 Genes, LogFold, -log10(PE_pval), PE_pval))) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
687 xlab("log2 fold change") + ylab("-log10 p-value") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
688 geom_point(shape=1, size = 1.5, stroke = .2) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
689 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
690 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
691 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
692 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
693 theme_light() + theme(legend.position="none")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
694 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_PE)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
695
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
696 outplot_TE = paste(outdir,"/TE_volcano.png",sep="",collapse="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
697 png(outplot_TE, width = 10, height = 10, units = 'in', res=300);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
698 # bitmap(outplot, "png16m");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
699 par(mfrow=c(1,1));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
700
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
701 plot(TE_df_logfold$LogFold, -log10(TE_pval),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
702 xlab="log2 fold change", ylab=volc_ylab,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
703 type="n")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
704
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
705 sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
706 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
707 TE_df_logfold$color <- "black"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
708 #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
709 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
710 sel1 <- which(TE_pval<=0.05)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
711 sel=intersect(sel,sel1)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
712 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="red")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
713 TE_df_logfold[sel,]$color <- "red"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
714 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
715 sel1 <- which(TE_pval>0.05)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
716 sel=intersect(sel,sel1)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
717 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
718 TE_df_logfold[sel,]$color <- "blue"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
719 abline(h = -log(0.05,base=10), col="red", lty=2)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
720 abline(v = log(2,base=2), col="red", lty=2)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
721 abline(v = log(0.5,base=2), col="red", lty=2)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
722 dev.off();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
723
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
724 g <- ggplot(TE_df_logfold, aes(x = LogFold, -log10(TE_pval), color = as.factor(color),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
725 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
726 Genes, LogFold, -log10(TE_pval), TE_pval))) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
727 xlab("log2 fold change") + ylab("-log10 p-value") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
728 geom_point(shape=1, size = 1.5, stroke = .2) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
729 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
730 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
731 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
732 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") +
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
733 theme_light() + theme(legend.position="none")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
734 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_TE)))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
735
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
736
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
737 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Transcript Fold-Change</font></th><th><font color=#ffcc33>Protein Fold-Change</font></th></tr>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
738 cat("<tr><td align=center>",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
739 '<img src="TE_volcano.png" width=600 height=600>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
740 extractWidgetCode(outplot_TE)$widget_div,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
741 '</td>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
742 cat("<td align=center>",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
743 '<img src="PE_volcano.png" width=600 height=600>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
744 extractWidgetCode(outplot_PE)$widget_div,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
745 '</td></tr></table><br>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
746 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
747
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
748
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
749 }else{
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
750 cat('<br><br><b><font color=red>!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!</font></b><br><br>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
751 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
752 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
753
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
754 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
755
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
756
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
757 #***************************************************************************************************************************************
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
758 # Functions: End
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
759 #***************************************************************************************************************************************
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
760
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
761
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
762 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
763 # Arguments
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
764 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
765 noargs = 12;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
766 args = commandArgs(trailingOnly = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
767 if(length(args) != noargs)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
768 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
769 stop(paste("Please check usage. Number of arguments is not equal to ",noargs,sep="",collapse=""));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
770 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
771
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
772 mode = args[1]; # "multiple" or "logfold"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
773 method = args[2]; # "mean" or "median"
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
774 sampleinfo_file = args[3];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
775 proteome_file = args[4];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
776 transcriptome_file = args[5];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
777 correction_method = args[6];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
778 cookdist_upper_cutoff = args[7];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
779 numCluster = args[8];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
780 hm_nclust = args[9];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
781 volc_with = args[10];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
782
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
783 htmloutfile = args[11]; # html output file
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
784 outdir = args[12]; # html supporting files
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
785
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
786 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
787 # Check for file existance
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
788 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
789 if(! file.exists(proteome_file))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
790 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
791 stop(paste("Proteome Data file does not exists. Path given: ",proteome_file,sep="",collapse=""));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
792 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
793 if(! file.exists(transcriptome_file))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
794 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
795 stop(paste("Transcriptome Data file does not exists. Path given: ",transcriptome_file,sep="",collapse=""));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
796 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
797
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
798 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
799 # Load library
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
800 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
801 options(warn=-1);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
802
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
803 suppressPackageStartupMessages(library(dplyr));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
804 suppressPackageStartupMessages(library(data.table));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
805 suppressPackageStartupMessages(library(gplots));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
806 suppressPackageStartupMessages(library(ggplot2));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
807 suppressPackageStartupMessages(library(ggfortify));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
808 suppressPackageStartupMessages(library(plotly));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
809 suppressPackageStartupMessages(library(d3heatmap));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
810
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
811 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
812 # Select mode and parse experiment design file
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
813 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
814 if(mode=="multiple")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
815 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
816 expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
817 expDesign_cc = expDesign[1:2,];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
818
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
819 sampleinfo_df = expDesign[3:nrow(expDesign),];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
820 rownames(sampleinfo_df)=1:nrow(sampleinfo_df);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
821 colnames(sampleinfo_df) = c("Sample","Group");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
822
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
823 condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
824 condition_g_name = "case";
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
825 control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
826 control_g_name = "control";
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
827 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case";
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
828 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control";
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
829 sampleinfo_df_orig = sampleinfo_df;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
830 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
831
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
832 if(mode=="logfold")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
833 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
834 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change"))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
835 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
836
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
837 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
838 # Parse Transcriptome data
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
839 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
840 TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
841 if(mode=="multiple")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
842 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
843 TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
844 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
845 if(mode=="logfold")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
846 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
847 TE_df = TE_df_orig;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
848 colnames(TE_df) = c("Genes", "LogFold");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
849 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
850 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
851 # Parse Proteome data
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
852 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
853 PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
854 if(mode=="multiple")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
855 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
856 PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
857 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
858 if(mode=="logfold")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
859 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
860 PE_df = PE_df_orig;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
861 colnames(PE_df) = c("Genes", "LogFold");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
862 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
863
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
864 #=============================================================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
865 # Create directory structures and then set the working directory to output directory
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
866 #=============================================================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
867 if(! file.exists(outdir))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
868 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
869 dir.create(outdir);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
870 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
871 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
872 # Write initial data summary in html outfile
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
873 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
874 cat("<html><head></head><body>\n", file = htmloutfile);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
875
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
876 cat("<h1><u>QuanTP: Association between abundance ratios of transcript and protein</u></h1><hr/>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
877 "<font><h3>Input data summary</h3></font>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
878 "<ul>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
879 "<li>Abbreviations used: PE (Proteome data) and TE (Transcriptome data)","</li><br>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
880 "<li>Input Proteome data dimension (Row Column): ", dim(PE_df)[1]," x ", dim(PE_df)[2],"</li>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
881 "<li>Input Transcriptome data dimension (Row Column): ", dim(TE_df)[1]," x ", dim(TE_df)[2],"</li></ul><hr/>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
882 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
883
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
884 cat("<h3 id=table_of_content>Table of Contents:</h3>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
885 "<ul>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
886 "<li><a href=#sample_dist>Sample distribution</a></li>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
887 "<li><a href=#corr_data>Correlation</a></li>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
888 "<li><a href=#regression_data>Regression analysis</a></li>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
889 "<li><a href=#inf_obs>Influential observations</a></li>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
890 "<li><a href=#cluster_data>Cluster analysis</a></li></ul><hr/>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
891 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
892 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
893 # Find common samples
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
894 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
895 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
896
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
897 if(length(common_samples)==0)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
898 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
899 stop("No common samples found ");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
900 cat("<b>Please check your experiment design file. Sample names (column names) in the Transcriptome and the Proteome data do not match. </b>\n",file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
901 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
902
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
903 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
904 # Create subsets based on common samples
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
905 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
906 TE_df = select(TE_df, 1, common_samples);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
907 PE_df = select(PE_df, 1, common_samples);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
908 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
909 rownames(sampleinfo_df) = sampleinfo_df[,1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
910
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
911 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
912 # Check for number of rows similarity
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
913 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
914 if(nrow(TE_df) != nrow(PE_df))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
915 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
916 stop("Number of rows in Transcriptome and Proteome data are not same i.e. they are not paired");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
917 cat("<b>The correlation analysis expects paired TE and PE data i.e. (i)th gene/transcript of TE file should correspond to (i)th protein of PE file. In the current input provided there is mismatch in terms of number of rows of TE and PE file. Please make sure you provide paired data.</b>\n",file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
918 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
919
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
920 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
921 # Number of groups
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
922 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
923 ngrps = unique(sampleinfo_df[,2]) %>% length();
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
924 grps = unique(sampleinfo_df[,2]);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
925 names(grps) = grps;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
926
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
927 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
928 # Change column1 name
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
929 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
930 colnames(TE_df)[1] = "Gene";
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
931 colnames(PE_df)[1] = "Protein";
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
932
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
933 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
934 # Treat missing values
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
935 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
936 TE_nacount = sum(is.na(TE_df));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
937 PE_nacount = sum(is.na(PE_df));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
938
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
939 TE_df[is.na(TE_df)] = 0;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
940 PE_df[is.na(PE_df)] = 0;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
941
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
942 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
943 # Obtain JS/HTML lines for interactive visualization
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
944 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
945 extractWidgetCode = function(outplot){
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
946 lines <- readLines(gsub("\\.png", "\\.html", outplot))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
947 return(list(
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
948 'prescripts' = c('',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
949 gsub('script', 'script',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
950 lines[grep('<head>',lines) + 3
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
951 :grep('</head>' ,lines) - 5]),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
952 ''),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
953 'widget_div' = paste('',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
954 gsub('width:100%;height:400px',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
955 'width:500px;height:500px',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
956 lines[grep(lines, pattern='html-widget')]),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
957 '', sep=''),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
958 'postscripts' = paste('',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
959 gsub('script', 'script',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
960 lines[grep(lines, pattern='<script type')]),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
961 '', sep='')));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
962 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
963 prescripts <- list()
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
964 postscripts <- list()
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
965
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
966
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
967 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
968 # Decide based on analysis mode
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
969 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
970 if(mode=="logfold")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
971 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
972 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
973 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
974
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
975 # TE Boxplot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
976 outplot = paste(outdir,"/Box_TE.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
977 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
978 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
979 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
980 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
981
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
982 # PE Boxplot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
983 outplot = paste(outdir,"/Box_PE.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
984 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
985 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
986
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
987 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
988 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
989
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
990 # TE PE scatter
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
991 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
992 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
993 singlesample_scatter(PE_TE_data, outplot);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
994 lines <- extractWidgetCode(outplot);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
995 postscripts <- c(postscripts, lines$postscripts);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
996 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', lines$widget_div, '</td></tr>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
997 PE_TE_data = data.frame(PE_df, TE_df);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
998 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
999
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1000 # TE PE Cor
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1001 cat("<tr><td align=center>", file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1002 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1003 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1004 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1005 cat('</td></table>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1006 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1007
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1008 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1009 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1010
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1011 # TE PE Regression
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1012 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1013 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1014 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1015 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1016 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1017 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1018
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1019 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1020 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1021
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1022 # TE PE Heatmap
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1023 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1024 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1025 postscripts <- c(postscripts, lines$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1026 prescripts <- c(prescripts, lines$prescripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1027
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1028
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1029 # TE PE Clustering (kmeans)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1030 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1031 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1032
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1033 }else{
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1034 if(mode=="multiple")
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1035 {
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1036 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1037 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1038
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1039 # TE Boxplot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1040 outplot = paste(outdir,"/Box_TE_all_rep.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1041 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)]));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1042 colnames(temp_df_te_data) = colnames(TE_df);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1043 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance (log)");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1044 lines <- extractWidgetCode(outplot)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1045 prescripts <- c(prescripts, lines$prescripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1046 postscripts <- c(postscripts, lines$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1047 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1048 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1049 "<tr><td align=center>", file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1050 cat('<img src="Box_TE_all_rep.png" width=500 height=500>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1051 lines$widget_div, '</td>', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1052
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1053 # PE Boxplot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1054 outplot = paste(outdir,"/Box_PE_all_rep.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1055 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)]));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1056 colnames(temp_df_pe_data) = colnames(PE_df);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1057 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance (log)");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1058 lines <- extractWidgetCode(outplot)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1059 #prescripts <- c(prescripts, lines$prescripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1060 postscripts <- c(postscripts, lines$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1061 cat("<td align=center>", '<img src="Box_PE_all_rep.png" width=500 height=500>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1062 lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1063
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1064 # Calc TE PCA
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1065 outplot = paste(outdir,"/PCA_TE_all_rep.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1066 multisample_PCA(TE_df, sampleinfo_df, outplot);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1067 PCA_TE <- extractWidgetCode(outplot)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1068 postscripts <- c(postscripts, PCA_TE$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1069
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1070 # Calc PE PCA
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1071 outplot = paste(outdir,"/PCA_PE_all_rep.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1072 multisample_PCA(PE_df, sampleinfo_df, outplot);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1073 PCA_PE <- extractWidgetCode(outplot)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1074 postscripts <- c(postscripts, PCA_PE$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1075
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1076 # Replicate mode
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1077 templist = mergeReplicates(TE_df,PE_df, sampleinfo_df, method);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1078 TE_df = templist$TE_df_merged;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1079 PE_df = templist$PE_df_merged;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1080 sampleinfo_df = templist$sampleinfo_df_merged;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1081 rownames(sampleinfo_df) = sampleinfo_df[,1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1082
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1083 # TE Boxplot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1084 outplot = paste(outdir,"/Box_TE_rep.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1085 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)]));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1086 colnames(temp_df_te_data) = colnames(TE_df);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1087 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Transcript Abundance (log)");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1088 lines <- extractWidgetCode(outplot)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1089 #prescripts <- c(prescripts, lines$prescripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1090 postscripts <- c(postscripts, lines$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1091 cat('<br><font color="#ff0000"><h3>Sample wise distribution (Box plot) after using ',method,' on replicates </h3></font><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1092 "<tr><td align=center>", '<img src="Box_TE_rep.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1093
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1094 # PE Boxplot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1095 outplot = paste(outdir,"/Box_PE_rep.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1096 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)]));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1097 colnames(temp_df_pe_data) = colnames(PE_df);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1098 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Protein Abundance (log)");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1099 lines <- extractWidgetCode(outplot)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1100 #prescripts <- c(prescripts, lines$prescripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1101 postscripts <- c(postscripts, lines$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1102 cat("<td align=center>", '<img src="Box_PE_rep.png" width=500 height=500>', lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1103
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1104 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1105 # Calculating log fold change and running the "single" code part
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1106 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1107
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1108 TE_df = data.frame("Genes"=TE_df[,1], "LogFold"=apply(TE_df[,c(which(colnames(TE_df)==condition_g_name),which(colnames(TE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2)));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1109 PE_df = data.frame("Genes"=PE_df[,1], "LogFold"=apply(PE_df[,c(which(colnames(PE_df)==condition_g_name),which(colnames(PE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2)));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1110
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1111 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1112 # Treat missing values
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1113 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1114
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1115 TE_df[is.infinite(TE_df[,2]),2] = NA;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1116 PE_df[is.infinite(PE_df[,2]),2] = NA;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1117 TE_df[is.na(TE_df)] = 0;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1118 PE_df[is.na(PE_df)] = 0;
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1119
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1120 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change"))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1121 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1122 # Find common samples
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1123 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1124
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1125 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1126 TE_df = select(TE_df, 1, common_samples);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1127 PE_df = select(PE_df, 1, common_samples);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1128 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1129 rownames(sampleinfo_df) = sampleinfo_df[,1];
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1130
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1131 # TE Boxplot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1132 outplot = paste(outdir,"/Box_TE.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1133 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Transcript Abundance fold-change (log2)");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1134 lines <- extractWidgetCode(outplot)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1135 postscripts <- c(postscripts, lines$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1136 cat('<br><font color="#ff0000"><h3>Distribution (Box plot) of log fold change </h3></font>', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1137 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1138 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1139
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1140 # PE Boxplot
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1141 outplot = paste(outdir,"/Box_PE.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1142 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Protein Abundance fold-change(log2)");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1143 lines <- extractWidgetCode(outplot)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1144 postscripts <- c(postscripts, lines$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1145 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500>', lines$widget_div,'</td></tr></table>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1146
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1147
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1148 # Log Fold Data
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1149 perform_Test_Volcano(TE_df_orig,PE_df_orig,TE_df, PE_df,sampleinfo_df_orig,method,correction_method,volc_with)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1150 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/TE_volcano.png",sep="",collapse=""))$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1151 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_volcano.png",sep="",collapse=""))$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1152
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1153 # Print PCA
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1154
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1155 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>PCA plot: Transcriptome data</font></th><th><font color=#ffcc33>PCA plot: Proteome data</font></th></tr>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1156 "<tr><td align=center>", '<img src="PCA_TE_all_rep.png" width=500 height=500>', PCA_TE$widget_div, '</td>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1157 "<td align=center>", '<img src="PCA_PE_all_rep.png" width=500 height=500>', PCA_PE$widget_div, '</td></tr></table>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1158 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1159
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1160
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1161
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1162 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1163 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1164
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1165 PE_TE_data = data.frame(PE_df, TE_df);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1166 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1167
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1168 # TE PE scatter
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1169 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape="");
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1170 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1171 singlesample_scatter(PE_TE_data, outplot);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1172 lines <- extractWidgetCode(outplot);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1173 postscripts <- c(postscripts, lines$postscripts);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1174 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', gsub('width:500px;height:500px', 'width:800px;height:800px' , lines$widget_div),
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1175 '</td>\n', file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1176
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1177 # TE PE Cor
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1178 cat("<tr><td align=center>\n", file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1179 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1180 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1181 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1182 cat('</td></table>',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1183 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1184
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1185 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1186 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1187
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1188 # TE PE Regression
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1189 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1190 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1191 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1192 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1193 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1194 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts,
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1195 gsub('data-for="html', 'data-for="secondhtml"',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1196 extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$postscripts)));
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1197
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1198 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n',
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1199 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1200
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1201 #TE PE Heatmap
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1202 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1203 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1204 postscripts <- c(postscripts, lines$postscripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1205 prescripts <- c(prescripts, lines$prescripts)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1206
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1207 #TE PE Clustering (kmeans)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1208 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster))
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1209 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1210 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1211 }
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1212 cat("<h3>Go To:</h3>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1213 "<ul>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1214 "<li><a href=#sample_dist>Sample distribution</a></li>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1215 "<li><a href=#corr_data>Correlation</a></li>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1216 "<li><a href=#regression_data>Regression analysis</a></li>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1217 "<li><a href=#inf_obs>Influential observations</a></li>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1218 "<li><a href=#cluster_data>Cluster analysis</a></li></ul>\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1219 "<br><a href=#>TOP</a>",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1220 file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1221 cat("</body></html>\n", file = htmloutfile, append = TRUE);
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1222
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1223
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1224 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1225 # Add masked-javascripts tags to HTML file in the head and end
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1226 #===============================================================================
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1227
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1228 htmllines <- readLines(htmloutfile)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1229 htmllines[1] <- paste('<html>\n<head>\n', paste(prescripts, collapse='\n'), '\n</head>\n<body>')
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1230 cat(paste(htmllines, collapse='\n'), file = htmloutfile)
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1231 cat('\n', paste(postscripts, collapse='\n'), "\n",
0072d7fe861a planemo upload
rsajulga
parents:
diff changeset
1232 "</body>\n</html>\n", file = htmloutfile, append = TRUE);