# HG changeset patch
# User galaxyp
# Date 1545339965 18000
# Node ID bcc7a4c4cc299c4981c3cd353efb64e1531d3ad2
# Parent 75faf9a89f5bc83086fbcf0e38552aa54d9c0817
planemo upload commit 1887dff812162880d66b003a927867cd5000c98f
diff -r 75faf9a89f5b -r bcc7a4c4cc29 quantp.r
--- a/quantp.r Fri Sep 14 12:22:31 2018 -0400
+++ b/quantp.r Thu Dec 20 16:06:05 2018 -0500
@@ -16,6 +16,7 @@
png(outfile, width = 6, height = 6, units = 'in', res=300);
# bitmap(outfile, "png16m");
g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3);
+ saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outplot)))
plot(g);
dev.off();
}
@@ -30,20 +31,20 @@
regmodel_summary = summary(regmodel);
cat("Linear Regression model fit between Proteome and Transcriptome data
\n",
- "
Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.
\n", - 'Parameter | Value |
---|
Parameter | Value |
---|---|
Formula | ","PE_abundance~TE_abundance"," |
Coefficients | ","|
",names(regmodel$coefficients[1])," | ",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")"," |
",names(regmodel$coefficients[2])," | ",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")"," |
Model parameters | ","|
Residual standard error | ",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom) |
F-statistic | ",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom) |
R-squared | ",regmodel_summary$r.squared," |
Adjusted R-squared | ",regmodel_summary$adj.r.squared," |
Coefficients | ","|
",names(regmodel$coefficients[1])," | ",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")"," |
",names(regmodel$coefficients[2])," | ",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")"," |
Model parameters | ","|
Residual standard error | ",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom) |
F-statistic | ",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom) |
R-squared | ",regmodel_summary$r.squared," |
Adjusted R-squared | ",regmodel_summary$adj.r.squared," |
', "1) Residuals vs Fitted plot | \n",
'2) Normal Q-Q plot of residuals |
---|---|
', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$widget_div), + ' | ', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$widget_div), + ' |
This plot checks for linear relationship assumptions. If a horizontal line is observed without any distinct patterns, it indicates a linear relationship. | \n',
'This plot checks whether residuals are normally distributed or not. It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line. |
Residuals from Regression | |
---|---|
Parameter | Value |
Residuals from Regression | |
Parameter | Value |
Mean Residual value | ",res_mean," |
Standard deviation (Residuals) | ",res_sd," |
Total outliers (Residual value > 2 standard deviation from the mean) | ',length(tempind),' (Download these ',length(tempind),' data points with high residual values here) | \n', - '
(Download the complete residuals data here) | \n', - "
3) Residuals vs Leverage plot | |
---|---|
', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$widget_div) + , ' | |
Mean Cook\'s distance | ",mean(cooksd, na.rm=T)," |
Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance) | ",length(tempind)," | \n", - - "
Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance | ",length(which(cooksd |
Scatterplot: Before removal | Scatterplot: After removal |
---|---|
", ' | \n', file = htmloutfile, append = TRUE); - - # After - cat("\n", - ' |
Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance) | ",length(tempind)," | \n", + + "
Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance | ",length(which(cooksd |
Scatterplot: Before removal | Scatterplot: After removal | ||
---|---|---|---|
", + '', + gsub('id="html', 'id="secondhtml"', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$widget_div)), + ' | \n', + file = htmloutfile, append = TRUE); + + # After + cat("\n", + '', + gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(outplot)$widget_div), + + ' |
\n',file = htmloutfile, append = TRUE); - - + cat(' \n',file = htmloutfile, append = TRUE); + + } @@ -297,7 +361,7 @@ #=============================================================================== singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){ cat('
Transcript Abundance: %.3f Protein Abundance: %.3f", + PE_ID, TE_abundance, PE_abundance), + color=as.factor(k1$cluster))) + + xlab("Transcript Abundance") + ylab("Protein Abundance") + + scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) + + geom_point(size = 1, shape = 8) + + theme_light() + theme(legend.position="none") + saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot))) + cat('
\n', - file = htmloutfile, append = TRUE); + file = htmloutfile, append = TRUE); } @@ -385,9 +474,14 @@ max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); png(outfile, width = 10, height = 10, units = 'in', res=300); # bitmap(outfile, "png16m"); - g = ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance))+geom_point() + geom_smooth() + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + xlim(min_lim,max_lim) + ylim(min_lim,max_lim); - suppressMessages(plot(g)); - # plot(g); + suppressWarnings(g <- ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() + + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + + xlim(min_lim,max_lim) + ylim(min_lim,max_lim) + + geom_point(aes(text=sprintf("Gene: %s Transcript Abundance (log fold-change): %.3f Protein Abundance (log fold-change): %.3f", + PE_ID, TE_abundance, PE_abundance)), + size = .5)) + suppressMessages(plot(g)) + suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile)))) dev.off(); } @@ -422,23 +516,41 @@ tempdf[is.na(tempdf)] = 0; tempdf$Sample = rownames(tempdf); tempdf1 = melt(tempdf, id.vars = "Sample"); + + if("Gene" %in% colnames(df)){ + tempdf1$Name = df$Gene; + } else if ("Protein" %in% colnames(df)){ + tempdf1$Name = df$Protein; + } else if ("Genes" %in% colnames(df)){ + tempdf1$Name = df$Genes; + } + tempdf1$Group = sampleinfo_df[tempdf1$Sample,2]; png(outplot, width = 6, height = 6, units = 'in', res=300); # bitmap(outplot, "png16m"); - if(fill_leg=="Yes") - { - g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab) - }else{ - if(fill_leg=="No") - { - tempdf1$Group = c("case", "control") - g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab) - } + if(fill_leg=="No"){ + tempdf1$Group = c("case", "control") } + + g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + + geom_boxplot()+ + labs(x=user_xlab) + labs(y=user_ylab) + saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outfile))) plot(g); dev.off(); } +## A wrapper to saveWidget which compensates for arguable BUG in +## saveWidget which requires `file` to be in current working +## directory. +saveWidget <- function (widget,file,...) { + wd<-getwd() + on.exit(setwd(wd)) + outDir<-dirname(file) + file<-basename(file) + setwd(outDir); + htmlwidgets::saveWidget(widget,file=file,selfcontained = FALSE) +} #=============================================================================== # Mean or Median of Replicates @@ -446,35 +558,35 @@ mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method) { -grps = unique(sampleinfo_df[,2]); - -TE_df_merged <<- sapply(grps, function(x){ - tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] - if(length(tempsample)!=1){ - apply(TE_df[,tempsample],1,method); - }else{ - return(TE_df[,tempsample]); - } -}); -TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged); -colnames(TE_df_merged) = c(colnames(TE_df)[1], grps); - -PE_df_merged <<- sapply(grps, function(x){ - tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] - if(length(tempsample)!=1){ - apply(PE_df[,tempsample],1,method); - }else{ - return(PE_df[,tempsample]); - } -}); - -PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged); -colnames(PE_df_merged) = c(colnames(PE_df)[1], grps); - -#sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F); -sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F); - -return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged)); + grps = unique(sampleinfo_df[,2]); + + TE_df_merged <<- sapply(grps, function(x){ + tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] + if(length(tempsample)!=1){ + apply(TE_df[,tempsample],1,method); + }else{ + return(TE_df[,tempsample]); + } + }); + TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged); + colnames(TE_df_merged) = c(colnames(TE_df)[1], grps); + + PE_df_merged <<- sapply(grps, function(x){ + tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] + if(length(tempsample)!=1){ + apply(PE_df[,tempsample],1,method); + }else{ + return(PE_df[,tempsample]); + } + }); + + PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged); + colnames(PE_df_merged) = c(colnames(PE_df)[1], grps); + + #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F); + sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F); + + return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged)); } #=============================================================================== @@ -483,127 +595,162 @@ perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with) { - -PE_colnames = colnames(PE_df_data); -control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; -control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); -condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; -condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); - -if(method=="mean"){ - #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); - PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) -}else{ -if(method=="median"){ - PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) - # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); + + PE_colnames = colnames(PE_df_data); + control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; + control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); + condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; + condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); + + if(method=="mean"){ + #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); + PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) + }else{ + if(method=="median"){ + PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) + # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); + } } -} -PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval)) - - -TE_colnames = colnames(TE_df_data); -control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; -control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); -condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; -condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); - -if(method=="mean"){ - # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); - TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) -}else{ - if(method=="median"){ - TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) - # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); + PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval)) + + + TE_colnames = colnames(TE_df_data); + control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; + control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); + condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; + condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); + + if(method=="mean"){ + # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); + TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) + }else{ + if(method=="median"){ + TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) + # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); + } } -} -TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval)) - - -PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval); -colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)"); -outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse=""); -write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F); -cat(" Download the complete fold change data here \n", - file = htmloutfile, append = TRUE); - - if(length(condition_ind)!=1) - { - # Volcano Plot - - if(volc_with=="adj_pval") - { - PE_pval = PE_adj_pval - TE_pval = TE_adj_pval - volc_ylab = "-log10 Adjusted p-value"; - }else{ - if(volc_with=="pval") - { - volc_ylab = "-log10 p-value"; - } - } - outplot = paste(outdir,"/PE_volcano.png",sep="",collapse=""); - png(outplot, width = 10, height = 10, units = 'in', res=300); - # bitmap(outplot, "png16m"); - par(mfrow=c(1,1)); + TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval)) + + + PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval); + colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)"); + outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse=""); + write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F); + cat(" Download the complete fold change data here \n", + file = htmloutfile, append = TRUE); - plot(PE_df_logfold$LogFold, -log10(PE_pval), - xlab="log2 fold change", ylab=volc_ylab, - type="n") - sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use - points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black") - #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFold Log2 Fold-Change: %.3f -log10 p-value: %.3f p-value: %.3f", + Genes, LogFold, -log10(PE_pval), PE_pval))) + + xlab("log2 fold change") + ylab("-log10 p-value") + + geom_point(shape=1, size = 1.5, stroke = .2) + + scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + + geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + + geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") + + geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") + + theme_light() + theme(legend.position="none") + saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_PE))) + + outplot_TE = paste(outdir,"/TE_volcano.png",sep="",collapse=""); + png(outplot_TE, width = 10, height = 10, units = 'in', res=300); + # bitmap(outplot, "png16m"); + par(mfrow=c(1,1)); + + plot(TE_df_logfold$LogFold, -log10(TE_pval), + xlab="log2 fold change", ylab=volc_ylab, + type="n") + + sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use + points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black") + TE_df_logfold$color <- "black" + #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold Log2 Fold-Change: %.3f -log10 p-value: %.3f p-value: %.3f", + Genes, LogFold, -log10(TE_pval), TE_pval))) + + xlab("log2 fold change") + ylab("-log10 p-value") + + geom_point(shape=1, size = 1.5, stroke = .2) + + scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + + geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + + geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") + + geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") + + theme_light() + theme(legend.position="none") + saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_TE))) + + + cat('
\n', + file = htmloutfile, append = TRUE); + + + }else{ + cat(' !!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!! ', + file = htmloutfile, append = TRUE); + } - - - outplot = paste(outdir,"/TE_volcano.png",sep="",collapse=""); - png(outplot, width = 10, height = 10, units = 'in', res=300); - # bitmap(outplot, "png16m"); - par(mfrow=c(1,1)); - - plot(TE_df_logfold$LogFold, -log10(TE_pval), - xlab="log2 fold change", ylab=volc_ylab, - type="n") - sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use - points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black") - #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold
\n', - file = htmloutfile, append = TRUE); - }else{ - cat(' !!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!! ', - file = htmloutfile, append = TRUE); - } - } @@ -612,8 +759,6 @@ #*************************************************************************************************************************************** - - #=============================================================================== # Arguments #=============================================================================== @@ -638,7 +783,6 @@ htmloutfile = args[11]; # html output file outdir = args[12]; # html supporting files - #=============================================================================== # Check for file existance #=============================================================================== @@ -661,31 +805,33 @@ suppressPackageStartupMessages(library(gplots)); suppressPackageStartupMessages(library(ggplot2)); suppressPackageStartupMessages(library(ggfortify)); +suppressPackageStartupMessages(library(plotly)); +suppressPackageStartupMessages(library(d3heatmap)); #=============================================================================== # Select mode and parse experiment design file #=============================================================================== if(mode=="multiple") { - expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame(); - expDesign_cc = expDesign[1:2,]; - - sampleinfo_df = expDesign[3:nrow(expDesign),]; - rownames(sampleinfo_df)=1:nrow(sampleinfo_df); - colnames(sampleinfo_df) = c("Sample","Group"); - - condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1]; - condition_g_name = "case"; - control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1]; - control_g_name = "control"; - sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case"; - sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control"; - sampleinfo_df_orig = sampleinfo_df; + expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame(); + expDesign_cc = expDesign[1:2,]; + + sampleinfo_df = expDesign[3:nrow(expDesign),]; + rownames(sampleinfo_df)=1:nrow(sampleinfo_df); + colnames(sampleinfo_df) = c("Sample","Group"); + + condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1]; + condition_g_name = "case"; + control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1]; + control_g_name = "control"; + sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case"; + sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control"; + sampleinfo_df_orig = sampleinfo_df; } if(mode=="logfold") { - sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) + sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) } #=============================================================================== @@ -694,12 +840,12 @@ TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); if(mode=="multiple") { - TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)]; + TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)]; } if(mode=="logfold") { - TE_df = TE_df_orig; - colnames(TE_df) = c("Genes", "LogFold"); + TE_df = TE_df_orig; + colnames(TE_df) = c("Genes", "LogFold"); } #=============================================================================== # Parse Proteome data @@ -707,12 +853,12 @@ PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); if(mode=="multiple") { - PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)]; + PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)]; } if(mode=="logfold") { - PE_df = PE_df_orig; - colnames(PE_df) = c("Genes", "LogFold"); + PE_df = PE_df_orig; + colnames(PE_df) = c("Genes", "LogFold"); } #============================================================================================================= @@ -725,17 +871,17 @@ #=============================================================================== # Write initial data summary in html outfile #=============================================================================== - cat("\n", file = htmloutfile); - - cat(" QuanTP: Association between abundance ratios of transcript and protein\n", +cat("\n", file = htmloutfile); + +cat(" QuanTP: Association between abundance ratios of transcript and protein\n", " Input data summary\n", "
\n", " \n", file = htmloutfile, append = TRUE); - - cat(" Table of Contents:\n", + +cat("Table of Contents:\n", "
|