diff protein_rna_correlation.r @ 5:6bf0203ee17e draft

planemo upload
author pravs
date Wed, 20 Jun 2018 20:43:56 -0400
parents 412403eec79c
children 8e9428eca82c
line wrap: on
line diff
--- a/protein_rna_correlation.r	Sun Jun 17 05:06:18 2018 -0400
+++ b/protein_rna_correlation.r	Wed Jun 20 20:43:56 2018 -0400
@@ -80,8 +80,8 @@
 # ............................SCRIPT STARTS FROM HERE .............................
 #==================================================================================
 # Warning Off
-oldw <- getOption("warn")
-options(warn = -1)
+# oldw <- getOption("warn")
+# options(warn = -1)
 #=============================================================================================================
 # Functions
 #=============================================================================================================
@@ -411,10 +411,10 @@
 		
 			cat(
 		"<font color='blue'><h3>Download mapped unmapped data</h3></font>",
-		"<ul><li>Protein mapped data: ", '<a href="',paste(outdir,"/",PE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
-		"<li>Protein unmapped data: ", '<a href="',paste(outdir,"/",PE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
-		"<li>Transcript mapped data: ", '<a href="',paste(outdir,"/",GE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
-		"<li>Transcript unmapped data: ", '<a href="',paste(outdir,"/",GE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
+		"<ul><li>Protein mapped data: ", '<a href="',paste(PE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
+		"<li>Protein unmapped data: ", '<a href="',paste(PE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
+		"<li>Transcript mapped data: ", '<a href="',paste(GE_outfile_mapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
+		"<li>Transcript unmapped data: ", '<a href="',paste(GE_outfile_unmapped,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
 		file = htmloutfile, append = TRUE);
 	}
 	
@@ -422,8 +422,8 @@
 	write.table(GE_df[rowpair[,2], c(GE_idcolno,GE_expcolno)], file=paste(outdir,"/",GE_expfile,sep="",collapse=""), row.names=F, quote=F, sep="\t", eol="\n")
 	
 		cat(
-	"<li>Protein abundance data: ", '<a href="',paste(outdir,"/",PE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
-	"<li>Transcript abundance data: ", '<a href="',paste(outdir,"/",GE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>",
+	"<li>Protein abundance data: ", '<a href="',paste(PE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
+	"<li>Transcript abundance data: ", '<a href="',paste(GE_expfile,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>",
 	file = htmloutfile, append = TRUE);
 	
 #==========================================================================================
@@ -470,8 +470,8 @@
 		
 		# Write excluded transcriptomics and proteomics data link to html file
 		cat(
-		"<ul><li>Protein excluded data with NA or Inf or -Inf: ", '<a href="',paste(outdir,"/",PE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
-		"<li>Transcript excluded data with NA or Inf or -Inf: ", '<a href="',paste(outdir,"/",GE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>",
+		"<ul><li>Protein excluded data with NA or Inf or -Inf: ", '<a href="',paste(PE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li>",
+		"<li>Transcript excluded data with NA or Inf or -Inf: ", '<a href="',paste(GE_outfile_excluded_naInf,sep="",collapse=""),'" target="_blank"> Link</a>',"</li></ul>",
 		file = htmloutfile, append = TRUE);
 		
 		# Keep the unexcluded entries only
@@ -535,7 +535,7 @@
 # Distribution of proteome and transcriptome abundance (Box and Density plot)
 #=============================================================================================================
 	cat("Generating Box and Density plot\n",file=logfile, append=T);
-	outplot = paste("./",outdir,"/AbundancePlot.png",sep="",collapse="");
+	outplot = paste(outdir,"/AbundancePlot.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(2,2));
 	boxplot(proteome_df[,2], ylab="Abundance", main="Proteome abundance", cex.lab=1.5);
@@ -553,7 +553,7 @@
 # Scatter plot
 #=============================================================================================================
 	cat("Generating scatter plot\n",file=logfile, append=T);
-	outplot = paste("./",outdir,"/AbundancePlot_scatter.png",sep="",collapse="");
+	outplot = paste(outdir,"/AbundancePlot_scatter.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(1,1));
 	scatter.smooth(transcriptome_df[,2], proteome_df[,2], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);	
@@ -620,7 +620,7 @@
 #=============================================================================================================
 # Plotting various regression diagnostics plots
 #=============================================================================================================
-	outplot1 = paste("./",outdir,"/PE_GE_modelfit.pdf",sep="",collapse="");
+	outplot1 = paste(outdir,"/PE_GE_modelfit.pdf",sep="",collapse="");
 	pdf(outplot1);
 	devnum = which(unlist(sapply(2:length(.Devices), function(x){attributes(.Devices[[x]])$filepath==outplot1})))+1
 	print(.Devices)
@@ -635,31 +635,31 @@
 	"<font color='blue'><h3>Plotting various regression diagnostics plots</h3></font>\n",
 	file = htmloutfile, append = TRUE);
 
-	outplot = paste("./",outdir,"/PE_GE_lm_1.png",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_lm_1.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(1,1));
 	plot(regmodel, 1, cex.lab=1.5);
 	dev.off();
 	
-	outplot = paste("./",outdir,"/PE_GE_lm_2.png",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_lm_2.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(1,1));
 	plot(regmodel, 2, cex.lab=1.5);
 	dev.off();
 	
-	outplot = paste("./",outdir,"/PE_GE_lm_3.png",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_lm_3.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(1,1));
 	plot(regmodel, 3, cex.lab=1.5);
 	dev.off();
 	
-	outplot = paste("./",outdir,"/PE_GE_lm_5.png",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_lm_5.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(1,1));
 	plot(regmodel, 5, cex.lab=1.5);
 	dev.off();
 	
-	outplot = paste("./",outdir,"/PE_GE_lm.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_lm.pdf",sep="",collapse="");
 	pdf(outplot);
 	plot(regmodel);
 	dev.off();
@@ -704,7 +704,7 @@
 	cooksd <- cooks.distance(regmodel);
 	
 	cat("Generating cooksd plot\n",file=logfile, append=T);
-	outplot = paste("./",outdir,"/PE_GE_lm_cooksd.png",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_lm_cooksd.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(1,1));
 	plot(cooksd, pch="*", cex=2, cex.lab=1.5,main="Influential Obs. by Cooks distance", ylab="Cook\'s distance", xlab="Observations")  # plot cooks distance
@@ -737,7 +737,7 @@
 	
 	cat("Writing influential observations\n",file=logfile, append=T);
 	
-	outdatafile = paste("./",outdir,"/PE_GE_influential_observation.tsv", sep="", collapse="");
+	outdatafile = paste(outdir,"/PE_GE_influential_observation.tsv", sep="", collapse="");
 	cat('<a href="',outdatafile, '" target="_blank">Download entire list</a>',file = htmloutfile, append = TRUE);	
 	write.table(PE_GE_data_outlier, file=outdatafile, row.names=F, sep="\t", quote=F);
 	
@@ -764,7 +764,7 @@
 	#=============================================================================================================
 	# Scatter plot
 	#=============================================================================================================
-	outplot = paste("./",outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse="");
+	outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(1,1));
 	scatter.smooth(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);	
@@ -806,10 +806,12 @@
 	file = htmloutfile, append = TRUE);
 	
 	cat("Generating heatmap plot\n",file=logfile, append=T);
-	outplot = paste("./",outdir,"/PE_GE_heatmap.png",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_heatmap.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(1,1));
-	heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("PE","GE"), scale="col");
+	#heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("PE","GE"), scale="col");
+	my_palette <- colorRampPalette(c("green", "white", "red"))(299);
+	heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=my_palette ,Colv=F, labCol=c("PE","GE"), scale="col", dendrogram = "row");
 	dev.off();
 	
 	cat(
@@ -825,7 +827,7 @@
 	
 	k1 = kmeans(PE_GE_data_kdata[,c("PE_abundance","GE_abundance")], 5);
 	cat("Generating kmeans plot\n",file=logfile, append=T);
-	outplot = paste("./",outdir,"/PE_GE_kmeans.png",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_kmeans.png",sep="",collapse="");
 	png(outplot);
 	par(mfrow=c(1,1));
 	scatter.smooth(PE_GE_data_kdata[,"GE_abundance"], PE_GE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
@@ -864,7 +866,7 @@
 	# Linear regression with removal of outliers
 	regmodel_no_outlier = lm(PE_abundance~GE_abundance, data=PE_GE_data_no_outlier);
 	regmodel_no_outlier_predictedy = predict(regmodel_no_outlier, PE_GE_data_no_outlier);
-	outplot = paste("./",outdir,"/PE_GE_lm_without_outliers.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_lm_without_outliers.pdf",sep="",collapse="");
 	plot(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Linear regression with removal of outliers");
 	points(PE_GE_data_no_outlier[,"GE_abundance"], regmodel_no_outlier_predictedy, col="red");
 	
@@ -876,7 +878,7 @@
 	# Resistant regression (lqs / least trimmed squares method)
 	regmodel_lqs = lqs(PE_abundance~GE_abundance, data=PE_GE_data);
 	regmodel_lqs_predictedy = predict(regmodel_lqs, PE_GE_data);
-	outplot = paste("./",outdir,"/PE_GE_lqs.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_lqs.pdf",sep="",collapse="");
 	pdf(outplot);
 	plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Resistant regression (lqs / least trimmed squares method)");
 	points(PE_GE_data[,"GE_abundance"], regmodel_lqs_predictedy, col="red");
@@ -890,7 +892,7 @@
 	# Robust regression (rlm /  Huber M-estimator method)
 	regmodel_rlm = rlm(PE_abundance~GE_abundance, data=PE_GE_data);
 	regmodel_rlm_predictedy = predict(regmodel_rlm, PE_GE_data);
-	outplot = paste("./",outdir,"/PE_GE_rlm.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_rlm.pdf",sep="",collapse="");
 	plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Robust regression (rlm /  Huber M-estimator method)");
 	points(PE_GE_data[,"GE_abundance"], regmodel_rlm_predictedy, col="red");
 	pdf(outplot);
@@ -915,7 +917,7 @@
 	regmodel_poly5_predictedy = predict(regmodel_poly5, PE_GE_data);
 	regmodel_poly6_predictedy = predict(regmodel_poly6, PE_GE_data);
 	
-	outplot = paste("./",outdir,"/PE_GE_poly2.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_poly2.pdf",sep="",collapse="");
 	dev.set(devnum);
 	plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 2");
 	points(PE_GE_data[,"GE_abundance"], regmodel_poly2_predictedy, col="red");
@@ -923,7 +925,7 @@
 	plot(regmodel_poly2);
 	dev.off();
 
-	outplot = paste("./",outdir,"/PE_GE_poly3.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_poly3.pdf",sep="",collapse="");
 	dev.set(devnum);
 	plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 3");
 	points(PE_GE_data[,"GE_abundance"], regmodel_poly3_predictedy, col="red");
@@ -931,7 +933,7 @@
 	plot(regmodel_poly3);
 	dev.off();
 
-	outplot = paste("./",outdir,"/PE_GE_poly4.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_poly4.pdf",sep="",collapse="");
 	dev.set(devnum);
 	plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 4");
 	points(PE_GE_data[,"GE_abundance"], regmodel_poly4_predictedy, col="red");
@@ -939,7 +941,7 @@
 	plot(regmodel_poly4);
 	dev.off();
 
-	outplot = paste("./",outdir,"/PE_GE_poly5.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_poly5.pdf",sep="",collapse="");
 	dev.set(devnum);
 	plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 5");
 	points(PE_GE_data[,"GE_abundance"], regmodel_poly5_predictedy, col="red");
@@ -947,7 +949,7 @@
 	plot(regmodel_poly5);
 	dev.off();
 	
-	outplot = paste("./",outdir,"/PE_GE_poly6.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_poly6.pdf",sep="",collapse="");
 	dev.set(devnum);
 	plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Polynomial regression with degree 6");
 	points(PE_GE_data[,"GE_abundance"], regmodel_poly6_predictedy, col="red");
@@ -960,7 +962,7 @@
 	regmodel_gam_predictedy = predict(regmodel_gam, PE_GE_data);
 	
 	regmodel_gam_metrics = regr.eval(PE_GE_data$PE_abundance, regmodel_gam$fitted.values)
-	outplot = paste("./",outdir,"/PE_GE_gam.pdf",sep="",collapse="");
+	outplot = paste(outdir,"/PE_GE_gam.pdf",sep="",collapse="");
 	dev.set(devnum);
 	plot(PE_GE_data[,"GE_abundance"], PE_GE_data[,"PE_abundance"], xlab="GE_abundance", ylab="PE_abundance",main="Generalized additive models");
 	points(PE_GE_data[,"GE_abundance"], regmodel_gam_predictedy, col="red");
@@ -1019,7 +1021,7 @@
 	
 	
 	# Warning On
-	options(warn = oldw)
+	# options(warn = oldw)
 	
 	
 	
\ No newline at end of file