comparison protein_rna_correlation.r @ 6:8e9428eca82c draft

planemo upload
author pravs
date Wed, 20 Jun 2018 21:09:54 -0400
parents 6bf0203ee17e
children 80892c607b1e
comparison
equal deleted inserted replaced
5:6bf0203ee17e 6:8e9428eca82c
534 #============================================================================================================= 534 #=============================================================================================================
535 # Distribution of proteome and transcriptome abundance (Box and Density plot) 535 # Distribution of proteome and transcriptome abundance (Box and Density plot)
536 #============================================================================================================= 536 #=============================================================================================================
537 cat("Generating Box and Density plot\n",file=logfile, append=T); 537 cat("Generating Box and Density plot\n",file=logfile, append=T);
538 outplot = paste(outdir,"/AbundancePlot.png",sep="",collapse=""); 538 outplot = paste(outdir,"/AbundancePlot.png",sep="",collapse="");
539 png(outplot); 539 #png(outplot);
540 bitmap(outplot, "png16m");
540 par(mfrow=c(2,2)); 541 par(mfrow=c(2,2));
541 boxplot(proteome_df[,2], ylab="Abundance", main="Proteome abundance", cex.lab=1.5); 542 boxplot(proteome_df[,2], ylab="Abundance", main="Proteome abundance", cex.lab=1.5);
542 plot(density(proteome_df[,2]), xlab="Protein Abundance", ylab="Density", main="Proteome abundance", cex.lab=1.5); 543 plot(density(proteome_df[,2]), xlab="Protein Abundance", ylab="Density", main="Proteome abundance", cex.lab=1.5);
543 boxplot(transcriptome_df[,2], ylab="Abundance", main="Transcriptome abundance", cex.lab=1.5); 544 boxplot(transcriptome_df[,2], ylab="Abundance", main="Transcriptome abundance", cex.lab=1.5);
544 plot(density(transcriptome_df[,2]), xlab="Transcript Abundance", ylab="Density", main="Transcriptome abundance", cex.lab=1.5); 545 plot(density(transcriptome_df[,2]), xlab="Transcript Abundance", ylab="Density", main="Transcriptome abundance", cex.lab=1.5);
552 #============================================================================================================= 553 #=============================================================================================================
553 # Scatter plot 554 # Scatter plot
554 #============================================================================================================= 555 #=============================================================================================================
555 cat("Generating scatter plot\n",file=logfile, append=T); 556 cat("Generating scatter plot\n",file=logfile, append=T);
556 outplot = paste(outdir,"/AbundancePlot_scatter.png",sep="",collapse=""); 557 outplot = paste(outdir,"/AbundancePlot_scatter.png",sep="",collapse="");
557 png(outplot); 558 #png(outplot);
559 bitmap(outplot,"png16m")
558 par(mfrow=c(1,1)); 560 par(mfrow=c(1,1));
559 scatter.smooth(transcriptome_df[,2], proteome_df[,2], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); 561 scatter.smooth(transcriptome_df[,2], proteome_df[,2], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
560 562
561 cat( 563 cat(
562 "<font color='blue'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n", 564 "<font color='blue'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n",
634 cat( 636 cat(
635 "<font color='blue'><h3>Plotting various regression diagnostics plots</h3></font>\n", 637 "<font color='blue'><h3>Plotting various regression diagnostics plots</h3></font>\n",
636 file = htmloutfile, append = TRUE); 638 file = htmloutfile, append = TRUE);
637 639
638 outplot = paste(outdir,"/PE_GE_lm_1.png",sep="",collapse=""); 640 outplot = paste(outdir,"/PE_GE_lm_1.png",sep="",collapse="");
639 png(outplot); 641 #png(outplot);
642 bitmap(outplot,"png16m");
640 par(mfrow=c(1,1)); 643 par(mfrow=c(1,1));
641 plot(regmodel, 1, cex.lab=1.5); 644 plot(regmodel, 1, cex.lab=1.5);
642 dev.off(); 645 dev.off();
643 646
644 outplot = paste(outdir,"/PE_GE_lm_2.png",sep="",collapse=""); 647 outplot = paste(outdir,"/PE_GE_lm_2.png",sep="",collapse="");
645 png(outplot); 648 #png(outplot);
649 bitmap(outplot,"png16m");
646 par(mfrow=c(1,1)); 650 par(mfrow=c(1,1));
647 plot(regmodel, 2, cex.lab=1.5); 651 plot(regmodel, 2, cex.lab=1.5);
648 dev.off(); 652 dev.off();
649 653
650 outplot = paste(outdir,"/PE_GE_lm_3.png",sep="",collapse=""); 654 outplot = paste(outdir,"/PE_GE_lm_3.png",sep="",collapse="");
651 png(outplot); 655 #png(outplot);
656 bitmap(outplot,"png16m");
652 par(mfrow=c(1,1)); 657 par(mfrow=c(1,1));
653 plot(regmodel, 3, cex.lab=1.5); 658 plot(regmodel, 3, cex.lab=1.5);
654 dev.off(); 659 dev.off();
655 660
656 outplot = paste(outdir,"/PE_GE_lm_5.png",sep="",collapse=""); 661 outplot = paste(outdir,"/PE_GE_lm_5.png",sep="",collapse="");
657 png(outplot); 662 #png(outplot);
663 bitmap(outplot,"png16m");
658 par(mfrow=c(1,1)); 664 par(mfrow=c(1,1));
659 plot(regmodel, 5, cex.lab=1.5); 665 plot(regmodel, 5, cex.lab=1.5);
660 dev.off(); 666 dev.off();
661 667
662 outplot = paste(outdir,"/PE_GE_lm.pdf",sep="",collapse=""); 668 outplot = paste(outdir,"/PE_GE_lm.pdf",sep="",collapse="");
703 709
704 cooksd <- cooks.distance(regmodel); 710 cooksd <- cooks.distance(regmodel);
705 711
706 cat("Generating cooksd plot\n",file=logfile, append=T); 712 cat("Generating cooksd plot\n",file=logfile, append=T);
707 outplot = paste(outdir,"/PE_GE_lm_cooksd.png",sep="",collapse=""); 713 outplot = paste(outdir,"/PE_GE_lm_cooksd.png",sep="",collapse="");
708 png(outplot); 714 #png(outplot);
715 bitmap(outplot,"png16m");
709 par(mfrow=c(1,1)); 716 par(mfrow=c(1,1));
710 plot(cooksd, pch="*", cex=2, cex.lab=1.5,main="Influential Obs. by Cooks distance", ylab="Cook\'s distance", xlab="Observations") # plot cooks distance 717 plot(cooksd, pch="*", cex=2, cex.lab=1.5,main="Influential Obs. by Cooks distance", ylab="Cook\'s distance", xlab="Observations") # plot cooks distance
711 abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line 718 abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line
712 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels 719 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels
713 dev.off(); 720 dev.off();
763 770
764 #============================================================================================================= 771 #=============================================================================================================
765 # Scatter plot 772 # Scatter plot
766 #============================================================================================================= 773 #=============================================================================================================
767 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); 774 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse="");
768 png(outplot); 775 #png(outplot);
776 bitmap(outplot,"png16m");
769 par(mfrow=c(1,1)); 777 par(mfrow=c(1,1));
770 scatter.smooth(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); 778 scatter.smooth(PE_GE_data_no_outlier[,"GE_abundance"], PE_GE_data_no_outlier[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
771 779
772 cat( 780 cat(
773 "<font color='blue'><h3>Scatter plot between Proteome and Transcriptome Abundance, after removal of outliers/influential observations</h3></font>\n", 781 "<font color='blue'><h3>Scatter plot between Proteome and Transcriptome Abundance, after removal of outliers/influential observations</h3></font>\n",
805 "<font color='blue'><h3>Heatmap of PE and GE abundance values</h3></font>\n", 813 "<font color='blue'><h3>Heatmap of PE and GE abundance values</h3></font>\n",
806 file = htmloutfile, append = TRUE); 814 file = htmloutfile, append = TRUE);
807 815
808 cat("Generating heatmap plot\n",file=logfile, append=T); 816 cat("Generating heatmap plot\n",file=logfile, append=T);
809 outplot = paste(outdir,"/PE_GE_heatmap.png",sep="",collapse=""); 817 outplot = paste(outdir,"/PE_GE_heatmap.png",sep="",collapse="");
810 png(outplot); 818 #png(outplot);
819 bitmap(outplot,"png16m");
811 par(mfrow=c(1,1)); 820 par(mfrow=c(1,1));
812 #heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("PE","GE"), scale="col"); 821 #heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("PE","GE"), scale="col");
813 my_palette <- colorRampPalette(c("green", "white", "red"))(299); 822 my_palette <- colorRampPalette(c("green", "white", "red"))(299);
814 heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=my_palette ,Colv=F, labCol=c("PE","GE"), scale="col", dendrogram = "row"); 823 heatmap.2(as.matrix(PE_GE_data[,c("PE_abundance","GE_abundance")]), trace="none", cexCol=1, col=my_palette ,Colv=F, labCol=c("PE","GE"), scale="col", dendrogram = "row");
815 dev.off(); 824 dev.off();
826 835
827 836
828 k1 = kmeans(PE_GE_data_kdata[,c("PE_abundance","GE_abundance")], 5); 837 k1 = kmeans(PE_GE_data_kdata[,c("PE_abundance","GE_abundance")], 5);
829 cat("Generating kmeans plot\n",file=logfile, append=T); 838 cat("Generating kmeans plot\n",file=logfile, append=T);
830 outplot = paste(outdir,"/PE_GE_kmeans.png",sep="",collapse=""); 839 outplot = paste(outdir,"/PE_GE_kmeans.png",sep="",collapse="");
831 png(outplot); 840 #png(outplot);
841 bitmap(outplot,"png16m");
832 par(mfrow=c(1,1)); 842 par(mfrow=c(1,1));
833 scatter.smooth(PE_GE_data_kdata[,"GE_abundance"], PE_GE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); 843 scatter.smooth(PE_GE_data_kdata[,"GE_abundance"], PE_GE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5);
834 844
835 ind=which(k1$cluster==1); 845 ind=which(k1$cluster==1);
836 points(PE_GE_data_kdata[ind,"GE_abundance"], PE_GE_data_kdata[ind,"PE_abundance"], col="red", pch=16); 846 points(PE_GE_data_kdata[ind,"GE_abundance"], PE_GE_data_kdata[ind,"PE_abundance"], col="red", pch=16);