Mercurial > repos > iuc > mageck_test

diff test-data/out.count.Rnw @ 2:81bbbddcf285 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 49e456dda49db1f52fc876f406a10273a408b1a2
author: iuc
date: Wed, 04 Apr 2018 11:03:59 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out.count.Rnw	Wed Apr 04 11:03:59 2018 -0400
@@ -0,0 +1,237 @@
+% This is a template file for Sweave used in MAGeCK
+% Author: Wei Li, Shirley Liu lab
+% Do not modify lines beginning with "#__".
+\documentclass{article}
+
+\usepackage{amsmath}
+\usepackage{amscd}
+\usepackage[tableposition=top]{caption}
+\usepackage{ifthen}
+\usepackage{fullpage}
+\usepackage[utf8]{inputenc}
+% \usepackage{longtable}
+
+\begin{document}
+\setkeys{Gin}{width=0.9\textwidth}
+
+\title{MAGeCK Count Report}
+\author{Wei Li}
+
+\maketitle
+
+
+\tableofcontents
+
+\section{Summary}
+
+%Function definition
+<<label=funcdef,include=FALSE,echo=FALSE>>=
+genreporttable<-function(filelist,labellist,reads,mappedreads){
+  xtb=data.frame(Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads);
+  colnames(xtb)=c("Label","Reads","Mapped","Percentage");
+  return (xtb);
+}
+genreporttable2<-function(filelist,labellist,sgrnas,zerocounts,gini){
+  xtb=data.frame(Label=labellist,TotalsgRNAs=sgrnas,ZeroCounts=zerocounts,GiniIndex=gini);
+  colnames(xtb)=c("Label","TotalsgRNA","ZeroCounts","GiniIndex");
+  return (xtb);
+}
+genreporttable3<-function(filelist,labellist){
+  xtb=data.frame(File=filelist,Label=labellist);
+  colnames(xtb)=c("File","Label");
+  return (xtb);
+}
+
+
+colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00",  "#A65628", "#F781BF",
+          "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", 
+          "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5",
+          "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F");
+
+
+
+genboxplot<-function(filename,...){
+  #slmed=read.table(filename,header=T)
+  slmed=read.table(filename,header=T)
+  slmat=as.matrix(slmed[,c(-1,-2)])
+  slmat_log=log2(slmat+1)
+
+  boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...)
+}
+
+
+genhistplot<-function(filename,isfile=T,...){
+  if(isfile){
+    slmed=read.table(filename,header=T)
+  }else{
+    slmed=filename;
+  }
+  tabsmat=as.matrix(log2(slmed[,c(-1,-2)]+1))
+  colnames(tabsmat)=colnames(slmed)[c(-1,-2)]
+  samplecol=colors[((1:ncol(tabsmat)) %% length(colors)) ]
+  if(ncol(tabsmat)>=1){
+    histlist=lapply(1:ncol(tabsmat),function(X){ return (hist(tabsmat[,X],plot=F,breaks=40)) })
+    xrange=range(unlist(lapply(histlist,function(X){X$mids})))
+    yrange=range(unlist(lapply(histlist,function(X){X$counts})))
+    hst1=histlist[[1]]
+    plot(hst1$mids,hst1$counts,type='b',pch=20,xlim=c(0,xrange[2]*1.2),ylim=c(0,yrange[2]*1.2),xlab='log2(counts)',ylab='Frequency',main='Distribution of read counts',col = samplecol[1], ... )
+  }
+  if(ncol(tabsmat)>=2){ 
+    for(i in 2:ncol(tabsmat)){
+      hstn=histlist[[i]]
+      lines(hstn$mids,hstn$counts,type='b',pch=20,col=samplecol[i])
+    }
+  }
+  legend('topright',colnames(tabsmat),pch=20,lwd=1,col=samplecol)
+}
+
+
+
+genclustering<-function(filename,...){
+  #slmed=read.table(filename,header=T)
+  slmed=read.table(filename,header=T)
+  slmat=as.matrix(slmed[,c(-1,-2)])
+  slmat_log=log2(slmat+1)
+
+  result=tryCatch({
+    library(gplots);
+    heatmap.2(cor(slmat_log),trace = 'none',density.info = 'none',cexRow = 0.8,cexCol = 0.8,offsetRow = -0.2,offsetCol = -0.2)
+  }, error=function(e){
+    heatmap(cor(slmat_log),scale='none',cexRow = 0.8,cexCol = 0.8,cex.axis=0.8,...)
+  });
+}
+
+ctfit_tx=0;
+
+
+panel.plot<-function(x,y,textnames=names(x),...){
+  par(new=TRUE)
+  m<-cbind(x,y)
+  plot(m,pch=20,xlim = range(x)*1.1,ylim=range(y)*1.1,...)
+  text(x,y,textnames,...)
+}
+
+
+genpcaplot<-function(filename,...){
+  #slmed=read.table(filename,header=T)
+  slmed=read.table(filename,header=T)
+  slmat=as.matrix(slmed[,c(-1,-2)])
+  slmat_log=log2(slmat+1)
+  ctfit_tx<<-prcomp(t(slmat_log),center=TRUE)
+  
+  # par(mfrow=c(2,1));
+  samplecol=colors[((1:ncol(slmat)) %% length(colors)) ]
+  # first 2 PCA
+  #plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2]));
+  #text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol);
+  # par(mfrow=c(1,1));
+  if(length(samplecol)>2){
+    pairs(ctfit_tx$x[,1:3],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 3 principle components',col=samplecol)
+  }else{
+    if(length(samplecol)>1){
+      pairs(ctfit_tx$x[,1:2],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 2 principle components',col=samplecol)
+   }
+  }
+
+
+}
+
+genpcavar<-function(){
+  # % variance 
+  varpca=ctfit_tx$sdev^2
+  varpca=varpca/sum(varpca)*100;
+  if(length(varpca)>10){
+    varpca=varpca[1:10];
+  }
+  plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained');
+}
+
+@
+
+%__FILE_SUMMARY__
+
+The statistics of comparisons are listed in Table 1 and Table 2.
+The corresponding fastq files in each row are listed in Table 3.
+
+<<label=tab1,echo=FALSE,results=tex>>=
+library(xtable)
+filelist=c("input_0.gz");
+labellist=c("test1_fastq_gz");
+reads=c(2500);
+mappedreads=c(1453);
+totalsgrnas=c(2550);
+zerocounts=c(1276);
+giniindex=c(0.5266899931488773);
+
+cptable=genreporttable(filelist,labellist,reads,mappedreads);
+print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one",
+    digits = c(0, 0, 0, 0,2),
+    align=c('c',  'c','c',  'c', 'c'),
+    table.placement = "tbp",
+    caption.placement = "top"))
+@
+
+<<label=tab2,echo=FALSE,results=tex>>=
+library(xtable)
+cptable=genreporttable2(filelist,labellist,totalsgrnas,zerocounts,giniindex);
+print(xtable(cptable, caption = "Summary of comparisons", label = "tab:two",
+    digits = c(0, 0,0, 0,2),
+    align=c('c',  'c','c',  'c', 'c'),
+    table.placement = "tbp",
+    caption.placement = "top"))
+@
+
+
+
+
+
+<<label=tab3,echo=FALSE,results=tex>>=
+library(xtable)
+cptable=genreporttable3(filelist,labellist);
+print(xtable(cptable, caption = "Summary of samples", label = "tab:three",
+    digits = c(0,0, 0),
+    align=c('c', 'p{9cm}', 'c'),
+    table.placement = "tbp",
+    caption.placement = "top"))
+@
+
+
+
+
+The meanings of the columns are as follows.
+
+\begin{itemize}
+\item \textbf{Row}: The row number in the table;
+\item \textbf{File}: The filename of fastq file;
+\item \textbf{Label}: Assigned label;
+\item \textbf{Reads}: The total read count in the fastq file;
+\item \textbf{Mapped}: Reads that can be mapped to gRNA library;
+\item \textbf{Percentage}: The percentage of mapped reads;
+\item \textbf{TotalsgRNAs}: The number of sgRNAs in the library; 
+\item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts;
+\item \textbf{GiniIndex}: The Gini Index of the read count distribution. Gini index can be used to measure the evenness of the read counts, and a smaller value means a more even distribution of the read counts.
+\end{itemize}
+
+
+
+\newpage\section{Normalized read count distribution of all samples}
+The following figure shows the distribution of median-normalized read counts in all samples.
+
+
+<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>=
+genboxplot("output.count_normalized.txt");
+@
+
+The following figure shows the histogram of median-normalized read counts in all samples.
+
+
+<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>=
+genhistplot("output.count_normalized.txt");
+@
+
+%__INDIVIDUAL_PAGE__
+
+
+
+\end{document}
+