mageck_test: test-data/out.count.Rnw comparison

comparison test-data/out.count.Rnw @ 2:81bbbddcf285 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 49e456dda49db1f52fc876f406a10273a408b1a2

author	iuc
date	Wed, 04 Apr 2018 11:03:59 -0400
parents
children

comparison

equal deleted inserted replaced

-:5e2a28bee02d
+:81bbbddcf285
+% This is a template file for Sweave used in MAGeCK
+% Author: Wei Li, Shirley Liu lab
+% Do not modify lines beginning with "#__".
+\documentclass{article}
+\usepackage{amsmath}
+\usepackage{amscd}
+\usepackage[tableposition=top]{caption}
+\usepackage{ifthen}
+\usepackage{fullpage}
+\usepackage[utf8]{inputenc}
+% \usepackage{longtable}
+\begin{document}
+\setkeys{Gin}{width=0.9\textwidth}
+\title{MAGeCK Count Report}
+\author{Wei Li}
+\maketitle
+\tableofcontents
+\section{Summary}
+%Function definition
+<<label=funcdef,include=FALSE,echo=FALSE>>=
+genreporttable<-function(filelist,labellist,reads,mappedreads){
+xtb=data.frame(Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads);
+colnames(xtb)=c("Label","Reads","Mapped","Percentage");
+return (xtb);
+}
+genreporttable2<-function(filelist,labellist,sgrnas,zerocounts,gini){
+xtb=data.frame(Label=labellist,TotalsgRNAs=sgrnas,ZeroCounts=zerocounts,GiniIndex=gini);
+colnames(xtb)=c("Label","TotalsgRNA","ZeroCounts","GiniIndex");
+return (xtb);
+}
+genreporttable3<-function(filelist,labellist){
+xtb=data.frame(File=filelist,Label=labellist);
+colnames(xtb)=c("File","Label");
+return (xtb);
+}
+colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00",  "#A65628", "#F781BF",
+"#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3",
+"#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5",
+"#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F");
+genboxplot<-function(filename,...){
+#slmed=read.table(filename,header=T)
+slmed=read.table(filename,header=T)
+slmat=as.matrix(slmed[,c(-1,-2)])
+slmat_log=log2(slmat+1)
+boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...)
+}
+genhistplot<-function(filename,isfile=T,...){
+if(isfile){
+slmed=read.table(filename,header=T)
+}else{
+slmed=filename;
+}
+tabsmat=as.matrix(log2(slmed[,c(-1,-2)]+1))
+colnames(tabsmat)=colnames(slmed)[c(-1,-2)]
+samplecol=colors[((1:ncol(tabsmat)) %% length(colors)) ]
+if(ncol(tabsmat)>=1){
+histlist=lapply(1:ncol(tabsmat),function(X){ return (hist(tabsmat[,X],plot=F,breaks=40)) })
+xrange=range(unlist(lapply(histlist,function(X){X$mids})))
+yrange=range(unlist(lapply(histlist,function(X){X$counts})))
+hst1=histlist[[1]]
+plot(hst1$mids,hst1$counts,type='b',pch=20,xlim=c(0,xrange[2]*1.2),ylim=c(0,yrange[2]*1.2),xlab='log2(counts)',ylab='Frequency',main='Distribution of read counts',col = samplecol[1], ... )
+}
+if(ncol(tabsmat)>=2){
+for(i in 2:ncol(tabsmat)){
+hstn=histlist[[i]]
+lines(hstn$mids,hstn$counts,type='b',pch=20,col=samplecol[i])
+}
+}
+legend('topright',colnames(tabsmat),pch=20,lwd=1,col=samplecol)
+}
+genclustering<-function(filename,...){
+#slmed=read.table(filename,header=T)
+slmed=read.table(filename,header=T)
+slmat=as.matrix(slmed[,c(-1,-2)])
+slmat_log=log2(slmat+1)
+result=tryCatch({
+library(gplots);
+heatmap.2(cor(slmat_log),trace = 'none',density.info = 'none',cexRow = 0.8,cexCol = 0.8,offsetRow = -0.2,offsetCol = -0.2)
+}, error=function(e){
+heatmap(cor(slmat_log),scale='none',cexRow = 0.8,cexCol = 0.8,cex.axis=0.8,...)
+});
+}
+ctfit_tx=0;
+panel.plot<-function(x,y,textnames=names(x),...){
+par(new=TRUE)
+m<-cbind(x,y)
+plot(m,pch=20,xlim = range(x)*1.1,ylim=range(y)*1.1,...)
+text(x,y,textnames,...)
+}
+genpcaplot<-function(filename,...){
+#slmed=read.table(filename,header=T)
+slmed=read.table(filename,header=T)
+slmat=as.matrix(slmed[,c(-1,-2)])
+slmat_log=log2(slmat+1)
+ctfit_tx<<-prcomp(t(slmat_log),center=TRUE)
+# par(mfrow=c(2,1));
+samplecol=colors[((1:ncol(slmat)) %% length(colors)) ]
+# first 2 PCA
+#plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2]));
+#text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol);
+# par(mfrow=c(1,1));
+if(length(samplecol)>2){
+pairs(ctfit_tx$x[,1:3],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 3 principle components',col=samplecol)
+}else{
+if(length(samplecol)>1){
+pairs(ctfit_tx$x[,1:2],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 2 principle components',col=samplecol)
+}
+}
+}
+genpcavar<-function(){
+# % variance
+varpca=ctfit_tx$sdev^2
+varpca=varpca/sum(varpca)*100;
+if(length(varpca)>10){
+varpca=varpca[1:10];
+}
+plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained');
+}
+@
+%__FILE_SUMMARY__
+The statistics of comparisons are listed in Table 1 and Table 2.
+The corresponding fastq files in each row are listed in Table 3.
+<<label=tab1,echo=FALSE,results=tex>>=
+library(xtable)
+filelist=c("input_0.gz");
+labellist=c("test1_fastq_gz");
+reads=c(2500);
+mappedreads=c(1453);
+totalsgrnas=c(2550);
+zerocounts=c(1276);
+giniindex=c(0.5266899931488773);
+cptable=genreporttable(filelist,labellist,reads,mappedreads);
+print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one",
+digits = c(0, 0, 0, 0,2),
+align=c('c',  'c','c',  'c', 'c'),
+table.placement = "tbp",
+caption.placement = "top"))
+@
+<<label=tab2,echo=FALSE,results=tex>>=
+library(xtable)
+cptable=genreporttable2(filelist,labellist,totalsgrnas,zerocounts,giniindex);
+print(xtable(cptable, caption = "Summary of comparisons", label = "tab:two",
+digits = c(0, 0,0, 0,2),
+align=c('c',  'c','c',  'c', 'c'),
+table.placement = "tbp",
+caption.placement = "top"))
+@
+<<label=tab3,echo=FALSE,results=tex>>=
+library(xtable)
+cptable=genreporttable3(filelist,labellist);
+print(xtable(cptable, caption = "Summary of samples", label = "tab:three",
+digits = c(0,0, 0),
+align=c('c', 'p{9cm}', 'c'),
+table.placement = "tbp",
+caption.placement = "top"))
+@
+The meanings of the columns are as follows.
+\begin{itemize}
+\item \textbf{Row}: The row number in the table;
+\item \textbf{File}: The filename of fastq file;
+\item \textbf{Label}: Assigned label;
+\item \textbf{Reads}: The total read count in the fastq file;
+\item \textbf{Mapped}: Reads that can be mapped to gRNA library;
+\item \textbf{Percentage}: The percentage of mapped reads;
+\item \textbf{TotalsgRNAs}: The number of sgRNAs in the library;
+\item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts;
+\item \textbf{GiniIndex}: The Gini Index of the read count distribution. Gini index can be used to measure the evenness of the read counts, and a smaller value means a more even distribution of the read counts.
+\end{itemize}
+\newpage\section{Normalized read count distribution of all samples}
+The following figure shows the distribution of median-normalized read counts in all samples.
+<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>=
+genboxplot("output.count_normalized.txt");
+@
+The following figure shows the histogram of median-normalized read counts in all samples.
+<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>=
+genhistplot("output.count_normalized.txt");
+@
+%__INDIVIDUAL_PAGE__
+\end{document}

Mercurial > repos > iuc > mageck_test

comparison test-data/out.count.Rnw @ 2:81bbbddcf285 draft