Repository 'mageck_mle'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/mageck_mle

Changeset 3:5dfc0e462f2a (2018-04-19)
Previous changeset 2:9cd937788131 (2018-04-04) Next changeset 4:b34c9d6373e0 (2018-06-04)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 95daf3f97e89989bae687e64cae8b129b3e2b7af
modified:
test-data/out.countsummary.txt
removed:
test-data/out.count.Rnw
test-data/out.count.log.txt
test-data/out.normcounts.txt
test-data/output_countsummary.Rnw
b
diff -r 9cd937788131 -r 5dfc0e462f2a test-data/out.count.Rnw
--- a/test-data/out.count.Rnw Wed Apr 04 11:03:44 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,237 +0,0 @@
-% This is a template file for Sweave used in MAGeCK
-% Author: Wei Li, Shirley Liu lab
-% Do not modify lines beginning with "#__".
-\documentclass{article}
-
-\usepackage{amsmath}
-\usepackage{amscd}
-\usepackage[tableposition=top]{caption}
-\usepackage{ifthen}
-\usepackage{fullpage}
-\usepackage[utf8]{inputenc}
-% \usepackage{longtable}
-
-\begin{document}
-\setkeys{Gin}{width=0.9\textwidth}
-
-\title{MAGeCK Count Report}
-\author{Wei Li}
-
-\maketitle
-
-
-\tableofcontents
-
-\section{Summary}
-
-%Function definition
-<<label=funcdef,include=FALSE,echo=FALSE>>=
-genreporttable<-function(filelist,labellist,reads,mappedreads){
-  xtb=data.frame(Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads);
-  colnames(xtb)=c("Label","Reads","Mapped","Percentage");
-  return (xtb);
-}
-genreporttable2<-function(filelist,labellist,sgrnas,zerocounts,gini){
-  xtb=data.frame(Label=labellist,TotalsgRNAs=sgrnas,ZeroCounts=zerocounts,GiniIndex=gini);
-  colnames(xtb)=c("Label","TotalsgRNA","ZeroCounts","GiniIndex");
-  return (xtb);
-}
-genreporttable3<-function(filelist,labellist){
-  xtb=data.frame(File=filelist,Label=labellist);
-  colnames(xtb)=c("File","Label");
-  return (xtb);
-}
-
-
-colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00",  "#A65628", "#F781BF",
-          "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", 
-          "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5",
-          "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F");
-
-
-
-genboxplot<-function(filename,...){
-  #slmed=read.table(filename,header=T)
-  slmed=read.table(filename,header=T)
-  slmat=as.matrix(slmed[,c(-1,-2)])
-  slmat_log=log2(slmat+1)
-
-  boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...)
-}
-
-
-genhistplot<-function(filename,isfile=T,...){
-  if(isfile){
-    slmed=read.table(filename,header=T)
-  }else{
-    slmed=filename;
-  }
-  tabsmat=as.matrix(log2(slmed[,c(-1,-2)]+1))
-  colnames(tabsmat)=colnames(slmed)[c(-1,-2)]
-  samplecol=colors[((1:ncol(tabsmat)) %% length(colors)) ]
-  if(ncol(tabsmat)>=1){
-    histlist=lapply(1:ncol(tabsmat),function(X){ return (hist(tabsmat[,X],plot=F,breaks=40)) })
-    xrange=range(unlist(lapply(histlist,function(X){X$mids})))
-    yrange=range(unlist(lapply(histlist,function(X){X$counts})))
-    hst1=histlist[[1]]
-    plot(hst1$mids,hst1$counts,type='b',pch=20,xlim=c(0,xrange[2]*1.2),ylim=c(0,yrange[2]*1.2),xlab='log2(counts)',ylab='Frequency',main='Distribution of read counts',col = samplecol[1], ... )
-  }
-  if(ncol(tabsmat)>=2){ 
-    for(i in 2:ncol(tabsmat)){
-      hstn=histlist[[i]]
-      lines(hstn$mids,hstn$counts,type='b',pch=20,col=samplecol[i])
-    }
-  }
-  legend('topright',colnames(tabsmat),pch=20,lwd=1,col=samplecol)
-}
-
-
-
-genclustering<-function(filename,...){
-  #slmed=read.table(filename,header=T)
-  slmed=read.table(filename,header=T)
-  slmat=as.matrix(slmed[,c(-1,-2)])
-  slmat_log=log2(slmat+1)
-
-  result=tryCatch({
-    library(gplots);
-    heatmap.2(cor(slmat_log),trace = 'none',density.info = 'none',cexRow = 0.8,cexCol = 0.8,offsetRow = -0.2,offsetCol = -0.2)
-  }, error=function(e){
-    heatmap(cor(slmat_log),scale='none',cexRow = 0.8,cexCol = 0.8,cex.axis=0.8,...)
-  });
-}
-
-ctfit_tx=0;
-
-
-panel.plot<-function(x,y,textnames=names(x),...){
-  par(new=TRUE)
-  m<-cbind(x,y)
-  plot(m,pch=20,xlim = range(x)*1.1,ylim=range(y)*1.1,...)
-  text(x,y,textnames,...)
-}
-
-
-genpcaplot<-function(filename,...){
-  #slmed=read.table(filename,header=T)
-  slmed=read.table(filename,header=T)
-  slmat=as.matrix(slmed[,c(-1,-2)])
-  slmat_log=log2(slmat+1)
-  ctfit_tx<<-prcomp(t(slmat_log),center=TRUE)
-  
-  # par(mfrow=c(2,1));
-  samplecol=colors[((1:ncol(slmat)) %% length(colors)) ]
-  # first 2 PCA
-  #plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2]));
-  #text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol);
-  # par(mfrow=c(1,1));
-  if(length(samplecol)>2){
-    pairs(ctfit_tx$x[,1:3],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 3 principle components',col=samplecol)
-  }else{
-    if(length(samplecol)>1){
-      pairs(ctfit_tx$x[,1:2],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 2 principle components',col=samplecol)
-   }
-  }
-
-
-}
-
-genpcavar<-function(){
-  # % variance 
-  varpca=ctfit_tx$sdev^2
-  varpca=varpca/sum(varpca)*100;
-  if(length(varpca)>10){
-    varpca=varpca[1:10];
-  }
-  plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained');
-}
-
-@
-
-%__FILE_SUMMARY__
-
-The statistics of comparisons are listed in Table 1 and Table 2.
-The corresponding fastq files in each row are listed in Table 3.
-
-<<label=tab1,echo=FALSE,results=tex>>=
-library(xtable)
-filelist=c("input_0.gz");
-labellist=c("test1_fastq_gz");
-reads=c(2500);
-mappedreads=c(1453);
-totalsgrnas=c(2550);
-zerocounts=c(1276);
-giniindex=c(0.5266899931488773);
-
-cptable=genreporttable(filelist,labellist,reads,mappedreads);
-print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one",
-    digits = c(0, 0, 0, 0,2),
-    align=c('c',  'c','c',  'c', 'c'),
-    table.placement = "tbp",
-    caption.placement = "top"))
-@
-
-<<label=tab2,echo=FALSE,results=tex>>=
-library(xtable)
-cptable=genreporttable2(filelist,labellist,totalsgrnas,zerocounts,giniindex);
-print(xtable(cptable, caption = "Summary of comparisons", label = "tab:two",
-    digits = c(0, 0,0, 0,2),
-    align=c('c',  'c','c',  'c', 'c'),
-    table.placement = "tbp",
-    caption.placement = "top"))
-@
-
-
-
-
-
-<<label=tab3,echo=FALSE,results=tex>>=
-library(xtable)
-cptable=genreporttable3(filelist,labellist);
-print(xtable(cptable, caption = "Summary of samples", label = "tab:three",
-    digits = c(0,0, 0),
-    align=c('c', 'p{9cm}', 'c'),
-    table.placement = "tbp",
-    caption.placement = "top"))
-@
-
-
-
-
-The meanings of the columns are as follows.
-
-\begin{itemize}
-\item \textbf{Row}: The row number in the table;
-\item \textbf{File}: The filename of fastq file;
-\item \textbf{Label}: Assigned label;
-\item \textbf{Reads}: The total read count in the fastq file;
-\item \textbf{Mapped}: Reads that can be mapped to gRNA library;
-\item \textbf{Percentage}: The percentage of mapped reads;
-\item \textbf{TotalsgRNAs}: The number of sgRNAs in the library; 
-\item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts;
-\item \textbf{GiniIndex}: The Gini Index of the read count distribution. Gini index can be used to measure the evenness of the read counts, and a smaller value means a more even distribution of the read counts.
-\end{itemize}
-
-
-
-\newpage\section{Normalized read count distribution of all samples}
-The following figure shows the distribution of median-normalized read counts in all samples.
-
-
-<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>=
-genboxplot("output.count_normalized.txt");
-@
-
-The following figure shows the histogram of median-normalized read counts in all samples.
-
-
-<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>=
-genhistplot("output.count_normalized.txt");
-@
-
-%__INDIVIDUAL_PAGE__
-
-
-
-\end{document}
-
b
diff -r 9cd937788131 -r 5dfc0e462f2a test-data/out.count.log.txt
--- a/test-data/out.count.log.txt Wed Apr 04 11:03:44 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,43 +0,0 @@
-INFO  @ Sun, 25 Mar 2018 15:51:06: Parameters: /Users/doylemaria/miniconda3/envs/mulled-v1-5ed9647f14e9d3e99564d31bed2eb19cd32ee8b9da66a89bea59b64a8983b1d6/bin/mageck count -l /private/var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T/tmp0EKzNL/files/000/dataset_2.dat --fastq input_0.gz --sample-label test1_fastq_gz -n output --pdf-report --keep-tmp --unmapped-to-file 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Welcome to MAGeCK v0.5.7. Command: count 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Loading 2550 predefined sgRNAs. 
-WARNING @ Sun, 25 Mar 2018 15:51:06: There are 0 sgRNAs with duplicated sequences. 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Parsing FASTQ file input_0.gz... 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Determining the trim-5 length of FASTQ file input_0.gz... 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Possible gRNA lengths:20 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Processing 0M reads ... 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Read length:30 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Total tested reads: 2500, mapped: 1453(0.5812) 
-INFO  @ Sun, 25 Mar 2018 15:51:06: --trim-5 test data: (trim_length reads fraction) 
-INFO  @ Sun, 25 Mar 2018 15:51:06: 0 1453 1.0 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Auto determination of trim5 results: 0 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Possible gRNA lengths:20 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Processing 0M reads .. 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Total: 2500. 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Mapped: 1453. 
-DEBUG @ Sun, 25 Mar 2018 15:51:06: Initial (total) size factor: 1.0 
-DEBUG @ Sun, 25 Mar 2018 15:51:06: Median factor: 2.0 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Final size factor: 2.0 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Summary of file input_0.gz: 
-INFO  @ Sun, 25 Mar 2018 15:51:06: label test1_fastq_gz 
-INFO  @ Sun, 25 Mar 2018 15:51:06: reads 2500 
-INFO  @ Sun, 25 Mar 2018 15:51:06: mappedreads 1453 
-INFO  @ Sun, 25 Mar 2018 15:51:06: totalsgrnas 2550 
-INFO  @ Sun, 25 Mar 2018 15:51:06: zerosgrnas 1276 
-INFO  @ Sun, 25 Mar 2018 15:51:06: giniindex 0.5266899931488773 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Loading Rnw template file: /Users/doylemaria/miniconda3/envs/mulled-v1-5ed9647f14e9d3e99564d31bed2eb19cd32ee8b9da66a89bea59b64a8983b1d6/lib/python3.6/site-packages/mageck/fastq_template.Rnw. 
-DEBUG @ Sun, 25 Mar 2018 15:51:06: Setting up the visualization module... 
-INFO  @ Sun, 25 Mar 2018 15:51:06: Running command: cd ./; Rscript output_countsummary.R 
-INFO  @ Sun, 25 Mar 2018 15:51:11: Command message: 
-INFO  @ Sun, 25 Mar 2018 15:51:11:   Writing to file output_countsummary.tex 
-INFO  @ Sun, 25 Mar 2018 15:51:11:   Processing code chunks with options ... 
-INFO  @ Sun, 25 Mar 2018 15:51:11:    1 : keep.source term verbatim (label = funcdef, output_countsummary.Rnw:28) 
-INFO  @ Sun, 25 Mar 2018 15:51:11:    2 : keep.source term tex (label = tab1, output_countsummary.Rnw:156) 
-INFO  @ Sun, 25 Mar 2018 15:51:11:    3 : keep.source term tex (label = tab2, output_countsummary.Rnw:174) 
-INFO  @ Sun, 25 Mar 2018 15:51:11:    4 : keep.source term tex (label = tab3, output_countsummary.Rnw:188) 
-INFO  @ Sun, 25 Mar 2018 15:51:11:    5 : keep.source term verbatim pdf  (output_countsummary.Rnw:221) 
-INFO  @ Sun, 25 Mar 2018 15:51:11:    6 : keep.source term verbatim pdf  (output_countsummary.Rnw:228) 
-INFO  @ Sun, 25 Mar 2018 15:51:11:    
-INFO  @ Sun, 25 Mar 2018 15:51:11:   You can now run (pdf)latex on ‘output_countsummary.tex’ 
-INFO  @ Sun, 25 Mar 2018 15:51:11:    
-INFO  @ Sun, 25 Mar 2018 15:51:11: End command message. 
b
diff -r 9cd937788131 -r 5dfc0e462f2a test-data/out.countsummary.txt
--- a/test-data/out.countsummary.txt Wed Apr 04 11:03:44 2018 -0400
+++ b/test-data/out.countsummary.txt Thu Apr 19 05:34:53 2018 -0400
b
@@ -1,2 +1,1 @@
 File Label Reads Mapped Percentage TotalsgRNAs Zerocounts GiniIndex NegSelQC NegSelQCPval NegSelQCPvalPermutation NegSelQCPvalPermutationFDR NegSelQCGene
-input_0.gz test1_fastq_gz 2500 1453 0.5812 2550 1276 0.5267 0 1 1 1 0.0
b
diff -r 9cd937788131 -r 5dfc0e462f2a test-data/out.normcounts.txt
--- a/test-data/out.normcounts.txt Wed Apr 04 11:03:44 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2551 +0,0 @@\n-sgRNA\tGene\ttest1_fastq_gz\n-s_47512\tRNF111\t2.0\n-s_24835\tHCFC1R1\t2.0\n-s_14784\tCYP4B1\t8.0\n-s_51146\tSLC18A1\t2.0\n-s_58960\tTRIM5\t2.0\n-s_48256\tRPRD2\t2.0\n-s_30297\tKRTAP5-5\t2.0\n-s_14555\tCYB5B\t2.0\n-s_39959\tPAAF1\t2.0\n-s_45293\tPUF60\t2.0\n-s_49358\tSCN8A\t2.0\n-s_64995\tZYG11A\t2.0\n-s_4029\tASTE1\t2.0\n-s_45554\tR3HDML\t2.0\n-s_34264\tMMRN1\t2.0\n-s_37459\tNOL6\t2.0\n-s_23990\tGPX7\t2.0\n-s_20268\tFANCC\t2.0\n-s_14157\tCTLA4\t2.0\n-s_36773\tNEURL4\t36.0\n-s_18804\tETFB\t2.0\n-s_782\tACSS1\t2.0\n-s_18272\tENPP2\t2.0\n-s_46620\tRCN1\t2.0\n-s_55436\tTAS2R3\t2.0\n-s_57947\tTMPRSS2\t2.0\n-s_6438\tC14orf159\t2.0\n-s_33846\tMGST2\t2.0\n-s_16328\tDNAH6\t2.0\n-s_17875\tEIF4G1\t2.0\n-s_2305\tANAPC11\t2.0\n-s_2500\tANKRD2\t2.0\n-s_82\tAARSD1\t2.0\n-s_55329\tTAL1\t2.0\n-s_57926\tTMPRSS11E\t16.0\n-s_38414\tNUP98\t8.0\n-s_50044\tSERPINF1\t2.0\n-s_9257\tCASR\t2.0\n-s_63396\tZNF182\t2.0\n-s_56478\tTHBS3\t2.0\n-s_17191\tDYRK1A\t2.0\n-s_11988\tCIR1\t2.0\n-s_43313\tPPARD\t2.0\n-s_44681\tPSMA4\t2.0\n-s_10387\tCD320\t2.0\n-s_64869\tZPBP\t2.0\n-s_54385\tSTK17B\t2.0\n-s_25423\tHIST1H4D\t2.0\n-s_54172\tST8SIA4\t18.0\n-s_1161\tADCY10\t2.0\n-s_29184\tKIAA0913\t2.0\n-s_42977\tPOLD3\t2.0\n-s_49449\tSCUBE1\t2.0\n-s_24181\tGRM4\t2.0\n-s_52507\tSMARCA5\t2.0\n-s_28674\tKCNJ10\t2.0\n-s_61074\tVAMP2\t8.0\n-s_3954\tASIC2\t2.0\n-s_2385\tANK1\t2.0\n-s_18397\tEPDR1\t2.0\n-s_18377\tEPB41L4B\t2.0\n-s_34580\tMRAP2\t2.0\n-s_48676\tRUFY3\t20.0\n-s_691\tACP1\t2.0\n-s_30460\tLAMP2\t2.0\n-s_42637\tPLRG1\t2.0\n-s_12695\tCNOT6\t2.0\n-s_33316\tMECOM\t4.0\n-s_35081\tMSRB2\t2.0\n-s_58512\tTPD52L2\t2.0\n-s_19912\tFAM22F\t2.0\n-s_45517\tQSOX2\t2.0\n-s_56705\tTINAG\t2.0\n-s_10946\tCDKL5\t2.0\n-s_57473\tTMEM211\t4.0\n-s_57657\tTMEM44\t2.0\n-s_43200\tPOT1\t2.0\n-s_19436\tFAM135A\t2.0\n-s_184\tABCB9\t2.0\n-s_30171\tKRT84\t2.0\n-s_44758\tPSMC3IP\t2.0\n-s_48313\tRPS3\t2.0\n-s_58142\tTNFSF12\t12.0\n-s_59718\tTTLL6\t14.0\n-s_9725\tCCDC43\t2.0\n-s_5135\tBCKDHA\t2.0\n-s_36539\tNDUFC2\t2.0\n-s_27251\tIL27RA\t2.0\n-s_48939\tSAMD10\t2.0\n-s_27343\tIL5RA\t2.0\n-s_28386\tKANK2\t2.0\n-s_27610\tINSRR\t2.0\n-s_2769\tAOC3\t4.0\n-s_58632\tTRA2B\t24.0\n-s_6674\tC16orf86\t2.0\n-s_22902\tGJD4\t2.0\n-s_48278\tRPS15A\t2.0\n-s_61998\tWIPF2\t2.0\n-s_4937\tBAIAP3\t4.0\n-s_54471\tSTOML1\t4.0\n-s_19157\tFABP12\t2.0\n-s_5434\tBIN1\t4.0\n-s_42042\tPIP5K1A\t2.0\n-s_7794\tC3orf18\t2.0\n-s_54846\tSVIL\t2.0\n-s_62273\tXPA\t2.0\n-s_45859\tRACGAP1\t2.0\n-s_53626\tSPOCK3\t2.0\n-s_43295\tPPAP2C\t14.0\n-s_11788\tCHRDL1\t4.0\n-s_50636\tSHQ1\t2.0\n-s_16705\tDPF1\t2.0\n-s_39741\tOTOF\t2.0\n-s_27505\tINHBE\t2.0\n-s_707\tACPL2\t2.0\n-s_15418\tDDX3Y\t12.0\n-s_56018\tTEAD4\t2.0\n-s_44367\tPRR12\t2.0\n-s_25875\tHOXB5\t2.0\n-s_49360\tSCN9A\t2.0\n-s_16244\tDMPK\t2.0\n-s_3909\tASCC2\t2.0\n-s_55088\tSYT6\t2.0\n-s_54311\tSTAU1\t2.0\n-s_53890\tSRP72\t2.0\n-s_11035\tCDX1\t2.0\n-s_18178\tEMR3\t4.0\n-s_16084\tDLD\t2.0\n-s_47207\tRHOBTB1\t2.0\n-s_40267\tPARK2\t12.0\n-s_43104\tPOLR3B\t2.0\n-s_2200\tAMDHD2\t2.0\n-s_12738\tCNRIP1\t2.0\n-s_17842\tEIF4A3\t2.0\n-s_57950\tTMPRSS3\t2.0\n-s_62146\tWRN\t2.0\n-s_11055\tCEACAM1\t2.0\n-s_54580\tSTX2\t2.0\n-s_29277\tKIAA1407\t2.0\n-s_33428\tMEF2A\t2.0\n-s_59797\tTUBB\t2.0\n-s_18113\tEME1\t2.0\n-s_29839\tKLHL8\t2.0\n-s_18058\tELP2\t2.0\n-s_49497\tSDCBP2\t6.0\n-s_16874\tDRP2\t2.0\n-s_13572\tCREBL2\t2.0\n-s_20540\tFBXO30\t2.0\n-s_64380\tZNF646\t2.0\n-s_50366\tSH2B1\t2.0\n-s_2548\tANKRD33B\t2.0\n-s_41183\tPDXP\t2.0\n-s_16315\tDNAH12\t2.0\n-s_19996\tFAM49B\t2.0\n-s_30751\tLDLRAD3\t2.0\n-s_36960\tNGEF\t2.0\n-s_39015\tOR2A2\t2.0\n-s_26302\tHSPB2\t2.0\n-s_64297\tZNF611\t10.0\n-s_730\tACSBG1\t2.0\n-s_50271\tSFXN4\t2.0\n-s_8592\tCA6\t4.0\n-s_13683\tCRMP1\t2.0\n-s_51103\tSLC16A7\t2.0\n-s_63785\tZNF384\t2.0\n-s_16339\tDNAH9\t2.0\n-s_55936\tTCTEX1D1\t2.0\n-s_14497\tCXorf40A\t2.0\n-s_1123\tADAT1\t2.0\n-s_41304\tPERP\t2.0\n-s_18719\tESAM\t2.0\n-s_35118\tMSX2\t2.0\n-s_30128\tKRT6A\t2.0\n-s_402\tABTB1\t2.0\n-s_32578\tMAP1LC3A\t2.0\n-s_45063\tPTMA\t2.0\n-s_43551\tPPP1R14D\t2.0\n-s_2538\tANKRD32\t2.0\n-s_40384\tPAX1\t2.0\n-s_29076\tKIAA0101\t2.0\n-s_40482\tPCDH10\t2.0\n-s_2348\tANGPT2\t2.0\n-s_59756\tTTYH3\t2.0\n-s_34330\tMOB4\t2.0\n-s_49331\tSCN2B\t2.0\n-s_54905\tSYDE1\t2.0\n-s_39101\tOR2T1\t2.0\n-s_36623\tNEDD4L\t2.0\n-s_40500\tPCDH15\t4.0\n-s_10660\tCDC42SE2\t2.0\n-s_30867\tLGALS13\t2.0\n-s_24322\tGSTK1\t4.0\n-s_59167\tTRPC1\t2.0\n-s_57440\tTMEM201\t2.0\n-s_50539\tSHC1\t2.0\n-s_37087\tNIT1\t2.0\n-s_56345\tTGFB2\t2.0\n-s_55388\tTARM1\t2.0\n-s_1224\tADD2\t2.0\n-s_5256\tBCOR\t4.0\n-s_51731\tSLC35B3\t2.0\n-s_12987\tCOL6A6\t2.0\n-s_56745\tTJP3\t2.0\n-s_19340\tFAM120AOS\t2.0\n-s_53904'..b'A\t0\n-s_57422\tTMEM198\t0\n-s_57429\tTMEM2\t0\n-s_57475\tTMEM212\t0\n-s_57531\tTMEM231\t0\n-s_57568\tTMEM245\t0\n-s_57700\tTMEM54\t0\n-s_57873\tTMF1\t0\n-s_57992\tTMUB1\t0\n-s_58180\tTNIP1\t0\n-s_58211\tTNKS2\t0\n-s_58237\tTNNT1\t0\n-s_58256\tTNPO2\t0\n-s_58259\tTNPO3\t0\n-s_58309\tTOM1\t0\n-s_58485\tTP73\t0\n-s_58503\tTPD52\t0\n-s_58533\tTPI1\t0\n-s_5857\tBSPRY\t0\n-s_58612\tTPSG1\t0\n-s_58633\tTRA2B\t0\n-s_58655\tTRAF3\t0\n-s_58668\tTRAF3IP2\t0\n-s_58690\tTRAK1\t0\n-s_58809\tTRIB2\t0\n-s_58962\tTRIM50\t0\n-s_58968\tTRIM52\t0\n-s_59050\tTRIO\t0\n-s_59107\tTRMT1L\t0\n-s_59133\tTRMT61B\t0\n-s_59160\tTROVE2\t0\n-s_59173\tTRPC4\t0\n-s_59196\tTRPM1\t0\n-s_59204\tTRPM3\t0\n-s_59311\tTSEN54\t0\n-s_59332\tTSHB\t0\n-s_59340\tTSHZ2\t0\n-s_59360\tTSNARE1\t0\n-s_5952\tBTG4\t0\n-s_59539\tTTC21A\t0\n-s_59602\tTTC39A\t0\n-s_59654\tTTC9C\t0\n-s_59717\tTTLL6\t0\n-s_5974\tBTN3A1\t0\n-s_59748\tTTYH1\t0\n-s_59807\tTUBB2B\t0\n-s_59859\tTULP1\t0\n-s_59870\tTULP3\t0\n-s_59955\tTXNDC8\t0\n-s_59983\tTXNRD2\t0\n-s_600\tACE\t0\n-s_60169\tUBE2H\t0\n-s_60209\tUBE2Q2\t0\n-s_60237\tUBE2V2\t0\n-s_60248\tUBE3A\t0\n-s_60250\tUBE3B\t0\n-s_60373\tUBXN6\t0\n-s_60396\tUCKL1\t0\n-s_60423\tUEVLD\t0\n-s_60438\tUFSP1\t0\n-s_60449\tUGDH\t0\n-s_60517\tUGT2A1\t0\n-s_60542\tUGT3A1\t0\n-s_60603\tUMODL1\t0\n-s_60614\tUNC119\t0\n-s_60649\tUNC5B\t0\n-s_6068\tC10orf125\t0\n-s_6071\tC10orf128\t0\n-s_60753\tUQCRC2\t0\n-s_60780\tURM1\t0\n-s_60839\tUSP15\t0\n-s_60851\tUSP19\t0\n-s_60925\tUSP4\t0\n-s_6100\tC10orf53\t0\n-s_6106\tC10orf54\t0\n-s_61149\tVAV2\t0\n-s_61173\tVCAM1\t0\n-s_61178\tVCAN\t0\n-s_61221\tVEPH1\t0\n-s_61263\tVIL1\t0\n-s_61341\tVPS13C\t0\n-s_61344\tVPS13D\t0\n-s_61367\tVPS29\t0\n-s_61529\tVWA5A\t0\n-s_61531\tVWA5A\t0\n-s_61587\tWBP1\t0\n-s_61595\tWBP2\t0\n-s_61623\tWDFY1\t0\n-s_61640\tWDHD1\t0\n-s_61662\tWDR16\t0\n-s_61695\tWDR26\t0\n-s_61739\tWDR44\t0\n-s_6200\tC11orf49\t0\n-s_62019\tWISP1\t0\n-s_62098\tWNT5B\t0\n-s_62114\tWNT8A\t0\n-s_62171\tWTAP\t0\n-s_62249\tXKR3\t0\n-s_62257\tXKR6\t0\n-s_62275\tXPC\t0\n-s_62320\tXRCC4\t0\n-s_62361\tYAE1D1\t0\n-s_62550\tZBBX\t0\n-s_62559\tZBED6\t0\n-s_62567\tZBTB1\t0\n-s_62624\tZBTB37\t0\n-s_62657\tZBTB47\t0\n-s_62759\tZC3H7A\t0\n-s_62845\tZDHHC11\t0\n-s_62862\tZDHHC16\t0\n-s_62881\tZDHHC2\t0\n-s_6292\tC12orf23\t0\n-s_62975\tZFC3H1\t0\n-s_63034\tZFP64\t0\n-s_63104\tZFYVE27\t0\n-s_63107\tZFYVE27\t0\n-s_63114\tZFYVE28\t0\n-s_63217\tZMIZ2\t0\n-s_63228\tZMYM3\t0\n-s_63234\tZMYM3\t0\n-s_6326\tC12orf49\t0\n-s_63302\tZNF132\t0\n-s_63362\tZNF167\t0\n-s_63435\tZNF200\t0\n-s_63487\tZNF223\t0\n-s_63594\tZNF276\t0\n-s_636\tACO1\t0\n-s_63746\tZNF354B\t0\n-s_63755\tZNF362\t0\n-s_6376\tC12orf74\t0\n-s_63903\tZNF436\t0\n-s_63905\tZNF438\t0\n-s_63923\tZNF442\t0\n-s_63934\tZNF445\t0\n-s_63935\tZNF446\t0\n-s_63964\tZNF469\t0\n-s_63983\tZNF480\t0\n-s_6409\tC14orf105\t0\n-s_64137\tZNF554\t0\n-s_64241\tZNF586\t0\n-s_6427\tC14orf133\t0\n-s_64356\tZNF639\t0\n-s_64393\tZNF655\t0\n-s_64396\tZNF655\t0\n-s_64419\tZNF668\t0\n-s_64424\tZNF669\t0\n-s_64459\tZNF682\t0\n-s_64479\tZNF688\t0\n-s_64581\tZNF746\t0\n-s_64627\tZNF772\t0\n-s_64638\tZNF776\t0\n-s_64652\tZNF780A\t0\n-s_64791\tZNF85\t0\n-s_64851\tZNRF3\t0\n-s_64871\tZPBP\t0\n-s_64878\tZPLD1\t0\n-s_64898\tZSCAN10\t0\n-s_64930\tZSCAN30\t0\n-s_64997\tZYG11A\t0\n-s_6525\tC15orf39\t0\n-s_6592\tC16orf13\t0\n-s_6639\tC16orf62\t0\n-s_6707\tC17orf102\t0\n-s_6710\tC17orf104\t0\n-s_6728\tC17orf112\t0\n-s_6736\tC17orf39\t0\n-s_6794\tC17orf72\t0\n-s_6814\tC17orf80\t0\n-s_6849\tC18orf21\t0\n-s_6859\tC18orf32\t0\n-s_6862\tC18orf34\t0\n-s_6906\tC19orf38\t0\n-s_7053\tC1QTNF7\t0\n-s_7128\tC1orf122\t0\n-s_7144\tC1orf130\t0\n-s_7162\tC1orf144\t0\n-s_7234\tC1orf198\t0\n-s_7341\tC1orf63\t0\n-s_747\tACSL1\t0\n-s_76\tAARS2\t0\n-s_7674\tC2orf57\t0\n-s_7681\tC2orf62\t0\n-s_7692\tC2orf63\t0\n-s_77\tAARSD1\t0\n-s_78\tAARSD1\t0\n-s_781\tACSS1\t0\n-s_786\tACSS2\t0\n-s_7940\tC4orf26\t0\n-s_7970\tC4orf37\t0\n-s_8000\tC4orf52\t0\n-s_804\tACTB\t0\n-s_8073\tC5orf51\t0\n-s_8141\tC6orf162\t0\n-s_8227\tC7orf10\t0\n-s_8281\tC7orf59\t0\n-s_8318\tC8A\t0\n-s_8403\tC9orf100\t0\n-s_8470\tC9orf24\t0\n-s_8699\tCACNA1G\t0\n-s_8705\tCACNA1I\t0\n-s_871\tACTR8\t0\n-s_874\tACTR8\t0\n-s_8757\tCACNG5\t0\n-s_8797\tCADPS\t0\n-s_8879\tCALR\t0\n-s_8910\tCAMK2B\t0\n-s_893\tACVR1B\t0\n-s_8930\tCAMKK1\t0\n-s_8954\tCAMSAP1\t0\n-s_9064\tCAPRIN1\t0\n-s_9077\tCAPSL\t0\n-s_9109\tCARD17\t0\n-s_913\tACY1\t0\n-s_9171\tCASD1\t0\n-s_9196\tCASP10\t0\n-s_9285\tCATSPER3\t0\n-s_9506\tCCDC120\t0\n-s_9507\tCCDC121\t0\n-s_952\tADAM12\t0\n-s_9584\tCCDC149\t0\n-s_964\tADAM18\t0\n-s_9646\tCCDC170\t0\n-s_9710\tCCDC40\t0\n-s_9732\tCCDC48\t0\n-s_976\tADAM21\t0\n-s_9763\tCCDC62\t0\n-s_9868\tCCDC89\t0\n-s_991\tADAM30\t0\n-s_9925\tCCL1\t0\n-s_9973\tCCL26\t0\n'
b
diff -r 9cd937788131 -r 5dfc0e462f2a test-data/output_countsummary.Rnw
--- a/test-data/output_countsummary.Rnw Wed Apr 04 11:03:44 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,237 +0,0 @@
-% This is a template file for Sweave used in MAGeCK
-% Author: Wei Li, Shirley Liu lab
-% Do not modify lines beginning with "#__".
-\documentclass{article}
-
-\usepackage{amsmath}
-\usepackage{amscd}
-\usepackage[tableposition=top]{caption}
-\usepackage{ifthen}
-\usepackage{fullpage}
-\usepackage[utf8]{inputenc}
-% \usepackage{longtable}
-
-\begin{document}
-\setkeys{Gin}{width=0.9\textwidth}
-
-\title{MAGeCK Count Report}
-\author{Wei Li}
-
-\maketitle
-
-
-\tableofcontents
-
-\section{Summary}
-
-%Function definition
-<<label=funcdef,include=FALSE,echo=FALSE>>=
-genreporttable<-function(filelist,labellist,reads,mappedreads){
-  xtb=data.frame(Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads);
-  colnames(xtb)=c("Label","Reads","Mapped","Percentage");
-  return (xtb);
-}
-genreporttable2<-function(filelist,labellist,sgrnas,zerocounts,gini){
-  xtb=data.frame(Label=labellist,TotalsgRNAs=sgrnas,ZeroCounts=zerocounts,GiniIndex=gini);
-  colnames(xtb)=c("Label","TotalsgRNA","ZeroCounts","GiniIndex");
-  return (xtb);
-}
-genreporttable3<-function(filelist,labellist){
-  xtb=data.frame(File=filelist,Label=labellist);
-  colnames(xtb)=c("File","Label");
-  return (xtb);
-}
-
-
-colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00",  "#A65628", "#F781BF",
-          "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", 
-          "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5",
-          "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F");
-
-
-
-genboxplot<-function(filename,...){
-  #slmed=read.table(filename,header=T)
-  slmed=read.table(filename,header=T)
-  slmat=as.matrix(slmed[,c(-1,-2)])
-  slmat_log=log2(slmat+1)
-
-  boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...)
-}
-
-
-genhistplot<-function(filename,isfile=T,...){
-  if(isfile){
-    slmed=read.table(filename,header=T)
-  }else{
-    slmed=filename;
-  }
-  tabsmat=as.matrix(log2(slmed[,c(-1,-2)]+1))
-  colnames(tabsmat)=colnames(slmed)[c(-1,-2)]
-  samplecol=colors[((1:ncol(tabsmat)) %% length(colors)) ]
-  if(ncol(tabsmat)>=1){
-    histlist=lapply(1:ncol(tabsmat),function(X){ return (hist(tabsmat[,X],plot=F,breaks=40)) })
-    xrange=range(unlist(lapply(histlist,function(X){X$mids})))
-    yrange=range(unlist(lapply(histlist,function(X){X$counts})))
-    hst1=histlist[[1]]
-    plot(hst1$mids,hst1$counts,type='b',pch=20,xlim=c(0,xrange[2]*1.2),ylim=c(0,yrange[2]*1.2),xlab='log2(counts)',ylab='Frequency',main='Distribution of read counts',col = samplecol[1], ... )
-  }
-  if(ncol(tabsmat)>=2){ 
-    for(i in 2:ncol(tabsmat)){
-      hstn=histlist[[i]]
-      lines(hstn$mids,hstn$counts,type='b',pch=20,col=samplecol[i])
-    }
-  }
-  legend('topright',colnames(tabsmat),pch=20,lwd=1,col=samplecol)
-}
-
-
-
-genclustering<-function(filename,...){
-  #slmed=read.table(filename,header=T)
-  slmed=read.table(filename,header=T)
-  slmat=as.matrix(slmed[,c(-1,-2)])
-  slmat_log=log2(slmat+1)
-
-  result=tryCatch({
-    library(gplots);
-    heatmap.2(cor(slmat_log),trace = 'none',density.info = 'none',cexRow = 0.8,cexCol = 0.8,offsetRow = -0.2,offsetCol = -0.2)
-  }, error=function(e){
-    heatmap(cor(slmat_log),scale='none',cexRow = 0.8,cexCol = 0.8,cex.axis=0.8,...)
-  });
-}
-
-ctfit_tx=0;
-
-
-panel.plot<-function(x,y,textnames=names(x),...){
-  par(new=TRUE)
-  m<-cbind(x,y)
-  plot(m,pch=20,xlim = range(x)*1.1,ylim=range(y)*1.1,...)
-  text(x,y,textnames,...)
-}
-
-
-genpcaplot<-function(filename,...){
-  #slmed=read.table(filename,header=T)
-  slmed=read.table(filename,header=T)
-  slmat=as.matrix(slmed[,c(-1,-2)])
-  slmat_log=log2(slmat+1)
-  ctfit_tx<<-prcomp(t(slmat_log),center=TRUE)
-  
-  # par(mfrow=c(2,1));
-  samplecol=colors[((1:ncol(slmat)) %% length(colors)) ]
-  # first 2 PCA
-  #plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2]));
-  #text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol);
-  # par(mfrow=c(1,1));
-  if(length(samplecol)>2){
-    pairs(ctfit_tx$x[,1:3],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 3 principle components',col=samplecol)
-  }else{
-    if(length(samplecol)>1){
-      pairs(ctfit_tx$x[,1:2],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 2 principle components',col=samplecol)
-   }
-  }
-
-
-}
-
-genpcavar<-function(){
-  # % variance 
-  varpca=ctfit_tx$sdev^2
-  varpca=varpca/sum(varpca)*100;
-  if(length(varpca)>10){
-    varpca=varpca[1:10];
-  }
-  plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained');
-}
-
-@
-
-%__FILE_SUMMARY__
-
-The statistics of comparisons are listed in Table 1 and Table 2.
-The corresponding fastq files in each row are listed in Table 3.
-
-<<label=tab1,echo=FALSE,results=tex>>=
-library(xtable)
-filelist=c("input_0.gz");
-labellist=c("test1_fastq_gz");
-reads=c(2500);
-mappedreads=c(1453);
-totalsgrnas=c(2550);
-zerocounts=c(1276);
-giniindex=c(0.5266899931488773);
-
-cptable=genreporttable(filelist,labellist,reads,mappedreads);
-print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one",
-    digits = c(0, 0, 0, 0,2),
-    align=c('c',  'c','c',  'c', 'c'),
-    table.placement = "tbp",
-    caption.placement = "top"))
-@
-
-<<label=tab2,echo=FALSE,results=tex>>=
-library(xtable)
-cptable=genreporttable2(filelist,labellist,totalsgrnas,zerocounts,giniindex);
-print(xtable(cptable, caption = "Summary of comparisons", label = "tab:two",
-    digits = c(0, 0,0, 0,2),
-    align=c('c',  'c','c',  'c', 'c'),
-    table.placement = "tbp",
-    caption.placement = "top"))
-@
-
-
-
-
-
-<<label=tab3,echo=FALSE,results=tex>>=
-library(xtable)
-cptable=genreporttable3(filelist,labellist);
-print(xtable(cptable, caption = "Summary of samples", label = "tab:three",
-    digits = c(0,0, 0),
-    align=c('c', 'p{9cm}', 'c'),
-    table.placement = "tbp",
-    caption.placement = "top"))
-@
-
-
-
-
-The meanings of the columns are as follows.
-
-\begin{itemize}
-\item \textbf{Row}: The row number in the table;
-\item \textbf{File}: The filename of fastq file;
-\item \textbf{Label}: Assigned label;
-\item \textbf{Reads}: The total read count in the fastq file;
-\item \textbf{Mapped}: Reads that can be mapped to gRNA library;
-\item \textbf{Percentage}: The percentage of mapped reads;
-\item \textbf{TotalsgRNAs}: The number of sgRNAs in the library; 
-\item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts;
-\item \textbf{GiniIndex}: The Gini Index of the read count distribution. Gini index can be used to measure the evenness of the read counts, and a smaller value means a more even distribution of the read counts.
-\end{itemize}
-
-
-
-\newpage\section{Normalized read count distribution of all samples}
-The following figure shows the distribution of median-normalized read counts in all samples.
-
-
-<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>=
-genboxplot("output.count_normalized.txt");
-@
-
-The following figure shows the histogram of median-normalized read counts in all samples.
-
-
-<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>=
-genhistplot("output.count_normalized.txt");
-@
-
-%__INDIVIDUAL_PAGE__
-
-
-
-\end{document}
-