Previous changeset 2:82a180e6b582 (2018-04-04) Next changeset 4:3b22ff50a362 (2018-06-04) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 95daf3f97e89989bae687e64cae8b129b3e2b7af |
modified:
test-data/out.countsummary.txt |
removed:
test-data/out.count.Rnw test-data/out.count.log.txt test-data/out.normcounts.txt test-data/output_countsummary.Rnw |
b |
diff -r 82a180e6b582 -r f259c29b3832 test-data/out.count.Rnw --- a/test-data/out.count.Rnw Wed Apr 04 11:03:05 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,237 +0,0 @@ -% This is a template file for Sweave used in MAGeCK -% Author: Wei Li, Shirley Liu lab -% Do not modify lines beginning with "#__". -\documentclass{article} - -\usepackage{amsmath} -\usepackage{amscd} -\usepackage[tableposition=top]{caption} -\usepackage{ifthen} -\usepackage{fullpage} -\usepackage[utf8]{inputenc} -% \usepackage{longtable} - -\begin{document} -\setkeys{Gin}{width=0.9\textwidth} - -\title{MAGeCK Count Report} -\author{Wei Li} - -\maketitle - - -\tableofcontents - -\section{Summary} - -%Function definition -<<label=funcdef,include=FALSE,echo=FALSE>>= -genreporttable<-function(filelist,labellist,reads,mappedreads){ - xtb=data.frame(Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads); - colnames(xtb)=c("Label","Reads","Mapped","Percentage"); - return (xtb); -} -genreporttable2<-function(filelist,labellist,sgrnas,zerocounts,gini){ - xtb=data.frame(Label=labellist,TotalsgRNAs=sgrnas,ZeroCounts=zerocounts,GiniIndex=gini); - colnames(xtb)=c("Label","TotalsgRNA","ZeroCounts","GiniIndex"); - return (xtb); -} -genreporttable3<-function(filelist,labellist){ - xtb=data.frame(File=filelist,Label=labellist); - colnames(xtb)=c("File","Label"); - return (xtb); -} - - -colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#F781BF", - "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", - "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", - "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F"); - - - -genboxplot<-function(filename,...){ - #slmed=read.table(filename,header=T) - slmed=read.table(filename,header=T) - slmat=as.matrix(slmed[,c(-1,-2)]) - slmat_log=log2(slmat+1) - - boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...) -} - - -genhistplot<-function(filename,isfile=T,...){ - if(isfile){ - slmed=read.table(filename,header=T) - }else{ - slmed=filename; - } - tabsmat=as.matrix(log2(slmed[,c(-1,-2)]+1)) - colnames(tabsmat)=colnames(slmed)[c(-1,-2)] - samplecol=colors[((1:ncol(tabsmat)) %% length(colors)) ] - if(ncol(tabsmat)>=1){ - histlist=lapply(1:ncol(tabsmat),function(X){ return (hist(tabsmat[,X],plot=F,breaks=40)) }) - xrange=range(unlist(lapply(histlist,function(X){X$mids}))) - yrange=range(unlist(lapply(histlist,function(X){X$counts}))) - hst1=histlist[[1]] - plot(hst1$mids,hst1$counts,type='b',pch=20,xlim=c(0,xrange[2]*1.2),ylim=c(0,yrange[2]*1.2),xlab='log2(counts)',ylab='Frequency',main='Distribution of read counts',col = samplecol[1], ... ) - } - if(ncol(tabsmat)>=2){ - for(i in 2:ncol(tabsmat)){ - hstn=histlist[[i]] - lines(hstn$mids,hstn$counts,type='b',pch=20,col=samplecol[i]) - } - } - legend('topright',colnames(tabsmat),pch=20,lwd=1,col=samplecol) -} - - - -genclustering<-function(filename,...){ - #slmed=read.table(filename,header=T) - slmed=read.table(filename,header=T) - slmat=as.matrix(slmed[,c(-1,-2)]) - slmat_log=log2(slmat+1) - - result=tryCatch({ - library(gplots); - heatmap.2(cor(slmat_log),trace = 'none',density.info = 'none',cexRow = 0.8,cexCol = 0.8,offsetRow = -0.2,offsetCol = -0.2) - }, error=function(e){ - heatmap(cor(slmat_log),scale='none',cexRow = 0.8,cexCol = 0.8,cex.axis=0.8,...) - }); -} - -ctfit_tx=0; - - -panel.plot<-function(x,y,textnames=names(x),...){ - par(new=TRUE) - m<-cbind(x,y) - plot(m,pch=20,xlim = range(x)*1.1,ylim=range(y)*1.1,...) - text(x,y,textnames,...) -} - - -genpcaplot<-function(filename,...){ - #slmed=read.table(filename,header=T) - slmed=read.table(filename,header=T) - slmat=as.matrix(slmed[,c(-1,-2)]) - slmat_log=log2(slmat+1) - ctfit_tx<<-prcomp(t(slmat_log),center=TRUE) - - # par(mfrow=c(2,1)); - samplecol=colors[((1:ncol(slmat)) %% length(colors)) ] - # first 2 PCA - #plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2])); - #text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol); - # par(mfrow=c(1,1)); - if(length(samplecol)>2){ - pairs(ctfit_tx$x[,1:3],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 3 principle components',col=samplecol) - }else{ - if(length(samplecol)>1){ - pairs(ctfit_tx$x[,1:2],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 2 principle components',col=samplecol) - } - } - - -} - -genpcavar<-function(){ - # % variance - varpca=ctfit_tx$sdev^2 - varpca=varpca/sum(varpca)*100; - if(length(varpca)>10){ - varpca=varpca[1:10]; - } - plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained'); -} - -@ - -%__FILE_SUMMARY__ - -The statistics of comparisons are listed in Table 1 and Table 2. -The corresponding fastq files in each row are listed in Table 3. - -<<label=tab1,echo=FALSE,results=tex>>= -library(xtable) -filelist=c("input_0.gz"); -labellist=c("test1_fastq_gz"); -reads=c(2500); -mappedreads=c(1453); -totalsgrnas=c(2550); -zerocounts=c(1276); -giniindex=c(0.5266899931488773); - -cptable=genreporttable(filelist,labellist,reads,mappedreads); -print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one", - digits = c(0, 0, 0, 0,2), - align=c('c', 'c','c', 'c', 'c'), - table.placement = "tbp", - caption.placement = "top")) -@ - -<<label=tab2,echo=FALSE,results=tex>>= -library(xtable) -cptable=genreporttable2(filelist,labellist,totalsgrnas,zerocounts,giniindex); -print(xtable(cptable, caption = "Summary of comparisons", label = "tab:two", - digits = c(0, 0,0, 0,2), - align=c('c', 'c','c', 'c', 'c'), - table.placement = "tbp", - caption.placement = "top")) -@ - - - - - -<<label=tab3,echo=FALSE,results=tex>>= -library(xtable) -cptable=genreporttable3(filelist,labellist); -print(xtable(cptable, caption = "Summary of samples", label = "tab:three", - digits = c(0,0, 0), - align=c('c', 'p{9cm}', 'c'), - table.placement = "tbp", - caption.placement = "top")) -@ - - - - -The meanings of the columns are as follows. - -\begin{itemize} -\item \textbf{Row}: The row number in the table; -\item \textbf{File}: The filename of fastq file; -\item \textbf{Label}: Assigned label; -\item \textbf{Reads}: The total read count in the fastq file; -\item \textbf{Mapped}: Reads that can be mapped to gRNA library; -\item \textbf{Percentage}: The percentage of mapped reads; -\item \textbf{TotalsgRNAs}: The number of sgRNAs in the library; -\item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts; -\item \textbf{GiniIndex}: The Gini Index of the read count distribution. Gini index can be used to measure the evenness of the read counts, and a smaller value means a more even distribution of the read counts. -\end{itemize} - - - -\newpage\section{Normalized read count distribution of all samples} -The following figure shows the distribution of median-normalized read counts in all samples. - - -<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= -genboxplot("output.count_normalized.txt"); -@ - -The following figure shows the histogram of median-normalized read counts in all samples. - - -<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= -genhistplot("output.count_normalized.txt"); -@ - -%__INDIVIDUAL_PAGE__ - - - -\end{document} - |
b |
diff -r 82a180e6b582 -r f259c29b3832 test-data/out.count.log.txt --- a/test-data/out.count.log.txt Wed Apr 04 11:03:05 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,43 +0,0 @@ -INFO @ Sun, 25 Mar 2018 15:51:06: Parameters: /Users/doylemaria/miniconda3/envs/mulled-v1-5ed9647f14e9d3e99564d31bed2eb19cd32ee8b9da66a89bea59b64a8983b1d6/bin/mageck count -l /private/var/folders/zn/m_qvr9zd7tq0wdtsbq255f8xypj_zg/T/tmp0EKzNL/files/000/dataset_2.dat --fastq input_0.gz --sample-label test1_fastq_gz -n output --pdf-report --keep-tmp --unmapped-to-file -INFO @ Sun, 25 Mar 2018 15:51:06: Welcome to MAGeCK v0.5.7. Command: count -INFO @ Sun, 25 Mar 2018 15:51:06: Loading 2550 predefined sgRNAs. -WARNING @ Sun, 25 Mar 2018 15:51:06: There are 0 sgRNAs with duplicated sequences. -INFO @ Sun, 25 Mar 2018 15:51:06: Parsing FASTQ file input_0.gz... -INFO @ Sun, 25 Mar 2018 15:51:06: Determining the trim-5 length of FASTQ file input_0.gz... -INFO @ Sun, 25 Mar 2018 15:51:06: Possible gRNA lengths:20 -INFO @ Sun, 25 Mar 2018 15:51:06: Processing 0M reads ... -INFO @ Sun, 25 Mar 2018 15:51:06: Read length:30 -INFO @ Sun, 25 Mar 2018 15:51:06: Total tested reads: 2500, mapped: 1453(0.5812) -INFO @ Sun, 25 Mar 2018 15:51:06: --trim-5 test data: (trim_length reads fraction) -INFO @ Sun, 25 Mar 2018 15:51:06: 0 1453 1.0 -INFO @ Sun, 25 Mar 2018 15:51:06: Auto determination of trim5 results: 0 -INFO @ Sun, 25 Mar 2018 15:51:06: Possible gRNA lengths:20 -INFO @ Sun, 25 Mar 2018 15:51:06: Processing 0M reads .. -INFO @ Sun, 25 Mar 2018 15:51:06: Total: 2500. -INFO @ Sun, 25 Mar 2018 15:51:06: Mapped: 1453. -DEBUG @ Sun, 25 Mar 2018 15:51:06: Initial (total) size factor: 1.0 -DEBUG @ Sun, 25 Mar 2018 15:51:06: Median factor: 2.0 -INFO @ Sun, 25 Mar 2018 15:51:06: Final size factor: 2.0 -INFO @ Sun, 25 Mar 2018 15:51:06: Summary of file input_0.gz: -INFO @ Sun, 25 Mar 2018 15:51:06: label test1_fastq_gz -INFO @ Sun, 25 Mar 2018 15:51:06: reads 2500 -INFO @ Sun, 25 Mar 2018 15:51:06: mappedreads 1453 -INFO @ Sun, 25 Mar 2018 15:51:06: totalsgrnas 2550 -INFO @ Sun, 25 Mar 2018 15:51:06: zerosgrnas 1276 -INFO @ Sun, 25 Mar 2018 15:51:06: giniindex 0.5266899931488773 -INFO @ Sun, 25 Mar 2018 15:51:06: Loading Rnw template file: /Users/doylemaria/miniconda3/envs/mulled-v1-5ed9647f14e9d3e99564d31bed2eb19cd32ee8b9da66a89bea59b64a8983b1d6/lib/python3.6/site-packages/mageck/fastq_template.Rnw. -DEBUG @ Sun, 25 Mar 2018 15:51:06: Setting up the visualization module... -INFO @ Sun, 25 Mar 2018 15:51:06: Running command: cd ./; Rscript output_countsummary.R -INFO @ Sun, 25 Mar 2018 15:51:11: Command message: -INFO @ Sun, 25 Mar 2018 15:51:11: Writing to file output_countsummary.tex -INFO @ Sun, 25 Mar 2018 15:51:11: Processing code chunks with options ... -INFO @ Sun, 25 Mar 2018 15:51:11: 1 : keep.source term verbatim (label = funcdef, output_countsummary.Rnw:28) -INFO @ Sun, 25 Mar 2018 15:51:11: 2 : keep.source term tex (label = tab1, output_countsummary.Rnw:156) -INFO @ Sun, 25 Mar 2018 15:51:11: 3 : keep.source term tex (label = tab2, output_countsummary.Rnw:174) -INFO @ Sun, 25 Mar 2018 15:51:11: 4 : keep.source term tex (label = tab3, output_countsummary.Rnw:188) -INFO @ Sun, 25 Mar 2018 15:51:11: 5 : keep.source term verbatim pdf (output_countsummary.Rnw:221) -INFO @ Sun, 25 Mar 2018 15:51:11: 6 : keep.source term verbatim pdf (output_countsummary.Rnw:228) -INFO @ Sun, 25 Mar 2018 15:51:11: -INFO @ Sun, 25 Mar 2018 15:51:11: You can now run (pdf)latex on ‘output_countsummary.tex’ -INFO @ Sun, 25 Mar 2018 15:51:11: -INFO @ Sun, 25 Mar 2018 15:51:11: End command message. |
b |
diff -r 82a180e6b582 -r f259c29b3832 test-data/out.countsummary.txt --- a/test-data/out.countsummary.txt Wed Apr 04 11:03:05 2018 -0400 +++ b/test-data/out.countsummary.txt Thu Apr 19 05:34:15 2018 -0400 |
b |
@@ -1,2 +1,1 @@ File Label Reads Mapped Percentage TotalsgRNAs Zerocounts GiniIndex NegSelQC NegSelQCPval NegSelQCPvalPermutation NegSelQCPvalPermutationFDR NegSelQCGene -input_0.gz test1_fastq_gz 2500 1453 0.5812 2550 1276 0.5267 0 1 1 1 0.0 |
b |
diff -r 82a180e6b582 -r f259c29b3832 test-data/out.normcounts.txt --- a/test-data/out.normcounts.txt Wed Apr 04 11:03:05 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,2551 +0,0 @@\n-sgRNA\tGene\ttest1_fastq_gz\n-s_47512\tRNF111\t2.0\n-s_24835\tHCFC1R1\t2.0\n-s_14784\tCYP4B1\t8.0\n-s_51146\tSLC18A1\t2.0\n-s_58960\tTRIM5\t2.0\n-s_48256\tRPRD2\t2.0\n-s_30297\tKRTAP5-5\t2.0\n-s_14555\tCYB5B\t2.0\n-s_39959\tPAAF1\t2.0\n-s_45293\tPUF60\t2.0\n-s_49358\tSCN8A\t2.0\n-s_64995\tZYG11A\t2.0\n-s_4029\tASTE1\t2.0\n-s_45554\tR3HDML\t2.0\n-s_34264\tMMRN1\t2.0\n-s_37459\tNOL6\t2.0\n-s_23990\tGPX7\t2.0\n-s_20268\tFANCC\t2.0\n-s_14157\tCTLA4\t2.0\n-s_36773\tNEURL4\t36.0\n-s_18804\tETFB\t2.0\n-s_782\tACSS1\t2.0\n-s_18272\tENPP2\t2.0\n-s_46620\tRCN1\t2.0\n-s_55436\tTAS2R3\t2.0\n-s_57947\tTMPRSS2\t2.0\n-s_6438\tC14orf159\t2.0\n-s_33846\tMGST2\t2.0\n-s_16328\tDNAH6\t2.0\n-s_17875\tEIF4G1\t2.0\n-s_2305\tANAPC11\t2.0\n-s_2500\tANKRD2\t2.0\n-s_82\tAARSD1\t2.0\n-s_55329\tTAL1\t2.0\n-s_57926\tTMPRSS11E\t16.0\n-s_38414\tNUP98\t8.0\n-s_50044\tSERPINF1\t2.0\n-s_9257\tCASR\t2.0\n-s_63396\tZNF182\t2.0\n-s_56478\tTHBS3\t2.0\n-s_17191\tDYRK1A\t2.0\n-s_11988\tCIR1\t2.0\n-s_43313\tPPARD\t2.0\n-s_44681\tPSMA4\t2.0\n-s_10387\tCD320\t2.0\n-s_64869\tZPBP\t2.0\n-s_54385\tSTK17B\t2.0\n-s_25423\tHIST1H4D\t2.0\n-s_54172\tST8SIA4\t18.0\n-s_1161\tADCY10\t2.0\n-s_29184\tKIAA0913\t2.0\n-s_42977\tPOLD3\t2.0\n-s_49449\tSCUBE1\t2.0\n-s_24181\tGRM4\t2.0\n-s_52507\tSMARCA5\t2.0\n-s_28674\tKCNJ10\t2.0\n-s_61074\tVAMP2\t8.0\n-s_3954\tASIC2\t2.0\n-s_2385\tANK1\t2.0\n-s_18397\tEPDR1\t2.0\n-s_18377\tEPB41L4B\t2.0\n-s_34580\tMRAP2\t2.0\n-s_48676\tRUFY3\t20.0\n-s_691\tACP1\t2.0\n-s_30460\tLAMP2\t2.0\n-s_42637\tPLRG1\t2.0\n-s_12695\tCNOT6\t2.0\n-s_33316\tMECOM\t4.0\n-s_35081\tMSRB2\t2.0\n-s_58512\tTPD52L2\t2.0\n-s_19912\tFAM22F\t2.0\n-s_45517\tQSOX2\t2.0\n-s_56705\tTINAG\t2.0\n-s_10946\tCDKL5\t2.0\n-s_57473\tTMEM211\t4.0\n-s_57657\tTMEM44\t2.0\n-s_43200\tPOT1\t2.0\n-s_19436\tFAM135A\t2.0\n-s_184\tABCB9\t2.0\n-s_30171\tKRT84\t2.0\n-s_44758\tPSMC3IP\t2.0\n-s_48313\tRPS3\t2.0\n-s_58142\tTNFSF12\t12.0\n-s_59718\tTTLL6\t14.0\n-s_9725\tCCDC43\t2.0\n-s_5135\tBCKDHA\t2.0\n-s_36539\tNDUFC2\t2.0\n-s_27251\tIL27RA\t2.0\n-s_48939\tSAMD10\t2.0\n-s_27343\tIL5RA\t2.0\n-s_28386\tKANK2\t2.0\n-s_27610\tINSRR\t2.0\n-s_2769\tAOC3\t4.0\n-s_58632\tTRA2B\t24.0\n-s_6674\tC16orf86\t2.0\n-s_22902\tGJD4\t2.0\n-s_48278\tRPS15A\t2.0\n-s_61998\tWIPF2\t2.0\n-s_4937\tBAIAP3\t4.0\n-s_54471\tSTOML1\t4.0\n-s_19157\tFABP12\t2.0\n-s_5434\tBIN1\t4.0\n-s_42042\tPIP5K1A\t2.0\n-s_7794\tC3orf18\t2.0\n-s_54846\tSVIL\t2.0\n-s_62273\tXPA\t2.0\n-s_45859\tRACGAP1\t2.0\n-s_53626\tSPOCK3\t2.0\n-s_43295\tPPAP2C\t14.0\n-s_11788\tCHRDL1\t4.0\n-s_50636\tSHQ1\t2.0\n-s_16705\tDPF1\t2.0\n-s_39741\tOTOF\t2.0\n-s_27505\tINHBE\t2.0\n-s_707\tACPL2\t2.0\n-s_15418\tDDX3Y\t12.0\n-s_56018\tTEAD4\t2.0\n-s_44367\tPRR12\t2.0\n-s_25875\tHOXB5\t2.0\n-s_49360\tSCN9A\t2.0\n-s_16244\tDMPK\t2.0\n-s_3909\tASCC2\t2.0\n-s_55088\tSYT6\t2.0\n-s_54311\tSTAU1\t2.0\n-s_53890\tSRP72\t2.0\n-s_11035\tCDX1\t2.0\n-s_18178\tEMR3\t4.0\n-s_16084\tDLD\t2.0\n-s_47207\tRHOBTB1\t2.0\n-s_40267\tPARK2\t12.0\n-s_43104\tPOLR3B\t2.0\n-s_2200\tAMDHD2\t2.0\n-s_12738\tCNRIP1\t2.0\n-s_17842\tEIF4A3\t2.0\n-s_57950\tTMPRSS3\t2.0\n-s_62146\tWRN\t2.0\n-s_11055\tCEACAM1\t2.0\n-s_54580\tSTX2\t2.0\n-s_29277\tKIAA1407\t2.0\n-s_33428\tMEF2A\t2.0\n-s_59797\tTUBB\t2.0\n-s_18113\tEME1\t2.0\n-s_29839\tKLHL8\t2.0\n-s_18058\tELP2\t2.0\n-s_49497\tSDCBP2\t6.0\n-s_16874\tDRP2\t2.0\n-s_13572\tCREBL2\t2.0\n-s_20540\tFBXO30\t2.0\n-s_64380\tZNF646\t2.0\n-s_50366\tSH2B1\t2.0\n-s_2548\tANKRD33B\t2.0\n-s_41183\tPDXP\t2.0\n-s_16315\tDNAH12\t2.0\n-s_19996\tFAM49B\t2.0\n-s_30751\tLDLRAD3\t2.0\n-s_36960\tNGEF\t2.0\n-s_39015\tOR2A2\t2.0\n-s_26302\tHSPB2\t2.0\n-s_64297\tZNF611\t10.0\n-s_730\tACSBG1\t2.0\n-s_50271\tSFXN4\t2.0\n-s_8592\tCA6\t4.0\n-s_13683\tCRMP1\t2.0\n-s_51103\tSLC16A7\t2.0\n-s_63785\tZNF384\t2.0\n-s_16339\tDNAH9\t2.0\n-s_55936\tTCTEX1D1\t2.0\n-s_14497\tCXorf40A\t2.0\n-s_1123\tADAT1\t2.0\n-s_41304\tPERP\t2.0\n-s_18719\tESAM\t2.0\n-s_35118\tMSX2\t2.0\n-s_30128\tKRT6A\t2.0\n-s_402\tABTB1\t2.0\n-s_32578\tMAP1LC3A\t2.0\n-s_45063\tPTMA\t2.0\n-s_43551\tPPP1R14D\t2.0\n-s_2538\tANKRD32\t2.0\n-s_40384\tPAX1\t2.0\n-s_29076\tKIAA0101\t2.0\n-s_40482\tPCDH10\t2.0\n-s_2348\tANGPT2\t2.0\n-s_59756\tTTYH3\t2.0\n-s_34330\tMOB4\t2.0\n-s_49331\tSCN2B\t2.0\n-s_54905\tSYDE1\t2.0\n-s_39101\tOR2T1\t2.0\n-s_36623\tNEDD4L\t2.0\n-s_40500\tPCDH15\t4.0\n-s_10660\tCDC42SE2\t2.0\n-s_30867\tLGALS13\t2.0\n-s_24322\tGSTK1\t4.0\n-s_59167\tTRPC1\t2.0\n-s_57440\tTMEM201\t2.0\n-s_50539\tSHC1\t2.0\n-s_37087\tNIT1\t2.0\n-s_56345\tTGFB2\t2.0\n-s_55388\tTARM1\t2.0\n-s_1224\tADD2\t2.0\n-s_5256\tBCOR\t4.0\n-s_51731\tSLC35B3\t2.0\n-s_12987\tCOL6A6\t2.0\n-s_56745\tTJP3\t2.0\n-s_19340\tFAM120AOS\t2.0\n-s_53904'..b'A\t0\n-s_57422\tTMEM198\t0\n-s_57429\tTMEM2\t0\n-s_57475\tTMEM212\t0\n-s_57531\tTMEM231\t0\n-s_57568\tTMEM245\t0\n-s_57700\tTMEM54\t0\n-s_57873\tTMF1\t0\n-s_57992\tTMUB1\t0\n-s_58180\tTNIP1\t0\n-s_58211\tTNKS2\t0\n-s_58237\tTNNT1\t0\n-s_58256\tTNPO2\t0\n-s_58259\tTNPO3\t0\n-s_58309\tTOM1\t0\n-s_58485\tTP73\t0\n-s_58503\tTPD52\t0\n-s_58533\tTPI1\t0\n-s_5857\tBSPRY\t0\n-s_58612\tTPSG1\t0\n-s_58633\tTRA2B\t0\n-s_58655\tTRAF3\t0\n-s_58668\tTRAF3IP2\t0\n-s_58690\tTRAK1\t0\n-s_58809\tTRIB2\t0\n-s_58962\tTRIM50\t0\n-s_58968\tTRIM52\t0\n-s_59050\tTRIO\t0\n-s_59107\tTRMT1L\t0\n-s_59133\tTRMT61B\t0\n-s_59160\tTROVE2\t0\n-s_59173\tTRPC4\t0\n-s_59196\tTRPM1\t0\n-s_59204\tTRPM3\t0\n-s_59311\tTSEN54\t0\n-s_59332\tTSHB\t0\n-s_59340\tTSHZ2\t0\n-s_59360\tTSNARE1\t0\n-s_5952\tBTG4\t0\n-s_59539\tTTC21A\t0\n-s_59602\tTTC39A\t0\n-s_59654\tTTC9C\t0\n-s_59717\tTTLL6\t0\n-s_5974\tBTN3A1\t0\n-s_59748\tTTYH1\t0\n-s_59807\tTUBB2B\t0\n-s_59859\tTULP1\t0\n-s_59870\tTULP3\t0\n-s_59955\tTXNDC8\t0\n-s_59983\tTXNRD2\t0\n-s_600\tACE\t0\n-s_60169\tUBE2H\t0\n-s_60209\tUBE2Q2\t0\n-s_60237\tUBE2V2\t0\n-s_60248\tUBE3A\t0\n-s_60250\tUBE3B\t0\n-s_60373\tUBXN6\t0\n-s_60396\tUCKL1\t0\n-s_60423\tUEVLD\t0\n-s_60438\tUFSP1\t0\n-s_60449\tUGDH\t0\n-s_60517\tUGT2A1\t0\n-s_60542\tUGT3A1\t0\n-s_60603\tUMODL1\t0\n-s_60614\tUNC119\t0\n-s_60649\tUNC5B\t0\n-s_6068\tC10orf125\t0\n-s_6071\tC10orf128\t0\n-s_60753\tUQCRC2\t0\n-s_60780\tURM1\t0\n-s_60839\tUSP15\t0\n-s_60851\tUSP19\t0\n-s_60925\tUSP4\t0\n-s_6100\tC10orf53\t0\n-s_6106\tC10orf54\t0\n-s_61149\tVAV2\t0\n-s_61173\tVCAM1\t0\n-s_61178\tVCAN\t0\n-s_61221\tVEPH1\t0\n-s_61263\tVIL1\t0\n-s_61341\tVPS13C\t0\n-s_61344\tVPS13D\t0\n-s_61367\tVPS29\t0\n-s_61529\tVWA5A\t0\n-s_61531\tVWA5A\t0\n-s_61587\tWBP1\t0\n-s_61595\tWBP2\t0\n-s_61623\tWDFY1\t0\n-s_61640\tWDHD1\t0\n-s_61662\tWDR16\t0\n-s_61695\tWDR26\t0\n-s_61739\tWDR44\t0\n-s_6200\tC11orf49\t0\n-s_62019\tWISP1\t0\n-s_62098\tWNT5B\t0\n-s_62114\tWNT8A\t0\n-s_62171\tWTAP\t0\n-s_62249\tXKR3\t0\n-s_62257\tXKR6\t0\n-s_62275\tXPC\t0\n-s_62320\tXRCC4\t0\n-s_62361\tYAE1D1\t0\n-s_62550\tZBBX\t0\n-s_62559\tZBED6\t0\n-s_62567\tZBTB1\t0\n-s_62624\tZBTB37\t0\n-s_62657\tZBTB47\t0\n-s_62759\tZC3H7A\t0\n-s_62845\tZDHHC11\t0\n-s_62862\tZDHHC16\t0\n-s_62881\tZDHHC2\t0\n-s_6292\tC12orf23\t0\n-s_62975\tZFC3H1\t0\n-s_63034\tZFP64\t0\n-s_63104\tZFYVE27\t0\n-s_63107\tZFYVE27\t0\n-s_63114\tZFYVE28\t0\n-s_63217\tZMIZ2\t0\n-s_63228\tZMYM3\t0\n-s_63234\tZMYM3\t0\n-s_6326\tC12orf49\t0\n-s_63302\tZNF132\t0\n-s_63362\tZNF167\t0\n-s_63435\tZNF200\t0\n-s_63487\tZNF223\t0\n-s_63594\tZNF276\t0\n-s_636\tACO1\t0\n-s_63746\tZNF354B\t0\n-s_63755\tZNF362\t0\n-s_6376\tC12orf74\t0\n-s_63903\tZNF436\t0\n-s_63905\tZNF438\t0\n-s_63923\tZNF442\t0\n-s_63934\tZNF445\t0\n-s_63935\tZNF446\t0\n-s_63964\tZNF469\t0\n-s_63983\tZNF480\t0\n-s_6409\tC14orf105\t0\n-s_64137\tZNF554\t0\n-s_64241\tZNF586\t0\n-s_6427\tC14orf133\t0\n-s_64356\tZNF639\t0\n-s_64393\tZNF655\t0\n-s_64396\tZNF655\t0\n-s_64419\tZNF668\t0\n-s_64424\tZNF669\t0\n-s_64459\tZNF682\t0\n-s_64479\tZNF688\t0\n-s_64581\tZNF746\t0\n-s_64627\tZNF772\t0\n-s_64638\tZNF776\t0\n-s_64652\tZNF780A\t0\n-s_64791\tZNF85\t0\n-s_64851\tZNRF3\t0\n-s_64871\tZPBP\t0\n-s_64878\tZPLD1\t0\n-s_64898\tZSCAN10\t0\n-s_64930\tZSCAN30\t0\n-s_64997\tZYG11A\t0\n-s_6525\tC15orf39\t0\n-s_6592\tC16orf13\t0\n-s_6639\tC16orf62\t0\n-s_6707\tC17orf102\t0\n-s_6710\tC17orf104\t0\n-s_6728\tC17orf112\t0\n-s_6736\tC17orf39\t0\n-s_6794\tC17orf72\t0\n-s_6814\tC17orf80\t0\n-s_6849\tC18orf21\t0\n-s_6859\tC18orf32\t0\n-s_6862\tC18orf34\t0\n-s_6906\tC19orf38\t0\n-s_7053\tC1QTNF7\t0\n-s_7128\tC1orf122\t0\n-s_7144\tC1orf130\t0\n-s_7162\tC1orf144\t0\n-s_7234\tC1orf198\t0\n-s_7341\tC1orf63\t0\n-s_747\tACSL1\t0\n-s_76\tAARS2\t0\n-s_7674\tC2orf57\t0\n-s_7681\tC2orf62\t0\n-s_7692\tC2orf63\t0\n-s_77\tAARSD1\t0\n-s_78\tAARSD1\t0\n-s_781\tACSS1\t0\n-s_786\tACSS2\t0\n-s_7940\tC4orf26\t0\n-s_7970\tC4orf37\t0\n-s_8000\tC4orf52\t0\n-s_804\tACTB\t0\n-s_8073\tC5orf51\t0\n-s_8141\tC6orf162\t0\n-s_8227\tC7orf10\t0\n-s_8281\tC7orf59\t0\n-s_8318\tC8A\t0\n-s_8403\tC9orf100\t0\n-s_8470\tC9orf24\t0\n-s_8699\tCACNA1G\t0\n-s_8705\tCACNA1I\t0\n-s_871\tACTR8\t0\n-s_874\tACTR8\t0\n-s_8757\tCACNG5\t0\n-s_8797\tCADPS\t0\n-s_8879\tCALR\t0\n-s_8910\tCAMK2B\t0\n-s_893\tACVR1B\t0\n-s_8930\tCAMKK1\t0\n-s_8954\tCAMSAP1\t0\n-s_9064\tCAPRIN1\t0\n-s_9077\tCAPSL\t0\n-s_9109\tCARD17\t0\n-s_913\tACY1\t0\n-s_9171\tCASD1\t0\n-s_9196\tCASP10\t0\n-s_9285\tCATSPER3\t0\n-s_9506\tCCDC120\t0\n-s_9507\tCCDC121\t0\n-s_952\tADAM12\t0\n-s_9584\tCCDC149\t0\n-s_964\tADAM18\t0\n-s_9646\tCCDC170\t0\n-s_9710\tCCDC40\t0\n-s_9732\tCCDC48\t0\n-s_976\tADAM21\t0\n-s_9763\tCCDC62\t0\n-s_9868\tCCDC89\t0\n-s_991\tADAM30\t0\n-s_9925\tCCL1\t0\n-s_9973\tCCL26\t0\n' |
b |
diff -r 82a180e6b582 -r f259c29b3832 test-data/output_countsummary.Rnw --- a/test-data/output_countsummary.Rnw Wed Apr 04 11:03:05 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,237 +0,0 @@ -% This is a template file for Sweave used in MAGeCK -% Author: Wei Li, Shirley Liu lab -% Do not modify lines beginning with "#__". -\documentclass{article} - -\usepackage{amsmath} -\usepackage{amscd} -\usepackage[tableposition=top]{caption} -\usepackage{ifthen} -\usepackage{fullpage} -\usepackage[utf8]{inputenc} -% \usepackage{longtable} - -\begin{document} -\setkeys{Gin}{width=0.9\textwidth} - -\title{MAGeCK Count Report} -\author{Wei Li} - -\maketitle - - -\tableofcontents - -\section{Summary} - -%Function definition -<<label=funcdef,include=FALSE,echo=FALSE>>= -genreporttable<-function(filelist,labellist,reads,mappedreads){ - xtb=data.frame(Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads); - colnames(xtb)=c("Label","Reads","Mapped","Percentage"); - return (xtb); -} -genreporttable2<-function(filelist,labellist,sgrnas,zerocounts,gini){ - xtb=data.frame(Label=labellist,TotalsgRNAs=sgrnas,ZeroCounts=zerocounts,GiniIndex=gini); - colnames(xtb)=c("Label","TotalsgRNA","ZeroCounts","GiniIndex"); - return (xtb); -} -genreporttable3<-function(filelist,labellist){ - xtb=data.frame(File=filelist,Label=labellist); - colnames(xtb)=c("File","Label"); - return (xtb); -} - - -colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#F781BF", - "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", - "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", - "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F"); - - - -genboxplot<-function(filename,...){ - #slmed=read.table(filename,header=T) - slmed=read.table(filename,header=T) - slmat=as.matrix(slmed[,c(-1,-2)]) - slmat_log=log2(slmat+1) - - boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...) -} - - -genhistplot<-function(filename,isfile=T,...){ - if(isfile){ - slmed=read.table(filename,header=T) - }else{ - slmed=filename; - } - tabsmat=as.matrix(log2(slmed[,c(-1,-2)]+1)) - colnames(tabsmat)=colnames(slmed)[c(-1,-2)] - samplecol=colors[((1:ncol(tabsmat)) %% length(colors)) ] - if(ncol(tabsmat)>=1){ - histlist=lapply(1:ncol(tabsmat),function(X){ return (hist(tabsmat[,X],plot=F,breaks=40)) }) - xrange=range(unlist(lapply(histlist,function(X){X$mids}))) - yrange=range(unlist(lapply(histlist,function(X){X$counts}))) - hst1=histlist[[1]] - plot(hst1$mids,hst1$counts,type='b',pch=20,xlim=c(0,xrange[2]*1.2),ylim=c(0,yrange[2]*1.2),xlab='log2(counts)',ylab='Frequency',main='Distribution of read counts',col = samplecol[1], ... ) - } - if(ncol(tabsmat)>=2){ - for(i in 2:ncol(tabsmat)){ - hstn=histlist[[i]] - lines(hstn$mids,hstn$counts,type='b',pch=20,col=samplecol[i]) - } - } - legend('topright',colnames(tabsmat),pch=20,lwd=1,col=samplecol) -} - - - -genclustering<-function(filename,...){ - #slmed=read.table(filename,header=T) - slmed=read.table(filename,header=T) - slmat=as.matrix(slmed[,c(-1,-2)]) - slmat_log=log2(slmat+1) - - result=tryCatch({ - library(gplots); - heatmap.2(cor(slmat_log),trace = 'none',density.info = 'none',cexRow = 0.8,cexCol = 0.8,offsetRow = -0.2,offsetCol = -0.2) - }, error=function(e){ - heatmap(cor(slmat_log),scale='none',cexRow = 0.8,cexCol = 0.8,cex.axis=0.8,...) - }); -} - -ctfit_tx=0; - - -panel.plot<-function(x,y,textnames=names(x),...){ - par(new=TRUE) - m<-cbind(x,y) - plot(m,pch=20,xlim = range(x)*1.1,ylim=range(y)*1.1,...) - text(x,y,textnames,...) -} - - -genpcaplot<-function(filename,...){ - #slmed=read.table(filename,header=T) - slmed=read.table(filename,header=T) - slmat=as.matrix(slmed[,c(-1,-2)]) - slmat_log=log2(slmat+1) - ctfit_tx<<-prcomp(t(slmat_log),center=TRUE) - - # par(mfrow=c(2,1)); - samplecol=colors[((1:ncol(slmat)) %% length(colors)) ] - # first 2 PCA - #plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2])); - #text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol); - # par(mfrow=c(1,1)); - if(length(samplecol)>2){ - pairs(ctfit_tx$x[,1:3],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 3 principle components',col=samplecol) - }else{ - if(length(samplecol)>1){ - pairs(ctfit_tx$x[,1:2],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 2 principle components',col=samplecol) - } - } - - -} - -genpcavar<-function(){ - # % variance - varpca=ctfit_tx$sdev^2 - varpca=varpca/sum(varpca)*100; - if(length(varpca)>10){ - varpca=varpca[1:10]; - } - plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained'); -} - -@ - -%__FILE_SUMMARY__ - -The statistics of comparisons are listed in Table 1 and Table 2. -The corresponding fastq files in each row are listed in Table 3. - -<<label=tab1,echo=FALSE,results=tex>>= -library(xtable) -filelist=c("input_0.gz"); -labellist=c("test1_fastq_gz"); -reads=c(2500); -mappedreads=c(1453); -totalsgrnas=c(2550); -zerocounts=c(1276); -giniindex=c(0.5266899931488773); - -cptable=genreporttable(filelist,labellist,reads,mappedreads); -print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one", - digits = c(0, 0, 0, 0,2), - align=c('c', 'c','c', 'c', 'c'), - table.placement = "tbp", - caption.placement = "top")) -@ - -<<label=tab2,echo=FALSE,results=tex>>= -library(xtable) -cptable=genreporttable2(filelist,labellist,totalsgrnas,zerocounts,giniindex); -print(xtable(cptable, caption = "Summary of comparisons", label = "tab:two", - digits = c(0, 0,0, 0,2), - align=c('c', 'c','c', 'c', 'c'), - table.placement = "tbp", - caption.placement = "top")) -@ - - - - - -<<label=tab3,echo=FALSE,results=tex>>= -library(xtable) -cptable=genreporttable3(filelist,labellist); -print(xtable(cptable, caption = "Summary of samples", label = "tab:three", - digits = c(0,0, 0), - align=c('c', 'p{9cm}', 'c'), - table.placement = "tbp", - caption.placement = "top")) -@ - - - - -The meanings of the columns are as follows. - -\begin{itemize} -\item \textbf{Row}: The row number in the table; -\item \textbf{File}: The filename of fastq file; -\item \textbf{Label}: Assigned label; -\item \textbf{Reads}: The total read count in the fastq file; -\item \textbf{Mapped}: Reads that can be mapped to gRNA library; -\item \textbf{Percentage}: The percentage of mapped reads; -\item \textbf{TotalsgRNAs}: The number of sgRNAs in the library; -\item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts; -\item \textbf{GiniIndex}: The Gini Index of the read count distribution. Gini index can be used to measure the evenness of the read counts, and a smaller value means a more even distribution of the read counts. -\end{itemize} - - - -\newpage\section{Normalized read count distribution of all samples} -The following figure shows the distribution of median-normalized read counts in all samples. - - -<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= -genboxplot("output.count_normalized.txt"); -@ - -The following figure shows the histogram of median-normalized read counts in all samples. - - -<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= -genhistplot("output.count_normalized.txt"); -@ - -%__INDIVIDUAL_PAGE__ - - - -\end{document} - |