# HG changeset patch # User mingchen0919 # Date 1510080632 18000 # Node ID 2f8ddef8d545e5fd20d52a92f178a026e1b444ef # Parent fd35142675067a08f3ddb9d8f72c640fdbdffb33 update deseq2 diff -r fd3514267506 -r 2f8ddef8d545 DESeq.Rmd --- a/DESeq.Rmd Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq.Rmd Tue Nov 07 13:50:32 2017 -0500 @@ -10,24 +10,22 @@ ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( - echo = ECHO + echo = ECHO, + error = TRUE ) - -library(stringi) -library(DESeq2) -library(pheatmap) -# library(PoiClaClu) -library(RColorBrewer) ``` # `DESeqDataSet` object -```{r} -count_files = strsplit(opt$count_files, ',')[[1]] +```{r 'DESeqDataSet object'} +count_file_paths = strsplit(opt$count_file_paths, ',')[[1]] +count_file_names = strsplit(opt$count_file_names, ',')[[1]] sample_table = read.table(opt$sample_table, header = TRUE) +row.names(sample_table) = sample_table[,2] +sample_table = sample_table[count_file_names, ] ## copy count files into working directory -file_copy = file.copy(count_files, sample_table$fileName, overwrite = TRUE) +file_copy = file.copy(count_file_paths, count_file_names, overwrite = TRUE) ## DESeqDataSet object dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, @@ -55,14 +53,14 @@ ## Count Data -```{r} +```{r 'count data'} datatable(head(counts(dds), 100), style="bootstrap", class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) ``` ## Sample Table -```{r} +```{r 'sample table'} datatable(sample_table, style="bootstrap", class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) ``` diff -r fd3514267506 -r 2f8ddef8d545 DESeq.xml --- a/DESeq.xml Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq.xml Tue Nov 07 13:50:32 2017 -0500 @@ -1,10 +1,10 @@ pandoc - bioconductor-deseq2 r-getopt r-rmarkdown r-plyr + bioconductor-deseq2 r-stringr r-highcharter r-dt @@ -18,46 +18,52 @@ An R Markdown tool to perform DESeq analysis. - - - + + - + + - - + + + @@ -94,7 +100,8 @@ @article{allaire2016rmarkdown, title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, journal={R package version 0.9}, volume={6}, year={2016} @@ -110,21 +117,15 @@ } - @misc{pheatmap2015, - title = {pheatmap: Pretty Heatmaps}, - author = {Raivo Kolde}, - year = {2015}, - note = {R package version 1.0.8}, - url = {https://CRAN.R-project.org/package=pheatmap}, - } - - - @misc{dt2016, - title = {DT: A Wrapper of the JavaScript Library 'DataTables'}, - author = {Yihui Xie}, - year = {2016}, - note = {R package version 0.2}, - url = {https://CRAN.R-project.org/package=DT}, + @article{love2014moderated, + title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, + author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, + journal={Genome biology}, + volume={15}, + number={12}, + pages={550}, + year={2014}, + publisher={BioMed Central} } diff -r fd3514267506 -r 2f8ddef8d545 DESeq_render.R --- a/DESeq_render.R Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq_render.R Tue Nov 07 13:50:32 2017 -0500 @@ -1,90 +1,70 @@ -##======= Handle arguments from command line ======== -# setup R error handline to go to stderr -options(show.error.messages=FALSE, -error=function(){ - cat(geterrmessage(), file=stderr()) - quit("no", 1, F) -}) - -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - -# suppress warning -options(warn = -1) - -options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) -args = commandArgs(trailingOnly=TRUE) - -suppressPackageStartupMessages({ - library(getopt) - library(tools) -}) +library(getopt) +library(rmarkdown) +library(htmltools) +library(dplyr) +library(stringi) +library(DESeq2) +library(pheatmap) +library(RColorBrewer) +library(DT) -# column 1: the long flag name -# column 2: the short flag alias. A SINGLE character string -# column 3: argument mask -# 0: no argument -# 1: argument required -# 2: argument is optional -# column 4: date type to which the flag's argument shall be cast. -# possible values: logical, integer, double, complex, character. -spec_list=list() - -##------- 1. input data --------------------- -spec_list$COUNT_FILES = c('count_files', 'c', '1', 'character') -spec_list$ECHO = c('echo', 'e', '1', 'character') -spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character') -spec_list$DESIGN_FORMULA = c('design_formula', 'p', '1', 'character') - -##--------2. output report and report site directory -------------- -spec_list$OUTPUT_HTML = c('deseq_html', 'o', '1', 'character') -spec_list$OUTPUT_DIR = c('deseq_dir', 'd', '1', 'character') -spec_list$WORKSPACE = c('deseq_workspace', 'w', '1', 'character') - -##--------3. Rmd templates sitting in the tool directory ---------- - -spec_list$DESEQ_RMD = c('deseq_rmd', 'D', '1', 'character') +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + ##---------below is the code for rendering .Rmd templates----- + + ##=============STEP 1: handle command line arguments========== + ## + ##============================================================ + # column 1: the long flag name + # column 2: the short flag alias. A SINGLE character string + # column 3: argument mask + # 0: no argument + # 1: argument required + # 2: argument is optional + # column 4: date type to which the flag's argument shall be cast. + # possible values: logical, integer, double, complex, character. + #------------------------------------------------------------- + #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ + # 1. short flag alias should match the flag in the command section in the XML file. + # 2. long flag name can be any legal R variable names + # 3. two names in args_list can have common string but one name should not be a part of another name. + # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. + #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + args_list=list() + ##------- 1. input data --------------------- + args_list$ECHO = c('echo', 'e', '1', 'character') + args_list$COUNT_FILE_PATHS = c('count_file_paths', 'P', '1', 'character') + args_list$COUNT_FILE_NAMES = c('count_file_names', 'N', '1', 'character') + args_list$SAMPLE_TABLE = c('sample_table', 'S', '1', 'character') + args_list$DESIGN_FORMULA = c('design_formula', 'p', '1', 'character') + ##--------2. output report and outputs -------------- + args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') + args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') + args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') + args_list$WORKSPACE = c('deseq_workspace', 'w', '1', 'character') + ##--------3. .Rmd templates in the tool directory ---------- + args_list$deseq_RMD = c('deseq_rmd', 't', '1', 'character') + ##----------------------------------------------------------- + opt = getopt(t(as.data.frame(args_list))) - -##------------------------------------------------------------------ - -spec = t(as.data.frame(spec_list)) -opt = getopt(spec) -# arguments are accessed by long flag name (the first column in the spec matrix) -# NOT by element name in the spec_list -# example: opt$help, opt$expression_file -##====== End of arguments handling ========== - -#------ Load libraries --------- -library(rmarkdown) -library(plyr) -library(stringr) -library(dplyr) -library(highcharter) -library(DT) -library(reshape2) -# library(Kmisc) -library(plotly) -library(formattable) -library(htmltools) - - -#----- 1. create the report directory ------------------------ -system(paste0('mkdir -p ', opt$deseq_dir)) - - -#----- 2. generate Rmd files with Rmd templates -------------- -# a. templates without placeholder variables: -# copy templates from tool directory to the working directory. -# b. templates with placeholder variables: -# substitute variables with user input values and place them in the working directory. - - -#----- 01 DESeq.Rmd ----------------------- -readLines(opt$deseq_rmd) %>% + + ##=======STEP 2: create report directory (optional)========== + ## + ##=========================================================== + dir.create(opt$report_dir) + + ##=STEP 3: replace placeholders in .Rmd with argument values= + ## + ##=========================================================== + #++ need to replace placeholders with args values one by one+ + readLines(opt$deseq_rmd) %>% (function(x) { - gsub('ECHO', opt$echo, x) + gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) @@ -93,21 +73,21 @@ gsub('DESIGN_FORMULA', opt$design_formula, x) }) %>% (function(x) { - gsub('OUTPUT_DIR', opt$deseq_dir, x) + gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { - fileConn = file('DESeq.Rmd') - writeLines(x, con=fileConn) - close(fileConn) + fileConn = file('deseq.Rmd') + writeLines(x, con=fileConn) + close(fileConn) }) + + + ##=============STEP 4: render .Rmd templates================= + ## + ##=========================================================== + render('deseq.Rmd', output_file = opt$report_html) -#------ 3. render all Rmd files -------- -render('DESeq.Rmd', output_file = opt$deseq_html) - - -#-------4. manipulate outputs ----------------------------- -# document file -# file.copy('DESeq.html', opt$deseq_html, recursive = TRUE) - - + ##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file diff -r fd3514267506 -r 2f8ddef8d545 DESeq_results.Rmd --- a/DESeq_results.Rmd Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq_results.Rmd Tue Nov 07 13:50:32 2017 -0500 @@ -10,17 +10,14 @@ ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( - echo = ECHO + echo = ECHO, + error = TRUE ) - -library(DESeq2) -library(pheatmap) -library(genefilter) ``` -# Import workspace ```{r eval=TRUE} +# Import workspace fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData") load("deseq.RData") ``` @@ -30,9 +27,11 @@ ## Result table ```{r} -group = colnames(sample_table)[CONTRAST_GROUP] -res <- results(dds, contrast = c(group, 'TREATMENT_LEVEL', 'CONDITION_LEVEL')) -datatable(as.data.frame(res), style="bootstrap", filter = 'top', +cat('--- View the top 100 rows of the result table ---') +res <- results(dds, contrast = c('CONTRAST_FACTOR', 'TREATMENT_LEVEL', 'CONDITION_LEVEL')) +write.csv(as.data.frame(res), file = 'deseq_results.csv') +res_df = as.data.frame(res)[1:100, ] +datatable(res_df, style="bootstrap", filter = 'top', class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) ``` @@ -45,16 +44,10 @@ # MA-plot {.tabset} -## Shrinked with `lfcShrink()` function -```{r eval=FALSE} -shrink_res = DESeq2::lfcShrink(dds, contrast = c(group, 'TREATMENT_LEVEL', 'CONDITION_LEVEL'), res=res) -plotMA(shrink_res) -``` - -## Shrinked with Bayesian procedure ```{r} +cat('--- Shrinked with Bayesian procedure ---') plotMA(res) ``` @@ -68,11 +61,11 @@ ``` -# Gene clustering +# Visualization {.tabset} +## Gene clustering ```{r} -group_index = as.numeric(strsplit("CLUSTERING_GROUPS", ',')[[1]]) -clustering_groups = colnames(sample_table)[group_index] +clustering_groups = strsplit("CLUSTERING_FACTORS", ',')[[1]] topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) mat <- assay(rld)[ topVarGenes, ] @@ -83,3 +76,34 @@ pheatmap(mat, annotation_col = annotation_col) ``` +## Sample-to-sample distance + +```{r} +sampleDistMatrix <- as.matrix( sampleDists ) +colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) +pheatmap(sampleDistMatrix, + clustering_distance_cols = sampleDists, + col = colors) +``` + +## PCA plot + +```{r} +plotPCA(rld, intgroup = clustering_groups) +``` + +## MDS plot {.tabset} + +### Data table +```{r} +mds <- as.data.frame(colData(rld)) %>% + cbind(cmdscale(sampleDistMatrix)) +knitr::kable(mds) +``` + +### Plot +```{r} +ggplot(mds, aes(x = `1`, y = `2`, col = time)) + + geom_point(size = 3) + coord_fixed() +``` + diff -r fd3514267506 -r 2f8ddef8d545 DESeq_results.xml --- a/DESeq_results.xml Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq_results.xml Tue Nov 07 13:50:32 2017 -0500 @@ -1,10 +1,10 @@ - + pandoc - bioconductor-deseq2 r-getopt r-rmarkdown r-plyr + bioconductor-deseq2 r-stringr r-highcharter r-dt @@ -18,18 +18,11 @@ An R Markdown tool to display DESeq analysis. - - - + + - - - - - - - - - - - + + + + + + - + + + @@ -102,7 +87,8 @@ @article{allaire2016rmarkdown, title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, journal={R package version 0.9}, volume={6}, year={2016} @@ -118,21 +104,15 @@ } - @misc{pheatmap2015, - title = {pheatmap: Pretty Heatmaps}, - author = {Raivo Kolde}, - year = {2015}, - note = {R package version 1.0.8}, - url = {https://CRAN.R-project.org/package=pheatmap}, - } - - - @misc{dt2016, - title = {DT: A Wrapper of the JavaScript Library 'DataTables'}, - author = {Yihui Xie}, - year = {2016}, - note = {R package version 0.2}, - url = {https://CRAN.R-project.org/package=DT}, + @article{love2014moderated, + title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, + author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, + journal={Genome biology}, + volume={15}, + number={12}, + pages={550}, + year={2014}, + publisher={BioMed Central} } diff -r fd3514267506 -r 2f8ddef8d545 DESeq_results_render.R --- a/DESeq_results_render.R Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq_results_render.R Tue Nov 07 13:50:32 2017 -0500 @@ -1,120 +1,108 @@ -##======= Handle arguments from command line ======== -# setup R error handline to go to stderr -options(show.error.messages=FALSE, - error=function(){ - cat(geterrmessage(), file=stderr()) - quit("no", 1, F) - }) - -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - -# suppress warning -options(warn = -1) - -options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) -args = commandArgs(trailingOnly=TRUE) - -suppressPackageStartupMessages({ - library(getopt) - library(tools) -}) +library(getopt) +library(rmarkdown) +library(htmltools) +library(dplyr) +library(DESeq2) +library(pheatmap) +library(genefilter) +library(DT) +library(stringi) +library(RColorBrewer) +library(ggplot2) -# column 1: the long flag name -# column 2: the short flag alias. A SINGLE character string -# column 3: argument mask -# 0: no argument -# 1: argument required -# 2: argument is optional -# column 4: date type to which the flag's argument shall be cast. -# possible values: logical, integer, double, complex, character. -spec_list=list() - -##------- 1. input data --------------------- -spec_list$ECHO = c('echo', 'e', '1', 'character') -spec_list$DESEQ_WORKSPACE = c('deseq_workspace', 'w', '1', 'character') -spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character') -spec_list$CONTRAST_GROUP = c('contrast_group', 'c', '1', 'character') -spec_list$TREATMENT_LEVEL = c('treatment_level', 't', '1', 'character') -spec_list$CONDITION_LEVEL = c('condition_level', 'k', '1', 'character') -spec_list$CLUSTERING_GROUPS = c('clustering_groups', 'm', '1', 'character') - -##--------2. output report and report site directory -------------- -spec_list$OUTPUT_HTML = c('deseq_results_html', 'o', '1', 'character') -spec_list$OUTPUT_DIR = c('deseq_results_dir', 'd', '1', 'character') - -##--------3. Rmd templates sitting in the tool directory ---------- - -spec_list$DESEQ_VISUALIZATION_RMD = c('deseq_results_rmd', 'D', '1', 'character') - +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + ##---------below is the code for rendering .Rmd templates----- + + ##=============STEP 1: handle command line arguments========== + ## + ##============================================================ + # column 1: the long flag name + # column 2: the short flag alias. A SINGLE character string + # column 3: argument mask + # 0: no argument + # 1: argument required + # 2: argument is optional + # column 4: date type to which the flag's argument shall be cast. + # possible values: logical, integer, double, complex, character. + #------------------------------------------------------------- + #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ + # 1. short flag alias should match the flag in the command section in the XML file. + # 2. long flag name can be any legal R variable names + # 3. two names in args_list can have common string but one name should not be a part of another name. + # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. + #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + args_list=list() + ##------- 1. input data --------------------- + args_list$ECHO = c('echo', 'e', '1', 'character') + args_list$DESEQ_WORKSPACE = c('deseq_workspace', 'W', '1', 'character') + args_list$CONTRAST_FACTOR = c('contrast_factor', 'C', '1', 'character') + args_list$TREATMENT_LEVEL = c('treatment_level', 'T', '1', 'character') + args_list$CONDITION_LEVEL = c('condition_level', 'K', '1', 'character') + args_list$CLUSTERING_FACTORS = c('clustering_factors', 'M', '1', 'character') + ##--------2. output report and outputs -------------- + args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') + args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') + args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') + args_list$DESEQ_RESULTS = c('deseq_results', 'R', '1', 'character') + ##--------3. .Rmd templates in the tool directory ---------- + args_list$deseq_results_RMD = c('deseq_results_rmd', 't', '1', 'character') + ##----------------------------------------------------------- + opt = getopt(t(as.data.frame(args_list))) -##------------------------------------------------------------------ - -spec = t(as.data.frame(spec_list)) -opt = getopt(spec) -# arguments are accessed by long flag name (the first column in the spec matrix) -# NOT by element name in the spec_list -# example: opt$help, opt$expression_file -##====== End of arguments handling ========== + + ##=======STEP 2: create report directory (optional)========== + ## + ##=========================================================== + dir.create(opt$report_dir) + + ##=STEP 3: replace placeholders in .Rmd with argument values= + ## + ##=========================================================== + #++ need to replace placeholders with args values one by one+ + readLines(opt$deseq_results_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) + }) %>% + (function(x) { + gsub('CONTRAST_FACTOR', opt$contrast_factor, x) + }) %>% + (function(x) { + gsub('TREATMENT_LEVEL', opt$treatment_level, x) + }) %>% + (function(x) { + gsub('CONDITION_LEVEL', opt$condition_level, x) + }) %>% + (function(x) { + gsub('CLUSTERING_FACTORS', opt$clustering_factors, x) + }) %>% + (function(x) { + gsub('REPORT_DIR', opt$report_dir, x) + }) %>% + (function(x) { + gsub('DESEQ_RESULTS', opt$deseq_results, x) + }) %>% + (function(x) { + fileConn = file('deseq_results.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + -#------ Load libraries --------- -library(rmarkdown) -library(plyr) -library(stringr) -library(dplyr) -library(highcharter) -library(DT) -library(reshape2) -# library(Kmisc) -library(plotly) -library(formattable) -library(htmltools) - - -#----- 1. create the report directory ------------------------ -system(paste0('mkdir -p ', opt$deseq_results_dir)) - - -#----- 2. generate Rmd files with Rmd templates -------------- -# a. templates without placeholder variables: -# copy templates from tool directory to the working directory. -# b. templates with placeholder variables: -# substitute variables with user input values and place them in the working directory. + ##=============STEP 4: render .Rmd templates================= + ## + ##=========================================================== + render('deseq_results.Rmd', output_file = opt$report_html) -#----- 01 DESeq_results.Rmd ----------------------- -readLines(opt$deseq_results_rmd) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) - }) %>% - (function(x) { - gsub('CONTRAST_GROUP', opt$contrast_group, x) - }) %>% - (function(x) { - gsub('TREATMENT_LEVEL', opt$treatment_level, x) - }) %>% - (function(x) { - gsub('CONDITION_LEVEL', opt$condition_level, x) - }) %>% - (function(x) { - gsub('CLUSTERING_GROUPS', opt$clustering_groups, x) - }) %>% - (function(x) { - gsub('OUTPUT_DIR', opt$deseq_results_dir, x) - }) %>% - (function(x) { - fileConn = file('DESeq_results.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - - -#------ 3. render all Rmd files -------- -render('DESeq_results.Rmd', output_file = opt$deseq_results_html) - - -#-------4. manipulate outputs ----------------------------- + ##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file diff -r fd3514267506 -r 2f8ddef8d545 DESeq_visualization.Rmd --- a/DESeq_visualization.Rmd Tue Aug 08 15:06:40 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,114 +0,0 @@ ---- -title: 'DESeq2: Visualization' -output: - html_document: - number_sections: true - toc: true - theme: cosmo - highlight: tango ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = ECHO -) - -library(stringi) -library(DESeq2) -library(pheatmap) -# library(PoiClaClu) -library(RColorBrewer) -``` - -# Import workspace - -```{r eval=TRUE} -fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData") -load("deseq.RData") -``` - -# Visualization - -## Heatmaps of sample-to-sample distances {.tabset} - -### rlog-transformed values - -```{r} -sampleDistMatrix <- as.matrix( sampleDists ) -colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) -pheatmap(sampleDistMatrix, - # clustering_distance_rows = sampleDists, - clustering_distance_cols = sampleDists, - col = colors) -``` - -### Poisson Distance - -```{r eval=FALSE} -count_t = t(counts(dds)) -rownames(count_t) = colnames(counts(dds)) -poisd <- PoissonDistance(count_t) -samplePoisDistMatrix <- as.matrix( poisd$dd ) -rownames(samplePoisDistMatrix) = rownames(count_t) -colnames(samplePoisDistMatrix) = rownames(count_t) -pheatmap(samplePoisDistMatrix, - # clustering_distance_rows = poisd$dd, - clustering_distance_cols = poisd$dd, - col = colors) -``` - - -## PCA plots {.tabset} - -### Using `plotPCA()` function - -```{r} -# interest groups -col_index = as.numeric(strsplit("INTGROUPS_PCA", ',')[[1]]) -intgroup_pca = colnames(sample_table)[col_index] -``` - -```{r} -plotPCA(rld, intgroup = intgroup_pca) -``` - - -### Using *ggplot2* - -```{r} -pcaData <- plotPCA(rld, intgroup = intgroup_pca, returnData = TRUE) -percentVar <- round(100 * attr(pcaData, "percentVar")) -ggplot(pcaData, aes(x = PC1, y = PC2, color = time)) + - geom_point(size =3) + - xlab(paste0("PC1: ", percentVar[1], "% variance")) + - ylab(paste0("PC2: ", percentVar[2], "% variance")) + - coord_fixed() -``` - -### PCA data table - -```{r} -knitr::kable(pcaData) -``` - - -## MDS plots {.tabset} - -### Using rlog-transformed values - -```{r} -mds <- as.data.frame(colData(rld)) %>% - cbind(cmdscale(sampleDistMatrix)) -mds -ggplot(mds, aes(x = `1`, y = `2`, col = time)) + - geom_point(size = 3) + coord_fixed() -``` - -### Using the *Poisson Distance* - -```{r eval=FALSE} -mdsPois <- as.data.frame(colData(dds)) %>% - cbind(cmdscale(samplePoisDistMatrix)) -ggplot(mdsPois, aes(x = `1`, y = `2`, col = time)) + - geom_point(size = 3) + coord_fixed() -``` diff -r fd3514267506 -r 2f8ddef8d545 DESeq_visualization.xml --- a/DESeq_visualization.xml Tue Aug 08 15:06:40 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,145 +0,0 @@ - - - pandoc - bioconductor-deseq2 - r-getopt - r-rmarkdown - r-plyr - r-stringr - r-highcharter - r-dt - r-reshape2 - r-plotly - r-formattable - r-htmltools - r-pheatmap - - - An R Markdown tool to visualize DESeq analysis results. - - - - - - - - - - - - - - - - - - - - - - - - - - @article{love2014moderated, - title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, - author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, - journal={Genome biology}, - volume={15}, - number={12}, - pages={550}, - year={2014}, - publisher={BioMed Central} - } - - - @article{allaire2016rmarkdown, - title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, - journal={R package version 0.9}, - volume={6}, - year={2016} - } - - - @book{xie2015dynamic, - title={Dynamic Documents with R and knitr}, - author={Xie, Yihui}, - volume={29}, - year={2015}, - publisher={CRC Press} - } - - - @misc{pheatmap2015, - title = {pheatmap: Pretty Heatmaps}, - author = {Raivo Kolde}, - year = {2015}, - note = {R package version 1.0.8}, - url = {https://CRAN.R-project.org/package=pheatmap}, - } - - - @misc{dt2016, - title = {DT: A Wrapper of the JavaScript Library 'DataTables'}, - author = {Yihui Xie}, - year = {2016}, - note = {R package version 0.2}, - url = {https://CRAN.R-project.org/package=DT}, - } - - - \ No newline at end of file diff -r fd3514267506 -r 2f8ddef8d545 DESeq_visualization_render.R --- a/DESeq_visualization_render.R Tue Aug 08 15:06:40 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -##======= Handle arguments from command line ======== -# setup R error handline to go to stderr -options(show.error.messages=FALSE, - error=function(){ - cat(geterrmessage(), file=stderr()) - quit("no", 1, F) - }) - -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - -# suppress warning -options(warn = -1) - -options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) -args = commandArgs(trailingOnly=TRUE) - -suppressPackageStartupMessages({ - library(getopt) - library(tools) -}) - -# column 1: the long flag name -# column 2: the short flag alias. A SINGLE character string -# column 3: argument mask -# 0: no argument -# 1: argument required -# 2: argument is optional -# column 4: date type to which the flag's argument shall be cast. -# possible values: logical, integer, double, complex, character. -spec_list=list() - -##------- 1. input data --------------------- -spec_list$ECHO = c('echo', 'e', '1', 'character') -spec_list$DESEQ_WORKSPACE = c('deseq_workspace', 'w', '1', 'character') -spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character') -spec_list$INTGROUPS_PCA = c('intgroups_pca', 'p', '1', 'character') -spec_list$INTGROUPS_MDS = c('intgroups_mds', 'm', '1', 'character') - -##--------2. output report and report site directory -------------- -spec_list$OUTPUT_HTML = c('deseq_visualization_html', 'o', '1', 'character') -spec_list$OUTPUT_DIR = c('deseq_visualization_dir', 'd', '1', 'character') - -##--------3. Rmd templates sitting in the tool directory ---------- - -spec_list$DESEQ_VISUALIZATION_RMD = c('deseq_visualization_rmd', 'D', '1', 'character') - - - -##------------------------------------------------------------------ - -spec = t(as.data.frame(spec_list)) -opt = getopt(spec) -# arguments are accessed by long flag name (the first column in the spec matrix) -# NOT by element name in the spec_list -# example: opt$help, opt$expression_file -##====== End of arguments handling ========== - -#------ Load libraries --------- -library(rmarkdown) -library(plyr) -library(stringr) -library(dplyr) -library(highcharter) -library(DT) -library(reshape2) -# library(Kmisc) -library(plotly) -library(formattable) -library(htmltools) - - -#----- 1. create the report directory ------------------------ -system(paste0('mkdir -p ', opt$deseq_visualization_dir)) - - -#----- 2. generate Rmd files with Rmd templates -------------- -# a. templates without placeholder variables: -# copy templates from tool directory to the working directory. -# b. templates with placeholder variables: -# substitute variables with user input values and place them in the working directory. - - -#----- 01 DESeq_visualization.Rmd ----------------------- -readLines(opt$deseq_visualization_rmd) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) - }) %>% - (function(x) { - gsub('INTGROUPS_PCA', opt$intgroups_pca, x) - }) %>% - (function(x) { - gsub('INTGROUPS_MDS', opt$intgroups_mds, x) - }) %>% - (function(x) { - gsub('OUTPUT_DIR', opt$deseq_visualization_dir, x) - }) %>% - (function(x) { - fileConn = file('DESeq_visualization.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - - -#------ 3. render all Rmd files -------- -render('DESeq_visualization.Rmd', output_file = opt$deseq_visualization_html) - - -#-------4. manipulate outputs -----------------------------