Mercurial > repos > mingchen0919 > rmarkdown_deseq2
changeset 6:2f8ddef8d545 draft
update deseq2
author | mingchen0919 |
---|---|
date | Tue, 07 Nov 2017 13:50:32 -0500 |
parents | fd3514267506 |
children | 466053167103 |
files | DESeq.Rmd DESeq.xml DESeq_render.R DESeq_results.Rmd DESeq_results.xml DESeq_results_render.R DESeq_visualization.Rmd DESeq_visualization.xml DESeq_visualization_render.R |
diffstat | 9 files changed, 318 insertions(+), 718 deletions(-) [+] |
line wrap: on
line diff
--- a/DESeq.Rmd Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq.Rmd Tue Nov 07 13:50:32 2017 -0500 @@ -10,24 +10,22 @@ ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( - echo = ECHO + echo = ECHO, + error = TRUE ) - -library(stringi) -library(DESeq2) -library(pheatmap) -# library(PoiClaClu) -library(RColorBrewer) ``` # `DESeqDataSet` object -```{r} -count_files = strsplit(opt$count_files, ',')[[1]] +```{r 'DESeqDataSet object'} +count_file_paths = strsplit(opt$count_file_paths, ',')[[1]] +count_file_names = strsplit(opt$count_file_names, ',')[[1]] sample_table = read.table(opt$sample_table, header = TRUE) +row.names(sample_table) = sample_table[,2] +sample_table = sample_table[count_file_names, ] ## copy count files into working directory -file_copy = file.copy(count_files, sample_table$fileName, overwrite = TRUE) +file_copy = file.copy(count_file_paths, count_file_names, overwrite = TRUE) ## DESeqDataSet object dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, @@ -55,14 +53,14 @@ ## Count Data -```{r} +```{r 'count data'} datatable(head(counts(dds), 100), style="bootstrap", class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) ``` ## Sample Table -```{r} +```{r 'sample table'} datatable(sample_table, style="bootstrap", class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) ```
--- a/DESeq.xml Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq.xml Tue Nov 07 13:50:32 2017 -0500 @@ -1,10 +1,10 @@ <tool id="DESeq" name="DESeq2: DESeq" version="1.0.0"> <requirements> <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> <requirement type="package" version="1.20.0">r-getopt</requirement> <requirement type="package" version="1.2">r-rmarkdown</requirement> <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> <requirement type="package" version="1.1.0">r-stringr</requirement> <requirement type="package" version="0.4.0">r-highcharter</requirement> <requirement type="package" version="0.2">r-dt</requirement> @@ -18,46 +18,52 @@ An R Markdown tool to perform DESeq analysis. </description> <stdio> - <regex match="Execution halted" - source="both" - level="fatal" - description="Execution halted." /> - <regex match="Error in" - source="both" - level="fatal" - description="An undefined error occured, please check your intput carefully and contact your administrator." /> - <regex match="Fatal error" - source="both" - level="fatal" - description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won't be interrupted during testing--> + <regex match="XXX" + source="stderr" + level="warning" + description="Check the warnings_and_errors.txt file for more details."/> </stdio> <command> <![CDATA[ Rscript '${__tool_directory__}/DESeq_render.R' + ## 1. input data -e $echo - -c $count_files - -s $sample_table - -p "$design_formula" + ##----- code chunk to get file paths and raw file names for a multiple inputs data field ---- + #set $sep = '' + #set $count_file_paths = '' + #set $count_file_names = '' + #for $count_file in $count_files: + #set $count_file_paths += $sep + str($count_file) + #set $count_file_names += $sep + str($count_file.name) + #set $sep = ',' + #end for + ##----------------- end for getting file names and file paths ------------------------------ + -P '$count_file_paths' + -N '$count_file_names' + -S $sample_table + -p '$design_formula' ## 2. output report and report site directory - -o $DESeq - -d $DESeq.files_path + -r $report + -d $report.files_path + -s $sink_message -w $deseq_workspace - ## 3. Rmd templates sitting in the tool directory - - ## _site.yml and index.Rmd template files - -D '${__tool_directory__}/DESeq.Rmd' + ## 3. Rmd templates from the tool directory + -t '${__tool_directory__}/DESeq.Rmd' ]]> </command> <inputs> - <param type="data" name="count_files" format="tabular" multiple="true" label="Count files from htseq-count" /> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Display analysis code in report?"/> + <param type="data" name="count_files" format="tabular" multiple="true" label="Count files from htseq-count"/> <param type="data" name="sample_table" format="tabular" multiple="false" label="sample table file" help="The sample table file contains a table. The first column is the sample name, the second column is the count file name and the rest of columns are treatment columns. The file names in this table have @@ -72,11 +78,11 @@ </valid> </sanitizer> </param> - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> </inputs> <outputs> - <data name="DESeq" format="html" label="DESeq Analysis" /> - <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis" /> + <data name="report" format="html" label="DESeq Analysis on ${on_string}"/> + <data format="txt" name="sink_message" label="Warnings and Errors on ${on_string}" from_work_dir="warnings_and_errors.txt"/> + <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis on ${on_string}"/> </outputs> <citations> <citation> @@ -94,7 +100,8 @@ <citation type="bibtex"> @article{allaire2016rmarkdown, title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, journal={R package version 0.9}, volume={6}, year={2016} @@ -110,21 +117,15 @@ } </citation> <citation> - @misc{pheatmap2015, - title = {pheatmap: Pretty Heatmaps}, - author = {Raivo Kolde}, - year = {2015}, - note = {R package version 1.0.8}, - url = {https://CRAN.R-project.org/package=pheatmap}, - } - </citation> - <citation> - @misc{dt2016, - title = {DT: A Wrapper of the JavaScript Library 'DataTables'}, - author = {Yihui Xie}, - year = {2016}, - note = {R package version 0.2}, - url = {https://CRAN.R-project.org/package=DT}, + @article{love2014moderated, + title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, + author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, + journal={Genome biology}, + volume={15}, + number={12}, + pages={550}, + year={2014}, + publisher={BioMed Central} } </citation> </citations>
--- a/DESeq_render.R Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq_render.R Tue Nov 07 13:50:32 2017 -0500 @@ -1,90 +1,70 @@ -##======= Handle arguments from command line ======== -# setup R error handline to go to stderr -options(show.error.messages=FALSE, -error=function(){ - cat(geterrmessage(), file=stderr()) - quit("no", 1, F) -}) - -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - -# suppress warning -options(warn = -1) - -options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) -args = commandArgs(trailingOnly=TRUE) - -suppressPackageStartupMessages({ - library(getopt) - library(tools) -}) +library(getopt) +library(rmarkdown) +library(htmltools) +library(dplyr) +library(stringi) +library(DESeq2) +library(pheatmap) +library(RColorBrewer) +library(DT) -# column 1: the long flag name -# column 2: the short flag alias. A SINGLE character string -# column 3: argument mask -# 0: no argument -# 1: argument required -# 2: argument is optional -# column 4: date type to which the flag's argument shall be cast. -# possible values: logical, integer, double, complex, character. -spec_list=list() - -##------- 1. input data --------------------- -spec_list$COUNT_FILES = c('count_files', 'c', '1', 'character') -spec_list$ECHO = c('echo', 'e', '1', 'character') -spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character') -spec_list$DESIGN_FORMULA = c('design_formula', 'p', '1', 'character') - -##--------2. output report and report site directory -------------- -spec_list$OUTPUT_HTML = c('deseq_html', 'o', '1', 'character') -spec_list$OUTPUT_DIR = c('deseq_dir', 'd', '1', 'character') -spec_list$WORKSPACE = c('deseq_workspace', 'w', '1', 'character') - -##--------3. Rmd templates sitting in the tool directory ---------- - -spec_list$DESEQ_RMD = c('deseq_rmd', 'D', '1', 'character') +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + ##---------below is the code for rendering .Rmd templates----- + + ##=============STEP 1: handle command line arguments========== + ## + ##============================================================ + # column 1: the long flag name + # column 2: the short flag alias. A SINGLE character string + # column 3: argument mask + # 0: no argument + # 1: argument required + # 2: argument is optional + # column 4: date type to which the flag's argument shall be cast. + # possible values: logical, integer, double, complex, character. + #------------------------------------------------------------- + #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ + # 1. short flag alias should match the flag in the command section in the XML file. + # 2. long flag name can be any legal R variable names + # 3. two names in args_list can have common string but one name should not be a part of another name. + # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. + #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + args_list=list() + ##------- 1. input data --------------------- + args_list$ECHO = c('echo', 'e', '1', 'character') + args_list$COUNT_FILE_PATHS = c('count_file_paths', 'P', '1', 'character') + args_list$COUNT_FILE_NAMES = c('count_file_names', 'N', '1', 'character') + args_list$SAMPLE_TABLE = c('sample_table', 'S', '1', 'character') + args_list$DESIGN_FORMULA = c('design_formula', 'p', '1', 'character') + ##--------2. output report and outputs -------------- + args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') + args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') + args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') + args_list$WORKSPACE = c('deseq_workspace', 'w', '1', 'character') + ##--------3. .Rmd templates in the tool directory ---------- + args_list$deseq_RMD = c('deseq_rmd', 't', '1', 'character') + ##----------------------------------------------------------- + opt = getopt(t(as.data.frame(args_list))) - -##------------------------------------------------------------------ - -spec = t(as.data.frame(spec_list)) -opt = getopt(spec) -# arguments are accessed by long flag name (the first column in the spec matrix) -# NOT by element name in the spec_list -# example: opt$help, opt$expression_file -##====== End of arguments handling ========== - -#------ Load libraries --------- -library(rmarkdown) -library(plyr) -library(stringr) -library(dplyr) -library(highcharter) -library(DT) -library(reshape2) -# library(Kmisc) -library(plotly) -library(formattable) -library(htmltools) - - -#----- 1. create the report directory ------------------------ -system(paste0('mkdir -p ', opt$deseq_dir)) - - -#----- 2. generate Rmd files with Rmd templates -------------- -# a. templates without placeholder variables: -# copy templates from tool directory to the working directory. -# b. templates with placeholder variables: -# substitute variables with user input values and place them in the working directory. - - -#----- 01 DESeq.Rmd ----------------------- -readLines(opt$deseq_rmd) %>% + + ##=======STEP 2: create report directory (optional)========== + ## + ##=========================================================== + dir.create(opt$report_dir) + + ##=STEP 3: replace placeholders in .Rmd with argument values= + ## + ##=========================================================== + #++ need to replace placeholders with args values one by one+ + readLines(opt$deseq_rmd) %>% (function(x) { - gsub('ECHO', opt$echo, x) + gsub('ECHO', opt$echo, x) }) %>% (function(x) { gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) @@ -93,21 +73,21 @@ gsub('DESIGN_FORMULA', opt$design_formula, x) }) %>% (function(x) { - gsub('OUTPUT_DIR', opt$deseq_dir, x) + gsub('REPORT_DIR', opt$report_dir, x) }) %>% (function(x) { - fileConn = file('DESeq.Rmd') - writeLines(x, con=fileConn) - close(fileConn) + fileConn = file('deseq.Rmd') + writeLines(x, con=fileConn) + close(fileConn) }) + + + ##=============STEP 4: render .Rmd templates================= + ## + ##=========================================================== + render('deseq.Rmd', output_file = opt$report_html) -#------ 3. render all Rmd files -------- -render('DESeq.Rmd', output_file = opt$deseq_html) - - -#-------4. manipulate outputs ----------------------------- -# document file -# file.copy('DESeq.html', opt$deseq_html, recursive = TRUE) - - + ##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file
--- a/DESeq_results.Rmd Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq_results.Rmd Tue Nov 07 13:50:32 2017 -0500 @@ -10,17 +10,14 @@ ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( - echo = ECHO + echo = ECHO, + error = TRUE ) - -library(DESeq2) -library(pheatmap) -library(genefilter) ``` -# Import workspace ```{r eval=TRUE} +# Import workspace fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData") load("deseq.RData") ``` @@ -30,9 +27,11 @@ ## Result table ```{r} -group = colnames(sample_table)[CONTRAST_GROUP] -res <- results(dds, contrast = c(group, 'TREATMENT_LEVEL', 'CONDITION_LEVEL')) -datatable(as.data.frame(res), style="bootstrap", filter = 'top', +cat('--- View the top 100 rows of the result table ---') +res <- results(dds, contrast = c('CONTRAST_FACTOR', 'TREATMENT_LEVEL', 'CONDITION_LEVEL')) +write.csv(as.data.frame(res), file = 'deseq_results.csv') +res_df = as.data.frame(res)[1:100, ] +datatable(res_df, style="bootstrap", filter = 'top', class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) ``` @@ -45,16 +44,10 @@ # MA-plot {.tabset} -## Shrinked with `lfcShrink()` function -```{r eval=FALSE} -shrink_res = DESeq2::lfcShrink(dds, contrast = c(group, 'TREATMENT_LEVEL', 'CONDITION_LEVEL'), res=res) -plotMA(shrink_res) -``` - -## Shrinked with Bayesian procedure ```{r} +cat('--- Shrinked with Bayesian procedure ---') plotMA(res) ``` @@ -68,11 +61,11 @@ ``` -# Gene clustering +# Visualization {.tabset} +## Gene clustering ```{r} -group_index = as.numeric(strsplit("CLUSTERING_GROUPS", ',')[[1]]) -clustering_groups = colnames(sample_table)[group_index] +clustering_groups = strsplit("CLUSTERING_FACTORS", ',')[[1]] topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) mat <- assay(rld)[ topVarGenes, ] @@ -83,3 +76,34 @@ pheatmap(mat, annotation_col = annotation_col) ``` +## Sample-to-sample distance + +```{r} +sampleDistMatrix <- as.matrix( sampleDists ) +colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) +pheatmap(sampleDistMatrix, + clustering_distance_cols = sampleDists, + col = colors) +``` + +## PCA plot + +```{r} +plotPCA(rld, intgroup = clustering_groups) +``` + +## MDS plot {.tabset} + +### Data table +```{r} +mds <- as.data.frame(colData(rld)) %>% + cbind(cmdscale(sampleDistMatrix)) +knitr::kable(mds) +``` + +### Plot +```{r} +ggplot(mds, aes(x = `1`, y = `2`, col = time)) + + geom_point(size = 3) + coord_fixed() +``` +
--- a/DESeq_results.xml Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq_results.xml Tue Nov 07 13:50:32 2017 -0500 @@ -1,10 +1,10 @@ -<tool id="DESeq_results" name="DESeq2: Results" version="1.0.0"> +<tool id="DESeq_results" name="DESeq2: Results" version="2.0.0"> <requirements> <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> <requirement type="package" version="1.20.0">r-getopt</requirement> <requirement type="package" version="1.2">r-rmarkdown</requirement> <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> <requirement type="package" version="1.1.0">r-stringr</requirement> <requirement type="package" version="0.4.0">r-highcharter</requirement> <requirement type="package" version="0.2">r-dt</requirement> @@ -18,18 +18,11 @@ An R Markdown tool to display DESeq analysis. </description> <stdio> - <regex match="Execution halted" - source="both" - level="fatal" - description="Execution halted." /> - <regex match="Error in" - source="both" - level="fatal" - description="An undefined error occured, please check your intput carefully and contact your administrator." /> - <regex match="Fatal error" - source="both" - level="fatal" - description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won't be interrupted during testing--> + <regex match="XXX" + source="stderr" + level="warning" + description="Check the warnings_and_errors.txt file for more details."/> </stdio> <command> <![CDATA[ @@ -38,53 +31,45 @@ ## 1. input data -e $echo - -w $deseq_workspace - -c "$contrast_group" - -t $treatment - -k $condition + -W $deseq_workspace + -C '$contrast_factor' + -T '$treatment' + -K '$condition' - #set $groups = [] - #for $c_group in $clustering_groups - #if str($c_group.group) - #set $groups = $groups + [str($c_group.group)] - #end if - #end for - #set $groups = ','.join($groups) - -m "$groups" + -M '$clustering_factors' ## 2. output report and report site directory - -o $deseq_results - -d $deseq_results.files_path + -r $report + -d $report.files_path + -s $sink_message + -R $deseq_results ## 3. Rmd templates sitting in the tool directory - - ## _site.yml and index.Rmd template files - -D '${__tool_directory__}/DESeq_results.Rmd' + -t '${__tool_directory__}/DESeq_results.Rmd' ]]> </command> <inputs> - <param type="data" name="deseq_workspace" format="rdata" multiple="false" label="Workspace from tool DESeq2: DESeq" /> - <param type="data" name="sample_table" format="tabular" multiple="false" label="Sample table file" /> - <param type="data_column" name="contrast_group" data_ref="sample_table" use_header_names="true" - optional="false" - label="Group for result contrast" - help=""/> - <param type="text" name="treatment" label="Treatment level" /> - <param type="text" name="condition" label="Condition level" /> - - <repeat name="clustering_groups" title="Gene clustering groups" min="1"> - <param type="data_column" name="group" data_ref="sample_table" use_header_names="true" - optional="false" - label="A phenotype column from the sample table" /> - </repeat> - - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Display analysis code in report?"/> + <param type="data" name="deseq_workspace" format="rdata" multiple="false" optional="false" + label="Workspace from tool DESeq2: DESeq"/> + <param type="text" name="contrast_factor" label="Factor" optional="false" + help="the name of a factor in the design formula"/> + <param type="text" name="treatment" label="Treatment level" optional="false" + help=" the name of the numerator level for the fold change"/> + <param type="text" name="condition" label="Condition level" optional="false" + help=" the name of the denominator level for the fold change"/> + <param type="text" name="clustering_factors" title="Gene clustering factors" optional="false" + label="factors of interest for clustering samples and PCA plot" + help="A single factor or multiple factors from the design formula. Multiple factors are separated by comma (,)."/> </inputs> <outputs> - <data name="deseq_results" format="html" label="DESeq Results" /> + <data format="html" name="report" label="DESeq results report on ${on_string}" /> + <data format="txt" name="sink_message" label="Warnings and Errors on ${on_string}" from_work_dir="warnings_and_errors.txt"/> + <data format="csv" name="deseq_results" label="DESeq results on ${on_string}" from_work_dir="deseq_results.csv" /> </outputs> <citations> <citation> @@ -102,7 +87,8 @@ <citation type="bibtex"> @article{allaire2016rmarkdown, title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, journal={R package version 0.9}, volume={6}, year={2016} @@ -118,21 +104,15 @@ } </citation> <citation> - @misc{pheatmap2015, - title = {pheatmap: Pretty Heatmaps}, - author = {Raivo Kolde}, - year = {2015}, - note = {R package version 1.0.8}, - url = {https://CRAN.R-project.org/package=pheatmap}, - } - </citation> - <citation> - @misc{dt2016, - title = {DT: A Wrapper of the JavaScript Library 'DataTables'}, - author = {Yihui Xie}, - year = {2016}, - note = {R package version 0.2}, - url = {https://CRAN.R-project.org/package=DT}, + @article{love2014moderated, + title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, + author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, + journal={Genome biology}, + volume={15}, + number={12}, + pages={550}, + year={2014}, + publisher={BioMed Central} } </citation> </citations>
--- a/DESeq_results_render.R Tue Aug 08 15:06:40 2017 -0400 +++ b/DESeq_results_render.R Tue Nov 07 13:50:32 2017 -0500 @@ -1,120 +1,108 @@ -##======= Handle arguments from command line ======== -# setup R error handline to go to stderr -options(show.error.messages=FALSE, - error=function(){ - cat(geterrmessage(), file=stderr()) - quit("no", 1, F) - }) - -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - -# suppress warning -options(warn = -1) - -options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) -args = commandArgs(trailingOnly=TRUE) - -suppressPackageStartupMessages({ - library(getopt) - library(tools) -}) +library(getopt) +library(rmarkdown) +library(htmltools) +library(dplyr) +library(DESeq2) +library(pheatmap) +library(genefilter) +library(DT) +library(stringi) +library(RColorBrewer) +library(ggplot2) -# column 1: the long flag name -# column 2: the short flag alias. A SINGLE character string -# column 3: argument mask -# 0: no argument -# 1: argument required -# 2: argument is optional -# column 4: date type to which the flag's argument shall be cast. -# possible values: logical, integer, double, complex, character. -spec_list=list() - -##------- 1. input data --------------------- -spec_list$ECHO = c('echo', 'e', '1', 'character') -spec_list$DESEQ_WORKSPACE = c('deseq_workspace', 'w', '1', 'character') -spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character') -spec_list$CONTRAST_GROUP = c('contrast_group', 'c', '1', 'character') -spec_list$TREATMENT_LEVEL = c('treatment_level', 't', '1', 'character') -spec_list$CONDITION_LEVEL = c('condition_level', 'k', '1', 'character') -spec_list$CLUSTERING_GROUPS = c('clustering_groups', 'm', '1', 'character') - -##--------2. output report and report site directory -------------- -spec_list$OUTPUT_HTML = c('deseq_results_html', 'o', '1', 'character') -spec_list$OUTPUT_DIR = c('deseq_results_dir', 'd', '1', 'character') - -##--------3. Rmd templates sitting in the tool directory ---------- - -spec_list$DESEQ_VISUALIZATION_RMD = c('deseq_results_rmd', 'D', '1', 'character') - +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + ##---------below is the code for rendering .Rmd templates----- + + ##=============STEP 1: handle command line arguments========== + ## + ##============================================================ + # column 1: the long flag name + # column 2: the short flag alias. A SINGLE character string + # column 3: argument mask + # 0: no argument + # 1: argument required + # 2: argument is optional + # column 4: date type to which the flag's argument shall be cast. + # possible values: logical, integer, double, complex, character. + #------------------------------------------------------------- + #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ + # 1. short flag alias should match the flag in the command section in the XML file. + # 2. long flag name can be any legal R variable names + # 3. two names in args_list can have common string but one name should not be a part of another name. + # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. + #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + args_list=list() + ##------- 1. input data --------------------- + args_list$ECHO = c('echo', 'e', '1', 'character') + args_list$DESEQ_WORKSPACE = c('deseq_workspace', 'W', '1', 'character') + args_list$CONTRAST_FACTOR = c('contrast_factor', 'C', '1', 'character') + args_list$TREATMENT_LEVEL = c('treatment_level', 'T', '1', 'character') + args_list$CONDITION_LEVEL = c('condition_level', 'K', '1', 'character') + args_list$CLUSTERING_FACTORS = c('clustering_factors', 'M', '1', 'character') + ##--------2. output report and outputs -------------- + args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') + args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') + args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') + args_list$DESEQ_RESULTS = c('deseq_results', 'R', '1', 'character') + ##--------3. .Rmd templates in the tool directory ---------- + args_list$deseq_results_RMD = c('deseq_results_rmd', 't', '1', 'character') + ##----------------------------------------------------------- + opt = getopt(t(as.data.frame(args_list))) -##------------------------------------------------------------------ - -spec = t(as.data.frame(spec_list)) -opt = getopt(spec) -# arguments are accessed by long flag name (the first column in the spec matrix) -# NOT by element name in the spec_list -# example: opt$help, opt$expression_file -##====== End of arguments handling ========== + + ##=======STEP 2: create report directory (optional)========== + ## + ##=========================================================== + dir.create(opt$report_dir) + + ##=STEP 3: replace placeholders in .Rmd with argument values= + ## + ##=========================================================== + #++ need to replace placeholders with args values one by one+ + readLines(opt$deseq_results_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) + }) %>% + (function(x) { + gsub('CONTRAST_FACTOR', opt$contrast_factor, x) + }) %>% + (function(x) { + gsub('TREATMENT_LEVEL', opt$treatment_level, x) + }) %>% + (function(x) { + gsub('CONDITION_LEVEL', opt$condition_level, x) + }) %>% + (function(x) { + gsub('CLUSTERING_FACTORS', opt$clustering_factors, x) + }) %>% + (function(x) { + gsub('REPORT_DIR', opt$report_dir, x) + }) %>% + (function(x) { + gsub('DESEQ_RESULTS', opt$deseq_results, x) + }) %>% + (function(x) { + fileConn = file('deseq_results.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + -#------ Load libraries --------- -library(rmarkdown) -library(plyr) -library(stringr) -library(dplyr) -library(highcharter) -library(DT) -library(reshape2) -# library(Kmisc) -library(plotly) -library(formattable) -library(htmltools) - - -#----- 1. create the report directory ------------------------ -system(paste0('mkdir -p ', opt$deseq_results_dir)) - - -#----- 2. generate Rmd files with Rmd templates -------------- -# a. templates without placeholder variables: -# copy templates from tool directory to the working directory. -# b. templates with placeholder variables: -# substitute variables with user input values and place them in the working directory. + ##=============STEP 4: render .Rmd templates================= + ## + ##=========================================================== + render('deseq_results.Rmd', output_file = opt$report_html) -#----- 01 DESeq_results.Rmd ----------------------- -readLines(opt$deseq_results_rmd) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) - }) %>% - (function(x) { - gsub('CONTRAST_GROUP', opt$contrast_group, x) - }) %>% - (function(x) { - gsub('TREATMENT_LEVEL', opt$treatment_level, x) - }) %>% - (function(x) { - gsub('CONDITION_LEVEL', opt$condition_level, x) - }) %>% - (function(x) { - gsub('CLUSTERING_GROUPS', opt$clustering_groups, x) - }) %>% - (function(x) { - gsub('OUTPUT_DIR', opt$deseq_results_dir, x) - }) %>% - (function(x) { - fileConn = file('DESeq_results.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - - -#------ 3. render all Rmd files -------- -render('DESeq_results.Rmd', output_file = opt$deseq_results_html) - - -#-------4. manipulate outputs ----------------------------- + ##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file
--- a/DESeq_visualization.Rmd Tue Aug 08 15:06:40 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,114 +0,0 @@ ---- -title: 'DESeq2: Visualization' -output: - html_document: - number_sections: true - toc: true - theme: cosmo - highlight: tango ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = ECHO -) - -library(stringi) -library(DESeq2) -library(pheatmap) -# library(PoiClaClu) -library(RColorBrewer) -``` - -# Import workspace - -```{r eval=TRUE} -fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData") -load("deseq.RData") -``` - -# Visualization - -## Heatmaps of sample-to-sample distances {.tabset} - -### rlog-transformed values - -```{r} -sampleDistMatrix <- as.matrix( sampleDists ) -colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) -pheatmap(sampleDistMatrix, - # clustering_distance_rows = sampleDists, - clustering_distance_cols = sampleDists, - col = colors) -``` - -### Poisson Distance - -```{r eval=FALSE} -count_t = t(counts(dds)) -rownames(count_t) = colnames(counts(dds)) -poisd <- PoissonDistance(count_t) -samplePoisDistMatrix <- as.matrix( poisd$dd ) -rownames(samplePoisDistMatrix) = rownames(count_t) -colnames(samplePoisDistMatrix) = rownames(count_t) -pheatmap(samplePoisDistMatrix, - # clustering_distance_rows = poisd$dd, - clustering_distance_cols = poisd$dd, - col = colors) -``` - - -## PCA plots {.tabset} - -### Using `plotPCA()` function - -```{r} -# interest groups -col_index = as.numeric(strsplit("INTGROUPS_PCA", ',')[[1]]) -intgroup_pca = colnames(sample_table)[col_index] -``` - -```{r} -plotPCA(rld, intgroup = intgroup_pca) -``` - - -### Using *ggplot2* - -```{r} -pcaData <- plotPCA(rld, intgroup = intgroup_pca, returnData = TRUE) -percentVar <- round(100 * attr(pcaData, "percentVar")) -ggplot(pcaData, aes(x = PC1, y = PC2, color = time)) + - geom_point(size =3) + - xlab(paste0("PC1: ", percentVar[1], "% variance")) + - ylab(paste0("PC2: ", percentVar[2], "% variance")) + - coord_fixed() -``` - -### PCA data table - -```{r} -knitr::kable(pcaData) -``` - - -## MDS plots {.tabset} - -### Using rlog-transformed values - -```{r} -mds <- as.data.frame(colData(rld)) %>% - cbind(cmdscale(sampleDistMatrix)) -mds -ggplot(mds, aes(x = `1`, y = `2`, col = time)) + - geom_point(size = 3) + coord_fixed() -``` - -### Using the *Poisson Distance* - -```{r eval=FALSE} -mdsPois <- as.data.frame(colData(dds)) %>% - cbind(cmdscale(samplePoisDistMatrix)) -ggplot(mdsPois, aes(x = `1`, y = `2`, col = time)) + - geom_point(size = 3) + coord_fixed() -```
--- a/DESeq_visualization.xml Tue Aug 08 15:06:40 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,145 +0,0 @@ -<tool id="DESeq_visualization" name="DESeq2: Visualization" version="1.0.0"> - <requirements> - <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> - <requirement type="package" version="1.20.0">r-getopt</requirement> - <requirement type="package" version="1.2">r-rmarkdown</requirement> - <requirement type="package" version="1.8.4">r-plyr</requirement> - <requirement type="package" version="1.1.0">r-stringr</requirement> - <requirement type="package" version="0.4.0">r-highcharter</requirement> - <requirement type="package" version="0.2">r-dt</requirement> - <requirement type="package" version="1.4.2">r-reshape2</requirement> - <requirement type="package" version="4.5.6">r-plotly</requirement> - <requirement type="package" version="0.2.0.1">r-formattable</requirement> - <requirement type="package" version="0.3.5">r-htmltools</requirement> - <requirement type="package" version="1.0.8">r-pheatmap</requirement> - </requirements> - <description> - An R Markdown tool to visualize DESeq analysis results. - </description> - <stdio> - <regex match="Execution halted" - source="both" - level="fatal" - description="Execution halted." /> - <regex match="Error in" - source="both" - level="fatal" - description="An undefined error occured, please check your intput carefully and contact your administrator." /> - <regex match="Fatal error" - source="both" - level="fatal" - description="An undefined error occured, please check your intput carefully and contact your administrator." /> - </stdio> - <command> - <![CDATA[ - - Rscript '${__tool_directory__}/DESeq_visualization_render.R' - - ## 1. input data - -e $echo - -w $deseq_workspace - - #set $pca_groups = [] - #for $group in $intgroups_pca - #if str($group.intgroup) - #set $pca_groups = $pca_groups + [str($group.intgroup)] - #end if - #end for - #set $pca_groups = ','.join($pca_groups) - -p "$pca_groups" - - #set $mds_groups = [] - #for $group in $intgroups_mds - #if str($group.intgroup) - #set $mds_groups = $mds_groups + [str($group.intgroup)] - #end if - #end for - #set $mds_groups = ','.join($mds_groups) - -m "$mds_groups" - - ## 2. output report and report site directory - -o $deseq_visualization - -d $deseq_visualization.files_path - - - ## 3. Rmd templates sitting in the tool directory - - ## _site.yml and index.Rmd template files - -D '${__tool_directory__}/DESeq_visualization.Rmd' - - - - ]]> - </command> - <inputs> - <param type="data" name="deseq_workspace" format="rdata" multiple="false" label="Workspace from tool DESeq2: DESeq" /> - <param type="data" name="sample_table" format="tabular" multiple="false" label="Sample table file" /> - <repeat name="intgroups_pca" title="Interest groups for PCA plot" min="1"> - <param type="data_column" name="intgroup" data_ref="sample_table" use_header_names="true" - optional="false" - label="Interest group for PCA plot" - help=""/> - </repeat> - <repeat name="intgroups_mds" title="Interest groups for MDS plot" min="1"> - <param type="data_column" name="intgroup" data_ref="sample_table" use_header_names="true" - optional="false" - label="Interest group for MDS plot" - help=""/> - </repeat> - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> - </inputs> - <outputs> - <data name="deseq_visualization" format="html" label="DESeq Visualization" /> - </outputs> - <citations> - <citation> - @article{love2014moderated, - title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, - author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, - journal={Genome biology}, - volume={15}, - number={12}, - pages={550}, - year={2014}, - publisher={BioMed Central} - } - </citation> - <citation type="bibtex"> - @article{allaire2016rmarkdown, - title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, - journal={R package version 0.9}, - volume={6}, - year={2016} - } - </citation> - <citation type="bibtex"> - @book{xie2015dynamic, - title={Dynamic Documents with R and knitr}, - author={Xie, Yihui}, - volume={29}, - year={2015}, - publisher={CRC Press} - } - </citation> - <citation> - @misc{pheatmap2015, - title = {pheatmap: Pretty Heatmaps}, - author = {Raivo Kolde}, - year = {2015}, - note = {R package version 1.0.8}, - url = {https://CRAN.R-project.org/package=pheatmap}, - } - </citation> - <citation> - @misc{dt2016, - title = {DT: A Wrapper of the JavaScript Library 'DataTables'}, - author = {Yihui Xie}, - year = {2016}, - note = {R package version 0.2}, - url = {https://CRAN.R-project.org/package=DT}, - } - </citation> - </citations> -</tool> \ No newline at end of file
--- a/DESeq_visualization_render.R Tue Aug 08 15:06:40 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -##======= Handle arguments from command line ======== -# setup R error handline to go to stderr -options(show.error.messages=FALSE, - error=function(){ - cat(geterrmessage(), file=stderr()) - quit("no", 1, F) - }) - -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - -# suppress warning -options(warn = -1) - -options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) -args = commandArgs(trailingOnly=TRUE) - -suppressPackageStartupMessages({ - library(getopt) - library(tools) -}) - -# column 1: the long flag name -# column 2: the short flag alias. A SINGLE character string -# column 3: argument mask -# 0: no argument -# 1: argument required -# 2: argument is optional -# column 4: date type to which the flag's argument shall be cast. -# possible values: logical, integer, double, complex, character. -spec_list=list() - -##------- 1. input data --------------------- -spec_list$ECHO = c('echo', 'e', '1', 'character') -spec_list$DESEQ_WORKSPACE = c('deseq_workspace', 'w', '1', 'character') -spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character') -spec_list$INTGROUPS_PCA = c('intgroups_pca', 'p', '1', 'character') -spec_list$INTGROUPS_MDS = c('intgroups_mds', 'm', '1', 'character') - -##--------2. output report and report site directory -------------- -spec_list$OUTPUT_HTML = c('deseq_visualization_html', 'o', '1', 'character') -spec_list$OUTPUT_DIR = c('deseq_visualization_dir', 'd', '1', 'character') - -##--------3. Rmd templates sitting in the tool directory ---------- - -spec_list$DESEQ_VISUALIZATION_RMD = c('deseq_visualization_rmd', 'D', '1', 'character') - - - -##------------------------------------------------------------------ - -spec = t(as.data.frame(spec_list)) -opt = getopt(spec) -# arguments are accessed by long flag name (the first column in the spec matrix) -# NOT by element name in the spec_list -# example: opt$help, opt$expression_file -##====== End of arguments handling ========== - -#------ Load libraries --------- -library(rmarkdown) -library(plyr) -library(stringr) -library(dplyr) -library(highcharter) -library(DT) -library(reshape2) -# library(Kmisc) -library(plotly) -library(formattable) -library(htmltools) - - -#----- 1. create the report directory ------------------------ -system(paste0('mkdir -p ', opt$deseq_visualization_dir)) - - -#----- 2. generate Rmd files with Rmd templates -------------- -# a. templates without placeholder variables: -# copy templates from tool directory to the working directory. -# b. templates with placeholder variables: -# substitute variables with user input values and place them in the working directory. - - -#----- 01 DESeq_visualization.Rmd ----------------------- -readLines(opt$deseq_visualization_rmd) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) - }) %>% - (function(x) { - gsub('INTGROUPS_PCA', opt$intgroups_pca, x) - }) %>% - (function(x) { - gsub('INTGROUPS_MDS', opt$intgroups_mds, x) - }) %>% - (function(x) { - gsub('OUTPUT_DIR', opt$deseq_visualization_dir, x) - }) %>% - (function(x) { - fileConn = file('DESeq_visualization.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - - -#------ 3. render all Rmd files -------- -render('DESeq_visualization.Rmd', output_file = opt$deseq_visualization_html) - - -#-------4. manipulate outputs -----------------------------