Mercurial > repos > mingchen0919 > rmarkdown_deseq2
changeset 0:7231d7e8d3ed draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_deseq2 commit 9285c2b8ad41a486dde2a87600a6b8267841c8b5-dirty
author | mingchen0919 |
---|---|
date | Tue, 08 Aug 2017 10:43:18 -0400 |
parents | |
children | 312e9bcc02f1 |
files | DESeq.Rmd DESeq.xml DESeq_render.R DESeq_results.Rmd DESeq_results.xml DESeq_results_render.R DESeq_visualization.Rmd DESeq_visualization.xml DESeq_visualization_render.R |
diffstat | 9 files changed, 904 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq.Rmd Tue Aug 08 10:43:18 2017 -0400 @@ -0,0 +1,98 @@ +--- +title: 'DESeq2: Perform DESeq analysis' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) + +library(stringi) +library(DESeq2) +library(pheatmap) +library(PoiClaClu) +library(RColorBrewer) +``` + +# `DESeqDataSet` object + +```{r} +count_files = strsplit(opt$count_files, ',')[[1]] +sample_table = read.table(opt$sample_table, header = TRUE) + +## copy count files into working directory +file_copy = file.copy(count_files, sample_table$fileName, overwrite = TRUE) + +## DESeqDataSet object +dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, + directory = './', + design = DESIGN_FORMULA) +dds +``` + +# Pre-filtering the dataset. + +We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed. + +* Number of rows before pre-filtering +```{r} +nrow(dds) +``` + +* Number of rows after pre-filtering +```{r} +dds = dds[rowSums(counts(dds)) > 1, ] +nrow(dds) +``` + +# Peek at data {.tabset} + +## Count Data + +```{r} +datatable(head(counts(dds), 100), style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Sample Table + +```{r} +datatable(sample_table, style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +# Sample distance on variance stabilized data {.tabset} + +## `rlog` Stabilizing transformation + +```{r} +rld = rlog(dds, blind = FALSE) +datatable(head(assay(rld), 100), style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Sample distance + +```{r} +sampleDists <- dist(t(assay(rld))) +sampleDists +``` + +# Differential expression analysis + +```{r} +dds <- DESeq(dds) +``` + +```{r} +rm("opt") +save(list=ls(all.names = TRUE), file='DESEQ_WORKSPACE') +``` + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq.xml Tue Aug 08 10:43:18 2017 -0400 @@ -0,0 +1,80 @@ +<tool id="DESeq" name="DESeq2: DESeq" version="1.0.0"> + <requirements> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="1.1.0">r-stringr</requirement> + <requirement type="package" version="0.4.0">r-highcharter</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="1.4.2">r-reshape2</requirement> + <requirement type="package" version="4.5.6">r-plotly</requirement> + <requirement type="package" version="0.2.0.1">r-formattable</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="1.0.8">r-pheatmap</requirement> + </requirements> + <description> + An R Markdown tool to perform DESeq analysis. + </description> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> + <![CDATA[ + + Rscript '${__tool_directory__}/DESeq_render.R' + + ## 1. input data + -e $echo + -c $count_files + -s $sample_table + -p "$design_formula" + + ## 2. output report and report site directory + -o $DESeq + -d $DESeq.files_path + -w $deseq_workspace + + ## 3. Rmd templates sitting in the tool directory + + ## _site.yml and index.Rmd template files + -D '${__tool_directory__}/DESeq.Rmd' + + + + ]]> + </command> + <inputs> + <param type="data" name="count_files" format="tabular" multiple="true" label="Count files from htseq-count" /> + <param type="data" name="sample_table" format="tabular" multiple="false" label="sample table file" + help="The sample table file contains a table. The first column is the sample name, the second column is + the count file name and the rest of columns are treatment columns. The file names in this table have + to be in the same order as the count files uploaded in the previous step. "/> + <param type="text" name="design_formula" value="~ condition_1 + condition_2" label="Design formula" + help="The simplest design formula for differential expression would be ~ condition, where condition + is a column in colData(dds) that specifies which of two (or more groups) the samples belong to"> + <sanitizer> + <valid initial="default"> + <add preset="string.printable"/> + <add value="~"/> + </valid> + </sanitizer> + </param> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + </inputs> + <outputs> + <data name="DESeq" format="html" label="DESeq Analysis" /> + <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis" /> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_render.R Tue Aug 08 10:43:18 2017 -0400 @@ -0,0 +1,113 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, +error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) +}) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$COUNT_FILES = c('count_files', 'c', '1', 'character') +spec_list$ECHO = c('echo', 'e', '1', 'character') +spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character') +spec_list$DESIGN_FORMULA = c('design_formula', 'p', '1', 'character') + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('deseq_html', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('deseq_dir', 'd', '1', 'character') +spec_list$WORKSPACE = c('deseq_workspace', 'w', '1', 'character') + +##--------3. Rmd templates sitting in the tool directory ---------- + +spec_list$DESEQ_RMD = c('deseq_rmd', 'D', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(plyr) +library(stringr) +library(dplyr) +library(highcharter) +library(DT) +library(reshape2) +# library(Kmisc) +library(plotly) +library(formattable) +library(htmltools) + + +#----- 1. create the report directory ------------------------ +system(paste0('mkdir -p ', opt$deseq_dir)) + + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- 01 DESeq.Rmd ----------------------- +readLines(opt$deseq_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) + }) %>% + (function(x) { + gsub('DESIGN_FORMULA', opt$design_formula, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$deseq_dir, x) + }) %>% + (function(x) { + fileConn = file('DESeq.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files -------- +render('DESeq.Rmd', output_file = opt$deseq_html) + + +#-------4. manipulate outputs ----------------------------- +# document file +# file.copy('DESeq.html', opt$deseq_html, recursive = TRUE) + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results.Rmd Tue Aug 08 10:43:18 2017 -0400 @@ -0,0 +1,85 @@ +--- +title: 'DESeq2: Results' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) + +library(DESeq2) +library(pheatmap) +library(genefilter) +``` + +# Import workspace + +```{r eval=TRUE} +fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData") +load("deseq.RData") +``` + +# Results {.tabset} + +## Result table + +```{r} +group = colnames(sample_table)[CONTRAST_GROUP] +res <- results(dds, contrast = c(group, 'TREATMENT_LEVEL', 'CONDITION_LEVEL')) +datatable(as.data.frame(res), style="bootstrap", filter = 'top', + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Result summary + +```{r} +summary(res) +``` + + +# MA-plot {.tabset} + +## Shrinked with `lfcShrink()` function + +```{r eval=FALSE} +shrink_res = DESeq2::lfcShrink(dds, contrast = c(group, 'TREATMENT_LEVEL', 'CONDITION_LEVEL'), res=res) +plotMA(shrink_res) +``` + +## Shrinked with Bayesian procedure + +```{r} +plotMA(res) +``` + + +# Histogram of p values + +```{r} +hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20, + col = "grey50", border = "white", main = "", + xlab = "Mean normalized count larger than 1") +``` + + +# Gene clustering + +```{r} +group_index = as.numeric(strsplit("CLUSTERING_GROUPS", ',')[[1]]) +clustering_groups = colnames(sample_table)[group_index] + +topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) +mat <- assay(rld)[ topVarGenes, ] +mat <- mat - rowMeans(mat) +annotation_col <- as.data.frame(colData(rld)[, clustering_groups]) +colnames(annotation_col) = clustering_groups +rownames(annotation_col) = colnames(mat) +pheatmap(mat, annotation_col = annotation_col) +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results.xml Tue Aug 08 10:43:18 2017 -0400 @@ -0,0 +1,88 @@ +<tool id="DESeq_results" name="DESeq2: Results" version="1.0.0"> + <requirements> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="1.1.0">r-stringr</requirement> + <requirement type="package" version="0.4.0">r-highcharter</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="1.4.2">r-reshape2</requirement> + <requirement type="package" version="4.5.6">r-plotly</requirement> + <requirement type="package" version="0.2.0.1">r-formattable</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="1.0.8">r-pheatmap</requirement> + </requirements> + <description> + An R Markdown tool to display DESeq analysis. + </description> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> + <![CDATA[ + + Rscript '${__tool_directory__}/DESeq_results_render.R' + + ## 1. input data + -e $echo + -w $deseq_workspace + -c "$contrast_group" + -t $treatment + -k $condition + + #set $groups = [] + #for $c_group in $clustering_groups + #if str($c_group.group) + #set $groups = $groups + [str($c_group.group)] + #end if + #end for + #set $groups = ','.join($groups) + -m "$groups" + + ## 2. output report and report site directory + -o $deseq_results + -d $deseq_results.files_path + + ## 3. Rmd templates sitting in the tool directory + + ## _site.yml and index.Rmd template files + -D '${__tool_directory__}/DESeq_results.Rmd' + + + + ]]> + </command> + <inputs> + <param type="data" name="deseq_workspace" format="rdata" multiple="false" label="Workspace from tool DESeq2: DESeq" /> + <param type="data" name="sample_table" format="tabular" multiple="false" label="Sample table file" /> + <param type="data_column" name="contrast_group" data_ref="sample_table" use_header_names="true" + optional="false" + label="Group for result contrast" + help=""/> + <param type="text" name="treatment" label="Treatment level" /> + <param type="text" name="condition" label="Condition level" /> + + <repeat name="clustering_groups" title="Gene clustering groups" min="1"> + <param type="data_column" name="group" data_ref="sample_table" use_header_names="true" + optional="false" + label="A phenotype column from the sample table" /> + </repeat> + + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + </inputs> + <outputs> + <data name="deseq_results" format="html" label="DESeq Results" /> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results_render.R Tue Aug 08 10:43:18 2017 -0400 @@ -0,0 +1,120 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, + error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) + }) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$ECHO = c('echo', 'e', '1', 'character') +spec_list$DESEQ_WORKSPACE = c('deseq_workspace', 'w', '1', 'character') +spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character') +spec_list$CONTRAST_GROUP = c('contrast_group', 'c', '1', 'character') +spec_list$TREATMENT_LEVEL = c('treatment_level', 't', '1', 'character') +spec_list$CONDITION_LEVEL = c('condition_level', 'k', '1', 'character') +spec_list$CLUSTERING_GROUPS = c('clustering_groups', 'm', '1', 'character') + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('deseq_results_html', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('deseq_results_dir', 'd', '1', 'character') + +##--------3. Rmd templates sitting in the tool directory ---------- + +spec_list$DESEQ_VISUALIZATION_RMD = c('deseq_results_rmd', 'D', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(plyr) +library(stringr) +library(dplyr) +library(highcharter) +library(DT) +library(reshape2) +# library(Kmisc) +library(plotly) +library(formattable) +library(htmltools) + + +#----- 1. create the report directory ------------------------ +system(paste0('mkdir -p ', opt$deseq_results_dir)) + + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- 01 DESeq_results.Rmd ----------------------- +readLines(opt$deseq_results_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) + }) %>% + (function(x) { + gsub('CONTRAST_GROUP', opt$contrast_group, x) + }) %>% + (function(x) { + gsub('TREATMENT_LEVEL', opt$treatment_level, x) + }) %>% + (function(x) { + gsub('CONDITION_LEVEL', opt$condition_level, x) + }) %>% + (function(x) { + gsub('CLUSTERING_GROUPS', opt$clustering_groups, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$deseq_results_dir, x) + }) %>% + (function(x) { + fileConn = file('DESeq_results.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files -------- +render('DESeq_results.Rmd', output_file = opt$deseq_results_html) + + +#-------4. manipulate outputs -----------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_visualization.Rmd Tue Aug 08 10:43:18 2017 -0400 @@ -0,0 +1,114 @@ +--- +title: 'DESeq2: Visualization' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) + +library(stringi) +library(DESeq2) +library(pheatmap) +# library(PoiClaClu) +library(RColorBrewer) +``` + +# Import workspace + +```{r eval=TRUE} +fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData") +load("deseq.RData") +``` + +# Visualization + +## Heatmaps of sample-to-sample distances {.tabset} + +### rlog-transformed values + +```{r} +sampleDistMatrix <- as.matrix( sampleDists ) +colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) +pheatmap(sampleDistMatrix, + # clustering_distance_rows = sampleDists, + clustering_distance_cols = sampleDists, + col = colors) +``` + +### Poisson Distance + +```{r eval=FALSE} +count_t = t(counts(dds)) +rownames(count_t) = colnames(counts(dds)) +poisd <- PoissonDistance(count_t) +samplePoisDistMatrix <- as.matrix( poisd$dd ) +rownames(samplePoisDistMatrix) = rownames(count_t) +colnames(samplePoisDistMatrix) = rownames(count_t) +pheatmap(samplePoisDistMatrix, + # clustering_distance_rows = poisd$dd, + clustering_distance_cols = poisd$dd, + col = colors) +``` + + +## PCA plots {.tabset} + +### Using `plotPCA()` function + +```{r} +# interest groups +col_index = as.numeric(strsplit("INTGROUPS_PCA", ',')[[1]]) +intgroup_pca = colnames(sample_table)[col_index] +``` + +```{r} +plotPCA(rld, intgroup = intgroup_pca) +``` + + +### Using *ggplot2* + +```{r} +pcaData <- plotPCA(rld, intgroup = intgroup_pca, returnData = TRUE) +percentVar <- round(100 * attr(pcaData, "percentVar")) +ggplot(pcaData, aes(x = PC1, y = PC2, color = time)) + + geom_point(size =3) + + xlab(paste0("PC1: ", percentVar[1], "% variance")) + + ylab(paste0("PC2: ", percentVar[2], "% variance")) + + coord_fixed() +``` + +### PCA data table + +```{r} +knitr::kable(pcaData) +``` + + +## MDS plots {.tabset} + +### Using rlog-transformed values + +```{r} +mds <- as.data.frame(colData(rld)) %>% + cbind(cmdscale(sampleDistMatrix)) +mds +ggplot(mds, aes(x = `1`, y = `2`, col = time)) + + geom_point(size = 3) + coord_fixed() +``` + +### Using the *Poisson Distance* + +```{r} +mdsPois <- as.data.frame(colData(dds)) %>% + cbind(cmdscale(samplePoisDistMatrix)) +ggplot(mdsPois, aes(x = `1`, y = `2`, col = time)) + + geom_point(size = 3) + coord_fixed() +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_visualization.xml Tue Aug 08 10:43:18 2017 -0400 @@ -0,0 +1,94 @@ +<tool id="DESeq_visualization" name="DESeq2: Visualization" version="1.0.0"> + <requirements> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="1.1.0">r-stringr</requirement> + <requirement type="package" version="0.4.0">r-highcharter</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="1.4.2">r-reshape2</requirement> + <requirement type="package" version="4.5.6">r-plotly</requirement> + <requirement type="package" version="0.2.0.1">r-formattable</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="1.0.8">r-pheatmap</requirement> + </requirements> + <description> + An R Markdown tool to visualize DESeq analysis results. + </description> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> + <![CDATA[ + + Rscript '${__tool_directory__}/DESeq_visualization_render.R' + + ## 1. input data + -e $echo + -w $deseq_workspace + + #set $pca_groups = [] + #for $group in $intgroups_pca + #if str($group.intgroup) + #set $pca_groups = $pca_groups + [str($group.intgroup)] + #end if + #end for + #set $pca_groups = ','.join($pca_groups) + -p "$pca_groups" + + #set $mds_groups = [] + #for $group in $intgroups_mds + #if str($group.intgroup) + #set $mds_groups = $mds_groups + [str($group.intgroup)] + #end if + #end for + #set $mds_groups = ','.join($mds_groups) + -m "$mds_groups" + + ## 2. output report and report site directory + -o $deseq_visualization + -d $deseq_visualization.files_path + + + ## 3. Rmd templates sitting in the tool directory + + ## _site.yml and index.Rmd template files + -D '${__tool_directory__}/DESeq_visualization.Rmd' + + + + ]]> + </command> + <inputs> + <param type="data" name="deseq_workspace" format="rdata" multiple="false" label="Workspace from tool DESeq2: DESeq" /> + <param type="data" name="sample_table" format="tabular" multiple="false" label="Sample table file" /> + <repeat name="intgroups_pca" title="Interest groups for PCA plot" min="1"> + <param type="data_column" name="intgroup" data_ref="sample_table" use_header_names="true" + optional="false" + label="Interest group for PCA plot" + help=""/> + </repeat> + <repeat name="intgroups_mds" title="Interest groups for MDS plot" min="1"> + <param type="data_column" name="intgroup" data_ref="sample_table" use_header_names="true" + optional="false" + label="Interest group for MDS plot" + help=""/> + </repeat> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + </inputs> + <outputs> + <data name="deseq_visualization" format="html" label="DESeq Visualization" /> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_visualization_render.R Tue Aug 08 10:43:18 2017 -0400 @@ -0,0 +1,112 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, + error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) + }) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$ECHO = c('echo', 'e', '1', 'character') +spec_list$DESEQ_WORKSPACE = c('deseq_workspace', 'w', '1', 'character') +spec_list$SAMPLE_TABLE = c('sample_table', 's', '1', 'character') +spec_list$INTGROUPS_PCA = c('intgroups_pca', 'p', '1', 'character') +spec_list$INTGROUPS_MDS = c('intgroups_mds', 'm', '1', 'character') + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('deseq_visualization_html', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('deseq_visualization_dir', 'd', '1', 'character') + +##--------3. Rmd templates sitting in the tool directory ---------- + +spec_list$DESEQ_VISUALIZATION_RMD = c('deseq_visualization_rmd', 'D', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(plyr) +library(stringr) +library(dplyr) +library(highcharter) +library(DT) +library(reshape2) +# library(Kmisc) +library(plotly) +library(formattable) +library(htmltools) + + +#----- 1. create the report directory ------------------------ +system(paste0('mkdir -p ', opt$deseq_visualization_dir)) + + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- 01 DESeq_visualization.Rmd ----------------------- +readLines(opt$deseq_visualization_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) + }) %>% + (function(x) { + gsub('INTGROUPS_PCA', opt$intgroups_pca, x) + }) %>% + (function(x) { + gsub('INTGROUPS_MDS', opt$intgroups_mds, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$deseq_visualization_dir, x) + }) %>% + (function(x) { + fileConn = file('DESeq_visualization.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files -------- +render('DESeq_visualization.Rmd', output_file = opt$deseq_visualization_html) + + +#-------4. manipulate outputs -----------------------------