Next changeset 1:32210899a3dd (2018-12-30) |
Commit message:
planemo upload |
added:
DESeq.Rmd DESeq.xml DESeq_01.Rmd DESeq_02.Rmd DESeq_03.Rmd DESeq_04.Rmd DESeq_05.Rmd DESeq_index.Rmd DESeq_render.R DESeq_results.Rmd DESeq_results.xml DESeq_results_01.Rmd DESeq_results_02.Rmd DESeq_results_03.Rmd DESeq_results_04.Rmd DESeq_results_index.Rmd DESeq_results_render.R DESeq_results_site.yml DESeq_site.yml |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq.Rmd Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,97 @@ +--- +title: 'DESeq2: Perform DESeq analysis' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + +# `DESeqDataSet` object + +```{r 'DESeqDataSet object'} +count_file_paths = strsplit(opt$X_P, ',')[[1]] +count_file_names = strsplit(opt$X_N, ',')[[1]] +sample_table = read.table(opt$X_S, header = TRUE) +row.names(sample_table) = sample_table[,2] +sample_table = sample_table[count_file_names, ] + +## copy count files into OUTPUT_DIR/counts +dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE) +file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE) + +## DESeqDataSet object +dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, + directory = paste0(OUTPUT_DIR, '/counts'), + design = formula(opt$X_p)) +dds +``` + +# Pre-filtering the dataset. + +We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed. + +* Number of rows before pre-filtering +```{r} +nrow(dds) +``` + +* Number of rows after pre-filtering +```{r} +dds = dds[rowSums(counts(dds)) > 1, ] +nrow(dds) +``` + +# Peek at data {.tabset} + +## Count Data + +```{r 'count data'} +datatable(head(counts(dds), 100), style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Sample Table + +```{r 'sample table'} +datatable(sample_table, style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +# Sample distance on variance stabilized data {.tabset} + +## `rlog` Stabilizing transformation + +```{r} +rld = rlog(dds, blind = FALSE) +datatable(head(assay(rld), 100), style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Sample distance + +```{r} +sampleDists <- dist(t(assay(rld))) +sampleDists +``` + +# Differential expression analysis + +```{r} +dds <- DESeq(dds) +``` + +```{r echo=FALSE} +# save objects except for opt. +save(list=ls()[ls() != "opt"], file=opt$X_w) +``` + + |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq.xml Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,106 @@ +<tool name="DESeq2: Analysis" id='deseq2' version="2.0.1"> + <description> + perform differential expression analysis + </description> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.6">r-rmarkdown</requirement> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="1.0.8">r-pheatmap</requirement> + </requirements> + <stdio> + <regex match="XXX" source="stderr" level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command><![CDATA[ + + + Rscript '${__tool_directory__}/DESeq_render.R' + + -e $echo + + -o $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}' + + + ##----- code chunk to get file paths and raw file names for a multiple inputs data field ---- + #set $sep = '' + #set $count_file_paths = '' + #set $count_file_names = '' + #for $count_file in $count_files: + #set $count_file_paths += $sep + str($count_file) + #set $count_file_names += $sep + str($count_file.name) + #set $sep = ',' + #end for + ##----------------- end for getting file names and file paths ------------------------------ + -P '$count_file_paths' + -N '$count_file_names' + -S $sample_table + -p '$design_formula' + -w $deseq_workspace + + + ]]></command> + <inputs> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Display analysis code in report?"/> + <param type="data" name="count_files" format="txt" multiple="true" label="Count files from htseq-count"/> + <param type="data" name="sample_table" format="txt" multiple="false" label="sample table file" + help="The sample table file contains a table. The first column is the sample name, the second column is + the count file name and the rest of columns are treatment columns. The file names in this table have + to be in the same order as the count files uploaded in the previous step. "/> + <param type="text" name="design_formula" value="~ condition_1 + condition_2" label="Design formula" + help="The simplest design formula for differential expression would be ~ condition, where condition + is a column in colData(dds) that specifies which of two (or more groups) the samples belong to"> + <sanitizer> + <valid initial="default"> + <add preset="string.printable"/> + <add value="~"/> + </valid> + </sanitizer> + </param> + </inputs> + <outputs> + <data name="report" format="html" label="DESeq Analysis on ${on_string}"/> + <data format="txt" name="sink_message" label="Warnings and Errors on" + from_work_dir="warnings_and_errors.txt"/> + <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis on ${on_string}"/> + </outputs> + <citations> + <citation type="bibtex"> + @article{love2014moderated, + title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, + author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, + journal={Genome biology}, + volume={15}, + number={12}, + pages={550}, + year={2014}, + publisher={BioMed Central} + } + </citation> + <citation type="bibtex"><![CDATA[ + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + ]]></citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_01.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_01.Rmd Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,30 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + +# `DESeqDataSet` object + +```{r 'DESeqDataSet object'} +count_file_paths = strsplit(opt$X_P, ',')[[1]] +count_file_names = strsplit(opt$X_N, ',')[[1]] +sample_table = read.table(opt$X_S, header = TRUE) +row.names(sample_table) = sample_table[,2] +sample_table = sample_table[count_file_names, ] + +## copy count files into OUTPUT_DIR/counts +dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE) +file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE) + +## DESeqDataSet object +dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, + directory = paste0(OUTPUT_DIR, '/counts'), + design = formula(opt$X_p)) +dds +``` |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_02.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_02.Rmd Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,27 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + + +# Pre-filtering the dataset. + +We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed. + +* Number of rows before pre-filtering +```{r} +nrow(dds) +``` + +* Number of rows after pre-filtering +```{r} +dds = dds[rowSums(counts(dds)) > 1, ] +nrow(dds) +``` |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_03.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_03.Rmd Tue Feb 27 23:57:53 2018 -0500 |
b |
@@ -0,0 +1,27 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +# Peek at data {.tabset} + +## Count Data + +```{r 'count data'} +datatable(head(counts(dds), 100), style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Sample Table + +```{r 'sample table'} +datatable(sample_table, style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_04.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_04.Rmd Tue Feb 27 23:57:53 2018 -0500 |
b |
@@ -0,0 +1,29 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + + +# Sample distance on variance stabilized data {.tabset} + +## `rlog` Stabilizing transformation + +```{r} +rld = rlog(dds, blind = FALSE) +datatable(head(assay(rld), 100), style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Sample distance + +```{r} +sampleDists <- dist(t(assay(rld))) +sampleDists +``` |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_05.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_05.Rmd Tue Feb 27 23:57:53 2018 -0500 |
b |
@@ -0,0 +1,23 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +# Differential expression analysis + +```{r} +dds <- DESeq(dds) +``` + +```{r echo=FALSE} +# save useful objects. +save(dds, rld, sample_table, sampleDists, zz, file=opt$X_w) +``` + |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_render.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_render.R Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,86 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(getopt) +library(rmarkdown) +library(DESeq2) +library(pheatmap) +library(DT) +library(ggplot2) +library(genefilter) +library(RColorBrewer) +#------------------------------------------------ + + +#------------get arguments into R-------------------- +# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>% +# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) + + +spec_matrix = as.matrix( + data.frame(stringsAsFactors=FALSE, + long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_P", "X_N", + "X_S", "X_p", "X_w"), + short_flags = c("e", "o", "d", "s", "t", "P", "N", "S", "p", "w"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), + data_type_flags = c("character", "character", "character", "character", + "character", "character", "character", + "character", "character", "character") + ) +) +opt = getopt(spec_matrix) +#---------------------------------------------------- + + +#-----------using passed arguments in R +# to define system environment variables--- +do.call(Sys.setenv, opt[-1]) +#---------------------------------------------------- + +#---------- often used variables ---------------- +# OUTPUT_REPORT: path to galaxy output report +# OUTPUT_DIR: path to the output associated directory, which stores all outputs +# TOOL_DIR: path to the tool installation directory +OUTPUT_DIR = opt$X_d +TOOL_DIR = opt$X_t +OUTPUT_REPORT = opt$X_o + + +# create the output associated directory to store all outputs +dir.create(OUTPUT_DIR, recursive = TRUE) + +#-----------------render site-------------- +# copy site generating materials into OUTPUT_DIR +dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE) +command_cp = paste0('cp -r ', TOOL_DIR, '/DESeq_0*.Rmd ', OUTPUT_DIR, '/site_generator') +system(command_cp) +system(paste0('cp -r ', TOOL_DIR, '/DESeq_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml')) +system(paste0('cp -r ', TOOL_DIR, '/DESeq_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd')) +# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file +dir.create(paste0(OUTPUT_DIR, '/_site')) +render_site(input = paste0(OUTPUT_DIR, '/site_generator')) +# remove site generating materials from output associated directory +print(unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)) +# move _site/* into output associated directory +move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR) +system(move_cmd) +#------------------------------------------ + +#-----link index.html to output----- +cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT) +system(cp_index) +#----------------------------------- + +#==============the end============== + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_results.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results.Rmd Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,109 @@ +--- +title: 'DESeq2: Results' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +```{r eval=TRUE} +# Import workspace +# fcp = file.copy(opt$X_W, "deseq.RData") +load(opt$X_W) +``` + +# Results {.tabset} + +## Result table + +```{r} +cat('--- View the top 100 rows of the result table ---') +res <- results(dds, contrast = c(opt$X_C, opt$X_T, opt$X_K)) +write.csv(as.data.frame(res), file = opt$X_R) +res_df = as.data.frame(res)[1:100, ] +datatable(res_df, style="bootstrap", filter = 'top', + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Result summary + +```{r} +summary(res) +``` + + +# MA-plot {.tabset} + + + +```{r} +cat('--- Shrinked with Bayesian procedure ---') +plotMA(res) +``` + + +# Histogram of p values + +```{r} +hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20, + col = "grey50", border = "white", main = "", + xlab = "Mean normalized count larger than 1") +``` + + +# Visualization {.tabset} +## Gene clustering + +```{r} +clustering_groups = strsplit(opt$X_M, ',')[[1]] + +topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) +mat <- assay(rld)[ topVarGenes, ] +mat <- mat - rowMeans(mat) +annotation_col <- as.data.frame(colData(rld)[, clustering_groups]) +colnames(annotation_col) = clustering_groups +rownames(annotation_col) = colnames(mat) +pheatmap(mat, annotation_col = annotation_col) +``` + +## Sample-to-sample distance + +```{r} +sampleDistMatrix <- as.matrix( sampleDists ) +colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) +pheatmap(sampleDistMatrix, + clustering_distance_cols = sampleDists, + col = colors) +``` + +## PCA plot + +```{r} +plotPCA(rld, intgroup = clustering_groups) +``` + +## MDS plot {.tabset} + +### Data table +```{r} +mds <- as.data.frame(colData(rld)) %>% + cbind(cmdscale(sampleDistMatrix)) +knitr::kable(mds) +``` + +### Plot +```{r} +ggplot(mds, aes(x = `1`, y = `2`, col = time)) + + geom_point(size = 3) + coord_fixed() +``` + |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_results.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results.xml Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,99 @@ +<tool id="deseq_results" name="DESeq2: Results" version="2.0.1"> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.6">r-rmarkdown</requirement> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="1.0.8">r-pheatmap</requirement> + </requirements> + <description> + display DESeq2 analysis results. + </description> + <stdio> + <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won't be interrupted during testing--> + <regex match="XXX" + source="stderr" + level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command> + <![CDATA[ + + Rscript '${__tool_directory__}/DESeq_results_render.R' + + -e $echo + -o $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}' + + ## 1. input data + + -W $deseq_workspace + -C '$contrast_factor' + -T '$treatment' + -K '$condition' + + -M '$clustering_factors' + + ## 2. output report and report site directory + -R $deseq_results + + + ]]> + </command> + <inputs> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Display analysis code in report?"/> + <param type="data" name="deseq_workspace" format="rdata" multiple="false" optional="false" + label="Workspace from tool DESeq2: DESeq"/> + <param type="text" name="contrast_factor" label="Factor" optional="false" + help="the name of a factor in the design formula"/> + <param type="text" name="treatment" label="Treatment level" optional="false" + help=" the name of the numerator level for the fold change"/> + <param type="text" name="condition" label="Condition level" optional="false" + help=" the name of the denominator level for the fold change"/> + <param type="text" name="clustering_factors" title="Gene clustering factors" optional="false" + label="factors of interest for clustering samples and PCA plot" + help="A single factor or multiple factors from the design formula. Multiple factors are separated by comma (,)."/> + </inputs> + <outputs> + <data format="html" name="report" label="DESeq results report on ${on_string}" /> + <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> + <data format="csv" name="deseq_results" label="DESeq results on ${on_string}" from_work_dir="deseq_results.csv" /> + </outputs> + <citations> + <citation type="bibtex"> + @article{love2014moderated, + title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, + author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, + journal={Genome biology}, + volume={15}, + number={12}, + pages={550}, + year={2014}, + publisher={BioMed Central} + } + </citation> + <citation type="bibtex"> + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + </citation> + <citation type="bibtex"> + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + </citation> + </citations> +</tool> \ No newline at end of file |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_results_01.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results_01.Rmd Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,30 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +```{r eval=TRUE} +# Import workspace +# fcp = file.copy(opt$X_W, "deseq.RData") +load(opt$X_W) +``` + +# Results {.tabset} + +## Result table + +```{r} +cat('--- View the top 100 rows of the result table ---') +res <- results(dds, contrast = c(opt$X_C, opt$X_T, opt$X_K)) +write.csv(as.data.frame(res), file = opt$X_R) +res_df = as.data.frame(res)[1:100, ] +datatable(res_df, style="bootstrap", filter = 'top', + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_results_02.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results_02.Rmd Tue Feb 27 23:57:53 2018 -0500 |
b |
@@ -0,0 +1,17 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + +# MA-plot {.tabset} + +```{r} +cat('--- Shrinked with Bayesian procedure ---') +plotMA(res) +``` |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_results_03.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results_03.Rmd Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,20 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +# Histogram of p values + +```{r} +hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20, + col = "grey50", border = "white", main = "", + xlab = "Mean normalized count larger than 1") +``` + |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_results_04.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results_04.Rmd Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,57 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +# Visualization {.tabset} +## Gene clustering + +```{r} +clustering_groups = strsplit(opt$X_M, ',')[[1]] + +topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) +mat <- assay(rld)[ topVarGenes, ] +mat <- mat - rowMeans(mat) +annotation_col <- as.data.frame(colData(rld)[, clustering_groups]) +colnames(annotation_col) = clustering_groups +rownames(annotation_col) = colnames(mat) +pheatmap(mat, annotation_col = annotation_col) +``` + +## Sample-to-sample distance + +```{r} +sampleDistMatrix <- as.matrix( sampleDists ) +colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) +pheatmap(sampleDistMatrix, + clustering_distance_cols = sampleDists, + col = colors) +``` + +## PCA plot + +```{r} +plotPCA(rld, intgroup = clustering_groups) +``` + +## MDS plot {.tabset} + +### Data table +```{r} +mds <- as.data.frame(colData(rld)) %>% + cbind(cmdscale(sampleDistMatrix)) +knitr::kable(mds) +``` + +### Plot +```{r} +ggplot(mds, aes(x = `1`, y = `2`, col = time)) + + geom_point(size = 3) + coord_fixed() +``` |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_results_render.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results_render.R Tue Feb 27 23:57:53 2018 -0500 |
[ |
@@ -0,0 +1,88 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(getopt) +library(rmarkdown) +library(DESeq2) +library(pheatmap) +library(DT) +library(ggplot2) +library(genefilter) +library(RColorBrewer) +#------------------------------------------------ + + +#------------get arguments into R-------------------- +# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>% +# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) + + +spec_matrix = as.matrix( + data.frame(stringsAsFactors=FALSE, + long_flags = c("X_e", "X_W", "X_C", "X_T", "X_K", "X_M", "X_o", + "X_d", "X_s", "X_R", "X_t"), + short_flags = c("e", "W", "C", "T", "K", "M", "o", "d", "s", "R", + "t"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), + data_type_flags = c("character", "character", "character", "character", + "character", "character", "character", + "character", "character", "character", "character") + ) +) +opt = getopt(spec_matrix) +opt +#---------------------------------------------------- + + +#-----------using passed arguments in R +# to define system environment variables--- +do.call(Sys.setenv, opt[-1]) +#---------------------------------------------------- + +#---------- often used variables ---------------- +# OUTPUT_REPORT: path to galaxy output report +# OUTPUT_DIR: path to the output associated directory, which stores all outputs +# TOOL_DIR: path to the tool installation directory +OUTPUT_DIR = opt$X_d +TOOL_DIR = opt$X_t +OUTPUT_REPORT = opt$X_o + + +# create the output associated directory to store all outputs +dir.create(OUTPUT_DIR, recursive = TRUE) + +#-----------------render site-------------- +# copy site generating materials into OUTPUT_DIR +dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE) +command_cp = paste0('cp -r ', TOOL_DIR, '/DESeq_results_*.Rmd ', OUTPUT_DIR, '/site_generator') +system(command_cp) +system(paste0('cp -r ', TOOL_DIR, '/DESeq_results_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml')) +system(paste0('cp -r ', TOOL_DIR, '/DESeq_results_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd')) +# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file +dir.create(paste0(OUTPUT_DIR, '/_site')) +render_site(input = paste0(OUTPUT_DIR, '/site_generator')) +# remove site generating materials from output associated directory +print(unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)) +# move _site/* into output associated directory +move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR) +system(move_cmd) +#------------------------------------------ + +#-----link index.html to output----- +cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT) +system(cp_index) +#----------------------------------- + +#==============the end============== + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_results_site.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_results_site.yml Tue Feb 27 23:57:53 2018 -0500 |
b |
@@ -0,0 +1,21 @@ +name: "Analysis Report" +output_dir: "../_site" +navbar: + title: "" + type: inverse + left: + - text: "Home" + icon: fa-home + href: index.html + - text: "Results" + href: DESeq_results_01.html + - text: "MA-plot" + href: DESeq_results_02.html + - text: "Histogram of p values" + href: DESeq_results_03.html + - text: "Visualization" + href: DESeq_results_04.html +output: + html_document: + theme: cosmo + highlight: textmate \ No newline at end of file |
b |
diff -r 000000000000 -r 6f94b4b9de44 DESeq_site.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq_site.yml Tue Feb 27 23:57:53 2018 -0500 |
b |
@@ -0,0 +1,21 @@ +name: "Analysis Report" +output_dir: "../_site" +navbar: + title: "" + type: inverse + left: + - text: "Home" + icon: fa-home + href: index.html + - text: "DESeqDataSet" + href: DESeq_01.html + - text: "Pre-filtering" + href: DESeq_02.html + - text: "Data" + href: DESeq_03.html + - text: "Sample distance" + href: DESeq_04.html +output: + html_document: + theme: cosmo + highlight: textmate \ No newline at end of file |