Galaxy |

Changeset 0:d0cbe6cc1f04 (2022-01-12)

Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genomic_super_signature commit 1aadd5dce3b254e7714c2fdd39413029fd4b9b7a"

added:
genomic_super_signature.xml
gss.R
gss.Rmd
test-data/bcellViperExpr_10C.tsv.gz
test-data/genomic_super_signature_ravmodels.loc
test-data/microRAVmodel.rds
tool-data/genomic_super_signature_ravmodels.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test

diff -r 000000000000 -r d0cbe6cc1f04 genomic_super_signature.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genomic_super_signature.xml Wed Jan 12 19:07:45 2022 +0000

[

b'@@ -0,0 +1,213 @@\n+<tool id="genomic_super_signature" name="GenomicSuperSignature" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@" profile="20.01">\n+ <description>interpretation of RNAseq experiments</description>\n+ <macros>\n+ <token name="@TOOL_VERSION@">1.2.0</token>\n+ <token name="@GALAXY_VERSION@">0</token>\n+ </macros>\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">bioconductor-genomicsupersignature</requirement>\n+ <requirement type="package" version="1.7.1">r-optparse</requirement>\n+ <requirement type="package" version="2.6">r-wordcloud</requirement>\n+ <requirement type="package" version="2.22.0">bioconductor-biocstyle</requirement>\n+ <requirement type="package" version="2.7.3">r-magick</requirement>\n+ <requirement type="package" version="2021d">tzdata</requirement>\n+ </requirements>\n+ <command detect_errors="exit_code"><![CDATA[\n+ #set $model = $model.fields.path\n+ mkdir out &&\n+ Rscript \'$__tool_directory__/gss.R\' --input \'$input\' --model \'$model\' --method \'$method\' --maxFrom \'$maxFrom\' --level \'$level\' --scale \'$scale\' --numOut $numOut --outDir out --toolDir \'$__tool_directory__\' --validate \'$validate\' --html \'$html\'\n+ ]]></command>\n+ <inputs>\n+ <param argument="--input" type="data" format="tabular,tsv" label="Tabular count matrix"/>\n+ <param argument="--model" type="select" label="Using RAVmodel" help="Select model from the list">\n+ <options from_data_table="genomic_super_signature_ravmodels">\n+ <filter type="data_meta" ref="input" key="dbkey" column="dbkey" />\n+ </options>\n+ <validator type="no_options" message="A built-in RAVmodel is not available for the build associated with the selected input file"/>\n+ </param>\n+ <param argument="--method" type="select" label="Select a correlation coefficient">\n+ <option value="pearson">Pearson</option>\n+ <option value="kendall">Kendall</option>\n+ <option value="spearman">Spearman</option>\n+ </param>\n+ <param argument="--maxFrom" type="select" label="Select whether to display the maximum value from dataset\'s PCs or avgLoadings" help="With Principal Component (PC), the maximum correlation coefficient from top 8 PCs for each avgLoading will be selected as an output. If you choose Average Loading, the Average Loading with the maximum correlation coefficient with each Principal Component will be in the output.">\n+ <option value="pc">Principal Components</option>\n+ <option value="avgLoading">Average Loading</option>\n+ </param>\n+ <param argument="--level" type="select" label="Output format of validated result" help="max will output the matrix containing only the maximum coefficient. To get the coefficient of all 8 PCs, set this argument to all.">\n+ <option value="max">Max</option>\n+ <option value="all">All</option>\n+ </param>\n+ <param argument="--scale" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Normalize rows of datasets?"/>\n+ <param argument="--numOut" type="integer" min="1" value="3" label="The number of top validated RAVs to check"/>\n+ </inputs>\n+ <outputs>\n+ <collection name="genesets" type="list" label="GenomicSuperSignature Genesets">\n+ <discover_datasets pattern=".*_genesets_(?P<name>.+)\\.csv" format="csv" directory="out" />\n+ </collection>\n+ <collection name="literatures" type="list" label="GenomicSuperSignature Literatures">\n+ <discover_datasets pattern=".*_literatures_(?P<name>.+)\\.csv" format="csv" directory="out" />\n+ </collection>\n+ <data name="validate" format="csv" label="GenomicSuperSignature validate.csv">\n+ </data>\n+ <data name="html" format="html" label="GenomicSuperSignature report.html">\n+ </data>\n+ </outputs>\n+ '..b'3 9.582061 9.628792 8.820422\n+===== ======== ========= ======== =========\n+\n+RAVmodel\n+~~~~~~~~\n+\n+*R*\\ eplicable *A*\\ xes of *V*\\ ariation (RAV) consists of principal\n+components repeatedly observed in an independent analysis of multiple\n+published datasets. RAVs connect different databases that are both\n+linked to the originated study or associated with the RAV itself through\n+the gene rankings of it. RAVmodel contains the collection of RAVs\n+(RAVindex), metadata from model building process and the additional\n+annotations. Currently, two RAVmodels are available based on the gene\n+sets used for annotation.\n+\n+1) C2 : RAVmodel annotated with Molecular Signatures Database (MSigDB)\n+ curated gene sets (version 7.1)\n+2) PLIERpriors : RAVmodel annotated with the three gene sets provided in\n+ the `PLIER package <https://github.com/wgmao/PLIER>`__ -\n+ bloodCellMarkersIRISDMAP, svmMarkers, and canonicalPathways\n+\n+Outputs\n+-------\n+\n+There are four categories of outputs from this tool, which is one html\n+file and three csv tabular files. The actual number of csv files will\n+vary depending on the parameter, *\xe2\x80\x93numOut*, and the validated RAVs.\n+\n+validate.csv\n+~~~~~~~~~~~~\n+\n++--------------------------+--------------------------------------------+\n+| Column | Description |\n++==========================+============================================+\n+| score | the maximum pearson correlation |\n+| | coefficient between the top 8 PCs of the |\n+| | input and RAVs |\n++--------------------------+--------------------------------------------+\n+| PC | one of the top 8 PCs of the input, which |\n+| | gives the highest *score* |\n++--------------------------+--------------------------------------------+\n+| sw | the average silhouette width of the RAV |\n++--------------------------+--------------------------------------------+\n+| cl_size | the size of each RAV |\n++--------------------------+--------------------------------------------+\n+| cl_num | the RAV number |\n++--------------------------+--------------------------------------------+\n+\n+Genesets\n+~~~~~~~~\n+\n+This is the enriched gene sets for the target RAV, calculated from the\n+ranked gene list. Gene sets with the adjusted p-value < 0.05 are\n+included.\n+\n+=========== ================================\n+Column Description\n+=========== ================================\n+Description name of the gene sets\n+NES normalized enrichment score (ES)\n+pvalue statistical significance\n+qvalues p-value adjusted for the FDR\n+=========== ================================\n+\n+Literatures\n+~~~~~~~~~~~\n+\n+========= ======================\n+Column Description\n+========= ======================\n+studyName study accession\n+title the title of the study\n+========= ======================\n+\n+report.html\n+~~~~~~~~~~~\n+\n+A html file with the summary of the main analyses by\n+GenomicSuperSignature. It includes MeSH terms in word cloud and an\n+interactive plot overviewing the validated RAVs, in addition to the\n+previews of the tabular output files.\n+\n+Citations\n+---------\n+\n+Oh, S., Geistlinger, L., Ramos, M., Taroni, J.N., Carey, V.J., Greene,\n+C.S., Waldron, L., & Davis, S.R. (2021). GenomicSuperSignature:\n+interpretation of RNA-seq experiments through robust, efficient\n+comparison to public databases. bioRxiv.\n+\n+References\n+----------\n+\n+| GenomicSuperSignature package:\n+ `webpage <https://shbrief.github.io/GenomicSuperSignature/>`__\n+| GenomicSuperSignature usecases:\n+ `webpage <https://shbrief.github.io/GenomicSuperSignaturePaper/>`__\n+ ]]></help>\n+ <citations>\n+ <citation type="doi">10.1101/2021.05.26.445900</citation>\n+ </citations>\n+</tool>\n'

diff -r 000000000000 -r d0cbe6cc1f04 gss.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gss.R Wed Jan 12 19:07:45 2022 +0000

[

@@ -0,0 +1,114 @@
+suppressPackageStartupMessages(library(optparse))
+
+### Parsing command line -------------------------------------------------------
+option_list <- list(
+    make_option(c("--input"), type = "character",
+                default = NULL, help = "Count matrix in tsv format"),
+    make_option(c("--model"), type = "character",
+                default = NULL, help = "RAVmodel to apply.
+                Currently 'C2' and 'PLIERpriors' are available"),
+    make_option(c("--method"), type = "character",
+                default = formals(GenomicSuperSignature::validate)$method),
+    make_option(c("--maxFrom"), type = "character",
+                default = formals(GenomicSuperSignature::validate)$maxFrom),
+    make_option(c("--level"), type = "character",
+                default = formals(GenomicSuperSignature::validate)$level),
+    make_option(c("--scale"), type = "character",
+                default = formals(GenomicSuperSignature::validate)$scale),
+    make_option(c("--outDir"), type = "character",
+                default = NULL, help = "Output file name"),
+    make_option(c("--validate"), type = "character",
+                default = NULL, help = "Path to save validate.csv"),
+    make_option(c("--html"), type = "character",
+                default = NULL, help = "Path to save HTML report"),
+    make_option(c("--numOut"), type = "integer",
+                default = 3, help = "The number of top validated RAVs to check"),
+    make_option(c("--toolDir"), type = "character",
+                default = ".", help = "Directory containing the tool scripts (e.g. gss.Rmd")
+)
+
+opt <- parse_args(OptionParser(option_list = option_list),
+                  args = commandArgs(trailingOnly = TRUE))
+input <- opt$input
+model <- opt$model
+out_dir <- opt$outDir
+num_out <- opt$numOut
+
+if (is.null(input)) stop("Need --input.")
+if (is.null(model)) stop("Need --model.")
+if (is.null(out_dir)) stop("Need --outDir.")
+
+input_name <- basename(tools::file_path_sans_ext(input))
+out_dir <- normalizePath(out_dir)
+
+suppressPackageStartupMessages(library(GenomicSuperSignature))
+dat <- as.matrix(read.table(file = input, header = TRUE, sep = "\t",
+                            row.names = 1))
+if (model %in% c("C2", "PLIERpriors")) {
+    rav_model <- getModel(model)
+} else {
+    rav_model <- readRDS(model)
+}
+
+
+
+### validate -------------------------------------------------------------------
+val_all <- validate(dat, rav_model)
+validated_ind <- validatedSignatures(val_all, num.out = num_out,
+                                     swCutoff = 0, indexOnly = TRUE)
+n <- min(num_out, length(validated_ind), na.rm = TRUE)
+
+### Save tables in csv ---------------------------------------------------------
+# Validation
+if (is.null(opt$validate)) {
+    output_fname <- file.path(out_dir, paste0(input_name, "_validate.csv"))
+} else {
+    output_fname <- opt$validate
+}
+write.csv(val_all,
+          file = output_fname,
+          row.names = TRUE)
+
+# GSEA
+for (i in seq_len(n)) {
+    rav_num <- validated_ind[i]
+    rav_name <- paste0("RAV", rav_num)
+    res <- gsea(rav_model)[[rav_name]]
+
+    output_fname <- paste0(input_name, "_genesets_RAV", rav_num, ".csv")
+    write.csv(res,
+              file = file.path(out_dir, output_fname),
+              row.names = TRUE)
+}
+
+# Related prior studies
+for (i in seq_len(n)) {
+    rav_num <- validated_ind[i]
+    res <- findStudiesInCluster(rav_model, rav_num)
+
+    output_fname <- paste0(input_name, "_literatures_RAV", rav_num, ".csv")
+    write.csv(res,
+              file = file.path(out_dir, output_fname),
+              row.names = TRUE)
+}
+
+### Create a report ------------------------------------------------------------
+if (is.null(opt$html)) {
+    output_fname <- file.path(out_dir, paste0("GSS-", input_name, "-",
+                              format(Sys.Date(), format = "%Y%m%d"), ".html"))
+} else {
+    output_fname <- opt$html
+
+}
+rmarkdown::render(
+    file.path(opt$toolDir, "gss.Rmd"), params = list(
+        val_all = val_all,
+        dat = dat,
+        RAVmodel = rav_model,
+        inputName = input_name,
+        numOut = num_out
+    ),
+    output_file = output_fname,
+    intermediates_dir = ".",
+    knit_root_dir = "."
+)

diff -r 000000000000 -r d0cbe6cc1f04 gss.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gss.Rmd Wed Jan 12 19:07:45 2022 +0000

[

@@ -0,0 +1,122 @@
+---
+title: "Analysis by GenomicSuperSignature"
+date: "`r Sys.Date()`"
+output:
+  BiocStyle::html_document:
+    toc: true
+    toc_float: false
+    toc_depth: 3
+params:
+    val_all: val_all
+    dat: dat
+    RAVmodel: RAVmodel
+    inputName: inputName
+    numOut: numOut
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = FALSE)
+```
+
+# RAVs best represents your dataset
+The *validation* provides a quantitative representation of the relevance
+between your dataset and RAVs. Below shows the top 6 validated RAVs and
+the complete result is saved as `{input_name}_validate.csv`.
+
+```{r}
+head(params$val_all)
+```
+
+## Heatmap Table
+`heatmapTable` takes validation results as its input and displays them into
+a two panel table: the top panel shows the average silhouette width (avg.sw)
+and the bottom panel displays the validation score.
+
+`heatmapTable` can display different subsets of the validation output. For
+example, if you specify `scoreCutoff`, any validation result above that score
+will be shown. If you specify the number (n) of top validation results through
+`num.out`, the output will be a n-columned heatmap table. You can also use the
+average silhouette width (`swCutoff`), the size of cluster (`clsizecutoff`),
+one of the top 8 PCs from the dataset (`whichPC`).
+
+Here, we print out top `r params$numOut` validated RAVs with average silhouette
+width above 0.
+
+```{r out.height="45%", out.width="45%", message=FALSE, warning=FALSE}
+heatmapTable(params$val_all, num.out = params$numOut, swCutoff = 0)
+```
+
+## Interactive Graph
+Under the default condition, `plotValidate` plots validation results of all non
+single-element RAVs in one graph, where x-axis represents average silhouette
+width of the RAVs (a quality control measure of RAVs) and y-axis represents
+validation score. We recommend users to focus on RAVs with higher validation
+score and use average silhouette width as a secondary criteria.
+
+```{r out.height="80%", out.width="80%"}
+plotValidate(params$val_all, interactive = TRUE)
+```
+
+Note that `interactive = TRUE` will result in a zoomable, interactive plot that
+included tooltips, which is saved as `{input_name}_validate_plot.html` file.
+
+You can hover each data point for more information:
+
+- **sw** : the average silhouette width of the cluster
+- **score** : the top validation score between 8 PCs of the dataset and RAVs
+- **cl_size** : the size of RAVs, represented by the dot size
+- **cl_num** : the RAV number. You need this index to find more information
+about the RAV.
+- **PC** : test dataset's PC number that validates the given RAV. Because we
+used top 8 PCs of the test dataset for validation, there are 8 categories.
+
+If you double-click the PC legend on the right, you will enter an
+individual display mode where you can add an additional group of data
+point by single-click.
+
+
+# Prior information associated to your dataset
+```{r echo=FALSE}
+validated_ind <- validatedSignatures(params$val_all, num.out = params$numOut,
+                                     swCutoff = 0, indexOnly = TRUE)
+
+# In case, there are fewer validated_ind than the number of outputs user set
+n <- min(params$numOut, length(validated_ind), na.rm = TRUE)
+```
+
+## MeSH terms in wordcloud
+```{r out.height="60%", out.width="60%", fig.width=8, fig.height=8}
+for (i in seq_len(n)) {
+    set.seed(1)
+    print(paste0("MeSH terms related to RAV", validated_ind[i]))
+    drawWordcloud(params$RAVmodel, validated_ind[i])
+}
+```
+
+## GSEA
+The complete result is saved as `{input_name}_genesets_RAV*.csv`.
+```{r}
+res_all <- vector(mode = "list", length = n)
+for (i in seq_len(n)) {
+    RAVnum <- validated_ind[i]
+    RAVname <- paste0("RAV", RAVnum)
+    res <- gsea(params$RAVmodel)[[RAVname]]
+    res_all[[i]] <- head(res)
+    names(res_all)[i] <- paste0("Enriched gene sets for RAV", validated_ind[i])
+}
+res_all
+```
+
+## Publication
+The complete result is saved as `{input_name}_literatures_RAV*.csv`.
+```{r}
+res_all <- vector(mode = "list", length = n)
+for (i in seq_len(n)) {
+    RAVnum <- validated_ind[i]
+    res <- findStudiesInCluster(params$RAVmodel, RAVnum, studyTitle = TRUE)
+    res_all[[i]] <- head(res)
+    names(res_all)[i] <- paste0("Studies related to RAV", validated_ind[i])
+}
+res_all
+```
+

diff -r 000000000000 -r d0cbe6cc1f04 test-data/bcellViperExpr_10C.tsv.gz

Binary file test-data/bcellViperExpr_10C.tsv.gz has changed

diff -r 000000000000 -r d0cbe6cc1f04 test-data/genomic_super_signature_ravmodels.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genomic_super_signature_ravmodels.loc Wed Jan 12 19:07:45 2022 +0000

@@ -0,0 +1,2 @@
+#<value/id> <dbkey> <RAV format version> <display_name> <path>
+microRAVmodel hg38 0 microRAVmodel (test data only) ${__HERE__}/microRAVmodel.rds

diff -r 000000000000 -r d0cbe6cc1f04 test-data/microRAVmodel.rds

Binary file test-data/microRAVmodel.rds has changed

diff -r 000000000000 -r d0cbe6cc1f04 tool-data/genomic_super_signature_ravmodels.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/genomic_super_signature_ravmodels.loc.sample Wed Jan 12 19:07:45 2022 +0000

@@ -0,0 +1,1 @@
+#<value/id> <dbkey> <RAV format version> <display_name> <path>

diff -r 000000000000 -r d0cbe6cc1f04 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Jan 12 19:07:45 2022 +0000

@@ -0,0 +1,7 @@
+<tables>
+    
+    <table name="genomic_super_signature_ravmodels" comment_char="#">
+        <columns>value, dbkey, version, name, path</columns>
+        <file path="tool-data/genomic_super_signature_ravmodels.loc" />
+    </table>
+</tables>

diff -r 000000000000 -r d0cbe6cc1f04 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed Jan 12 19:07:45 2022 +0000

@@ -0,0 +1,7 @@
+<tables>
+    
+    <table name="genomic_super_signature_ravmodels" comment_char="#">
+        <columns>value, dbkey, version, name, path</columns>
+        <file path="${__HERE__}/test-data/genomic_super_signature_ravmodels.loc" />
+    </table>
+</tables>