changeset 5:bd810b6d8a3b draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_signature_score commit ae36c63e1e5c0c285593d427f0ffd348a3d89e3f
author artbio
date Thu, 07 Nov 2024 22:43:58 +0000
parents 31da579595c5
children
files signature_score.R signature_score.xml
diffstat 2 files changed, 149 insertions(+), 133 deletions(-) [+]
line wrap: on
line diff
--- a/signature_score.R	Sun Dec 10 04:11:16 2023 +0000
+++ b/signature_score.R	Thu Nov 07 22:43:58 2024 +0000
@@ -2,11 +2,12 @@
 # Compute the signature score based on the geometric mean of the target gene expression
 # and split cells  in 2 groups (high/low) using this signature score.
 
-options(show.error.messages = FALSE,
-  error = function() {
-    cat(geterrmessage(), file = stderr())
-    q("no", 1, FALSE)
-  }
+options(
+    show.error.messages = FALSE,
+    error = function() {
+        cat(geterrmessage(), file = stderr())
+        q("no", 1, FALSE)
+    }
 )
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 warnings()
@@ -18,93 +19,95 @@
 
 # Arguments
 option_list <- list(
-  make_option(
-    "--input",
-    default = NA,
-    type = "character",
-    help = "Input file that contains log2(CPM +1) values"
-  ),
-  make_option(
-    "--sep",
-    default = "\t",
-    type = "character",
-    help = "File separator [default : '%default' ]"
-  ),
-  make_option(
-    "--colnames",
-    default = TRUE,
-    type = "logical",
-    help = "Consider first line as header ? [default : '%default' ]"
-  ),
-  make_option(
-    "--genes",
-    default = NA,
-    type = "character",
-    help = "List of genes comma separated"
-  ),
-  make_option(
-    "--percentile_threshold",
-    default = 20,
-    type = "integer",
-    help = "detection threshold to keep a gene in signature set [default : '%default' ]"
-  ),
-  make_option(
-    "--output",
-    default = "./output.tab",
-    type = "character",
-    help = "Output path [default : '%default' ]"
-  ),
-  make_option(
-    "--stats",
-    default = "./statistics.tab",
-    type = "character",
-    help = "statistics path [default : '%default' ]"
-  ),
-  make_option(
-    "--correlations",
-    default = "./correlations.tab",
-    type = "character",
-    help = "Correlations between signature genes  [default : '%default' ]"
-  ),
-  make_option(
-    "--covariances",
-    default = "./statistics.tab",
-    type = "character",
-    help = "Covariances between signature genes [default : '%default' ]"
-  ),
-  make_option(
-    "--pdf",
-    default = "~/output.pdf",
-    type = "character",
-    help = "pdf path [default : '%default' ]"
-  )
+    make_option(
+        "--input",
+        default = NA,
+        type = "character",
+        help = "Input file that contains log2(CPM +1) values"
+    ),
+    make_option(
+        "--sep",
+        default = "\t",
+        type = "character",
+        help = "File separator [default : '%default' ]"
+    ),
+    make_option(
+        "--colnames",
+        default = TRUE,
+        type = "logical",
+        help = "Consider first line as header ? [default : '%default' ]"
+    ),
+    make_option(
+        "--genes",
+        default = NA,
+        type = "character",
+        help = "List of genes comma separated"
+    ),
+    make_option(
+        "--percentile_threshold",
+        default = 20,
+        type = "integer",
+        help = "detection threshold to keep a gene in signature set [default : '%default' ]"
+    ),
+    make_option(
+        "--output",
+        default = "./output.tab",
+        type = "character",
+        help = "Output path [default : '%default' ]"
+    ),
+    make_option(
+        "--stats",
+        default = "./statistics.tab",
+        type = "character",
+        help = "statistics path [default : '%default' ]"
+    ),
+    make_option(
+        "--correlations",
+        default = "./correlations.tab",
+        type = "character",
+        help = "Correlations between signature genes  [default : '%default' ]"
+    ),
+    make_option(
+        "--covariances",
+        default = "./statistics.tab",
+        type = "character",
+        help = "Covariances between signature genes [default : '%default' ]"
+    ),
+    make_option(
+        "--pdf",
+        default = "~/output.pdf",
+        type = "character",
+        help = "pdf path [default : '%default' ]"
+    )
 )
 
 opt <- parse_args(OptionParser(option_list = option_list),
-                  args = commandArgs(trailingOnly = TRUE))
+    args = commandArgs(trailingOnly = TRUE)
+)
 
 if (opt$sep == "tab") {
-  opt$sep <- "\t"
+    opt$sep <- "\t"
 }
 if (opt$sep == "comma") {
-  opt$sep <- ","
+    opt$sep <- ","
 }
 
 # Take input data
 data.counts <- read.table(
-  opt$input,
-  h = opt$colnames,
-  row.names = 1,
-  sep = opt$sep,
-  check.names = FALSE
+    opt$input,
+    h = opt$colnames,
+    row.names = 1,
+    sep = opt$sep,
+    check.names = FALSE
 )
 
 # Get vector of target genes
 genes <- unlist(strsplit(opt$genes, ","))
 
 if (length(unique(genes %in% rownames(data.counts))) == 1) {
-  if (unique(genes %in% rownames(data.counts)) == FALSE)
-    stop("None of these genes are in your dataset: ", opt$genes)
+    if (unique(genes %in% rownames(data.counts)) == FALSE) {
+        stop("None of these genes are in your dataset: ", opt$genes)
+    }
 }
 
 logical_genes <- rownames(data.counts) %in% genes
@@ -116,10 +119,11 @@
 signature.covariances <- as.data.frame(cov(t(signature.counts)))
 signature.covariances <- cbind(gene = rownames(signature.covariances), signature.covariances)
 write.table(signature.covariances,
-            file = opt$covariances,
-            quote = FALSE,
-            row.names = FALSE,
-            sep = "\t")
+    file = opt$covariances,
+    quote = FALSE,
+    row.names = FALSE,
+    sep = "\t"
+)
 
 # compute signature.correlations
 signature.correlations <- as.data.frame(cor(t(signature.counts)))
@@ -128,15 +132,15 @@
 
 ## Descriptive Statistics Function
 descriptive_stats <- function(InputData) {
-  SummaryData <- data.frame(
-    mean = rowMeans(InputData),
-    SD = apply(InputData, 1, sd),
-    Variance = apply(InputData, 1, var),
-    Percentage_Detection = apply(InputData, 1, function(x, y = InputData) {
-      (sum(x != 0) / ncol(y)) * 100
-    })
-  )
-  return(SummaryData)
+    SummaryData <- data.frame(
+        mean = rowMeans(InputData),
+        SD = apply(InputData, 1, sd),
+        Variance = apply(InputData, 1, var),
+        Percentage_Detection = apply(InputData, 1, function(x, y = InputData) {
+            (sum(x != 0) / ncol(y)) * 100
+        })
+    )
+    return(SummaryData)
 }
 
 signature_stats <- descriptive_stats(signature.counts)
@@ -146,21 +150,21 @@
 
 # Add warnings
 if (length(unique(kept_genes)) > 1) {
-  cat(
-    "WARNINGS ! Following genes were removed from further analysis due to low gene expression :",
-    paste(paste(rownames(signature.counts)[!kept_genes], round(signature_stats$Percentage_Detection[!kept_genes], 2), sep = " : "), collapse = ", "),
-    "\n"
-  )
+    cat(
+        "WARNINGS ! Following genes were removed from further analysis due to low gene expression :",
+        paste(paste(rownames(signature.counts)[!kept_genes], round(signature_stats$Percentage_Detection[!kept_genes], 2), sep = " : "), collapse = ", "),
+        "\n"
+    )
 } else {
-  if (unique(kept_genes) == FALSE) {
-    stop(
-      "None of these genes are detected in ",
-      opt$percent,
-      "% of your cells: ",
-      paste(rownames(signature_stats), collapse = ", "),
-      ". You can be less stringent thanks to --percent parameter."
-    )
-  }
+    if (unique(kept_genes) == FALSE) {
+        stop(
+            "None of these genes are detected in ",
+            opt$percent,
+            "% of your cells: ",
+            paste(rownames(signature_stats), collapse = ", "),
+            ". You can be less stringent thanks to --percent parameter."
+        )
+    }
 }
 
 # Remove genes poorly detected in the dataset
@@ -173,54 +177,63 @@
 score <- apply(signature.counts, 2, geometric.mean) # geometric.mean requires psych
 
 # Add results in signature_output
-signature_output <- data.frame(cell = names(score),
-                               score = score,
-                               rate = ifelse(score > mean(score), "HIGH", "LOW"),
-                               nGenes = colSums(data.counts != 0),
-                               total_counts = colSums(data.counts))
+signature_output <- data.frame(
+    cell = names(score),
+    score = score,
+    rate = ifelse(score > mean(score), "HIGH", "LOW"),
+    nGenes = colSums(data.counts != 0),
+    total_counts = colSums(data.counts)
+)
 
 # statistics of input genes, signature genes first lines
-statistics.counts <- rbind(subset(data.counts, logical_genes),
-                           subset(data.counts, !logical_genes))
+statistics.counts <- rbind(
+    subset(data.counts, logical_genes),
+    subset(data.counts, !logical_genes)
+)
 statistics <- descriptive_stats(statistics.counts)
 statistics <- cbind(gene = rownames(statistics), statistics)
 
 
 
 # Re-arrange score matrix for plots
-score <- data.frame(score = score,
-                    order = rank(score, ties.method = "first"),
-                    signature = signature_output$rate,
-                    stringsAsFactors = FALSE)
+score <- data.frame(
+    score = score,
+    order = rank(score, ties.method = "first"),
+    signature = signature_output$rate,
+    stringsAsFactors = FALSE
+)
 
 pdf(file = opt$pdf)
 myplot <- ggplot(signature_output, aes(x = rate, y = score)) +
-  geom_violin(aes(fill = rate), alpha = .5, trim = FALSE, show.legend = FALSE, cex = 0.5) +
-  geom_abline(slope = 0, intercept = mean(score$score), lwd = 0.5, color = "red") +
-  scale_fill_manual(values = c("#ff0000", "#08661e")) +
-  geom_jitter(size = 0.2) + labs(y = "Score", x = "Rate") +
-  annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust = 1.5,
-           color = "black", label = mean(score$score), parse = TRUE) +
-  labs(title = "Violin plots of Cell signature scores")
+    geom_violin(aes(fill = rate), alpha = .5, trim = FALSE, show.legend = FALSE, cex = 0.5) +
+    geom_abline(slope = 0, intercept = mean(score$score), lwd = 0.5, color = "red") +
+    scale_fill_manual(values = c("#ff0000", "#08661e")) +
+    geom_jitter(size = 0.2) +
+    labs(y = "Score", x = "Rate") +
+    annotate("text",
+        x = 0.55, y = mean(score$score), cex = 3, vjust = 1.5,
+        color = "black", label = mean(score$score), parse = TRUE
+    ) +
+    labs(title = "Violin plots of Cell signature scores")
 
 print(myplot)
 dev.off()
 
 # Save file
 write.table(
-  signature_output,
-  opt$output,
-  sep = "\t",
-  quote = FALSE,
-  col.names = TRUE,
-  row.names = FALSE
+    signature_output,
+    opt$output,
+    sep = "\t",
+    quote = FALSE,
+    col.names = TRUE,
+    row.names = FALSE
 )
 
 write.table(
-  statistics,
-  opt$stats,
-  sep = "\t",
-  quote = FALSE,
-  col.names = TRUE,
-  row.names = FALSE
+    statistics,
+    opt$stats,
+    sep = "\t",
+    quote = FALSE,
+    col.names = TRUE,
+    row.names = FALSE
 )
--- a/signature_score.xml	Sun Dec 10 04:11:16 2023 +0000
+++ b/signature_score.xml	Thu Nov 07 22:43:58 2024 +0000
@@ -1,5 +1,8 @@
-<tool id="signature_score" name="Compute signature scores" version="2.3.9+galaxy0">
+<tool id="signature_score" name="Compute signature scores" version="2.3.9+galaxy1">
     <description>in single cell RNAseq</description>
+    <xrefs>
+        <xref type="bio.tools">galaxy_single_cell_suite</xref>
+    </xrefs>
     <requirements>
         <requirement type="package" version="1.7.3">r-optparse</requirement>
         <requirement type="package" version="3.4.4">r-ggplot2</requirement>