changeset 3:4791f06fc7a4 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_gene_expression_correlations commit 1573c4dc0cf4ac72be7acffe72e63c29224ef71d
author artbio
date Thu, 07 Nov 2024 21:01:56 +0000
parents b49295546f29
children
files correlation_with_signature.R correlation_with_signature.xml
diffstat 2 files changed, 90 insertions(+), 83 deletions(-) [+]
line wrap: on
line diff
--- a/correlation_with_signature.R	Wed Oct 18 10:00:34 2023 +0000
+++ b/correlation_with_signature.R	Thu Nov 07 21:01:56 2024 +0000
@@ -10,11 +10,12 @@
 #                                       --gene_corr_pval <gene-gene corr pvalues file>
 #                                       --sig_corr <genes correlation to signature file>
 
-options(show.error.messages = FALSE,
-  error = function() {
-    cat(geterrmessage(), file = stderr())
-    q("no", 1, FALSE)
-  }
+options(
+    show.error.messages = FALSE,
+    error = function() {
+        cat(geterrmessage(), file = stderr())
+        q("no", 1, FALSE)
+    }
 )
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
@@ -23,75 +24,76 @@
 
 # Arguments
 option_list <- list(
-  make_option(
-    "--sep",
-    default = "\t",
-    type = "character",
-    help = "File separator, must be the same for all input files [default : '%default' ]"
-  ),
-  make_option(
-    "--colnames",
-    default = TRUE,
-    type = "logical",
-    help = "Consider first lines as header (must stand for all input files) [default : '%default' ]"
-  ),
-  make_option(
-    "--expression_file",
-    default = NA,
-    type = "character",
-    help = "Input file that contains log2(CPM +1) expression values"
-  ),
-  make_option(
-    "--signatures_file",
-    default = NA,
-    type = "character",
-    help = "Input file that contains cell signature"
-  ),
-  make_option(
-    "--sig_corr",
-    default = "sig_corr.tsv",
-    type = "character",
-    help = "signature correlations output [default : '%default' ]"
-  ),
-  make_option(
-    "--gene_corr",
-    default = "gene_corr.tsv",
-    type = "character",
-    help = "genes-genes correlations output [default : '%default' ]"
-  ),
-  make_option(
-    "--gene_corr_pval",
-    default = "gene_corr_pval.tsv",
-    type = "character",
-    help = "genes-genes correlations pvalues output [default : '%default' ]"
-  )
+    make_option(
+        "--sep",
+        default = "\t",
+        type = "character",
+        help = "File separator, must be the same for all input files [default : '%default' ]"
+    ),
+    make_option(
+        "--colnames",
+        default = TRUE,
+        type = "logical",
+        help = "Consider first lines as header (must stand for all input files) [default : '%default' ]"
+    ),
+    make_option(
+        "--expression_file",
+        default = NA,
+        type = "character",
+        help = "Input file that contains log2(CPM +1) expression values"
+    ),
+    make_option(
+        "--signatures_file",
+        default = NA,
+        type = "character",
+        help = "Input file that contains cell signature"
+    ),
+    make_option(
+        "--sig_corr",
+        default = "sig_corr.tsv",
+        type = "character",
+        help = "signature correlations output [default : '%default' ]"
+    ),
+    make_option(
+        "--gene_corr",
+        default = "gene_corr.tsv",
+        type = "character",
+        help = "genes-genes correlations output [default : '%default' ]"
+    ),
+    make_option(
+        "--gene_corr_pval",
+        default = "gene_corr_pval.tsv",
+        type = "character",
+        help = "genes-genes correlations pvalues output [default : '%default' ]"
+    )
 )
 
 opt <- parse_args(OptionParser(option_list = option_list),
-                  args = commandArgs(trailingOnly = TRUE))
+    args = commandArgs(trailingOnly = TRUE)
+)
 
 if (opt$sep == "tab") {
-  opt$sep <- "\t"
+    opt$sep <- "\t"
 }
 if (opt$sep == "comma") {
-  opt$sep <- ","
+    opt$sep <- ","
 }
 
 # Open files
 data <- read.delim(
-  opt$expression_file,
-  header = opt$colnames,
-  row.names = 1,
-  sep = opt$sep,
-  check.names = FALSE
+    opt$expression_file,
+    header = opt$colnames,
+    row.names = 1,
+    sep = opt$sep,
+    check.names = FALSE
 )
 signature <- read.delim(
-  opt$signatures_file,
-  header = TRUE,
-  stringsAsFactors = FALSE,
-  row.names = 1,
-  sep = opt$sep,
-  check.names = FALSE
+    opt$signatures_file,
+    header = TRUE,
+    stringsAsFactors = FALSE,
+    row.names = 1,
+    sep = opt$sep,
+    check.names = FALSE
 )
 
 
@@ -105,39 +107,41 @@
 gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells
 
 # Gene correlation with signature score
-gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r),
-                                        Pearson_correlation = gene_corr$r[, 1],
-                                        p_value = gene_corr$P[, 1])
+gene_signature_corr <- cbind.data.frame(
+    gene = colnames(gene_corr$r),
+    Pearson_correlation = gene_corr$r[, 1],
+    p_value = gene_corr$P[, 1]
+)
 gene_signature_corr <- gene_signature_corr[order(gene_signature_corr[, 2], decreasing = TRUE), ]
 
 
 ###  Save files  ###
 
 write.table(
-  format(gene_signature_corr, digits = 2),
-  file = opt$sig_corr,
-  sep = "\t",
-  quote = FALSE,
-  col.names = TRUE,
-  row.names = FALSE
+    format(gene_signature_corr, digits = 2),
+    file = opt$sig_corr,
+    sep = "\t",
+    quote = FALSE,
+    col.names = TRUE,
+    row.names = FALSE
 )
 
 r_genes <- data.frame(gene = rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output
 write.table(
-  format(r_genes[-1, -2], digits = 2),
-  file = opt$gene_corr,
-  sep = "\t",
-  quote = FALSE,
-  col.names = TRUE,
-  row.names = FALSE
+    format(r_genes[-1, -2], digits = 2),
+    file = opt$gene_corr,
+    sep = "\t",
+    quote = FALSE,
+    col.names = TRUE,
+    row.names = FALSE
 )
 
 p_genes <- data.frame(gene = rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output
 write.table(
-  format(p_genes[-1, -2], digits = 2),
-  file = opt$gene_corr_pval,
-  sep = "\t",
-  quote = FALSE,
-  col.names = TRUE,
-  row.names = FALSE
+    format(p_genes[-1, -2], digits = 2),
+    file = opt$gene_corr_pval,
+    sep = "\t",
+    quote = FALSE,
+    col.names = TRUE,
+    row.names = FALSE
 )
--- a/correlation_with_signature.xml	Wed Oct 18 10:00:34 2023 +0000
+++ b/correlation_with_signature.xml	Thu Nov 07 21:01:56 2024 +0000
@@ -1,5 +1,8 @@
-<tool id="single_cell_gene_expression_correlations" name="single-cell gene expression correlations" version="4.3.1+galaxy0" profile="21.01">
+<tool id="single_cell_gene_expression_correlations" name="single-cell gene expression correlations" version="4.3.1+galaxy1" profile="21.01">
     <description>between genes or with a signature of selected genes</description>
+    <xrefs>
+        <xref type="bio.tools">galaxy_single_cell_suite</xref>
+    </xrefs>
     <requirements>
         <requirement type="package" version="1.7.3">r-optparse</requirement>
         <requirement type="package" version="5.1_1">r-hmisc</requirement>