Mercurial > repos > artbio > gsc_gene_expression_correlations
view correlation_with_signature.R @ 2:b49295546f29 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_gene_expression_correlations commit 91d59a3a90a9bdb64ec70000b69a864285411d9a
author | artbio |
---|---|
date | Wed, 18 Oct 2023 10:00:34 +0000 |
parents | 8ad272e0b640 |
children |
line wrap: on
line source
# Performs multi-correlation analysis between the vectors of gene expressions # in single cell RNAseq libraries and the vectors of signature scores in these # same single cell RNAseq libraries. # Example of command # Rscript correlations_with_signature.R --expression_file <expression_data.tsv> # --signatures_file <signature_scores.tsv> # --sep "\t" # --colnames "T" # --gene_corr <gene-gene corr file> # --gene_corr_pval <gene-gene corr pvalues file> # --sig_corr <genes correlation to signature file> options(show.error.messages = FALSE, error = function() { cat(geterrmessage(), file = stderr()) q("no", 1, FALSE) } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") library(optparse) library(Hmisc) # Arguments option_list <- list( make_option( "--sep", default = "\t", type = "character", help = "File separator, must be the same for all input files [default : '%default' ]" ), make_option( "--colnames", default = TRUE, type = "logical", help = "Consider first lines as header (must stand for all input files) [default : '%default' ]" ), make_option( "--expression_file", default = NA, type = "character", help = "Input file that contains log2(CPM +1) expression values" ), make_option( "--signatures_file", default = NA, type = "character", help = "Input file that contains cell signature" ), make_option( "--sig_corr", default = "sig_corr.tsv", type = "character", help = "signature correlations output [default : '%default' ]" ), make_option( "--gene_corr", default = "gene_corr.tsv", type = "character", help = "genes-genes correlations output [default : '%default' ]" ), make_option( "--gene_corr_pval", default = "gene_corr_pval.tsv", type = "character", help = "genes-genes correlations pvalues output [default : '%default' ]" ) ) opt <- parse_args(OptionParser(option_list = option_list), args = commandArgs(trailingOnly = TRUE)) if (opt$sep == "tab") { opt$sep <- "\t" } if (opt$sep == "comma") { opt$sep <- "," } # Open files data <- read.delim( opt$expression_file, header = opt$colnames, row.names = 1, sep = opt$sep, check.names = FALSE ) signature <- read.delim( opt$signatures_file, header = TRUE, stringsAsFactors = FALSE, row.names = 1, sep = opt$sep, check.names = FALSE ) # keep only signatures that are in the expression dataframe signature <- subset(signature, rownames(signature) %in% colnames(data)) # Add signature score to expression matrix data <- rbind(t(signature), data) # Gene correlation gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells # Gene correlation with signature score gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r), Pearson_correlation = gene_corr$r[, 1], p_value = gene_corr$P[, 1]) gene_signature_corr <- gene_signature_corr[order(gene_signature_corr[, 2], decreasing = TRUE), ] ### Save files ### write.table( format(gene_signature_corr, digits = 2), file = opt$sig_corr, sep = "\t", quote = FALSE, col.names = TRUE, row.names = FALSE ) r_genes <- data.frame(gene = rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output write.table( format(r_genes[-1, -2], digits = 2), file = opt$gene_corr, sep = "\t", quote = FALSE, col.names = TRUE, row.names = FALSE ) p_genes <- data.frame(gene = rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output write.table( format(p_genes[-1, -2], digits = 2), file = opt$gene_corr_pval, sep = "\t", quote = FALSE, col.names = TRUE, row.names = FALSE )