Mercurial > repos > artbio > gsc_gene_expression_correlations
diff correlation_with_signature.R @ 2:b49295546f29 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_gene_expression_correlations commit 91d59a3a90a9bdb64ec70000b69a864285411d9a
author | artbio |
---|---|
date | Wed, 18 Oct 2023 10:00:34 +0000 |
parents | 8ad272e0b640 |
children |
line wrap: on
line diff
--- a/correlation_with_signature.R Mon Sep 02 04:38:59 2019 -0400 +++ b/correlation_with_signature.R Wed Oct 18 10:00:34 2023 +0000 @@ -1,7 +1,6 @@ # Performs multi-correlation analysis between the vectors of gene expressions # in single cell RNAseq libraries and the vectors of signature scores in these # same single cell RNAseq libraries. - # Example of command # Rscript correlations_with_signature.R --expression_file <expression_data.tsv> # --signatures_file <signature_scores.tsv> @@ -11,82 +10,88 @@ # --gene_corr_pval <gene-gene corr pvalues file> # --sig_corr <genes correlation to signature file> -# load packages that are provided in the conda env -options( show.error.messages=F, - error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +options(show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } +) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") -warnings() library(optparse) library(Hmisc) # Arguments -option_list = list( +option_list <- list( make_option( "--sep", - default = '\t', - type = 'character', + default = "\t", + type = "character", help = "File separator, must be the same for all input files [default : '%default' ]" ), make_option( "--colnames", default = TRUE, - type = 'logical', + type = "logical", help = "Consider first lines as header (must stand for all input files) [default : '%default' ]" - ), + ), make_option( "--expression_file", default = NA, - type = 'character', + type = "character", help = "Input file that contains log2(CPM +1) expression values" ), make_option( "--signatures_file", default = NA, - type = 'character', + type = "character", help = "Input file that contains cell signature" ), make_option( "--sig_corr", default = "sig_corr.tsv", - type = 'character', + type = "character", help = "signature correlations output [default : '%default' ]" ), make_option( "--gene_corr", default = "gene_corr.tsv", - type = 'character', + type = "character", help = "genes-genes correlations output [default : '%default' ]" ), make_option( "--gene_corr_pval", default = "gene_corr_pval.tsv", - type = 'character', + type = "character", help = "genes-genes correlations pvalues output [default : '%default' ]" ) ) -opt = parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) +opt <- parse_args(OptionParser(option_list = option_list), + args = commandArgs(trailingOnly = TRUE)) -if (opt$sep == "tab") {opt$sep = "\t"} -if (opt$sep == "comma") {opt$sep = ","} +if (opt$sep == "tab") { + opt$sep <- "\t" +} +if (opt$sep == "comma") { + opt$sep <- "," +} # Open files -data <- read.table( +data <- read.delim( opt$expression_file, header = opt$colnames, row.names = 1, sep = opt$sep, - check.names = F + check.names = FALSE ) signature <- read.delim( opt$signatures_file, - header = T, - stringsAsFactors = F, + header = TRUE, + stringsAsFactors = FALSE, row.names = 1, sep = opt$sep, - check.names = F + check.names = FALSE ) @@ -101,36 +106,38 @@ # Gene correlation with signature score gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r), - Pearson_correlation = gene_corr$r[, 1], + Pearson_correlation = gene_corr$r[, 1], p_value = gene_corr$P[, 1]) -gene_signature_corr <- gene_signature_corr[ order(gene_signature_corr[,2], decreasing = T), ] +gene_signature_corr <- gene_signature_corr[order(gene_signature_corr[, 2], decreasing = TRUE), ] -# Save files +### Save files ### + write.table( - gene_signature_corr, + format(gene_signature_corr, digits = 2), file = opt$sig_corr, sep = "\t", - quote = F, - col.names = T, - row.names = F + quote = FALSE, + col.names = TRUE, + row.names = FALSE ) -r_genes <- data.frame(gene=rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output -p_genes <- data.frame(gene=rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output +r_genes <- data.frame(gene = rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output write.table( - r_genes[-1,-2], + format(r_genes[-1, -2], digits = 2), file = opt$gene_corr, sep = "\t", - quote = F, - col.names = T, - row.names = F + quote = FALSE, + col.names = TRUE, + row.names = FALSE ) + +p_genes <- data.frame(gene = rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output write.table( - p_genes[-1,-2], + format(p_genes[-1, -2], digits = 2), file = opt$gene_corr_pval, sep = "\t", - quote = F, - col.names = T, - row.names = F + quote = FALSE, + col.names = TRUE, + row.names = FALSE )