comparison correlation_with_signature.R @ 2:b49295546f29 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_gene_expression_correlations commit 91d59a3a90a9bdb64ec70000b69a864285411d9a
author artbio
date Wed, 18 Oct 2023 10:00:34 +0000
parents 8ad272e0b640
children
comparison
equal deleted inserted replaced
1:8ad272e0b640 2:b49295546f29
1 # Performs multi-correlation analysis between the vectors of gene expressions 1 # Performs multi-correlation analysis between the vectors of gene expressions
2 # in single cell RNAseq libraries and the vectors of signature scores in these 2 # in single cell RNAseq libraries and the vectors of signature scores in these
3 # same single cell RNAseq libraries. 3 # same single cell RNAseq libraries.
4
5 # Example of command 4 # Example of command
6 # Rscript correlations_with_signature.R --expression_file <expression_data.tsv> 5 # Rscript correlations_with_signature.R --expression_file <expression_data.tsv>
7 # --signatures_file <signature_scores.tsv> 6 # --signatures_file <signature_scores.tsv>
8 # --sep "\t" 7 # --sep "\t"
9 # --colnames "T" 8 # --colnames "T"
10 # --gene_corr <gene-gene corr file> 9 # --gene_corr <gene-gene corr file>
11 # --gene_corr_pval <gene-gene corr pvalues file> 10 # --gene_corr_pval <gene-gene corr pvalues file>
12 # --sig_corr <genes correlation to signature file> 11 # --sig_corr <genes correlation to signature file>
13 12
14 # load packages that are provided in the conda env 13 options(show.error.messages = FALSE,
15 options( show.error.messages=F, 14 error = function() {
16 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 15 cat(geterrmessage(), file = stderr())
16 q("no", 1, FALSE)
17 }
18 )
17 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
18 warnings()
19 20
20 library(optparse) 21 library(optparse)
21 library(Hmisc) 22 library(Hmisc)
22 23
23 # Arguments 24 # Arguments
24 option_list = list( 25 option_list <- list(
25 make_option( 26 make_option(
26 "--sep", 27 "--sep",
27 default = '\t', 28 default = "\t",
28 type = 'character', 29 type = "character",
29 help = "File separator, must be the same for all input files [default : '%default' ]" 30 help = "File separator, must be the same for all input files [default : '%default' ]"
30 ), 31 ),
31 make_option( 32 make_option(
32 "--colnames", 33 "--colnames",
33 default = TRUE, 34 default = TRUE,
34 type = 'logical', 35 type = "logical",
35 help = "Consider first lines as header (must stand for all input files) [default : '%default' ]" 36 help = "Consider first lines as header (must stand for all input files) [default : '%default' ]"
36 ), 37 ),
37 make_option( 38 make_option(
38 "--expression_file", 39 "--expression_file",
39 default = NA, 40 default = NA,
40 type = 'character', 41 type = "character",
41 help = "Input file that contains log2(CPM +1) expression values" 42 help = "Input file that contains log2(CPM +1) expression values"
42 ), 43 ),
43 make_option( 44 make_option(
44 "--signatures_file", 45 "--signatures_file",
45 default = NA, 46 default = NA,
46 type = 'character', 47 type = "character",
47 help = "Input file that contains cell signature" 48 help = "Input file that contains cell signature"
48 ), 49 ),
49 make_option( 50 make_option(
50 "--sig_corr", 51 "--sig_corr",
51 default = "sig_corr.tsv", 52 default = "sig_corr.tsv",
52 type = 'character', 53 type = "character",
53 help = "signature correlations output [default : '%default' ]" 54 help = "signature correlations output [default : '%default' ]"
54 ), 55 ),
55 make_option( 56 make_option(
56 "--gene_corr", 57 "--gene_corr",
57 default = "gene_corr.tsv", 58 default = "gene_corr.tsv",
58 type = 'character', 59 type = "character",
59 help = "genes-genes correlations output [default : '%default' ]" 60 help = "genes-genes correlations output [default : '%default' ]"
60 ), 61 ),
61 make_option( 62 make_option(
62 "--gene_corr_pval", 63 "--gene_corr_pval",
63 default = "gene_corr_pval.tsv", 64 default = "gene_corr_pval.tsv",
64 type = 'character', 65 type = "character",
65 help = "genes-genes correlations pvalues output [default : '%default' ]" 66 help = "genes-genes correlations pvalues output [default : '%default' ]"
66 ) 67 )
67 ) 68 )
68 69
69 opt = parse_args(OptionParser(option_list = option_list), 70 opt <- parse_args(OptionParser(option_list = option_list),
70 args = commandArgs(trailingOnly = TRUE)) 71 args = commandArgs(trailingOnly = TRUE))
71 72
72 if (opt$sep == "tab") {opt$sep = "\t"} 73 if (opt$sep == "tab") {
73 if (opt$sep == "comma") {opt$sep = ","} 74 opt$sep <- "\t"
75 }
76 if (opt$sep == "comma") {
77 opt$sep <- ","
78 }
74 79
75 # Open files 80 # Open files
76 data <- read.table( 81 data <- read.delim(
77 opt$expression_file, 82 opt$expression_file,
78 header = opt$colnames, 83 header = opt$colnames,
79 row.names = 1, 84 row.names = 1,
80 sep = opt$sep, 85 sep = opt$sep,
81 check.names = F 86 check.names = FALSE
82 ) 87 )
83 signature <- read.delim( 88 signature <- read.delim(
84 opt$signatures_file, 89 opt$signatures_file,
85 header = T, 90 header = TRUE,
86 stringsAsFactors = F, 91 stringsAsFactors = FALSE,
87 row.names = 1, 92 row.names = 1,
88 sep = opt$sep, 93 sep = opt$sep,
89 check.names = F 94 check.names = FALSE
90 ) 95 )
91 96
92 97
93 # keep only signatures that are in the expression dataframe 98 # keep only signatures that are in the expression dataframe
94 signature <- subset(signature, rownames(signature) %in% colnames(data)) 99 signature <- subset(signature, rownames(signature) %in% colnames(data))
99 # Gene correlation 104 # Gene correlation
100 gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells 105 gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells
101 106
102 # Gene correlation with signature score 107 # Gene correlation with signature score
103 gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r), 108 gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r),
104 Pearson_correlation = gene_corr$r[, 1], 109 Pearson_correlation = gene_corr$r[, 1],
105 p_value = gene_corr$P[, 1]) 110 p_value = gene_corr$P[, 1])
106 gene_signature_corr <- gene_signature_corr[ order(gene_signature_corr[,2], decreasing = T), ] 111 gene_signature_corr <- gene_signature_corr[order(gene_signature_corr[, 2], decreasing = TRUE), ]
107 112
108 113
109 # Save files 114 ### Save files ###
115
110 write.table( 116 write.table(
111 gene_signature_corr, 117 format(gene_signature_corr, digits = 2),
112 file = opt$sig_corr, 118 file = opt$sig_corr,
113 sep = "\t", 119 sep = "\t",
114 quote = F, 120 quote = FALSE,
115 col.names = T, 121 col.names = TRUE,
116 row.names = F 122 row.names = FALSE
117 ) 123 )
118 124
119 r_genes <- data.frame(gene=rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output 125 r_genes <- data.frame(gene = rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output
120 p_genes <- data.frame(gene=rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output
121 write.table( 126 write.table(
122 r_genes[-1,-2], 127 format(r_genes[-1, -2], digits = 2),
123 file = opt$gene_corr, 128 file = opt$gene_corr,
124 sep = "\t", 129 sep = "\t",
125 quote = F, 130 quote = FALSE,
126 col.names = T, 131 col.names = TRUE,
127 row.names = F 132 row.names = FALSE
128 ) 133 )
134
135 p_genes <- data.frame(gene = rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output
129 write.table( 136 write.table(
130 p_genes[-1,-2], 137 format(p_genes[-1, -2], digits = 2),
131 file = opt$gene_corr_pval, 138 file = opt$gene_corr_pval,
132 sep = "\t", 139 sep = "\t",
133 quote = F, 140 quote = FALSE,
134 col.names = T, 141 col.names = TRUE,
135 row.names = F 142 row.names = FALSE
136 ) 143 )