Mercurial > repos > artbio > gsc_gene_expression_correlations
comparison correlation_with_signature.R @ 2:b49295546f29 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_gene_expression_correlations commit 91d59a3a90a9bdb64ec70000b69a864285411d9a
author | artbio |
---|---|
date | Wed, 18 Oct 2023 10:00:34 +0000 |
parents | 8ad272e0b640 |
children |
comparison
equal
deleted
inserted
replaced
1:8ad272e0b640 | 2:b49295546f29 |
---|---|
1 # Performs multi-correlation analysis between the vectors of gene expressions | 1 # Performs multi-correlation analysis between the vectors of gene expressions |
2 # in single cell RNAseq libraries and the vectors of signature scores in these | 2 # in single cell RNAseq libraries and the vectors of signature scores in these |
3 # same single cell RNAseq libraries. | 3 # same single cell RNAseq libraries. |
4 | |
5 # Example of command | 4 # Example of command |
6 # Rscript correlations_with_signature.R --expression_file <expression_data.tsv> | 5 # Rscript correlations_with_signature.R --expression_file <expression_data.tsv> |
7 # --signatures_file <signature_scores.tsv> | 6 # --signatures_file <signature_scores.tsv> |
8 # --sep "\t" | 7 # --sep "\t" |
9 # --colnames "T" | 8 # --colnames "T" |
10 # --gene_corr <gene-gene corr file> | 9 # --gene_corr <gene-gene corr file> |
11 # --gene_corr_pval <gene-gene corr pvalues file> | 10 # --gene_corr_pval <gene-gene corr pvalues file> |
12 # --sig_corr <genes correlation to signature file> | 11 # --sig_corr <genes correlation to signature file> |
13 | 12 |
14 # load packages that are provided in the conda env | 13 options(show.error.messages = FALSE, |
15 options( show.error.messages=F, | 14 error = function() { |
16 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 15 cat(geterrmessage(), file = stderr()) |
16 q("no", 1, FALSE) | |
17 } | |
18 ) | |
17 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 19 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
18 warnings() | |
19 | 20 |
20 library(optparse) | 21 library(optparse) |
21 library(Hmisc) | 22 library(Hmisc) |
22 | 23 |
23 # Arguments | 24 # Arguments |
24 option_list = list( | 25 option_list <- list( |
25 make_option( | 26 make_option( |
26 "--sep", | 27 "--sep", |
27 default = '\t', | 28 default = "\t", |
28 type = 'character', | 29 type = "character", |
29 help = "File separator, must be the same for all input files [default : '%default' ]" | 30 help = "File separator, must be the same for all input files [default : '%default' ]" |
30 ), | 31 ), |
31 make_option( | 32 make_option( |
32 "--colnames", | 33 "--colnames", |
33 default = TRUE, | 34 default = TRUE, |
34 type = 'logical', | 35 type = "logical", |
35 help = "Consider first lines as header (must stand for all input files) [default : '%default' ]" | 36 help = "Consider first lines as header (must stand for all input files) [default : '%default' ]" |
36 ), | 37 ), |
37 make_option( | 38 make_option( |
38 "--expression_file", | 39 "--expression_file", |
39 default = NA, | 40 default = NA, |
40 type = 'character', | 41 type = "character", |
41 help = "Input file that contains log2(CPM +1) expression values" | 42 help = "Input file that contains log2(CPM +1) expression values" |
42 ), | 43 ), |
43 make_option( | 44 make_option( |
44 "--signatures_file", | 45 "--signatures_file", |
45 default = NA, | 46 default = NA, |
46 type = 'character', | 47 type = "character", |
47 help = "Input file that contains cell signature" | 48 help = "Input file that contains cell signature" |
48 ), | 49 ), |
49 make_option( | 50 make_option( |
50 "--sig_corr", | 51 "--sig_corr", |
51 default = "sig_corr.tsv", | 52 default = "sig_corr.tsv", |
52 type = 'character', | 53 type = "character", |
53 help = "signature correlations output [default : '%default' ]" | 54 help = "signature correlations output [default : '%default' ]" |
54 ), | 55 ), |
55 make_option( | 56 make_option( |
56 "--gene_corr", | 57 "--gene_corr", |
57 default = "gene_corr.tsv", | 58 default = "gene_corr.tsv", |
58 type = 'character', | 59 type = "character", |
59 help = "genes-genes correlations output [default : '%default' ]" | 60 help = "genes-genes correlations output [default : '%default' ]" |
60 ), | 61 ), |
61 make_option( | 62 make_option( |
62 "--gene_corr_pval", | 63 "--gene_corr_pval", |
63 default = "gene_corr_pval.tsv", | 64 default = "gene_corr_pval.tsv", |
64 type = 'character', | 65 type = "character", |
65 help = "genes-genes correlations pvalues output [default : '%default' ]" | 66 help = "genes-genes correlations pvalues output [default : '%default' ]" |
66 ) | 67 ) |
67 ) | 68 ) |
68 | 69 |
69 opt = parse_args(OptionParser(option_list = option_list), | 70 opt <- parse_args(OptionParser(option_list = option_list), |
70 args = commandArgs(trailingOnly = TRUE)) | 71 args = commandArgs(trailingOnly = TRUE)) |
71 | 72 |
72 if (opt$sep == "tab") {opt$sep = "\t"} | 73 if (opt$sep == "tab") { |
73 if (opt$sep == "comma") {opt$sep = ","} | 74 opt$sep <- "\t" |
75 } | |
76 if (opt$sep == "comma") { | |
77 opt$sep <- "," | |
78 } | |
74 | 79 |
75 # Open files | 80 # Open files |
76 data <- read.table( | 81 data <- read.delim( |
77 opt$expression_file, | 82 opt$expression_file, |
78 header = opt$colnames, | 83 header = opt$colnames, |
79 row.names = 1, | 84 row.names = 1, |
80 sep = opt$sep, | 85 sep = opt$sep, |
81 check.names = F | 86 check.names = FALSE |
82 ) | 87 ) |
83 signature <- read.delim( | 88 signature <- read.delim( |
84 opt$signatures_file, | 89 opt$signatures_file, |
85 header = T, | 90 header = TRUE, |
86 stringsAsFactors = F, | 91 stringsAsFactors = FALSE, |
87 row.names = 1, | 92 row.names = 1, |
88 sep = opt$sep, | 93 sep = opt$sep, |
89 check.names = F | 94 check.names = FALSE |
90 ) | 95 ) |
91 | 96 |
92 | 97 |
93 # keep only signatures that are in the expression dataframe | 98 # keep only signatures that are in the expression dataframe |
94 signature <- subset(signature, rownames(signature) %in% colnames(data)) | 99 signature <- subset(signature, rownames(signature) %in% colnames(data)) |
99 # Gene correlation | 104 # Gene correlation |
100 gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells | 105 gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells |
101 | 106 |
102 # Gene correlation with signature score | 107 # Gene correlation with signature score |
103 gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r), | 108 gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r), |
104 Pearson_correlation = gene_corr$r[, 1], | 109 Pearson_correlation = gene_corr$r[, 1], |
105 p_value = gene_corr$P[, 1]) | 110 p_value = gene_corr$P[, 1]) |
106 gene_signature_corr <- gene_signature_corr[ order(gene_signature_corr[,2], decreasing = T), ] | 111 gene_signature_corr <- gene_signature_corr[order(gene_signature_corr[, 2], decreasing = TRUE), ] |
107 | 112 |
108 | 113 |
109 # Save files | 114 ### Save files ### |
115 | |
110 write.table( | 116 write.table( |
111 gene_signature_corr, | 117 format(gene_signature_corr, digits = 2), |
112 file = opt$sig_corr, | 118 file = opt$sig_corr, |
113 sep = "\t", | 119 sep = "\t", |
114 quote = F, | 120 quote = FALSE, |
115 col.names = T, | 121 col.names = TRUE, |
116 row.names = F | 122 row.names = FALSE |
117 ) | 123 ) |
118 | 124 |
119 r_genes <- data.frame(gene=rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output | 125 r_genes <- data.frame(gene = rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output |
120 p_genes <- data.frame(gene=rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output | |
121 write.table( | 126 write.table( |
122 r_genes[-1,-2], | 127 format(r_genes[-1, -2], digits = 2), |
123 file = opt$gene_corr, | 128 file = opt$gene_corr, |
124 sep = "\t", | 129 sep = "\t", |
125 quote = F, | 130 quote = FALSE, |
126 col.names = T, | 131 col.names = TRUE, |
127 row.names = F | 132 row.names = FALSE |
128 ) | 133 ) |
134 | |
135 p_genes <- data.frame(gene = rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output | |
129 write.table( | 136 write.table( |
130 p_genes[-1,-2], | 137 format(p_genes[-1, -2], digits = 2), |
131 file = opt$gene_corr_pval, | 138 file = opt$gene_corr_pval, |
132 sep = "\t", | 139 sep = "\t", |
133 quote = F, | 140 quote = FALSE, |
134 col.names = T, | 141 col.names = TRUE, |
135 row.names = F | 142 row.names = FALSE |
136 ) | 143 ) |