comparison signature_score.R @ 2:e08419b8ec24 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_signature_score commit 987e0ceb55e8de1d2f09d0f2ae48ff7cd3e82051
author artbio
date Tue, 09 Jul 2019 10:52:29 -0400
parents baf7a079bce0
children 3351ca630a01
comparison
equal deleted inserted replaced
1:01b2c4fcada8 2:e08419b8ec24
61 make_option( 61 make_option(
62 "--stats", 62 "--stats",
63 default = "./statistics.tab", 63 default = "./statistics.tab",
64 type = 'character', 64 type = 'character',
65 help = "statistics path [default : '%default' ]" 65 help = "statistics path [default : '%default' ]"
66 ),
67 make_option(
68 "--correlations",
69 default = "./correlations.tab",
70 type = 'character',
71 help = "Correlations between signature genes [default : '%default' ]"
72 ),
73 make_option(
74 "--covariances",
75 default = "./statistics.tab",
76 type = 'character',
77 help = "Covariances between signature genes [default : '%default' ]"
66 ), 78 ),
67 make_option( 79 make_option(
68 "--pdf", 80 "--pdf",
69 default = "~/output.pdf", 81 default = "~/output.pdf",
70 type = 'character', 82 type = 'character',
98 logical_genes <- rownames(data.counts) %in% genes 110 logical_genes <- rownames(data.counts) %in% genes
99 111
100 # Retrieve target genes in counts data 112 # Retrieve target genes in counts data
101 signature.counts <- subset(data.counts, logical_genes) 113 signature.counts <- subset(data.counts, logical_genes)
102 114
115 # compute covariance
116 signature.covariances <- as.data.frame(cov(t(signature.counts)))
117 signature.covariances <- cbind(gene=rownames(signature.covariances), signature.covariances)
118 write.table(signature.covariances, file=opt$covariances, quote=F, row.names=F, sep="\t")
119
120 # compute signature.correlations
121 signature.correlations <- as.data.frame(cov(t(signature.counts)))
122 signature.correlations <- cbind(gene=rownames(signature.correlations), signature.correlations)
123 write.table(signature.correlations, file=opt$correlations, quote=F, row.names=F, sep="\t")
103 124
104 ## Descriptive Statistics Function 125 ## Descriptive Statistics Function
105 descriptive_stats = function(InputData) { 126 descriptive_stats = function(InputData) {
106 SummaryData = data.frame( 127 SummaryData = data.frame(
107 mean = rowMeans(InputData), 128 mean = rowMeans(InputData),
169 order = rank(score, ties.method = "first"), 190 order = rank(score, ties.method = "first"),
170 signature = signature_output$rate, 191 signature = signature_output$rate,
171 stringsAsFactors = F) 192 stringsAsFactors = F)
172 193
173 pdf(file = opt$pdf) 194 pdf(file = opt$pdf)
174 195 myplot <- ggplot(signature_output, aes(x=rate, y=score)) +
175 ggplot(score, aes(x = order, y = score)) + 196 geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F, cex=0.5) +
176 geom_line() + 197 geom_abline(slope=0, intercept=mean(score$score), lwd=.5, color="red") +
177 geom_segment(x = 0, xend = max(score$order[score$signature == "LOW"]), y = mean(score$score), yend = mean(score$score)) + 198 scale_fill_manual(values=c("#ff0000", "#08661e")) +
178 geom_area(aes(fill = signature), alpha = .7) + 199 geom_jitter(size=0.2) + labs(y = "Score", x = "Rate") +
179 scale_fill_manual(values=c("#ff0000", "#08661e")) + 200 annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust=1.5,
180 geom_text(aes(x = 1, y = mean(score)), label = "Mean", vjust = -0.3, colour = "black") + 201 color="black", label = mean(score$score), parse = TRUE) +
181 labs(title = "Ordered cell signature scores", x = "Cell index", y = "Score") 202 labs(title = "Violin plots of Cell signature scores")
182 203
183 density_score <- density(score$score) 204 print(myplot)
184 ggplot(data.frame(density_score[1:2]), aes(x, y, fill = ifelse(x < mean(score$score), "LOW", "HIGH"))) +
185 geom_line() +
186 geom_vline(xintercept = mean(score$score)) +
187 geom_text(x = mean(score$score), y = max(density_score$y), label = "Mean", hjust = -0.3, colour = "black") +
188 geom_area(alpha = .7) +
189 scale_fill_manual(values=c("#ff0000", "#08661e")) +
190 ylim(0, max(density_score$y)) +
191 labs(
192 title = "Distribution of Cell signature scores",
193 x = paste("N =", density_score$n, "Bandwidth =", density_score$bw),
194 y = "Density",
195 fill = "Signature"
196 )
197
198 # Check score independant of low expression
199 p_gene <- ggplot(signature_output, aes(rate, nGenes)) +
200 geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F) +
201 scale_fill_manual(values=c("#ff0000", "#08661e")) +
202 geom_jitter() + labs(y = "Number of detected genes", x = "Signature")
203
204 p_counts <- ggplot(signature_output, aes(rate, total_counts)) +
205 geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F) +
206 scale_fill_manual(values=c("#ff0000", "#08661e")) +
207 geom_jitter() + labs(y = "Total counts", x = "Signature")
208
209 grid.arrange(p_gene, p_counts, ncol = 2, top = "Influence of library sequencing depth on cell signature scores")
210
211 dev.off() 205 dev.off()
212 206
213 # Save file 207 # Save file
214 write.table( 208 write.table(
215 signature_output, 209 signature_output,