Mercurial > repos > idot > coverage_correlation
comparison corr.R @ 2:1eb46757d8bb
Uploaded
| author | idot |
|---|---|
| date | Sun, 18 Aug 2013 09:42:48 -0400 |
| parents | |
| children | 7bdd29cdfed8 |
comparison
equal
deleted
inserted
replaced
| 1:f106b52086b0 | 2:1eb46757d8bb |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 # | |
| 3 #for SGE | |
| 4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript | |
| 5 #$ -q solexa.q | |
| 6 #$ -P pipeline | |
| 7 #$ -cwd | |
| 8 #$ -l vf=30G | |
| 9 | |
| 10 #args[1] <- comma separated bigwig input files | |
| 11 #args[2] <- comma separated names of files | |
| 12 #args[3] <- comma separated file formats | |
| 13 #args[4] <- output pdf | |
| 14 #args[5] <- output matrix | |
| 15 #args[6] <- title | |
| 16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong | |
| 17 | |
| 18 options("useFancyQuotes" = FALSE) | |
| 19 | |
| 20 library(rtracklayer) | |
| 21 library(lattice) | |
| 22 library(latticeExtra) | |
| 23 library(hexbin) | |
| 24 | |
| 25 ## creates the union of coverages i.e. mappable regions | |
| 26 ## todo: add mappability argument | |
| 27 createMappable <- function(coverages){ | |
| 28 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings | |
| 29 } | |
| 30 | |
| 31 ## adds mappable regions as 0 coverage to track | |
| 32 covWith0 <- function(cov, mappable){ | |
| 33 c0 <- setdiff(mappable, cov) | |
| 34 cus <- if(length(c0) > 0){ | |
| 35 elementMetadata(c0)$score <- 0 | |
| 36 sort(c(c0, cov)) | |
| 37 }else{ | |
| 38 sort(cov) | |
| 39 } | |
| 40 rc <- Rle(score(cus), ranges(cus)@width) | |
| 41 rc | |
| 42 } | |
| 43 | |
| 44 ## unlisted rlelist is too long. Taking weighted mean as approximation. | |
| 45 correlate <- function(rlA, rlB){ | |
| 46 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length))) | |
| 47 } | |
| 48 | |
| 49 | |
| 50 ## correlation dist | |
| 51 corrDist <- function(covRles, outnames){ | |
| 52 vl <- length(covRles) | |
| 53 v <- 1:vl | |
| 54 o <- matrix(NA,vl,vl) | |
| 55 colnames(o) <- outnames | |
| 56 rownames(o) <- outnames | |
| 57 tri <- lower.tri(o, diag=FALSE) | |
| 58 indic <- expand.grid(v,v) | |
| 59 pairs <- indic[tri,] | |
| 60 corrs <- apply(pairs,1, function(ins){ | |
| 61 index1 <- ins[1] | |
| 62 index2 <- ins[2] | |
| 63 cor(covRles[[index1]], covRles[[index2]]) | |
| 64 # correlate(covRles[[index1]], covRles[[index2]]) | |
| 65 }) | |
| 66 | |
| 67 o[lower.tri(o, diag=FALSE)] <- corrs | |
| 68 o[upper.tri(o)] <- t(o)[upper.tri(o)] | |
| 69 diag(o) <- 1 | |
| 70 o | |
| 71 } | |
| 72 | |
| 73 | |
| 74 | |
| 75 plotCorr <- function(corrMat, title){ | |
| 76 cor.d <- dist(corrMat) | |
| 77 cor.row <- as.dendrogram( hclust( cor.d )) | |
| 78 ord <- order.dendrogram(cor.row) | |
| 79 levelplot( | |
| 80 corrMat[ord,ord], | |
| 81 scales=list(x=list(rot=90)), | |
| 82 colorkey = list(space="left"), | |
| 83 col.regions=BTY(100), | |
| 84 xlab = "", | |
| 85 ylab = "pearson correlation coefficient", | |
| 86 legend = list(top=list( fun = dendrogramGrob, args= list( | |
| 87 x=cor.row, | |
| 88 ord = ord, | |
| 89 side = "top" | |
| 90 ))), | |
| 91 main=title | |
| 92 ) | |
| 93 | |
| 94 } | |
| 95 | |
| 96 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){ | |
| 97 | |
| 98 ca <- lapply(coverages, covWith0, mappable) | |
| 99 | |
| 100 cd <- corrDist(ca, outnames) | |
| 101 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE) | |
| 102 pdf(outnamePDF) | |
| 103 print(plotCorr(cd, title)) | |
| 104 dev.off() | |
| 105 | |
| 106 } | |
| 107 | |
| 108 getCoverage <- function(infile, format){ | |
| 109 print(paste("reading", infile, format)) | |
| 110 values <- import(infile, format=format, asRangedData = FALSE) | |
| 111 if(tolower(format) %in% c("bigwig","wig")){ | |
| 112 values | |
| 113 }else{ | |
| 114 cov <- coverage(values) | |
| 115 gcov <- as(cov, "GRanges") | |
| 116 gcov[score(gcov) > 0] | |
| 117 } | |
| 118 } | |
| 119 | |
| 120 | |
| 121 getCoverages <- function(infiles, formats){ | |
| 122 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) }) | |
| 123 } | |
| 124 | |
| 125 getMappable <- function(mappable){ | |
| 126 if(is.na(mappable)){ | |
| 127 createMappable(coverages) | |
| 128 }else{ | |
| 129 import(mappable, format="bed", asRangedData = FALSE) | |
| 130 } | |
| 131 } | |
| 132 | |
| 133 | |
| 134 args <- commandArgs(TRUE) | |
| 135 | |
| 136 infiles <- unlist(strsplit(args[1], ",")) | |
| 137 outnames <- unlist(strsplit(args[2], ",")) | |
| 138 formats <- unlist(strsplit(args[3], ",")) | |
| 139 outnamePDF <- args[4] | |
| 140 outnameMat <- args[5] | |
| 141 title <- args[6] | |
| 142 mappable <- args[7] | |
| 143 | |
| 144 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable) | |
| 145 lpp <- sapply(lp, paste, sep=" ") | |
| 146 print("parsed input parameters") | |
| 147 print(lpp) | |
| 148 | |
| 149 | |
| 150 coverages <- getCoverages(infiles, formats) | |
| 151 mappability <- getMappable(mappable) | |
| 152 | |
| 153 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title) | |
| 154 | |
| 155 | |
| 156 | |
| 157 |
