Mercurial > repos > iuc > raceid_clustering
comparison scripts/cluster.R @ 0:4ea021bd7513 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit f880060c478d42202df5b78a81329f8af56b1138
| author | iuc |
|---|---|
| date | Thu, 22 Nov 2018 04:43:57 -0500 |
| parents | |
| children | 89ee61bcc310 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4ea021bd7513 |
|---|---|
| 1 #!/usr/bin/env R | |
| 2 VERSION = "0.2" | |
| 3 | |
| 4 args = commandArgs(trailingOnly = T) | |
| 5 | |
| 6 if (length(args) != 1){ | |
| 7 message(paste("VERSION:", VERSION)) | |
| 8 stop("Please provide the config file") | |
| 9 } | |
| 10 | |
| 11 suppressWarnings(suppressPackageStartupMessages(require(RaceID))) | |
| 12 suppressWarnings(suppressPackageStartupMessages(require(scran))) | |
| 13 source(args[1]) | |
| 14 | |
| 15 | |
| 16 do.filter <- function(sc){ | |
| 17 if (!is.null(filt.lbatch.regexes)){ | |
| 18 lar <- filt.lbatch.regexes | |
| 19 nn <- colnames(sc@expdata) | |
| 20 filt$LBatch <- lapply(1:length(lar), function(m){ return( nn[grep(lar[[m]], nn)] ) }) | |
| 21 } | |
| 22 | |
| 23 sc <- do.call(filterdata, c(sc, filt)) | |
| 24 | |
| 25 ## Get histogram metrics for library size and number of features | |
| 26 raw.lib <- log(colSums(as.matrix(sc@expdata))) | |
| 27 raw.feat <- log(rowSums(as.matrix(sc@expdata))) | |
| 28 filt.lib <- log(colSums(getfdata(sc))) | |
| 29 filt.feat <- log(rowSums(getfdata(sc))) | |
| 30 | |
| 31 br <- 50 | |
| 32 ## Determine limits on plots based on the unfiltered data | |
| 33 ## (doesn't work, R rejects limits and norm data is too different to compare to exp data | |
| 34 ## so let them keep their own ranges) | |
| 35 | |
| 36 ## betterrange <- function(floatval){ | |
| 37 ## return(10 * (floor(floatval / 10) + 1)) | |
| 38 ## } | |
| 39 | |
| 40 ## tmp.lib <- hist(raw.lib, breaks=br, plot=F) | |
| 41 ## tmp.feat <- hist(raw.feat, breaks=br, plot=F) | |
| 42 | |
| 43 ## lib.y_lim <- c(0,betterrange(max(tmp.lib$counts))) | |
| 44 ## lib.x_lim <- c(0,betterrange(max(tmp.lib$breaks))) | |
| 45 | |
| 46 ## feat.y_lim <- c(0,betterrange(max(tmp.feat$counts))) | |
| 47 ## feat.x_lim <- c(0,betterrange(max(tmp.feat$breaks))) | |
| 48 | |
| 49 par(mfrow=c(2,2)) | |
| 50 print(hist(raw.lib, breaks=br, main="ExpData Log(LibSize)")) # , xlim=lib.x_lim, ylim=lib.y_lim) | |
| 51 print(hist(raw.feat, breaks=br, main="ExpData Log(NumFeat)")) #, xlim=feat.x_lim, ylim=feat.y_lim) | |
| 52 print(hist(filt.lib, breaks=br, main="FiltData Log(LibSize)")) # , xlim=lib.x_lim, ylim=lib.y_lim) | |
| 53 print(hist(filt.feat, breaks=br, main="FiltData Log(NumFeat)")) # , xlim=feat.x_lim, ylim=feat.y_lim) | |
| 54 | |
| 55 if (filt.use.ccorrect){ | |
| 56 par(mfrow=c(2,2)) | |
| 57 sc <- do.call(CCcorrect, c(sc, filt.ccc)) | |
| 58 print(plotdimsat(sc, change=T)) | |
| 59 print(plotdimsat(sc, change=F)) | |
| 60 } | |
| 61 return(sc) | |
| 62 } | |
| 63 | |
| 64 do.cluster <- function(sc){ | |
| 65 sc <- do.call(compdist, c(sc, clust.compdist)) | |
| 66 sc <- do.call(clustexp, c(sc, clust.clustexp)) | |
| 67 if (clust.clustexp$sat){ | |
| 68 print(plotsaturation(sc, disp=F)) | |
| 69 print(plotsaturation(sc, disp=T)) | |
| 70 } | |
| 71 print(plotjaccard(sc)) | |
| 72 return(sc) | |
| 73 } | |
| 74 | |
| 75 do.outlier <- function(sc){ | |
| 76 sc <- do.call(findoutliers, c(sc, outlier.findoutliers)) | |
| 77 if (outlier.use.randomforest){ | |
| 78 sc <- do.call(rfcorrect, c(sc, outlier.rfcorrect)) | |
| 79 } | |
| 80 print(plotbackground(sc)) | |
| 81 print(plotsensitivity(sc)) | |
| 82 print(plotoutlierprobs(sc)) | |
| 83 ## Heatmaps | |
| 84 test1 <- list() | |
| 85 test1$side = 3 | |
| 86 test1$line = 0 #1 #3 | |
| 87 | |
| 88 x <- clustheatmap(sc, final=FALSE) | |
| 89 print(do.call(mtext, c(paste("(Initial)"), test1))) ## spacing is a hack | |
| 90 x <- clustheatmap(sc, final=TRUE) | |
| 91 print(do.call(mtext, c(paste("(Final)"), test1))) ## spacing is a hack | |
| 92 return(sc) | |
| 93 } | |
| 94 | |
| 95 do.clustmap <- function(sc){ | |
| 96 sc <- do.call(comptsne, c(sc, cluster.comptsne)) | |
| 97 sc <- do.call(compfr, c(sc, cluster.compfr)) | |
| 98 return(sc) | |
| 99 } | |
| 100 | |
| 101 | |
| 102 mkgenelist <- function(sc){ | |
| 103 ## Layout | |
| 104 test <- list() | |
| 105 test$side = 3 | |
| 106 test$line = 0 #1 #3 | |
| 107 test$cex = 0.8 | |
| 108 | |
| 109 df <- c() | |
| 110 options(cex = 1) | |
| 111 lapply(unique(sc@cpart), function(n){ | |
| 112 dg <- clustdiffgenes(sc, cl=n, pvalue=genelist.pvalue) | |
| 113 | |
| 114 dg.goi <- dg[dg$fc > genelist.foldchange,] | |
| 115 dg.goi.table <- head(dg.goi, genelist.tablelim) | |
| 116 df <<- rbind(df, cbind(n, dg.goi.table)) | |
| 117 | |
| 118 goi <- head(rownames(dg.goi.table), genelist.plotlim) | |
| 119 print(plotmarkergenes(sc, goi)) | |
| 120 print(do.call(mtext, c(paste(" Cluster ",n), test))) ## spacing is a hack | |
| 121 test$line=-1 | |
| 122 print(do.call(mtext, c(paste(" Sig. Genes"), test))) ## spacing is a hack | |
| 123 test$line=-2 | |
| 124 print(do.call(mtext, c(paste(" (fc > ", genelist.foldchange,")"), test))) ## spacing is a hack | |
| 125 | |
| 126 }) | |
| 127 write.table(df, file=out.genelist, sep="\t", quote=F) | |
| 128 } | |
| 129 | |
| 130 pdf(out.pdf) | |
| 131 | |
| 132 if (use.filtnormconf){ | |
| 133 sc <- do.filter(sc) | |
| 134 message(paste(" - Source:: genes:",nrow(sc@expdata),", cells:",ncol(sc@expdata))) | |
| 135 message(paste(" - Filter:: genes:",nrow(sc@ndata),", cells:",ncol(sc@ndata))) | |
| 136 message(paste(" :: ", | |
| 137 sprintf("%.1f", 100 * nrow(sc@ndata)/nrow(sc@expdata)), "% of genes remain,", | |
| 138 sprintf("%.1f", 100 * ncol(sc@ndata)/ncol(sc@expdata)), "% of cells remain")) | |
| 139 } | |
| 140 | |
| 141 if (use.cluster){ | |
| 142 par(mfrow=c(2,2)) | |
| 143 sc <- do.cluster(sc) | |
| 144 | |
| 145 par(mfrow=c(2,2)) | |
| 146 sc <- do.outlier(sc) | |
| 147 | |
| 148 par(mfrow=c(2,2), mar=c(1,1,6,1)) | |
| 149 sc <- do.clustmap(sc) | |
| 150 | |
| 151 mkgenelist(sc) | |
| 152 } | |
| 153 | |
| 154 dev.off() | |
| 155 | |
| 156 saveRDS(sc, out.rdat) |
