Mercurial > repos > kmace > mtls_analysis
comparison mtls_analyze/annotate_mtls.R @ 4:b465306d00ba draft default tip
Uploaded
| author | kmace |
|---|---|
| date | Mon, 23 Jul 2012 13:00:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:a0306edbf2f8 | 4:b465306d00ba |
|---|---|
| 1 debug = F | |
| 2 # Read in Data | |
| 3 if (debug == T) { | |
| 4 mtls <- read.delim("~/code/scorchR-annotate_mtls/mtls.xls", quote="", stringsAsFactors=F) | |
| 5 annotation <- read.delim("~/code/scorchR-annotate_mtls/gene_annotation.txt", header=T, stringsAsFactors=F) | |
| 6 threshold <- 100000 | |
| 7 } else { | |
| 8 cmd <- commandArgs(trailingOnly = T) | |
| 9 mtls <- read.delim(cmd[1], quote="", stringsAsFactors=F) | |
| 10 annotation <- read.delim(cmd[2], header=T, stringsAsFactors=F) | |
| 11 threshold <- as.numeric(cmd[3]) | |
| 12 } | |
| 13 | |
| 14 | |
| 15 | |
| 16 # Get the median summit | |
| 17 summits <- sapply(strsplit(mtls[ ,"summit"], split="_"), function(x) {median(as.numeric(x))}) | |
| 18 # Extract every tss | |
| 19 tss <- apply(annotation,1,function(x) { | |
| 20 if (x["strand"]=="+"){ | |
| 21 return(as.numeric(x["txStart"])) | |
| 22 }else{ | |
| 23 return(as.numeric(x["txEnd"])) | |
| 24 } | |
| 25 }) | |
| 26 | |
| 27 | |
| 28 # Get each Chromosome | |
| 29 chromosomes <- unique( mtls[,"chr"]) | |
| 30 | |
| 31 # Set up output: | |
| 32 trgt <- character(length = length(summits)) | |
| 33 | |
| 34 # Loop through Chromosomes | |
| 35 for (chromosome in chromosomes){ | |
| 36 cat(chromosome, "\n") | |
| 37 chr.ix.tss <- which(annotation[ ,"chrom"]==chromosome) | |
| 38 chr.ix.summits <- which(mtls[ ,"chr"]==chromosome) | |
| 39 | |
| 40 | |
| 41 cat(length(chr.ix.summits), " -summits\n", | |
| 42 length(chr.ix.tss), " -tss\n") | |
| 43 if (length(chr.ix.summits)>0 && length(chr.ix.tss)>0) { | |
| 44 # Loop through Summits | |
| 45 for(summit.ix in chr.ix.summits) { | |
| 46 #cat("Summit = ", summit.ix, class(summit.ix),"\n") | |
| 47 distances <- abs(tss[chr.ix.tss] - summits[summit.ix]) | |
| 48 closest.ix.tss <- which(distances == min(distances))[1] | |
| 49 if(!is.na(closest.ix.tss) && !is.na(distances[closest.ix.tss])) { | |
| 50 #print(closest.ix.tss) | |
| 51 if (distances[closest.ix.tss] < threshold) { | |
| 52 | |
| 53 trgt[summit.ix] <- annotation[chr.ix.tss, "name2"][closest.ix.tss] | |
| 54 | |
| 55 # cat("A distance of ", | |
| 56 # distances[closest.ix.tss], | |
| 57 # " was found for the mtl summit located at: ", | |
| 58 # chromosome, | |
| 59 # ": ", | |
| 60 # summits[summit.ix], | |
| 61 # " and TSS located at: ", | |
| 62 # chromosome, | |
| 63 # ": ", | |
| 64 # tss[chr.ix.tss][closest.ix.tss], | |
| 65 # "\n") | |
| 66 } | |
| 67 } | |
| 68 } | |
| 69 } else { | |
| 70 cat("chromosome", chromosome, " either has no transcripts or no peaks in this experiment. \n") | |
| 71 } | |
| 72 output <- cbind(mtls, trgt) | |
| 73 } | |
| 74 | |
| 75 write.table(output, file="annotated_mtls.xls", sep="\t",row.names=F, quote=F) | |
| 76 | |
| 77 | |
| 78 | |
| 79 |
