annotate mtls_analyze/annotate_mtls.R @ 4:b465306d00ba draft default tip

Uploaded
author kmace
date Mon, 23 Jul 2012 13:00:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
b465306d00ba Uploaded
kmace
parents:
diff changeset
1 debug = F
b465306d00ba Uploaded
kmace
parents:
diff changeset
2 # Read in Data
b465306d00ba Uploaded
kmace
parents:
diff changeset
3 if (debug == T) {
b465306d00ba Uploaded
kmace
parents:
diff changeset
4 mtls <- read.delim("~/code/scorchR-annotate_mtls/mtls.xls", quote="", stringsAsFactors=F)
b465306d00ba Uploaded
kmace
parents:
diff changeset
5 annotation <- read.delim("~/code/scorchR-annotate_mtls/gene_annotation.txt", header=T, stringsAsFactors=F)
b465306d00ba Uploaded
kmace
parents:
diff changeset
6 threshold <- 100000
b465306d00ba Uploaded
kmace
parents:
diff changeset
7 } else {
b465306d00ba Uploaded
kmace
parents:
diff changeset
8 cmd <- commandArgs(trailingOnly = T)
b465306d00ba Uploaded
kmace
parents:
diff changeset
9 mtls <- read.delim(cmd[1], quote="", stringsAsFactors=F)
b465306d00ba Uploaded
kmace
parents:
diff changeset
10 annotation <- read.delim(cmd[2], header=T, stringsAsFactors=F)
b465306d00ba Uploaded
kmace
parents:
diff changeset
11 threshold <- as.numeric(cmd[3])
b465306d00ba Uploaded
kmace
parents:
diff changeset
12 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
13
b465306d00ba Uploaded
kmace
parents:
diff changeset
14
b465306d00ba Uploaded
kmace
parents:
diff changeset
15
b465306d00ba Uploaded
kmace
parents:
diff changeset
16 # Get the median summit
b465306d00ba Uploaded
kmace
parents:
diff changeset
17 summits <- sapply(strsplit(mtls[ ,"summit"], split="_"), function(x) {median(as.numeric(x))})
b465306d00ba Uploaded
kmace
parents:
diff changeset
18 # Extract every tss
b465306d00ba Uploaded
kmace
parents:
diff changeset
19 tss <- apply(annotation,1,function(x) {
b465306d00ba Uploaded
kmace
parents:
diff changeset
20 if (x["strand"]=="+"){
b465306d00ba Uploaded
kmace
parents:
diff changeset
21 return(as.numeric(x["txStart"]))
b465306d00ba Uploaded
kmace
parents:
diff changeset
22 }else{
b465306d00ba Uploaded
kmace
parents:
diff changeset
23 return(as.numeric(x["txEnd"]))
b465306d00ba Uploaded
kmace
parents:
diff changeset
24 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
25 })
b465306d00ba Uploaded
kmace
parents:
diff changeset
26
b465306d00ba Uploaded
kmace
parents:
diff changeset
27
b465306d00ba Uploaded
kmace
parents:
diff changeset
28 # Get each Chromosome
b465306d00ba Uploaded
kmace
parents:
diff changeset
29 chromosomes <- unique( mtls[,"chr"])
b465306d00ba Uploaded
kmace
parents:
diff changeset
30
b465306d00ba Uploaded
kmace
parents:
diff changeset
31 # Set up output:
b465306d00ba Uploaded
kmace
parents:
diff changeset
32 trgt <- character(length = length(summits))
b465306d00ba Uploaded
kmace
parents:
diff changeset
33
b465306d00ba Uploaded
kmace
parents:
diff changeset
34 # Loop through Chromosomes
b465306d00ba Uploaded
kmace
parents:
diff changeset
35 for (chromosome in chromosomes){
b465306d00ba Uploaded
kmace
parents:
diff changeset
36 cat(chromosome, "\n")
b465306d00ba Uploaded
kmace
parents:
diff changeset
37 chr.ix.tss <- which(annotation[ ,"chrom"]==chromosome)
b465306d00ba Uploaded
kmace
parents:
diff changeset
38 chr.ix.summits <- which(mtls[ ,"chr"]==chromosome)
b465306d00ba Uploaded
kmace
parents:
diff changeset
39
b465306d00ba Uploaded
kmace
parents:
diff changeset
40
b465306d00ba Uploaded
kmace
parents:
diff changeset
41 cat(length(chr.ix.summits), " -summits\n",
b465306d00ba Uploaded
kmace
parents:
diff changeset
42 length(chr.ix.tss), " -tss\n")
b465306d00ba Uploaded
kmace
parents:
diff changeset
43 if (length(chr.ix.summits)>0 && length(chr.ix.tss)>0) {
b465306d00ba Uploaded
kmace
parents:
diff changeset
44 # Loop through Summits
b465306d00ba Uploaded
kmace
parents:
diff changeset
45 for(summit.ix in chr.ix.summits) {
b465306d00ba Uploaded
kmace
parents:
diff changeset
46 #cat("Summit = ", summit.ix, class(summit.ix),"\n")
b465306d00ba Uploaded
kmace
parents:
diff changeset
47 distances <- abs(tss[chr.ix.tss] - summits[summit.ix])
b465306d00ba Uploaded
kmace
parents:
diff changeset
48 closest.ix.tss <- which(distances == min(distances))[1]
b465306d00ba Uploaded
kmace
parents:
diff changeset
49 if(!is.na(closest.ix.tss) && !is.na(distances[closest.ix.tss])) {
b465306d00ba Uploaded
kmace
parents:
diff changeset
50 #print(closest.ix.tss)
b465306d00ba Uploaded
kmace
parents:
diff changeset
51 if (distances[closest.ix.tss] < threshold) {
b465306d00ba Uploaded
kmace
parents:
diff changeset
52
b465306d00ba Uploaded
kmace
parents:
diff changeset
53 trgt[summit.ix] <- annotation[chr.ix.tss, "name2"][closest.ix.tss]
b465306d00ba Uploaded
kmace
parents:
diff changeset
54
b465306d00ba Uploaded
kmace
parents:
diff changeset
55 # cat("A distance of ",
b465306d00ba Uploaded
kmace
parents:
diff changeset
56 # distances[closest.ix.tss],
b465306d00ba Uploaded
kmace
parents:
diff changeset
57 # " was found for the mtl summit located at: ",
b465306d00ba Uploaded
kmace
parents:
diff changeset
58 # chromosome,
b465306d00ba Uploaded
kmace
parents:
diff changeset
59 # ": ",
b465306d00ba Uploaded
kmace
parents:
diff changeset
60 # summits[summit.ix],
b465306d00ba Uploaded
kmace
parents:
diff changeset
61 # " and TSS located at: ",
b465306d00ba Uploaded
kmace
parents:
diff changeset
62 # chromosome,
b465306d00ba Uploaded
kmace
parents:
diff changeset
63 # ": ",
b465306d00ba Uploaded
kmace
parents:
diff changeset
64 # tss[chr.ix.tss][closest.ix.tss],
b465306d00ba Uploaded
kmace
parents:
diff changeset
65 # "\n")
b465306d00ba Uploaded
kmace
parents:
diff changeset
66 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
67 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
68 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
69 } else {
b465306d00ba Uploaded
kmace
parents:
diff changeset
70 cat("chromosome", chromosome, " either has no transcripts or no peaks in this experiment. \n")
b465306d00ba Uploaded
kmace
parents:
diff changeset
71 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
72 output <- cbind(mtls, trgt)
b465306d00ba Uploaded
kmace
parents:
diff changeset
73 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
74
b465306d00ba Uploaded
kmace
parents:
diff changeset
75 write.table(output, file="annotated_mtls.xls", sep="\t",row.names=F, quote=F)
b465306d00ba Uploaded
kmace
parents:
diff changeset
76
b465306d00ba Uploaded
kmace
parents:
diff changeset
77
b465306d00ba Uploaded
kmace
parents:
diff changeset
78
b465306d00ba Uploaded
kmace
parents:
diff changeset
79