4
|
1 debug = F
|
|
2 # Read in Data
|
|
3 if (debug == T) {
|
|
4 mtls <- read.delim("~/code/scorchR-annotate_mtls/mtls.xls", quote="", stringsAsFactors=F)
|
|
5 annotation <- read.delim("~/code/scorchR-annotate_mtls/gene_annotation.txt", header=T, stringsAsFactors=F)
|
|
6 threshold <- 100000
|
|
7 } else {
|
|
8 cmd <- commandArgs(trailingOnly = T)
|
|
9 mtls <- read.delim(cmd[1], quote="", stringsAsFactors=F)
|
|
10 annotation <- read.delim(cmd[2], header=T, stringsAsFactors=F)
|
|
11 threshold <- as.numeric(cmd[3])
|
|
12 }
|
|
13
|
|
14
|
|
15
|
|
16 # Get the median summit
|
|
17 summits <- sapply(strsplit(mtls[ ,"summit"], split="_"), function(x) {median(as.numeric(x))})
|
|
18 # Extract every tss
|
|
19 tss <- apply(annotation,1,function(x) {
|
|
20 if (x["strand"]=="+"){
|
|
21 return(as.numeric(x["txStart"]))
|
|
22 }else{
|
|
23 return(as.numeric(x["txEnd"]))
|
|
24 }
|
|
25 })
|
|
26
|
|
27
|
|
28 # Get each Chromosome
|
|
29 chromosomes <- unique( mtls[,"chr"])
|
|
30
|
|
31 # Set up output:
|
|
32 trgt <- character(length = length(summits))
|
|
33
|
|
34 # Loop through Chromosomes
|
|
35 for (chromosome in chromosomes){
|
|
36 cat(chromosome, "\n")
|
|
37 chr.ix.tss <- which(annotation[ ,"chrom"]==chromosome)
|
|
38 chr.ix.summits <- which(mtls[ ,"chr"]==chromosome)
|
|
39
|
|
40
|
|
41 cat(length(chr.ix.summits), " -summits\n",
|
|
42 length(chr.ix.tss), " -tss\n")
|
|
43 if (length(chr.ix.summits)>0 && length(chr.ix.tss)>0) {
|
|
44 # Loop through Summits
|
|
45 for(summit.ix in chr.ix.summits) {
|
|
46 #cat("Summit = ", summit.ix, class(summit.ix),"\n")
|
|
47 distances <- abs(tss[chr.ix.tss] - summits[summit.ix])
|
|
48 closest.ix.tss <- which(distances == min(distances))[1]
|
|
49 if(!is.na(closest.ix.tss) && !is.na(distances[closest.ix.tss])) {
|
|
50 #print(closest.ix.tss)
|
|
51 if (distances[closest.ix.tss] < threshold) {
|
|
52
|
|
53 trgt[summit.ix] <- annotation[chr.ix.tss, "name2"][closest.ix.tss]
|
|
54
|
|
55 # cat("A distance of ",
|
|
56 # distances[closest.ix.tss],
|
|
57 # " was found for the mtl summit located at: ",
|
|
58 # chromosome,
|
|
59 # ": ",
|
|
60 # summits[summit.ix],
|
|
61 # " and TSS located at: ",
|
|
62 # chromosome,
|
|
63 # ": ",
|
|
64 # tss[chr.ix.tss][closest.ix.tss],
|
|
65 # "\n")
|
|
66 }
|
|
67 }
|
|
68 }
|
|
69 } else {
|
|
70 cat("chromosome", chromosome, " either has no transcripts or no peaks in this experiment. \n")
|
|
71 }
|
|
72 output <- cbind(mtls, trgt)
|
|
73 }
|
|
74
|
|
75 write.table(output, file="annotated_mtls.xls", sep="\t",row.names=F, quote=F)
|
|
76
|
|
77
|
|
78
|
|
79
|