| 
4
 | 
     1 debug = F
 | 
| 
 | 
     2 # Read in Data
 | 
| 
 | 
     3 if (debug == T) {
 | 
| 
 | 
     4   mtls <- read.delim("~/code/scorchR-annotate_mtls/mtls.xls", quote="", stringsAsFactors=F)
 | 
| 
 | 
     5   annotation <- read.delim("~/code/scorchR-annotate_mtls/gene_annotation.txt", header=T, stringsAsFactors=F)
 | 
| 
 | 
     6   threshold <- 100000  
 | 
| 
 | 
     7 } else {
 | 
| 
 | 
     8   cmd <- commandArgs(trailingOnly = T)
 | 
| 
 | 
     9   mtls <- read.delim(cmd[1], quote="", stringsAsFactors=F)
 | 
| 
 | 
    10   annotation <- read.delim(cmd[2], header=T, stringsAsFactors=F)
 | 
| 
 | 
    11   threshold <- as.numeric(cmd[3])
 | 
| 
 | 
    12 }
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 
 | 
| 
 | 
    16 # Get the median summit
 | 
| 
 | 
    17 summits <- sapply(strsplit(mtls[ ,"summit"], split="_"), function(x) {median(as.numeric(x))})
 | 
| 
 | 
    18 # Extract every tss
 | 
| 
 | 
    19 tss <- apply(annotation,1,function(x) {
 | 
| 
 | 
    20   if (x["strand"]=="+"){
 | 
| 
 | 
    21     return(as.numeric(x["txStart"]))
 | 
| 
 | 
    22     }else{
 | 
| 
 | 
    23       return(as.numeric(x["txEnd"]))
 | 
| 
 | 
    24       }
 | 
| 
 | 
    25   })
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 # Get each Chromosome
 | 
| 
 | 
    29 chromosomes <- unique( mtls[,"chr"])
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 # Set up output:
 | 
| 
 | 
    32 trgt <- character(length = length(summits))
 | 
| 
 | 
    33 
 | 
| 
 | 
    34 # Loop through Chromosomes
 | 
| 
 | 
    35 for (chromosome in chromosomes){
 | 
| 
 | 
    36   cat(chromosome, "\n")
 | 
| 
 | 
    37   chr.ix.tss <- which(annotation[ ,"chrom"]==chromosome)
 | 
| 
 | 
    38   chr.ix.summits <- which(mtls[ ,"chr"]==chromosome)
 | 
| 
 | 
    39   
 | 
| 
 | 
    40 
 | 
| 
 | 
    41   cat(length(chr.ix.summits), " -summits\n",
 | 
| 
 | 
    42       length(chr.ix.tss), " -tss\n")
 | 
| 
 | 
    43   if (length(chr.ix.summits)>0 && length(chr.ix.tss)>0) {
 | 
| 
 | 
    44   # Loop through Summits
 | 
| 
 | 
    45   for(summit.ix in chr.ix.summits) {
 | 
| 
 | 
    46     #cat("Summit = ", summit.ix, class(summit.ix),"\n")
 | 
| 
 | 
    47     distances <- abs(tss[chr.ix.tss] - summits[summit.ix])
 | 
| 
 | 
    48     closest.ix.tss <- which(distances == min(distances))[1]
 | 
| 
 | 
    49     if(!is.na(closest.ix.tss) && !is.na(distances[closest.ix.tss])) {
 | 
| 
 | 
    50     #print(closest.ix.tss)
 | 
| 
 | 
    51     if (distances[closest.ix.tss] < threshold) {
 | 
| 
 | 
    52       
 | 
| 
 | 
    53       trgt[summit.ix] <- annotation[chr.ix.tss, "name2"][closest.ix.tss]
 | 
| 
 | 
    54    
 | 
| 
 | 
    55 #       cat("A distance of ",
 | 
| 
 | 
    56 #           distances[closest.ix.tss],
 | 
| 
 | 
    57 #           " was found for the mtl summit located at: ",
 | 
| 
 | 
    58 #           chromosome,
 | 
| 
 | 
    59 #           ": ",
 | 
| 
 | 
    60 #           summits[summit.ix],
 | 
| 
 | 
    61 #           " and TSS located at: ",
 | 
| 
 | 
    62 #           chromosome,
 | 
| 
 | 
    63 #           ": ",
 | 
| 
 | 
    64 #           tss[chr.ix.tss][closest.ix.tss],
 | 
| 
 | 
    65 #           "\n")
 | 
| 
 | 
    66     }
 | 
| 
 | 
    67     }
 | 
| 
 | 
    68   }
 | 
| 
 | 
    69   } else {
 | 
| 
 | 
    70     cat("chromosome", chromosome, " either has no transcripts or no peaks in this experiment. \n")
 | 
| 
 | 
    71   }
 | 
| 
 | 
    72   output <- cbind(mtls, trgt)
 | 
| 
 | 
    73 }
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 write.table(output, file="annotated_mtls.xls", sep="\t",row.names=F, quote=F)
 | 
| 
 | 
    76 
 | 
| 
 | 
    77 
 | 
| 
 | 
    78 
 | 
| 
 | 
    79 
 |