diff mtls_analyze/annotate_mtls.R @ 4:b465306d00ba draft default tip

Uploaded
author kmace
date Mon, 23 Jul 2012 13:00:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mtls_analyze/annotate_mtls.R	Mon Jul 23 13:00:15 2012 -0400
@@ -0,0 +1,79 @@
+debug = F
+# Read in Data
+if (debug == T) {
+  mtls <- read.delim("~/code/scorchR-annotate_mtls/mtls.xls", quote="", stringsAsFactors=F)
+  annotation <- read.delim("~/code/scorchR-annotate_mtls/gene_annotation.txt", header=T, stringsAsFactors=F)
+  threshold <- 100000  
+} else {
+  cmd <- commandArgs(trailingOnly = T)
+  mtls <- read.delim(cmd[1], quote="", stringsAsFactors=F)
+  annotation <- read.delim(cmd[2], header=T, stringsAsFactors=F)
+  threshold <- as.numeric(cmd[3])
+}
+
+
+
+# Get the median summit
+summits <- sapply(strsplit(mtls[ ,"summit"], split="_"), function(x) {median(as.numeric(x))})
+# Extract every tss
+tss <- apply(annotation,1,function(x) {
+  if (x["strand"]=="+"){
+    return(as.numeric(x["txStart"]))
+    }else{
+      return(as.numeric(x["txEnd"]))
+      }
+  })
+
+
+# Get each Chromosome
+chromosomes <- unique( mtls[,"chr"])
+
+# Set up output:
+trgt <- character(length = length(summits))
+
+# Loop through Chromosomes
+for (chromosome in chromosomes){
+  cat(chromosome, "\n")
+  chr.ix.tss <- which(annotation[ ,"chrom"]==chromosome)
+  chr.ix.summits <- which(mtls[ ,"chr"]==chromosome)
+  
+
+  cat(length(chr.ix.summits), " -summits\n",
+      length(chr.ix.tss), " -tss\n")
+  if (length(chr.ix.summits)>0 && length(chr.ix.tss)>0) {
+  # Loop through Summits
+  for(summit.ix in chr.ix.summits) {
+    #cat("Summit = ", summit.ix, class(summit.ix),"\n")
+    distances <- abs(tss[chr.ix.tss] - summits[summit.ix])
+    closest.ix.tss <- which(distances == min(distances))[1]
+    if(!is.na(closest.ix.tss) && !is.na(distances[closest.ix.tss])) {
+    #print(closest.ix.tss)
+    if (distances[closest.ix.tss] < threshold) {
+      
+      trgt[summit.ix] <- annotation[chr.ix.tss, "name2"][closest.ix.tss]
+   
+#       cat("A distance of ",
+#           distances[closest.ix.tss],
+#           " was found for the mtl summit located at: ",
+#           chromosome,
+#           ": ",
+#           summits[summit.ix],
+#           " and TSS located at: ",
+#           chromosome,
+#           ": ",
+#           tss[chr.ix.tss][closest.ix.tss],
+#           "\n")
+    }
+    }
+  }
+  } else {
+    cat("chromosome", chromosome, " either has no transcripts or no peaks in this experiment. \n")
+  }
+  output <- cbind(mtls, trgt)
+}
+
+write.table(output, file="annotated_mtls.xls", sep="\t",row.names=F, quote=F)
+
+
+
+