view mtls_analyze/gtfToMapFriendlyAnnotation.R @ 4:b465306d00ba draft default tip

Uploaded
author kmace
date Mon, 23 Jul 2012 13:00:15 -0400
parents
children
line wrap: on
line source

rm (list = ls())
args <- commandArgs()
print(args)
gtf <- read.table(args[6],as.is=T)
colnames(gtf) <- c("chrom", 
                   "source", 
                   "type", 
                   "five_prime", 
                   "three_prime", 
                   "score", 
                   "strand", 
                   "frame", 
                   "nothinggene", 
                   "gene", 
                   "nothingSemi1",
                   "nothingtranscript",
                   "transcript",
                   "nothingSemi2")
gtf.exon <- gtf[which(gtf[,"type"]=="exon"),]
gtf.exon.slim <-gtf.exon[,c("chrom", "five_prime", "three_prime", "strand", "transcript")]
gtf.exon.slim.sort <- gtf.exon.slim[sort.list(gtf.exon.slim[,"transcript"]),]



transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"]))
#transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"]))



output <- matrix(0,nr=length(transcripts),nc=ncol(gtf.exon.slim.sort))

all.chrom <- gtf.exon.slim.sort[,"chrom"]
all.five_prime <- gtf.exon.slim.sort[,"five_prime"]
all.three_prime <- gtf.exon.slim.sort[,"three_prime"]
all.strand <- gtf.exon.slim.sort[,"strand"]
all.transcript <- gtf.exon.slim.sort[,"transcript"]

colnames(output) <- colnames(gtf.exon.slim.sort)

j <- 0
i <- 1

previous.strand <- gtf.exon.slim.sort[i,"strand"]
previous.chrom <- gtf.exon.slim.sort[i,"chrom"]
previous.transcript <- gtf.exon.slim.sort[i,"transcript"]
previous.five_prime.min <- gtf.exon.slim.sort[i,"five_prime"]
previous.three_prime.max <- gtf.exon.slim.sort[i,"three_prime"]

# Progress bar:
total <- dim(gtf.exon.slim.sort)[1]
# create progress bar
pb <- txtProgressBar(min = 0, max = total, style = 3)




for ( i in 1:length(all.transcript)) { 
  current.transcript <- all.transcript[i]
  setTxtProgressBar(pb, i)
  if (previous.transcript != current.transcript) {
    j <- j + 1
    # Write out the current transcript info
    output[j,"chrom"] <- previous.chrom
    output[j,"five_prime"] <- previous.five_prime.min
    output[j,"three_prime"] <- previous.three_prime.max
    output[j,"strand"] <- previous.strand
    output[j,"transcript"] <- previous.transcript
    
    # Save the new transcript info (convert the current to previous)
    
    previous.strand <- all.strand[i]
    previous.chrom <- all.chrom[i]
    previous.transcript <- all.transcript[i] # current.transcript
    previous.five_prime.min <- all.five_prime[i]
    previous.three_prime.max <- all.three_prime[i]
    }
  else {
    previous.five_prime.min <- min(previous.five_prime.min, all.five_prime[i])
    previous.three_prime.max <- max(previous.three_prime.max, all.three_prime[i])
    previous.transcript <- all.transcript[i] # current.transcript
  }
}
# Write the last item
j <- j + 1
# Write out the current transcript info
output[j,"chrom"] <- previous.chrom
output[j,"five_prime"] <- previous.five_prime.min
output[j,"three_prime"] <- previous.three_prime.max
output[j,"strand"] <- previous.strand
output[j,"transcript"] <- previous.transcript

close(pb)

colnames(output) <- c("chrom", "txStart", "txEnd", "strand", "name2")
write.table(output, file="gene_annotation.txt", sep="\t", row.names=F)

  #ix <- which(gtf.exon.slim[,"transcript"] == transcripts[i])
  #output[i,"transcript"] <- transcripts[i]
  #output[i,"five_prime"] <- min(gtf.exon.slim[ix,"five_prime"])
  #output[i,"three_prime"] <- max(gtf.exon.slim[ix,"three_prime"])
  #output[i,"strand"] <- gtf.exon.slim[ix,"strand"][1]
  #output[i,"chrom"] <- gtf.exon.slim[ix,"chrom"][1]