Mercurial > repos > kmace > mtls_analysis
view mtls_analyze/gtfToMapFriendlyAnnotation.R @ 4:b465306d00ba draft default tip
Uploaded
author | kmace |
---|---|
date | Mon, 23 Jul 2012 13:00:15 -0400 |
parents | |
children |
line wrap: on
line source
rm (list = ls()) args <- commandArgs() print(args) gtf <- read.table(args[6],as.is=T) colnames(gtf) <- c("chrom", "source", "type", "five_prime", "three_prime", "score", "strand", "frame", "nothinggene", "gene", "nothingSemi1", "nothingtranscript", "transcript", "nothingSemi2") gtf.exon <- gtf[which(gtf[,"type"]=="exon"),] gtf.exon.slim <-gtf.exon[,c("chrom", "five_prime", "three_prime", "strand", "transcript")] gtf.exon.slim.sort <- gtf.exon.slim[sort.list(gtf.exon.slim[,"transcript"]),] transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"])) #transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"])) output <- matrix(0,nr=length(transcripts),nc=ncol(gtf.exon.slim.sort)) all.chrom <- gtf.exon.slim.sort[,"chrom"] all.five_prime <- gtf.exon.slim.sort[,"five_prime"] all.three_prime <- gtf.exon.slim.sort[,"three_prime"] all.strand <- gtf.exon.slim.sort[,"strand"] all.transcript <- gtf.exon.slim.sort[,"transcript"] colnames(output) <- colnames(gtf.exon.slim.sort) j <- 0 i <- 1 previous.strand <- gtf.exon.slim.sort[i,"strand"] previous.chrom <- gtf.exon.slim.sort[i,"chrom"] previous.transcript <- gtf.exon.slim.sort[i,"transcript"] previous.five_prime.min <- gtf.exon.slim.sort[i,"five_prime"] previous.three_prime.max <- gtf.exon.slim.sort[i,"three_prime"] # Progress bar: total <- dim(gtf.exon.slim.sort)[1] # create progress bar pb <- txtProgressBar(min = 0, max = total, style = 3) for ( i in 1:length(all.transcript)) { current.transcript <- all.transcript[i] setTxtProgressBar(pb, i) if (previous.transcript != current.transcript) { j <- j + 1 # Write out the current transcript info output[j,"chrom"] <- previous.chrom output[j,"five_prime"] <- previous.five_prime.min output[j,"three_prime"] <- previous.three_prime.max output[j,"strand"] <- previous.strand output[j,"transcript"] <- previous.transcript # Save the new transcript info (convert the current to previous) previous.strand <- all.strand[i] previous.chrom <- all.chrom[i] previous.transcript <- all.transcript[i] # current.transcript previous.five_prime.min <- all.five_prime[i] previous.three_prime.max <- all.three_prime[i] } else { previous.five_prime.min <- min(previous.five_prime.min, all.five_prime[i]) previous.three_prime.max <- max(previous.three_prime.max, all.three_prime[i]) previous.transcript <- all.transcript[i] # current.transcript } } # Write the last item j <- j + 1 # Write out the current transcript info output[j,"chrom"] <- previous.chrom output[j,"five_prime"] <- previous.five_prime.min output[j,"three_prime"] <- previous.three_prime.max output[j,"strand"] <- previous.strand output[j,"transcript"] <- previous.transcript close(pb) colnames(output) <- c("chrom", "txStart", "txEnd", "strand", "name2") write.table(output, file="gene_annotation.txt", sep="\t", row.names=F) #ix <- which(gtf.exon.slim[,"transcript"] == transcripts[i]) #output[i,"transcript"] <- transcripts[i] #output[i,"five_prime"] <- min(gtf.exon.slim[ix,"five_prime"]) #output[i,"three_prime"] <- max(gtf.exon.slim[ix,"three_prime"]) #output[i,"strand"] <- gtf.exon.slim[ix,"strand"][1] #output[i,"chrom"] <- gtf.exon.slim[ix,"chrom"][1]