Mercurial > repos > kmace > mtls_analysis
comparison mtls_analyze/gtfToMapFriendlyAnnotation.R @ 4:b465306d00ba draft default tip
Uploaded
| author | kmace |
|---|---|
| date | Mon, 23 Jul 2012 13:00:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:a0306edbf2f8 | 4:b465306d00ba |
|---|---|
| 1 rm (list = ls()) | |
| 2 args <- commandArgs() | |
| 3 print(args) | |
| 4 gtf <- read.table(args[6],as.is=T) | |
| 5 colnames(gtf) <- c("chrom", | |
| 6 "source", | |
| 7 "type", | |
| 8 "five_prime", | |
| 9 "three_prime", | |
| 10 "score", | |
| 11 "strand", | |
| 12 "frame", | |
| 13 "nothinggene", | |
| 14 "gene", | |
| 15 "nothingSemi1", | |
| 16 "nothingtranscript", | |
| 17 "transcript", | |
| 18 "nothingSemi2") | |
| 19 gtf.exon <- gtf[which(gtf[,"type"]=="exon"),] | |
| 20 gtf.exon.slim <-gtf.exon[,c("chrom", "five_prime", "three_prime", "strand", "transcript")] | |
| 21 gtf.exon.slim.sort <- gtf.exon.slim[sort.list(gtf.exon.slim[,"transcript"]),] | |
| 22 | |
| 23 | |
| 24 | |
| 25 transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"])) | |
| 26 #transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"])) | |
| 27 | |
| 28 | |
| 29 | |
| 30 output <- matrix(0,nr=length(transcripts),nc=ncol(gtf.exon.slim.sort)) | |
| 31 | |
| 32 all.chrom <- gtf.exon.slim.sort[,"chrom"] | |
| 33 all.five_prime <- gtf.exon.slim.sort[,"five_prime"] | |
| 34 all.three_prime <- gtf.exon.slim.sort[,"three_prime"] | |
| 35 all.strand <- gtf.exon.slim.sort[,"strand"] | |
| 36 all.transcript <- gtf.exon.slim.sort[,"transcript"] | |
| 37 | |
| 38 colnames(output) <- colnames(gtf.exon.slim.sort) | |
| 39 | |
| 40 j <- 0 | |
| 41 i <- 1 | |
| 42 | |
| 43 previous.strand <- gtf.exon.slim.sort[i,"strand"] | |
| 44 previous.chrom <- gtf.exon.slim.sort[i,"chrom"] | |
| 45 previous.transcript <- gtf.exon.slim.sort[i,"transcript"] | |
| 46 previous.five_prime.min <- gtf.exon.slim.sort[i,"five_prime"] | |
| 47 previous.three_prime.max <- gtf.exon.slim.sort[i,"three_prime"] | |
| 48 | |
| 49 # Progress bar: | |
| 50 total <- dim(gtf.exon.slim.sort)[1] | |
| 51 # create progress bar | |
| 52 pb <- txtProgressBar(min = 0, max = total, style = 3) | |
| 53 | |
| 54 | |
| 55 | |
| 56 | |
| 57 for ( i in 1:length(all.transcript)) { | |
| 58 current.transcript <- all.transcript[i] | |
| 59 setTxtProgressBar(pb, i) | |
| 60 if (previous.transcript != current.transcript) { | |
| 61 j <- j + 1 | |
| 62 # Write out the current transcript info | |
| 63 output[j,"chrom"] <- previous.chrom | |
| 64 output[j,"five_prime"] <- previous.five_prime.min | |
| 65 output[j,"three_prime"] <- previous.three_prime.max | |
| 66 output[j,"strand"] <- previous.strand | |
| 67 output[j,"transcript"] <- previous.transcript | |
| 68 | |
| 69 # Save the new transcript info (convert the current to previous) | |
| 70 | |
| 71 previous.strand <- all.strand[i] | |
| 72 previous.chrom <- all.chrom[i] | |
| 73 previous.transcript <- all.transcript[i] # current.transcript | |
| 74 previous.five_prime.min <- all.five_prime[i] | |
| 75 previous.three_prime.max <- all.three_prime[i] | |
| 76 } | |
| 77 else { | |
| 78 previous.five_prime.min <- min(previous.five_prime.min, all.five_prime[i]) | |
| 79 previous.three_prime.max <- max(previous.three_prime.max, all.three_prime[i]) | |
| 80 previous.transcript <- all.transcript[i] # current.transcript | |
| 81 } | |
| 82 } | |
| 83 # Write the last item | |
| 84 j <- j + 1 | |
| 85 # Write out the current transcript info | |
| 86 output[j,"chrom"] <- previous.chrom | |
| 87 output[j,"five_prime"] <- previous.five_prime.min | |
| 88 output[j,"three_prime"] <- previous.three_prime.max | |
| 89 output[j,"strand"] <- previous.strand | |
| 90 output[j,"transcript"] <- previous.transcript | |
| 91 | |
| 92 close(pb) | |
| 93 | |
| 94 colnames(output) <- c("chrom", "txStart", "txEnd", "strand", "name2") | |
| 95 write.table(output, file="gene_annotation.txt", sep="\t", row.names=F) | |
| 96 | |
| 97 #ix <- which(gtf.exon.slim[,"transcript"] == transcripts[i]) | |
| 98 #output[i,"transcript"] <- transcripts[i] | |
| 99 #output[i,"five_prime"] <- min(gtf.exon.slim[ix,"five_prime"]) | |
| 100 #output[i,"three_prime"] <- max(gtf.exon.slim[ix,"three_prime"]) | |
| 101 #output[i,"strand"] <- gtf.exon.slim[ix,"strand"][1] | |
| 102 #output[i,"chrom"] <- gtf.exon.slim[ix,"chrom"][1] |
