annotate mtls_analyze/gtfToMapFriendlyAnnotation.R @ 4:b465306d00ba draft default tip

Uploaded
author kmace
date Mon, 23 Jul 2012 13:00:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
b465306d00ba Uploaded
kmace
parents:
diff changeset
1 rm (list = ls())
b465306d00ba Uploaded
kmace
parents:
diff changeset
2 args <- commandArgs()
b465306d00ba Uploaded
kmace
parents:
diff changeset
3 print(args)
b465306d00ba Uploaded
kmace
parents:
diff changeset
4 gtf <- read.table(args[6],as.is=T)
b465306d00ba Uploaded
kmace
parents:
diff changeset
5 colnames(gtf) <- c("chrom",
b465306d00ba Uploaded
kmace
parents:
diff changeset
6 "source",
b465306d00ba Uploaded
kmace
parents:
diff changeset
7 "type",
b465306d00ba Uploaded
kmace
parents:
diff changeset
8 "five_prime",
b465306d00ba Uploaded
kmace
parents:
diff changeset
9 "three_prime",
b465306d00ba Uploaded
kmace
parents:
diff changeset
10 "score",
b465306d00ba Uploaded
kmace
parents:
diff changeset
11 "strand",
b465306d00ba Uploaded
kmace
parents:
diff changeset
12 "frame",
b465306d00ba Uploaded
kmace
parents:
diff changeset
13 "nothinggene",
b465306d00ba Uploaded
kmace
parents:
diff changeset
14 "gene",
b465306d00ba Uploaded
kmace
parents:
diff changeset
15 "nothingSemi1",
b465306d00ba Uploaded
kmace
parents:
diff changeset
16 "nothingtranscript",
b465306d00ba Uploaded
kmace
parents:
diff changeset
17 "transcript",
b465306d00ba Uploaded
kmace
parents:
diff changeset
18 "nothingSemi2")
b465306d00ba Uploaded
kmace
parents:
diff changeset
19 gtf.exon <- gtf[which(gtf[,"type"]=="exon"),]
b465306d00ba Uploaded
kmace
parents:
diff changeset
20 gtf.exon.slim <-gtf.exon[,c("chrom", "five_prime", "three_prime", "strand", "transcript")]
b465306d00ba Uploaded
kmace
parents:
diff changeset
21 gtf.exon.slim.sort <- gtf.exon.slim[sort.list(gtf.exon.slim[,"transcript"]),]
b465306d00ba Uploaded
kmace
parents:
diff changeset
22
b465306d00ba Uploaded
kmace
parents:
diff changeset
23
b465306d00ba Uploaded
kmace
parents:
diff changeset
24
b465306d00ba Uploaded
kmace
parents:
diff changeset
25 transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"]))
b465306d00ba Uploaded
kmace
parents:
diff changeset
26 #transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"]))
b465306d00ba Uploaded
kmace
parents:
diff changeset
27
b465306d00ba Uploaded
kmace
parents:
diff changeset
28
b465306d00ba Uploaded
kmace
parents:
diff changeset
29
b465306d00ba Uploaded
kmace
parents:
diff changeset
30 output <- matrix(0,nr=length(transcripts),nc=ncol(gtf.exon.slim.sort))
b465306d00ba Uploaded
kmace
parents:
diff changeset
31
b465306d00ba Uploaded
kmace
parents:
diff changeset
32 all.chrom <- gtf.exon.slim.sort[,"chrom"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
33 all.five_prime <- gtf.exon.slim.sort[,"five_prime"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
34 all.three_prime <- gtf.exon.slim.sort[,"three_prime"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
35 all.strand <- gtf.exon.slim.sort[,"strand"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
36 all.transcript <- gtf.exon.slim.sort[,"transcript"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
37
b465306d00ba Uploaded
kmace
parents:
diff changeset
38 colnames(output) <- colnames(gtf.exon.slim.sort)
b465306d00ba Uploaded
kmace
parents:
diff changeset
39
b465306d00ba Uploaded
kmace
parents:
diff changeset
40 j <- 0
b465306d00ba Uploaded
kmace
parents:
diff changeset
41 i <- 1
b465306d00ba Uploaded
kmace
parents:
diff changeset
42
b465306d00ba Uploaded
kmace
parents:
diff changeset
43 previous.strand <- gtf.exon.slim.sort[i,"strand"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
44 previous.chrom <- gtf.exon.slim.sort[i,"chrom"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
45 previous.transcript <- gtf.exon.slim.sort[i,"transcript"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
46 previous.five_prime.min <- gtf.exon.slim.sort[i,"five_prime"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
47 previous.three_prime.max <- gtf.exon.slim.sort[i,"three_prime"]
b465306d00ba Uploaded
kmace
parents:
diff changeset
48
b465306d00ba Uploaded
kmace
parents:
diff changeset
49 # Progress bar:
b465306d00ba Uploaded
kmace
parents:
diff changeset
50 total <- dim(gtf.exon.slim.sort)[1]
b465306d00ba Uploaded
kmace
parents:
diff changeset
51 # create progress bar
b465306d00ba Uploaded
kmace
parents:
diff changeset
52 pb <- txtProgressBar(min = 0, max = total, style = 3)
b465306d00ba Uploaded
kmace
parents:
diff changeset
53
b465306d00ba Uploaded
kmace
parents:
diff changeset
54
b465306d00ba Uploaded
kmace
parents:
diff changeset
55
b465306d00ba Uploaded
kmace
parents:
diff changeset
56
b465306d00ba Uploaded
kmace
parents:
diff changeset
57 for ( i in 1:length(all.transcript)) {
b465306d00ba Uploaded
kmace
parents:
diff changeset
58 current.transcript <- all.transcript[i]
b465306d00ba Uploaded
kmace
parents:
diff changeset
59 setTxtProgressBar(pb, i)
b465306d00ba Uploaded
kmace
parents:
diff changeset
60 if (previous.transcript != current.transcript) {
b465306d00ba Uploaded
kmace
parents:
diff changeset
61 j <- j + 1
b465306d00ba Uploaded
kmace
parents:
diff changeset
62 # Write out the current transcript info
b465306d00ba Uploaded
kmace
parents:
diff changeset
63 output[j,"chrom"] <- previous.chrom
b465306d00ba Uploaded
kmace
parents:
diff changeset
64 output[j,"five_prime"] <- previous.five_prime.min
b465306d00ba Uploaded
kmace
parents:
diff changeset
65 output[j,"three_prime"] <- previous.three_prime.max
b465306d00ba Uploaded
kmace
parents:
diff changeset
66 output[j,"strand"] <- previous.strand
b465306d00ba Uploaded
kmace
parents:
diff changeset
67 output[j,"transcript"] <- previous.transcript
b465306d00ba Uploaded
kmace
parents:
diff changeset
68
b465306d00ba Uploaded
kmace
parents:
diff changeset
69 # Save the new transcript info (convert the current to previous)
b465306d00ba Uploaded
kmace
parents:
diff changeset
70
b465306d00ba Uploaded
kmace
parents:
diff changeset
71 previous.strand <- all.strand[i]
b465306d00ba Uploaded
kmace
parents:
diff changeset
72 previous.chrom <- all.chrom[i]
b465306d00ba Uploaded
kmace
parents:
diff changeset
73 previous.transcript <- all.transcript[i] # current.transcript
b465306d00ba Uploaded
kmace
parents:
diff changeset
74 previous.five_prime.min <- all.five_prime[i]
b465306d00ba Uploaded
kmace
parents:
diff changeset
75 previous.three_prime.max <- all.three_prime[i]
b465306d00ba Uploaded
kmace
parents:
diff changeset
76 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
77 else {
b465306d00ba Uploaded
kmace
parents:
diff changeset
78 previous.five_prime.min <- min(previous.five_prime.min, all.five_prime[i])
b465306d00ba Uploaded
kmace
parents:
diff changeset
79 previous.three_prime.max <- max(previous.three_prime.max, all.three_prime[i])
b465306d00ba Uploaded
kmace
parents:
diff changeset
80 previous.transcript <- all.transcript[i] # current.transcript
b465306d00ba Uploaded
kmace
parents:
diff changeset
81 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
82 }
b465306d00ba Uploaded
kmace
parents:
diff changeset
83 # Write the last item
b465306d00ba Uploaded
kmace
parents:
diff changeset
84 j <- j + 1
b465306d00ba Uploaded
kmace
parents:
diff changeset
85 # Write out the current transcript info
b465306d00ba Uploaded
kmace
parents:
diff changeset
86 output[j,"chrom"] <- previous.chrom
b465306d00ba Uploaded
kmace
parents:
diff changeset
87 output[j,"five_prime"] <- previous.five_prime.min
b465306d00ba Uploaded
kmace
parents:
diff changeset
88 output[j,"three_prime"] <- previous.three_prime.max
b465306d00ba Uploaded
kmace
parents:
diff changeset
89 output[j,"strand"] <- previous.strand
b465306d00ba Uploaded
kmace
parents:
diff changeset
90 output[j,"transcript"] <- previous.transcript
b465306d00ba Uploaded
kmace
parents:
diff changeset
91
b465306d00ba Uploaded
kmace
parents:
diff changeset
92 close(pb)
b465306d00ba Uploaded
kmace
parents:
diff changeset
93
b465306d00ba Uploaded
kmace
parents:
diff changeset
94 colnames(output) <- c("chrom", "txStart", "txEnd", "strand", "name2")
b465306d00ba Uploaded
kmace
parents:
diff changeset
95 write.table(output, file="gene_annotation.txt", sep="\t", row.names=F)
b465306d00ba Uploaded
kmace
parents:
diff changeset
96
b465306d00ba Uploaded
kmace
parents:
diff changeset
97 #ix <- which(gtf.exon.slim[,"transcript"] == transcripts[i])
b465306d00ba Uploaded
kmace
parents:
diff changeset
98 #output[i,"transcript"] <- transcripts[i]
b465306d00ba Uploaded
kmace
parents:
diff changeset
99 #output[i,"five_prime"] <- min(gtf.exon.slim[ix,"five_prime"])
b465306d00ba Uploaded
kmace
parents:
diff changeset
100 #output[i,"three_prime"] <- max(gtf.exon.slim[ix,"three_prime"])
b465306d00ba Uploaded
kmace
parents:
diff changeset
101 #output[i,"strand"] <- gtf.exon.slim[ix,"strand"][1]
b465306d00ba Uploaded
kmace
parents:
diff changeset
102 #output[i,"chrom"] <- gtf.exon.slim[ix,"chrom"][1]