4
|
1 rm (list = ls())
|
|
2 args <- commandArgs()
|
|
3 print(args)
|
|
4 gtf <- read.table(args[6],as.is=T)
|
|
5 colnames(gtf) <- c("chrom",
|
|
6 "source",
|
|
7 "type",
|
|
8 "five_prime",
|
|
9 "three_prime",
|
|
10 "score",
|
|
11 "strand",
|
|
12 "frame",
|
|
13 "nothinggene",
|
|
14 "gene",
|
|
15 "nothingSemi1",
|
|
16 "nothingtranscript",
|
|
17 "transcript",
|
|
18 "nothingSemi2")
|
|
19 gtf.exon <- gtf[which(gtf[,"type"]=="exon"),]
|
|
20 gtf.exon.slim <-gtf.exon[,c("chrom", "five_prime", "three_prime", "strand", "transcript")]
|
|
21 gtf.exon.slim.sort <- gtf.exon.slim[sort.list(gtf.exon.slim[,"transcript"]),]
|
|
22
|
|
23
|
|
24
|
|
25 transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"]))
|
|
26 #transcripts <- as.vector(unique(gtf.exon.slim[,"transcript"]))
|
|
27
|
|
28
|
|
29
|
|
30 output <- matrix(0,nr=length(transcripts),nc=ncol(gtf.exon.slim.sort))
|
|
31
|
|
32 all.chrom <- gtf.exon.slim.sort[,"chrom"]
|
|
33 all.five_prime <- gtf.exon.slim.sort[,"five_prime"]
|
|
34 all.three_prime <- gtf.exon.slim.sort[,"three_prime"]
|
|
35 all.strand <- gtf.exon.slim.sort[,"strand"]
|
|
36 all.transcript <- gtf.exon.slim.sort[,"transcript"]
|
|
37
|
|
38 colnames(output) <- colnames(gtf.exon.slim.sort)
|
|
39
|
|
40 j <- 0
|
|
41 i <- 1
|
|
42
|
|
43 previous.strand <- gtf.exon.slim.sort[i,"strand"]
|
|
44 previous.chrom <- gtf.exon.slim.sort[i,"chrom"]
|
|
45 previous.transcript <- gtf.exon.slim.sort[i,"transcript"]
|
|
46 previous.five_prime.min <- gtf.exon.slim.sort[i,"five_prime"]
|
|
47 previous.three_prime.max <- gtf.exon.slim.sort[i,"three_prime"]
|
|
48
|
|
49 # Progress bar:
|
|
50 total <- dim(gtf.exon.slim.sort)[1]
|
|
51 # create progress bar
|
|
52 pb <- txtProgressBar(min = 0, max = total, style = 3)
|
|
53
|
|
54
|
|
55
|
|
56
|
|
57 for ( i in 1:length(all.transcript)) {
|
|
58 current.transcript <- all.transcript[i]
|
|
59 setTxtProgressBar(pb, i)
|
|
60 if (previous.transcript != current.transcript) {
|
|
61 j <- j + 1
|
|
62 # Write out the current transcript info
|
|
63 output[j,"chrom"] <- previous.chrom
|
|
64 output[j,"five_prime"] <- previous.five_prime.min
|
|
65 output[j,"three_prime"] <- previous.three_prime.max
|
|
66 output[j,"strand"] <- previous.strand
|
|
67 output[j,"transcript"] <- previous.transcript
|
|
68
|
|
69 # Save the new transcript info (convert the current to previous)
|
|
70
|
|
71 previous.strand <- all.strand[i]
|
|
72 previous.chrom <- all.chrom[i]
|
|
73 previous.transcript <- all.transcript[i] # current.transcript
|
|
74 previous.five_prime.min <- all.five_prime[i]
|
|
75 previous.three_prime.max <- all.three_prime[i]
|
|
76 }
|
|
77 else {
|
|
78 previous.five_prime.min <- min(previous.five_prime.min, all.five_prime[i])
|
|
79 previous.three_prime.max <- max(previous.three_prime.max, all.three_prime[i])
|
|
80 previous.transcript <- all.transcript[i] # current.transcript
|
|
81 }
|
|
82 }
|
|
83 # Write the last item
|
|
84 j <- j + 1
|
|
85 # Write out the current transcript info
|
|
86 output[j,"chrom"] <- previous.chrom
|
|
87 output[j,"five_prime"] <- previous.five_prime.min
|
|
88 output[j,"three_prime"] <- previous.three_prime.max
|
|
89 output[j,"strand"] <- previous.strand
|
|
90 output[j,"transcript"] <- previous.transcript
|
|
91
|
|
92 close(pb)
|
|
93
|
|
94 colnames(output) <- c("chrom", "txStart", "txEnd", "strand", "name2")
|
|
95 write.table(output, file="gene_annotation.txt", sep="\t", row.names=F)
|
|
96
|
|
97 #ix <- which(gtf.exon.slim[,"transcript"] == transcripts[i])
|
|
98 #output[i,"transcript"] <- transcripts[i]
|
|
99 #output[i,"five_prime"] <- min(gtf.exon.slim[ix,"five_prime"])
|
|
100 #output[i,"three_prime"] <- max(gtf.exon.slim[ix,"three_prime"])
|
|
101 #output[i,"strand"] <- gtf.exon.slim[ix,"strand"][1]
|
|
102 #output[i,"chrom"] <- gtf.exon.slim[ix,"chrom"][1]
|