Mercurial > repos > bioitcore > splicetrap
comparison bin/gtf2bed.pl @ 1:adc0f7765d85 draft
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 15:06:58 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d4ca551ca300 | 1:adc0f7765d85 |
---|---|
1 # rewrite on Sep 7th,2022 | |
2 | |
3 #part of package SpliceTrap | |
4 | |
5 #Jie Wu | |
6 use strict; | |
7 | |
8 my $inputfilename = $ARGV[0]; | |
9 | |
10 # input file is a gtf file, | |
11 # "transcript_id" is required for each line and should not be ambiguous. | |
12 # only the "exon" lines are used | |
13 | |
14 my %chr_hash; | |
15 my %strand_hash; | |
16 my %tx_exons; #tx_exons{$tx_id){$start} = $size; | |
17 | |
18 my $linenum = 0; | |
19 | |
20 open(input, $inputfilename); | |
21 | |
22 while(my $line=<input>) | |
23 { | |
24 $linenum++; | |
25 my @a = split("\t",$line); | |
26 if ($a[2] eq "exon") | |
27 { | |
28 my $txid; | |
29 if($a[8]=~/transcript_id "(\S*?)"/) | |
30 { | |
31 $txid = $1; | |
32 } | |
33 else | |
34 { | |
35 die ("$inputfilename format error! No transcript_id in line $linenum \n"); | |
36 } | |
37 | |
38 if( exists $chr_hash{$txid} and $chr_hash{$txid} ne $a[0]) | |
39 { | |
40 warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped \n"); | |
41 next; | |
42 } | |
43 if( exists $strand_hash{$txid} and $strand_hash{$txid} ne $a[6]) | |
44 { | |
45 warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped\n"); | |
46 } | |
47 $chr_hash{$txid} = $a[0]; | |
48 $strand_hash{$txid} = $a[6]; | |
49 $tx_exons{$txid}{$a[3]} = $a[4] - $a[3] +1; | |
50 | |
51 } | |
52 | |
53 } | |
54 | |
55 foreach my $txid (keys %chr_hash) | |
56 { | |
57 my @starts; | |
58 my @sizes; | |
59 foreach my $start (sort {$a<=>$b} (keys %{$tx_exons{$txid}} ) ) | |
60 { | |
61 push (@starts, $start); | |
62 push (@sizes, $tx_exons{$txid}{$start}); | |
63 } | |
64 my $exon_num = scalar(@sizes); | |
65 my $starts_str = ""; | |
66 for(my $i = 0; $i < $exon_num; $i++) | |
67 { | |
68 $starts_str = $starts_str.($starts[$i] - $starts[0]).","; | |
69 if($i>0) | |
70 { | |
71 warn "$txid, intron size..".($starts[$i]-$starts[$i-1])."\n" if ($starts[$i]-$starts[$i-1]>1000000); | |
72 } | |
73 } | |
74 my $sizes_str = join(",",@sizes); | |
75 my $end = $starts[$exon_num-1] + $sizes[$exon_num-1] -1; | |
76 print join("\t",$chr_hash{$txid}, $starts[0]-1, $end, $txid,"0",$strand_hash{$txid},$starts[0]-1, $end, "255,0,0",$exon_num,$sizes_str, $starts_str); | |
77 print "\n"; | |
78 } | |
79 | |
80 | |
81 close(input); |