0
|
1 #!/bin/perl
|
|
2
|
|
3 use strict ;
|
|
4 use warnings ;
|
|
5 use List::Util qw[min max];
|
|
6
|
|
7 die "usage: a.pl path_to_list_of_splice_file > trusted.splice\n" if ( @ARGV == 0 ) ;
|
|
8
|
|
9 my %spliceSupport ;
|
|
10 my %spliceSampleSupport ;
|
|
11 my %spliceUniqSupport ;
|
|
12 my %spliceSecSupport ;
|
|
13 my %uniqSpliceSites ;
|
|
14 my %spliceSiteSupport ;
|
|
15
|
|
16 my $sampleCnt = 0 ;
|
|
17 open FP1, $ARGV[0] ;
|
|
18 while ( <FP1> )
|
|
19 {
|
|
20 ++$sampleCnt ;
|
|
21 }
|
|
22 close FP1 ;
|
|
23
|
|
24 open FP1, $ARGV[0] ;
|
|
25 while ( <FP1> )
|
|
26 {
|
|
27 chomp ;
|
|
28 open FP2, $_ ;
|
|
29 while ( <FP2> )
|
|
30 {
|
|
31 chomp ;
|
|
32 my $line = $_ ;
|
|
33 my @cols = split /\s+/, $line ;
|
|
34 my $key = $cols[0]." ".$cols[1]." ".$cols[2]." ".$cols[4] ;
|
|
35 if ( $cols[3] <= 0 )
|
|
36 {
|
|
37 $cols[3] = 0.1 ;
|
|
38 }
|
|
39 elsif ( $cols[3] == 1 && $sampleCnt > 5 )
|
|
40 {
|
|
41 $cols[3] = 0.75 ;
|
|
42 }
|
|
43
|
|
44 if ( ! defined $spliceSupport{$key} )
|
|
45 {
|
|
46 $spliceSupport{ $key } = $cols[3] ;
|
|
47 $spliceSampleSupport{ $key } = 1 ;
|
|
48 $spliceUniqSupport{ $key } = $cols[5] ;
|
|
49 $spliceSecSupport{ $key } = $cols[6] ;
|
|
50 }
|
|
51 else
|
|
52 {
|
|
53 $spliceSupport{ $key } += $cols[3] ;
|
|
54 $spliceSampleSupport{ $key } += 1 ;
|
|
55 $spliceUniqSupport{ $key } += $cols[5] ;
|
|
56 $spliceSecSupport{ $key } += $cols[6] ;
|
|
57 }
|
|
58
|
|
59 for ( my $i = 1 ; $i <=2 ; ++$i )
|
|
60 {
|
|
61 $key = $cols[0]." ".$cols[$i] ;
|
|
62 if ( defined $spliceSiteSupport{ $key } )
|
|
63 {
|
|
64 $spliceSiteSupport{ $key } += $cols[3] ;
|
|
65 }
|
|
66 else
|
|
67 {
|
|
68 $spliceSiteSupport{ $key } = $cols[3] ;
|
|
69 }
|
|
70
|
|
71 if ( defined $uniqSpliceSites{ $key } && $uniqSpliceSites{ $key } != $cols[2 - $i + 1] )
|
|
72 {
|
|
73 $uniqSpliceSites{ $key } = -1 ;
|
|
74 }
|
|
75 else
|
|
76 {
|
|
77 $uniqSpliceSites{ $key } = $cols[2 - $i + 1] ;
|
|
78 }
|
|
79 }
|
|
80 }
|
|
81 close FP2 ;
|
|
82 }
|
|
83 close FP1 ;
|
|
84
|
|
85 foreach my $key (keys %spliceSupport)
|
|
86 {
|
|
87 next if ( $spliceSupport{ $key } / $sampleCnt < 0.5 ) ;
|
|
88 #next if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.01 ) ;
|
|
89 next if ( $spliceUniqSupport{$key} <= 2 && ( $spliceSupport{ $key } / $sampleCnt < 1 || $spliceSampleSupport{$key} < min( 2, $sampleCnt ) ) ) ;
|
|
90
|
|
91 my @cols = split /\s+/, $key ;
|
|
92 my $flag = 0 ;
|
|
93 #if ( $cols[2] - $cols[1] + 1 >= 10000 )
|
|
94 #{
|
|
95 # $flag = 1 if ( $spliceSupport{ $key } / $sampleCnt < 1 ) ;
|
|
96 #}
|
|
97 my $siteSupport = max( $spliceSiteSupport{ $cols[0]." ".$cols[1] }, $spliceSiteSupport{ $cols[0]." ".$cols[2] } ) ;
|
|
98
|
|
99 if ( $spliceSupport{ $key } < $siteSupport / 10.0 )
|
|
100 {
|
|
101 #print $spliceSupport{ $key } / $siteSupport, " ", -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ), "\n" ;
|
|
102 #if ( $cols[1] == 73518141 && $cols[2] == 73518206 )
|
|
103 #{
|
|
104 # print "test: ", $spliceSupport{$key}, " $siteSupport ", -log( $spliceSupport{ $key } / $siteSupport ), "\n";
|
|
105 #}
|
|
106 my $needSample = min( -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ) + 1, $sampleCnt ) ;
|
|
107 next if ( $spliceSampleSupport{ $key } < $needSample ) ;
|
|
108 }
|
|
109
|
|
110 if ( $cols[2] - $cols[1] + 1 >= 100000 )
|
|
111 {
|
|
112 my $needSample = int( ( $cols[2] - $cols[1] + 1 ) / 100000 ) + 1 ;
|
|
113 $needSample = $sampleCnt if ( $needSample > $sampleCnt ) ;
|
|
114 $flag = 1 if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.1
|
|
115 || ( $spliceUniqSupport{ $key } / $sampleCnt < 1 )
|
|
116 || $spliceSampleSupport{ $key } < $needSample ) ;
|
|
117 next if ( $flag == 1 && $cols[2] - $cols[1] + 1 >= 300000 ) ;
|
|
118 }
|
|
119 if ( $flag == 1 && ( ( $uniqSpliceSites{ $cols[0]." ".$cols[1] } == -1 || $uniqSpliceSites{ $cols[0]." ".$cols[2] } == -1 )
|
|
120 || $spliceSampleSupport{ $key } <= 1 ) )
|
|
121 {
|
|
122 next ;
|
|
123 }
|
|
124 print $cols[0], " ", $cols[1], " ", $cols[2], " 10 ", $cols[3], " 10 0 0 0\n" ;
|
|
125 }
|