annotate PsiCLASS-1.0.2/GetTrustedSplice.pl @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #!/bin/perl
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 use strict ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 use warnings ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 use List::Util qw[min max];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 die "usage: a.pl path_to_list_of_splice_file > trusted.splice\n" if ( @ARGV == 0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 my %spliceSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 my %spliceSampleSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 my %spliceUniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 my %spliceSecSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 my %uniqSpliceSites ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 my %spliceSiteSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 my $sampleCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 open FP1, $ARGV[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 while ( <FP1> )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 ++$sampleCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 close FP1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 open FP1, $ARGV[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 while ( <FP1> )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 chomp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 open FP2, $_ ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 while ( <FP2> )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 chomp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 my $line = $_ ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 my @cols = split /\s+/, $line ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 my $key = $cols[0]." ".$cols[1]." ".$cols[2]." ".$cols[4] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 if ( $cols[3] <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 $cols[3] = 0.1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 elsif ( $cols[3] == 1 && $sampleCnt > 5 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 $cols[3] = 0.75 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 if ( ! defined $spliceSupport{$key} )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 $spliceSupport{ $key } = $cols[3] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 $spliceSampleSupport{ $key } = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 $spliceUniqSupport{ $key } = $cols[5] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 $spliceSecSupport{ $key } = $cols[6] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 $spliceSupport{ $key } += $cols[3] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 $spliceSampleSupport{ $key } += 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 $spliceUniqSupport{ $key } += $cols[5] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 $spliceSecSupport{ $key } += $cols[6] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 for ( my $i = 1 ; $i <=2 ; ++$i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 $key = $cols[0]." ".$cols[$i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 if ( defined $spliceSiteSupport{ $key } )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 $spliceSiteSupport{ $key } += $cols[3] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 $spliceSiteSupport{ $key } = $cols[3] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 if ( defined $uniqSpliceSites{ $key } && $uniqSpliceSites{ $key } != $cols[2 - $i + 1] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 $uniqSpliceSites{ $key } = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 $uniqSpliceSites{ $key } = $cols[2 - $i + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 close FP2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 close FP1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 foreach my $key (keys %spliceSupport)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 next if ( $spliceSupport{ $key } / $sampleCnt < 0.5 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 #next if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.01 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 next if ( $spliceUniqSupport{$key} <= 2 && ( $spliceSupport{ $key } / $sampleCnt < 1 || $spliceSampleSupport{$key} < min( 2, $sampleCnt ) ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 my @cols = split /\s+/, $key ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 my $flag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 #if ( $cols[2] - $cols[1] + 1 >= 10000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 #{
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 # $flag = 1 if ( $spliceSupport{ $key } / $sampleCnt < 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 #}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 my $siteSupport = max( $spliceSiteSupport{ $cols[0]." ".$cols[1] }, $spliceSiteSupport{ $cols[0]." ".$cols[2] } ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 if ( $spliceSupport{ $key } < $siteSupport / 10.0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 #print $spliceSupport{ $key } / $siteSupport, " ", -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ), "\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 #if ( $cols[1] == 73518141 && $cols[2] == 73518206 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 #{
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 # print "test: ", $spliceSupport{$key}, " $siteSupport ", -log( $spliceSupport{ $key } / $siteSupport ), "\n";
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 #}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 my $needSample = min( -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ) + 1, $sampleCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 next if ( $spliceSampleSupport{ $key } < $needSample ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 if ( $cols[2] - $cols[1] + 1 >= 100000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 my $needSample = int( ( $cols[2] - $cols[1] + 1 ) / 100000 ) + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 $needSample = $sampleCnt if ( $needSample > $sampleCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 $flag = 1 if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 || ( $spliceUniqSupport{ $key } / $sampleCnt < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 || $spliceSampleSupport{ $key } < $needSample ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 next if ( $flag == 1 && $cols[2] - $cols[1] + 1 >= 300000 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 if ( $flag == 1 && ( ( $uniqSpliceSites{ $cols[0]." ".$cols[1] } == -1 || $uniqSpliceSites{ $cols[0]." ".$cols[2] } == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 || $spliceSampleSupport{ $key } <= 1 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 print $cols[0], " ", $cols[1], " ", $cols[2], " 10 ", $cols[3], " 10 0 0 0\n" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 }