comparison PsiCLASS-1.0.2/GetTrustedSplice.pl @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:903fc43d6227
1 #!/bin/perl
2
3 use strict ;
4 use warnings ;
5 use List::Util qw[min max];
6
7 die "usage: a.pl path_to_list_of_splice_file > trusted.splice\n" if ( @ARGV == 0 ) ;
8
9 my %spliceSupport ;
10 my %spliceSampleSupport ;
11 my %spliceUniqSupport ;
12 my %spliceSecSupport ;
13 my %uniqSpliceSites ;
14 my %spliceSiteSupport ;
15
16 my $sampleCnt = 0 ;
17 open FP1, $ARGV[0] ;
18 while ( <FP1> )
19 {
20 ++$sampleCnt ;
21 }
22 close FP1 ;
23
24 open FP1, $ARGV[0] ;
25 while ( <FP1> )
26 {
27 chomp ;
28 open FP2, $_ ;
29 while ( <FP2> )
30 {
31 chomp ;
32 my $line = $_ ;
33 my @cols = split /\s+/, $line ;
34 my $key = $cols[0]." ".$cols[1]." ".$cols[2]." ".$cols[4] ;
35 if ( $cols[3] <= 0 )
36 {
37 $cols[3] = 0.1 ;
38 }
39 elsif ( $cols[3] == 1 && $sampleCnt > 5 )
40 {
41 $cols[3] = 0.75 ;
42 }
43
44 if ( ! defined $spliceSupport{$key} )
45 {
46 $spliceSupport{ $key } = $cols[3] ;
47 $spliceSampleSupport{ $key } = 1 ;
48 $spliceUniqSupport{ $key } = $cols[5] ;
49 $spliceSecSupport{ $key } = $cols[6] ;
50 }
51 else
52 {
53 $spliceSupport{ $key } += $cols[3] ;
54 $spliceSampleSupport{ $key } += 1 ;
55 $spliceUniqSupport{ $key } += $cols[5] ;
56 $spliceSecSupport{ $key } += $cols[6] ;
57 }
58
59 for ( my $i = 1 ; $i <=2 ; ++$i )
60 {
61 $key = $cols[0]." ".$cols[$i] ;
62 if ( defined $spliceSiteSupport{ $key } )
63 {
64 $spliceSiteSupport{ $key } += $cols[3] ;
65 }
66 else
67 {
68 $spliceSiteSupport{ $key } = $cols[3] ;
69 }
70
71 if ( defined $uniqSpliceSites{ $key } && $uniqSpliceSites{ $key } != $cols[2 - $i + 1] )
72 {
73 $uniqSpliceSites{ $key } = -1 ;
74 }
75 else
76 {
77 $uniqSpliceSites{ $key } = $cols[2 - $i + 1] ;
78 }
79 }
80 }
81 close FP2 ;
82 }
83 close FP1 ;
84
85 foreach my $key (keys %spliceSupport)
86 {
87 next if ( $spliceSupport{ $key } / $sampleCnt < 0.5 ) ;
88 #next if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.01 ) ;
89 next if ( $spliceUniqSupport{$key} <= 2 && ( $spliceSupport{ $key } / $sampleCnt < 1 || $spliceSampleSupport{$key} < min( 2, $sampleCnt ) ) ) ;
90
91 my @cols = split /\s+/, $key ;
92 my $flag = 0 ;
93 #if ( $cols[2] - $cols[1] + 1 >= 10000 )
94 #{
95 # $flag = 1 if ( $spliceSupport{ $key } / $sampleCnt < 1 ) ;
96 #}
97 my $siteSupport = max( $spliceSiteSupport{ $cols[0]." ".$cols[1] }, $spliceSiteSupport{ $cols[0]." ".$cols[2] } ) ;
98
99 if ( $spliceSupport{ $key } < $siteSupport / 10.0 )
100 {
101 #print $spliceSupport{ $key } / $siteSupport, " ", -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ), "\n" ;
102 #if ( $cols[1] == 73518141 && $cols[2] == 73518206 )
103 #{
104 # print "test: ", $spliceSupport{$key}, " $siteSupport ", -log( $spliceSupport{ $key } / $siteSupport ), "\n";
105 #}
106 my $needSample = min( -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ) + 1, $sampleCnt ) ;
107 next if ( $spliceSampleSupport{ $key } < $needSample ) ;
108 }
109
110 if ( $cols[2] - $cols[1] + 1 >= 100000 )
111 {
112 my $needSample = int( ( $cols[2] - $cols[1] + 1 ) / 100000 ) + 1 ;
113 $needSample = $sampleCnt if ( $needSample > $sampleCnt ) ;
114 $flag = 1 if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.1
115 || ( $spliceUniqSupport{ $key } / $sampleCnt < 1 )
116 || $spliceSampleSupport{ $key } < $needSample ) ;
117 next if ( $flag == 1 && $cols[2] - $cols[1] + 1 >= 300000 ) ;
118 }
119 if ( $flag == 1 && ( ( $uniqSpliceSites{ $cols[0]." ".$cols[1] } == -1 || $uniqSpliceSites{ $cols[0]." ".$cols[2] } == -1 )
120 || $spliceSampleSupport{ $key } <= 1 ) )
121 {
122 next ;
123 }
124 print $cols[0], " ", $cols[1], " ", $cols[2], " 10 ", $cols[3], " 10 0 0 0\n" ;
125 }