Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/GetTrustedSplice.pl @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 #!/bin/perl | |
2 | |
3 use strict ; | |
4 use warnings ; | |
5 use List::Util qw[min max]; | |
6 | |
7 die "usage: a.pl path_to_list_of_splice_file > trusted.splice\n" if ( @ARGV == 0 ) ; | |
8 | |
9 my %spliceSupport ; | |
10 my %spliceSampleSupport ; | |
11 my %spliceUniqSupport ; | |
12 my %spliceSecSupport ; | |
13 my %uniqSpliceSites ; | |
14 my %spliceSiteSupport ; | |
15 | |
16 my $sampleCnt = 0 ; | |
17 open FP1, $ARGV[0] ; | |
18 while ( <FP1> ) | |
19 { | |
20 ++$sampleCnt ; | |
21 } | |
22 close FP1 ; | |
23 | |
24 open FP1, $ARGV[0] ; | |
25 while ( <FP1> ) | |
26 { | |
27 chomp ; | |
28 open FP2, $_ ; | |
29 while ( <FP2> ) | |
30 { | |
31 chomp ; | |
32 my $line = $_ ; | |
33 my @cols = split /\s+/, $line ; | |
34 my $key = $cols[0]." ".$cols[1]." ".$cols[2]." ".$cols[4] ; | |
35 if ( $cols[3] <= 0 ) | |
36 { | |
37 $cols[3] = 0.1 ; | |
38 } | |
39 elsif ( $cols[3] == 1 && $sampleCnt > 5 ) | |
40 { | |
41 $cols[3] = 0.75 ; | |
42 } | |
43 | |
44 if ( ! defined $spliceSupport{$key} ) | |
45 { | |
46 $spliceSupport{ $key } = $cols[3] ; | |
47 $spliceSampleSupport{ $key } = 1 ; | |
48 $spliceUniqSupport{ $key } = $cols[5] ; | |
49 $spliceSecSupport{ $key } = $cols[6] ; | |
50 } | |
51 else | |
52 { | |
53 $spliceSupport{ $key } += $cols[3] ; | |
54 $spliceSampleSupport{ $key } += 1 ; | |
55 $spliceUniqSupport{ $key } += $cols[5] ; | |
56 $spliceSecSupport{ $key } += $cols[6] ; | |
57 } | |
58 | |
59 for ( my $i = 1 ; $i <=2 ; ++$i ) | |
60 { | |
61 $key = $cols[0]." ".$cols[$i] ; | |
62 if ( defined $spliceSiteSupport{ $key } ) | |
63 { | |
64 $spliceSiteSupport{ $key } += $cols[3] ; | |
65 } | |
66 else | |
67 { | |
68 $spliceSiteSupport{ $key } = $cols[3] ; | |
69 } | |
70 | |
71 if ( defined $uniqSpliceSites{ $key } && $uniqSpliceSites{ $key } != $cols[2 - $i + 1] ) | |
72 { | |
73 $uniqSpliceSites{ $key } = -1 ; | |
74 } | |
75 else | |
76 { | |
77 $uniqSpliceSites{ $key } = $cols[2 - $i + 1] ; | |
78 } | |
79 } | |
80 } | |
81 close FP2 ; | |
82 } | |
83 close FP1 ; | |
84 | |
85 foreach my $key (keys %spliceSupport) | |
86 { | |
87 next if ( $spliceSupport{ $key } / $sampleCnt < 0.5 ) ; | |
88 #next if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.01 ) ; | |
89 next if ( $spliceUniqSupport{$key} <= 2 && ( $spliceSupport{ $key } / $sampleCnt < 1 || $spliceSampleSupport{$key} < min( 2, $sampleCnt ) ) ) ; | |
90 | |
91 my @cols = split /\s+/, $key ; | |
92 my $flag = 0 ; | |
93 #if ( $cols[2] - $cols[1] + 1 >= 10000 ) | |
94 #{ | |
95 # $flag = 1 if ( $spliceSupport{ $key } / $sampleCnt < 1 ) ; | |
96 #} | |
97 my $siteSupport = max( $spliceSiteSupport{ $cols[0]." ".$cols[1] }, $spliceSiteSupport{ $cols[0]." ".$cols[2] } ) ; | |
98 | |
99 if ( $spliceSupport{ $key } < $siteSupport / 10.0 ) | |
100 { | |
101 #print $spliceSupport{ $key } / $siteSupport, " ", -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ), "\n" ; | |
102 #if ( $cols[1] == 73518141 && $cols[2] == 73518206 ) | |
103 #{ | |
104 # print "test: ", $spliceSupport{$key}, " $siteSupport ", -log( $spliceSupport{ $key } / $siteSupport ), "\n"; | |
105 #} | |
106 my $needSample = min( -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ) + 1, $sampleCnt ) ; | |
107 next if ( $spliceSampleSupport{ $key } < $needSample ) ; | |
108 } | |
109 | |
110 if ( $cols[2] - $cols[1] + 1 >= 100000 ) | |
111 { | |
112 my $needSample = int( ( $cols[2] - $cols[1] + 1 ) / 100000 ) + 1 ; | |
113 $needSample = $sampleCnt if ( $needSample > $sampleCnt ) ; | |
114 $flag = 1 if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.1 | |
115 || ( $spliceUniqSupport{ $key } / $sampleCnt < 1 ) | |
116 || $spliceSampleSupport{ $key } < $needSample ) ; | |
117 next if ( $flag == 1 && $cols[2] - $cols[1] + 1 >= 300000 ) ; | |
118 } | |
119 if ( $flag == 1 && ( ( $uniqSpliceSites{ $cols[0]." ".$cols[1] } == -1 || $uniqSpliceSites{ $cols[0]." ".$cols[2] } == -1 ) | |
120 || $spliceSampleSupport{ $key } <= 1 ) ) | |
121 { | |
122 next ; | |
123 } | |
124 print $cols[0], " ", $cols[1], " ", $cols[2], " 10 ", $cols[3], " 10 0 0 0\n" ; | |
125 } |