Mercurial > repos > lsong10 > psiclass
diff PsiCLASS-1.0.2/GetTrustedSplice.pl @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PsiCLASS-1.0.2/GetTrustedSplice.pl Fri Mar 26 16:52:45 2021 +0000 @@ -0,0 +1,125 @@ +#!/bin/perl + +use strict ; +use warnings ; +use List::Util qw[min max]; + +die "usage: a.pl path_to_list_of_splice_file > trusted.splice\n" if ( @ARGV == 0 ) ; + +my %spliceSupport ; +my %spliceSampleSupport ; +my %spliceUniqSupport ; +my %spliceSecSupport ; +my %uniqSpliceSites ; +my %spliceSiteSupport ; + +my $sampleCnt = 0 ; +open FP1, $ARGV[0] ; +while ( <FP1> ) +{ + ++$sampleCnt ; +} +close FP1 ; + +open FP1, $ARGV[0] ; +while ( <FP1> ) +{ + chomp ; + open FP2, $_ ; + while ( <FP2> ) + { + chomp ; + my $line = $_ ; + my @cols = split /\s+/, $line ; + my $key = $cols[0]." ".$cols[1]." ".$cols[2]." ".$cols[4] ; + if ( $cols[3] <= 0 ) + { + $cols[3] = 0.1 ; + } + elsif ( $cols[3] == 1 && $sampleCnt > 5 ) + { + $cols[3] = 0.75 ; + } + + if ( ! defined $spliceSupport{$key} ) + { + $spliceSupport{ $key } = $cols[3] ; + $spliceSampleSupport{ $key } = 1 ; + $spliceUniqSupport{ $key } = $cols[5] ; + $spliceSecSupport{ $key } = $cols[6] ; + } + else + { + $spliceSupport{ $key } += $cols[3] ; + $spliceSampleSupport{ $key } += 1 ; + $spliceUniqSupport{ $key } += $cols[5] ; + $spliceSecSupport{ $key } += $cols[6] ; + } + + for ( my $i = 1 ; $i <=2 ; ++$i ) + { + $key = $cols[0]." ".$cols[$i] ; + if ( defined $spliceSiteSupport{ $key } ) + { + $spliceSiteSupport{ $key } += $cols[3] ; + } + else + { + $spliceSiteSupport{ $key } = $cols[3] ; + } + + if ( defined $uniqSpliceSites{ $key } && $uniqSpliceSites{ $key } != $cols[2 - $i + 1] ) + { + $uniqSpliceSites{ $key } = -1 ; + } + else + { + $uniqSpliceSites{ $key } = $cols[2 - $i + 1] ; + } + } + } + close FP2 ; +} +close FP1 ; + +foreach my $key (keys %spliceSupport) +{ + next if ( $spliceSupport{ $key } / $sampleCnt < 0.5 ) ; + #next if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.01 ) ; + next if ( $spliceUniqSupport{$key} <= 2 && ( $spliceSupport{ $key } / $sampleCnt < 1 || $spliceSampleSupport{$key} < min( 2, $sampleCnt ) ) ) ; + + my @cols = split /\s+/, $key ; + my $flag = 0 ; + #if ( $cols[2] - $cols[1] + 1 >= 10000 ) + #{ + # $flag = 1 if ( $spliceSupport{ $key } / $sampleCnt < 1 ) ; + #} + my $siteSupport = max( $spliceSiteSupport{ $cols[0]." ".$cols[1] }, $spliceSiteSupport{ $cols[0]." ".$cols[2] } ) ; + + if ( $spliceSupport{ $key } < $siteSupport / 10.0 ) + { + #print $spliceSupport{ $key } / $siteSupport, " ", -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ), "\n" ; + #if ( $cols[1] == 73518141 && $cols[2] == 73518206 ) + #{ + # print "test: ", $spliceSupport{$key}, " $siteSupport ", -log( $spliceSupport{ $key } / $siteSupport ), "\n"; + #} + my $needSample = min( -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ) + 1, $sampleCnt ) ; + next if ( $spliceSampleSupport{ $key } < $needSample ) ; + } + + if ( $cols[2] - $cols[1] + 1 >= 100000 ) + { + my $needSample = int( ( $cols[2] - $cols[1] + 1 ) / 100000 ) + 1 ; + $needSample = $sampleCnt if ( $needSample > $sampleCnt ) ; + $flag = 1 if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.1 + || ( $spliceUniqSupport{ $key } / $sampleCnt < 1 ) + || $spliceSampleSupport{ $key } < $needSample ) ; + next if ( $flag == 1 && $cols[2] - $cols[1] + 1 >= 300000 ) ; + } + if ( $flag == 1 && ( ( $uniqSpliceSites{ $cols[0]." ".$cols[1] } == -1 || $uniqSpliceSites{ $cols[0]." ".$cols[2] } == -1 ) + || $spliceSampleSupport{ $key } <= 1 ) ) + { + next ; + } + print $cols[0], " ", $cols[1], " ", $cols[2], " 10 ", $cols[3], " 10 0 0 0\n" ; +}