Mercurial > repos > lsong10 > psiclass
view PsiCLASS-1.0.2/GetTrustedSplice.pl @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
line wrap: on
line source
#!/bin/perl use strict ; use warnings ; use List::Util qw[min max]; die "usage: a.pl path_to_list_of_splice_file > trusted.splice\n" if ( @ARGV == 0 ) ; my %spliceSupport ; my %spliceSampleSupport ; my %spliceUniqSupport ; my %spliceSecSupport ; my %uniqSpliceSites ; my %spliceSiteSupport ; my $sampleCnt = 0 ; open FP1, $ARGV[0] ; while ( <FP1> ) { ++$sampleCnt ; } close FP1 ; open FP1, $ARGV[0] ; while ( <FP1> ) { chomp ; open FP2, $_ ; while ( <FP2> ) { chomp ; my $line = $_ ; my @cols = split /\s+/, $line ; my $key = $cols[0]." ".$cols[1]." ".$cols[2]." ".$cols[4] ; if ( $cols[3] <= 0 ) { $cols[3] = 0.1 ; } elsif ( $cols[3] == 1 && $sampleCnt > 5 ) { $cols[3] = 0.75 ; } if ( ! defined $spliceSupport{$key} ) { $spliceSupport{ $key } = $cols[3] ; $spliceSampleSupport{ $key } = 1 ; $spliceUniqSupport{ $key } = $cols[5] ; $spliceSecSupport{ $key } = $cols[6] ; } else { $spliceSupport{ $key } += $cols[3] ; $spliceSampleSupport{ $key } += 1 ; $spliceUniqSupport{ $key } += $cols[5] ; $spliceSecSupport{ $key } += $cols[6] ; } for ( my $i = 1 ; $i <=2 ; ++$i ) { $key = $cols[0]." ".$cols[$i] ; if ( defined $spliceSiteSupport{ $key } ) { $spliceSiteSupport{ $key } += $cols[3] ; } else { $spliceSiteSupport{ $key } = $cols[3] ; } if ( defined $uniqSpliceSites{ $key } && $uniqSpliceSites{ $key } != $cols[2 - $i + 1] ) { $uniqSpliceSites{ $key } = -1 ; } else { $uniqSpliceSites{ $key } = $cols[2 - $i + 1] ; } } } close FP2 ; } close FP1 ; foreach my $key (keys %spliceSupport) { next if ( $spliceSupport{ $key } / $sampleCnt < 0.5 ) ; #next if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.01 ) ; next if ( $spliceUniqSupport{$key} <= 2 && ( $spliceSupport{ $key } / $sampleCnt < 1 || $spliceSampleSupport{$key} < min( 2, $sampleCnt ) ) ) ; my @cols = split /\s+/, $key ; my $flag = 0 ; #if ( $cols[2] - $cols[1] + 1 >= 10000 ) #{ # $flag = 1 if ( $spliceSupport{ $key } / $sampleCnt < 1 ) ; #} my $siteSupport = max( $spliceSiteSupport{ $cols[0]." ".$cols[1] }, $spliceSiteSupport{ $cols[0]." ".$cols[2] } ) ; if ( $spliceSupport{ $key } < $siteSupport / 10.0 ) { #print $spliceSupport{ $key } / $siteSupport, " ", -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ), "\n" ; #if ( $cols[1] == 73518141 && $cols[2] == 73518206 ) #{ # print "test: ", $spliceSupport{$key}, " $siteSupport ", -log( $spliceSupport{ $key } / $siteSupport ), "\n"; #} my $needSample = min( -log( $spliceSupport{ $key } / $siteSupport ) / log( 10.0 ) + 1, $sampleCnt ) ; next if ( $spliceSampleSupport{ $key } < $needSample ) ; } if ( $cols[2] - $cols[1] + 1 >= 100000 ) { my $needSample = int( ( $cols[2] - $cols[1] + 1 ) / 100000 ) + 1 ; $needSample = $sampleCnt if ( $needSample > $sampleCnt ) ; $flag = 1 if ( $spliceUniqSupport{$key} / ( $spliceSecSupport{$key} + $spliceUniqSupport{$key} ) < 0.1 || ( $spliceUniqSupport{ $key } / $sampleCnt < 1 ) || $spliceSampleSupport{ $key } < $needSample ) ; next if ( $flag == 1 && $cols[2] - $cols[1] + 1 >= 300000 ) ; } if ( $flag == 1 && ( ( $uniqSpliceSites{ $cols[0]." ".$cols[1] } == -1 || $uniqSpliceSites{ $cols[0]." ".$cols[2] } == -1 ) || $spliceSampleSupport{ $key } <= 1 ) ) { next ; } print $cols[0], " ", $cols[1], " ", $cols[2], " 10 ", $cols[3], " 10 0 0 0\n" ; }