Mercurial > repos > bioitcore > splicetrap
diff SpliceChange @ 1:adc0f7765d85 draft
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 15:06:58 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SpliceChange Thu Sep 07 15:06:58 2017 -0400 @@ -0,0 +1,176 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# compare two outputs from SpliceTrap +use strict; + +# the information needed +# inclusion ratio input file +# filtered out or not input file +# minimal inclusion ratio at least 0.1 for one condition +# minimal splicing changes parameter +# orignial pipeline written by Martin Akerman +# re-organized and re-written by Jie Wu + +use FileHandle; + +use Getopt::Long; + +my @programs = ('grep','mkdir','R','paste','awk','sort'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); +} + + +my $InputFileName1 = ""; +my $InputFileName2 = ""; +my $OutputFileName = ""; +my $minchange = 0.3; +my $mininc = 0.1; +my $noIRM = 0; + + +GetOptions ( + "1:s"=>\$InputFileName1, + "2:s"=>\$InputFileName2, + "o:s"=>\$OutputFileName, + "noIRM|noirm"=>\$noIRM, + "m:f"=>\$mininc, + "c:f"=>\$minchange +); + +my $InputParaDes=" Usage of the script: + -1 input file 1, output from SpliceTrap, *.raw file in the output folder + -2 input file 2. see above. + -o output file prefix. + -c minimal change required, [default:0.3] + -m minimal inclusion ratio for at least one condition. [defualt:0.1] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") +{ + print $InputParaDes; + exit; +} + + + +if(-d "$OutputFileName.cache" ) +{ + print "Aborted! output cache folder exists: $OutputFileName.cache \n"; + exit; +} +else +{ + system("mkdir $OutputFileName.cache"); +} + +# +my %ir1; # records ir from file1 +my %ir2; # records ir from file2 +# only records trios above the cutoffs + +open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; +while(my $line=<input1>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir1{$a[0]} = $a[1]; + } + else + { + $ir1{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; +close(input1); + +open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; +while(my $line=<input2>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir2{$a[0]} = $a[1]; + } + else + { + $ir2{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; + + +close(input2); + + +## +my %mean; +my %sd; + +my %num; + +my %filehandles; + +my @types = ("CA", "IR", "AD","AA"); + +foreach my $type (@types) +{ + my $fh = new FileHandle; + open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; + $filehandles{$type} = $fh; +} + + +foreach my $key (keys %ir1) +{ + if(exists $ir2{$key}) + { + if(($ir1{$key} + $ir2{$key}) > 0) + { + #find the type + my $type = substr($key, 0, 2); + $type = "CA" if $type eq "CS"; + $num{$type}++; + + my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); + $mean{$type} = $mean{$type} + $change; + $sd{$type} = $change*$change + $sd{$type}; + + $change = sprintf("%.4f",$change); + + my $fout = $filehandles{$type}; + print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; + } + } +} + +foreach my $type (keys %filehandles) +{ + close($filehandles{$type}); + if($num{$type} == 0) + { + warn "no AS events passed filters for both files\n"; + next; + } + $mean{$type} = $mean{$type}/$num{$type}; + $sd{$type} = sqrt($sd{$type}/$num{$type}); + system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); + system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); + print "$num{$type} $type events processed...\n"; + #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; + +} +system("rm $OutputFileName.cache -rf"); + +