Mercurial > repos > bioitcore > splicetrap
view SpliceChange @ 4:cd336e593a92 draft
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 16:53:12 -0400 |
parents | adc0f7765d85 |
children |
line wrap: on
line source
#!/usr/bin/perl my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; # compare two outputs from SpliceTrap use strict; # the information needed # inclusion ratio input file # filtered out or not input file # minimal inclusion ratio at least 0.1 for one condition # minimal splicing changes parameter # orignial pipeline written by Martin Akerman # re-organized and re-written by Jie Wu use FileHandle; use Getopt::Long; my @programs = ('grep','mkdir','R','paste','awk','sort'); foreach my $program (@programs) { die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); } my $InputFileName1 = ""; my $InputFileName2 = ""; my $OutputFileName = ""; my $minchange = 0.3; my $mininc = 0.1; my $noIRM = 0; GetOptions ( "1:s"=>\$InputFileName1, "2:s"=>\$InputFileName2, "o:s"=>\$OutputFileName, "noIRM|noirm"=>\$noIRM, "m:f"=>\$mininc, "c:f"=>\$minchange ); my $InputParaDes=" Usage of the script: -1 input file 1, output from SpliceTrap, *.raw file in the output folder -2 input file 2. see above. -o output file prefix. -c minimal change required, [default:0.3] -m minimal inclusion ratio for at least one condition. [defualt:0.1] --noIRM Use the unadjusted inclusion ratios (before IRM correction) "; if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") { print $InputParaDes; exit; } if(-d "$OutputFileName.cache" ) { print "Aborted! output cache folder exists: $OutputFileName.cache \n"; exit; } else { system("mkdir $OutputFileName.cache"); } # my %ir1; # records ir from file1 my %ir2; # records ir from file2 # only records trios above the cutoffs open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; while(my $line=<input1>) { chomp($line); my @a = split("\t", $line); if($a[21] ne "na") { if($noIRM) { $ir1{$a[0]} = $a[1]; } else { $ir1{$a[0]} = $a[2]; } } } print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; close(input1); open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; while(my $line=<input2>) { chomp($line); my @a = split("\t", $line); if($a[21] ne "na") { if($noIRM) { $ir2{$a[0]} = $a[1]; } else { $ir2{$a[0]} = $a[2]; } } } print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; close(input2); ## my %mean; my %sd; my %num; my %filehandles; my @types = ("CA", "IR", "AD","AA"); foreach my $type (@types) { my $fh = new FileHandle; open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; $filehandles{$type} = $fh; } foreach my $key (keys %ir1) { if(exists $ir2{$key}) { if(($ir1{$key} + $ir2{$key}) > 0) { #find the type my $type = substr($key, 0, 2); $type = "CA" if $type eq "CS"; $num{$type}++; my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); $mean{$type} = $mean{$type} + $change; $sd{$type} = $change*$change + $sd{$type}; $change = sprintf("%.4f",$change); my $fout = $filehandles{$type}; print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; } } } foreach my $type (keys %filehandles) { close($filehandles{$type}); if($num{$type} == 0) { warn "no AS events passed filters for both files\n"; next; } $mean{$type} = $mean{$type}/$num{$type}; $sd{$type} = sqrt($sd{$type}/$num{$type}); system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); print "$num{$type} $type events processed...\n"; #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; } system("rm $OutputFileName.cache -rf");