Mercurial > repos > bioitcore > splicetrap
comparison SpliceChange @ 1:adc0f7765d85 draft
planemo upload
| author | bioitcore |
|---|---|
| date | Thu, 07 Sep 2017 15:06:58 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d4ca551ca300 | 1:adc0f7765d85 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; | |
| 3 # compare two outputs from SpliceTrap | |
| 4 use strict; | |
| 5 | |
| 6 # the information needed | |
| 7 # inclusion ratio input file | |
| 8 # filtered out or not input file | |
| 9 # minimal inclusion ratio at least 0.1 for one condition | |
| 10 # minimal splicing changes parameter | |
| 11 # orignial pipeline written by Martin Akerman | |
| 12 # re-organized and re-written by Jie Wu | |
| 13 | |
| 14 use FileHandle; | |
| 15 | |
| 16 use Getopt::Long; | |
| 17 | |
| 18 my @programs = ('grep','mkdir','R','paste','awk','sort'); | |
| 19 foreach my $program (@programs) | |
| 20 { | |
| 21 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); | |
| 22 } | |
| 23 | |
| 24 | |
| 25 my $InputFileName1 = ""; | |
| 26 my $InputFileName2 = ""; | |
| 27 my $OutputFileName = ""; | |
| 28 my $minchange = 0.3; | |
| 29 my $mininc = 0.1; | |
| 30 my $noIRM = 0; | |
| 31 | |
| 32 | |
| 33 GetOptions ( | |
| 34 "1:s"=>\$InputFileName1, | |
| 35 "2:s"=>\$InputFileName2, | |
| 36 "o:s"=>\$OutputFileName, | |
| 37 "noIRM|noirm"=>\$noIRM, | |
| 38 "m:f"=>\$mininc, | |
| 39 "c:f"=>\$minchange | |
| 40 ); | |
| 41 | |
| 42 my $InputParaDes=" Usage of the script: | |
| 43 -1 input file 1, output from SpliceTrap, *.raw file in the output folder | |
| 44 -2 input file 2. see above. | |
| 45 -o output file prefix. | |
| 46 -c minimal change required, [default:0.3] | |
| 47 -m minimal inclusion ratio for at least one condition. [defualt:0.1] | |
| 48 --noIRM Use the unadjusted inclusion ratios (before IRM correction) | |
| 49 "; | |
| 50 | |
| 51 if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") | |
| 52 { | |
| 53 print $InputParaDes; | |
| 54 exit; | |
| 55 } | |
| 56 | |
| 57 | |
| 58 | |
| 59 if(-d "$OutputFileName.cache" ) | |
| 60 { | |
| 61 print "Aborted! output cache folder exists: $OutputFileName.cache \n"; | |
| 62 exit; | |
| 63 } | |
| 64 else | |
| 65 { | |
| 66 system("mkdir $OutputFileName.cache"); | |
| 67 } | |
| 68 | |
| 69 # | |
| 70 my %ir1; # records ir from file1 | |
| 71 my %ir2; # records ir from file2 | |
| 72 # only records trios above the cutoffs | |
| 73 | |
| 74 open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; | |
| 75 while(my $line=<input1>) | |
| 76 { | |
| 77 chomp($line); | |
| 78 my @a = split("\t", $line); | |
| 79 if($a[21] ne "na") | |
| 80 { | |
| 81 if($noIRM) | |
| 82 { | |
| 83 $ir1{$a[0]} = $a[1]; | |
| 84 } | |
| 85 else | |
| 86 { | |
| 87 $ir1{$a[0]} = $a[2]; | |
| 88 } | |
| 89 } | |
| 90 } | |
| 91 print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; | |
| 92 close(input1); | |
| 93 | |
| 94 open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; | |
| 95 while(my $line=<input2>) | |
| 96 { | |
| 97 chomp($line); | |
| 98 my @a = split("\t", $line); | |
| 99 if($a[21] ne "na") | |
| 100 { | |
| 101 if($noIRM) | |
| 102 { | |
| 103 $ir2{$a[0]} = $a[1]; | |
| 104 } | |
| 105 else | |
| 106 { | |
| 107 $ir2{$a[0]} = $a[2]; | |
| 108 } | |
| 109 } | |
| 110 } | |
| 111 print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; | |
| 112 | |
| 113 | |
| 114 close(input2); | |
| 115 | |
| 116 | |
| 117 ## | |
| 118 my %mean; | |
| 119 my %sd; | |
| 120 | |
| 121 my %num; | |
| 122 | |
| 123 my %filehandles; | |
| 124 | |
| 125 my @types = ("CA", "IR", "AD","AA"); | |
| 126 | |
| 127 foreach my $type (@types) | |
| 128 { | |
| 129 my $fh = new FileHandle; | |
| 130 open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; | |
| 131 $filehandles{$type} = $fh; | |
| 132 } | |
| 133 | |
| 134 | |
| 135 foreach my $key (keys %ir1) | |
| 136 { | |
| 137 if(exists $ir2{$key}) | |
| 138 { | |
| 139 if(($ir1{$key} + $ir2{$key}) > 0) | |
| 140 { | |
| 141 #find the type | |
| 142 my $type = substr($key, 0, 2); | |
| 143 $type = "CA" if $type eq "CS"; | |
| 144 $num{$type}++; | |
| 145 | |
| 146 my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); | |
| 147 $mean{$type} = $mean{$type} + $change; | |
| 148 $sd{$type} = $change*$change + $sd{$type}; | |
| 149 | |
| 150 $change = sprintf("%.4f",$change); | |
| 151 | |
| 152 my $fout = $filehandles{$type}; | |
| 153 print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; | |
| 154 } | |
| 155 } | |
| 156 } | |
| 157 | |
| 158 foreach my $type (keys %filehandles) | |
| 159 { | |
| 160 close($filehandles{$type}); | |
| 161 if($num{$type} == 0) | |
| 162 { | |
| 163 warn "no AS events passed filters for both files\n"; | |
| 164 next; | |
| 165 } | |
| 166 $mean{$type} = $mean{$type}/$num{$type}; | |
| 167 $sd{$type} = sqrt($sd{$type}/$num{$type}); | |
| 168 system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); | |
| 169 system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); | |
| 170 print "$num{$type} $type events processed...\n"; | |
| 171 #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; | |
| 172 | |
| 173 } | |
| 174 system("rm $OutputFileName.cache -rf"); | |
| 175 | |
| 176 |
