1
|
1 #!/usr/bin/perl
|
|
2 my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
|
|
3 # compare two outputs from SpliceTrap
|
|
4 use strict;
|
|
5
|
|
6 # the information needed
|
|
7 # inclusion ratio input file
|
|
8 # filtered out or not input file
|
|
9 # minimal inclusion ratio at least 0.1 for one condition
|
|
10 # minimal splicing changes parameter
|
|
11 # orignial pipeline written by Martin Akerman
|
|
12 # re-organized and re-written by Jie Wu
|
|
13
|
|
14 use FileHandle;
|
|
15
|
|
16 use Getopt::Long;
|
|
17
|
|
18 my @programs = ('grep','mkdir','R','paste','awk','sort');
|
|
19 foreach my $program (@programs)
|
|
20 {
|
|
21 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
|
|
22 }
|
|
23
|
|
24
|
|
25 my $InputFileName1 = "";
|
|
26 my $InputFileName2 = "";
|
|
27 my $OutputFileName = "";
|
|
28 my $minchange = 0.3;
|
|
29 my $mininc = 0.1;
|
|
30 my $noIRM = 0;
|
|
31
|
|
32
|
|
33 GetOptions (
|
|
34 "1:s"=>\$InputFileName1,
|
|
35 "2:s"=>\$InputFileName2,
|
|
36 "o:s"=>\$OutputFileName,
|
|
37 "noIRM|noirm"=>\$noIRM,
|
|
38 "m:f"=>\$mininc,
|
|
39 "c:f"=>\$minchange
|
|
40 );
|
|
41
|
|
42 my $InputParaDes=" Usage of the script:
|
|
43 -1 input file 1, output from SpliceTrap, *.raw file in the output folder
|
|
44 -2 input file 2. see above.
|
|
45 -o output file prefix.
|
|
46 -c minimal change required, [default:0.3]
|
|
47 -m minimal inclusion ratio for at least one condition. [defualt:0.1]
|
|
48 --noIRM Use the unadjusted inclusion ratios (before IRM correction)
|
|
49 ";
|
|
50
|
|
51 if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "")
|
|
52 {
|
|
53 print $InputParaDes;
|
|
54 exit;
|
|
55 }
|
|
56
|
|
57
|
|
58
|
|
59 if(-d "$OutputFileName.cache" )
|
|
60 {
|
|
61 print "Aborted! output cache folder exists: $OutputFileName.cache \n";
|
|
62 exit;
|
|
63 }
|
|
64 else
|
|
65 {
|
|
66 system("mkdir $OutputFileName.cache");
|
|
67 }
|
|
68
|
|
69 #
|
|
70 my %ir1; # records ir from file1
|
|
71 my %ir2; # records ir from file2
|
|
72 # only records trios above the cutoffs
|
|
73
|
|
74 open(input1, $InputFileName1) or die "$InputFileName1 open error!\n";
|
|
75 while(my $line=<input1>)
|
|
76 {
|
|
77 chomp($line);
|
|
78 my @a = split("\t", $line);
|
|
79 if($a[21] ne "na")
|
|
80 {
|
|
81 if($noIRM)
|
|
82 {
|
|
83 $ir1{$a[0]} = $a[1];
|
|
84 }
|
|
85 else
|
|
86 {
|
|
87 $ir1{$a[0]} = $a[2];
|
|
88 }
|
|
89 }
|
|
90 }
|
|
91 print scalar(keys (%ir1) )," records loaded from $InputFileName1\n";
|
|
92 close(input1);
|
|
93
|
|
94 open(input2, $InputFileName2) or die "$InputFileName2 open error!\n";
|
|
95 while(my $line=<input2>)
|
|
96 {
|
|
97 chomp($line);
|
|
98 my @a = split("\t", $line);
|
|
99 if($a[21] ne "na")
|
|
100 {
|
|
101 if($noIRM)
|
|
102 {
|
|
103 $ir2{$a[0]} = $a[1];
|
|
104 }
|
|
105 else
|
|
106 {
|
|
107 $ir2{$a[0]} = $a[2];
|
|
108 }
|
|
109 }
|
|
110 }
|
|
111 print scalar(keys (%ir2) )," records loaded from $InputFileName2\n";
|
|
112
|
|
113
|
|
114 close(input2);
|
|
115
|
|
116
|
|
117 ##
|
|
118 my %mean;
|
|
119 my %sd;
|
|
120
|
|
121 my %num;
|
|
122
|
|
123 my %filehandles;
|
|
124
|
|
125 my @types = ("CA", "IR", "AD","AA");
|
|
126
|
|
127 foreach my $type (@types)
|
|
128 {
|
|
129 my $fh = new FileHandle;
|
|
130 open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n";
|
|
131 $filehandles{$type} = $fh;
|
|
132 }
|
|
133
|
|
134
|
|
135 foreach my $key (keys %ir1)
|
|
136 {
|
|
137 if(exists $ir2{$key})
|
|
138 {
|
|
139 if(($ir1{$key} + $ir2{$key}) > 0)
|
|
140 {
|
|
141 #find the type
|
|
142 my $type = substr($key, 0, 2);
|
|
143 $type = "CA" if $type eq "CS";
|
|
144 $num{$type}++;
|
|
145
|
|
146 my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key});
|
|
147 $mean{$type} = $mean{$type} + $change;
|
|
148 $sd{$type} = $change*$change + $sd{$type};
|
|
149
|
|
150 $change = sprintf("%.4f",$change);
|
|
151
|
|
152 my $fout = $filehandles{$type};
|
|
153 print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n";
|
|
154 }
|
|
155 }
|
|
156 }
|
|
157
|
|
158 foreach my $type (keys %filehandles)
|
|
159 {
|
|
160 close($filehandles{$type});
|
|
161 if($num{$type} == 0)
|
|
162 {
|
|
163 warn "no AS events passed filters for both files\n";
|
|
164 next;
|
|
165 }
|
|
166 $mean{$type} = $mean{$type}/$num{$type};
|
|
167 $sd{$type} = sqrt($sd{$type}/$num{$type});
|
|
168 system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R");
|
|
169 system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report");
|
|
170 print "$num{$type} $type events processed...\n";
|
|
171 #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n";
|
|
172
|
|
173 }
|
|
174 system("rm $OutputFileName.cache -rf");
|
|
175
|
|
176
|