comparison bin/SpliceChange.pl @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
comparison
equal deleted inserted replaced
0:d4ca551ca300 1:adc0f7765d85
1 # compare two outputs from SpliceTrap
2 use strict;
3
4 # the information needed
5 # inclusion ratio input file
6 # filtered out or not input file
7 # minimal inclusion ratio at least 0.1 for one condition
8 # minimal splicing changes parameter
9 # orignial pipeline written by Martin Akerman
10 # re-organized and re-written by Jie Wu
11
12 use FileHandle;
13
14 use Getopt::Long;
15
16 my @programs = ('grep','mkdir','R','paste','awk','sort');
17 foreach my $program (@programs)
18 {
19 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
20 }
21
22
23 my $InputFileName1 = "";
24 my $InputFileName2 = "";
25 my $OutputFileName = "";
26 my $minchange = 0.3;
27 my $mininc = 0.1;
28 my $noIRM = 0;
29
30
31 GetOptions (
32 "1:s"=>\$InputFileName1,
33 "2:s"=>\$InputFileName2,
34 "o:s"=>\$OutputFileName,
35 "noIRM|noirm"=>\$noIRM,
36 "m:f"=>\$mininc,
37 "c:f"=>\$minchange
38 );
39
40 my $InputParaDes=" Usage of the script:
41 -1 input file 1, output from SpliceTrap, *.raw file in the output folder
42 -2 input file 2. see above.
43 -o output file prefix.
44 -c minimal change required, [default:0.3]
45 -m minimal inclusion ratio for at least one condition. [defualt:0.1]
46 --noIRM Use the unadjusted inclusion ratios (before IRM correction)
47 ";
48
49 if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "")
50 {
51 print $InputParaDes;
52 exit;
53 }
54
55
56
57 if(-d "$OutputFileName.cache" )
58 {
59 print "Aborted! output cache folder exists: $OutputFileName.cache \n";
60 exit;
61 }
62 else
63 {
64 system("mkdir $OutputFileName.cache");
65 }
66
67 #
68 my %ir1; # records ir from file1
69 my %ir2; # records ir from file2
70 # only records trios above the cutoffs
71
72 open(input1, $InputFileName1) or die "$InputFileName1 open error!\n";
73 while(my $line=<input1>)
74 {
75 chomp($line);
76 my @a = split("\t", $line);
77 if($a[21] ne "na")
78 {
79 if($noIRM)
80 {
81 $ir1{$a[0]} = $a[1];
82 }
83 else
84 {
85 $ir1{$a[0]} = $a[2];
86 }
87 }
88 }
89 print scalar(keys (%ir1) )," records loaded from $InputFileName1\n";
90 close(input1);
91
92 open(input2, $InputFileName2) or die "$InputFileName2 open error!\n";
93 while(my $line=<input2>)
94 {
95 chomp($line);
96 my @a = split("\t", $line);
97 if($a[21] ne "na")
98 {
99 if($noIRM)
100 {
101 $ir2{$a[0]} = $a[1];
102 }
103 else
104 {
105 $ir2{$a[0]} = $a[2];
106 }
107 }
108 }
109 print scalar(keys (%ir2) )," records loaded from $InputFileName2\n";
110
111
112 close(input2);
113
114
115 ##
116 my %mean;
117 my %sd;
118
119 my %num;
120
121 my %filehandles;
122
123 my @types = ("CA", "IR", "AD","AA");
124
125 foreach my $type (@types)
126 {
127 my $fh = new FileHandle;
128 open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n";
129 $filehandles{$type} = $fh;
130 }
131
132
133 foreach my $key (keys %ir1)
134 {
135 if(exists $ir2{$key})
136 {
137 if(($ir1{$key} + $ir2{$key}) > 0)
138 {
139 #find the type
140 my $type = substr($key, 0, 2);
141 $type = "CA" if $type eq "CS";
142 $num{$type}++;
143
144 my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key});
145 $mean{$type} = $mean{$type} + $change;
146 $sd{$type} = $change*$change + $sd{$type};
147
148 $change = sprintf("%.4f",$change);
149
150 my $fout = $filehandles{$type};
151 print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n";
152 }
153 }
154 }
155
156 foreach my $type (keys %filehandles)
157 {
158 close($filehandles{$type});
159 if($num{$type} == 0)
160 {
161 warn "no AS events passed filters for both files\n";
162 next;
163 }
164 $mean{$type} = $mean{$type}/$num{$type};
165 $sd{$type} = sqrt($sd{$type}/$num{$type});
166 system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R");
167 system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report");
168 print "$num{$type} $type events processed...\n";
169 #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n";
170
171 }
172 system("rm $OutputFileName.cache -rf");
173
174