Mercurial > repos > bioitcore > splicetrap
comparison bin/SpliceChange.pl @ 1:adc0f7765d85 draft
planemo upload
author | bioitcore |
---|---|
date | Thu, 07 Sep 2017 15:06:58 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d4ca551ca300 | 1:adc0f7765d85 |
---|---|
1 # compare two outputs from SpliceTrap | |
2 use strict; | |
3 | |
4 # the information needed | |
5 # inclusion ratio input file | |
6 # filtered out or not input file | |
7 # minimal inclusion ratio at least 0.1 for one condition | |
8 # minimal splicing changes parameter | |
9 # orignial pipeline written by Martin Akerman | |
10 # re-organized and re-written by Jie Wu | |
11 | |
12 use FileHandle; | |
13 | |
14 use Getopt::Long; | |
15 | |
16 my @programs = ('grep','mkdir','R','paste','awk','sort'); | |
17 foreach my $program (@programs) | |
18 { | |
19 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); | |
20 } | |
21 | |
22 | |
23 my $InputFileName1 = ""; | |
24 my $InputFileName2 = ""; | |
25 my $OutputFileName = ""; | |
26 my $minchange = 0.3; | |
27 my $mininc = 0.1; | |
28 my $noIRM = 0; | |
29 | |
30 | |
31 GetOptions ( | |
32 "1:s"=>\$InputFileName1, | |
33 "2:s"=>\$InputFileName2, | |
34 "o:s"=>\$OutputFileName, | |
35 "noIRM|noirm"=>\$noIRM, | |
36 "m:f"=>\$mininc, | |
37 "c:f"=>\$minchange | |
38 ); | |
39 | |
40 my $InputParaDes=" Usage of the script: | |
41 -1 input file 1, output from SpliceTrap, *.raw file in the output folder | |
42 -2 input file 2. see above. | |
43 -o output file prefix. | |
44 -c minimal change required, [default:0.3] | |
45 -m minimal inclusion ratio for at least one condition. [defualt:0.1] | |
46 --noIRM Use the unadjusted inclusion ratios (before IRM correction) | |
47 "; | |
48 | |
49 if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") | |
50 { | |
51 print $InputParaDes; | |
52 exit; | |
53 } | |
54 | |
55 | |
56 | |
57 if(-d "$OutputFileName.cache" ) | |
58 { | |
59 print "Aborted! output cache folder exists: $OutputFileName.cache \n"; | |
60 exit; | |
61 } | |
62 else | |
63 { | |
64 system("mkdir $OutputFileName.cache"); | |
65 } | |
66 | |
67 # | |
68 my %ir1; # records ir from file1 | |
69 my %ir2; # records ir from file2 | |
70 # only records trios above the cutoffs | |
71 | |
72 open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; | |
73 while(my $line=<input1>) | |
74 { | |
75 chomp($line); | |
76 my @a = split("\t", $line); | |
77 if($a[21] ne "na") | |
78 { | |
79 if($noIRM) | |
80 { | |
81 $ir1{$a[0]} = $a[1]; | |
82 } | |
83 else | |
84 { | |
85 $ir1{$a[0]} = $a[2]; | |
86 } | |
87 } | |
88 } | |
89 print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; | |
90 close(input1); | |
91 | |
92 open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; | |
93 while(my $line=<input2>) | |
94 { | |
95 chomp($line); | |
96 my @a = split("\t", $line); | |
97 if($a[21] ne "na") | |
98 { | |
99 if($noIRM) | |
100 { | |
101 $ir2{$a[0]} = $a[1]; | |
102 } | |
103 else | |
104 { | |
105 $ir2{$a[0]} = $a[2]; | |
106 } | |
107 } | |
108 } | |
109 print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; | |
110 | |
111 | |
112 close(input2); | |
113 | |
114 | |
115 ## | |
116 my %mean; | |
117 my %sd; | |
118 | |
119 my %num; | |
120 | |
121 my %filehandles; | |
122 | |
123 my @types = ("CA", "IR", "AD","AA"); | |
124 | |
125 foreach my $type (@types) | |
126 { | |
127 my $fh = new FileHandle; | |
128 open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; | |
129 $filehandles{$type} = $fh; | |
130 } | |
131 | |
132 | |
133 foreach my $key (keys %ir1) | |
134 { | |
135 if(exists $ir2{$key}) | |
136 { | |
137 if(($ir1{$key} + $ir2{$key}) > 0) | |
138 { | |
139 #find the type | |
140 my $type = substr($key, 0, 2); | |
141 $type = "CA" if $type eq "CS"; | |
142 $num{$type}++; | |
143 | |
144 my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); | |
145 $mean{$type} = $mean{$type} + $change; | |
146 $sd{$type} = $change*$change + $sd{$type}; | |
147 | |
148 $change = sprintf("%.4f",$change); | |
149 | |
150 my $fout = $filehandles{$type}; | |
151 print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; | |
152 } | |
153 } | |
154 } | |
155 | |
156 foreach my $type (keys %filehandles) | |
157 { | |
158 close($filehandles{$type}); | |
159 if($num{$type} == 0) | |
160 { | |
161 warn "no AS events passed filters for both files\n"; | |
162 next; | |
163 } | |
164 $mean{$type} = $mean{$type}/$num{$type}; | |
165 $sd{$type} = sqrt($sd{$type}/$num{$type}); | |
166 system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); | |
167 system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); | |
168 print "$num{$type} $type events processed...\n"; | |
169 #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; | |
170 | |
171 } | |
172 system("rm $OutputFileName.cache -rf"); | |
173 | |
174 |