annotate SpliceChange @ 5:2ebca9da5e42 draft default tip

planemo upload
author bioitcore
date Thu, 07 Sep 2017 17:39:24 -0400
parents adc0f7765d85
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
1 #!/usr/bin/perl
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
2 my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
3 # compare two outputs from SpliceTrap
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
4 use strict;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
5
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
6 # the information needed
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
7 # inclusion ratio input file
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
8 # filtered out or not input file
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
9 # minimal inclusion ratio at least 0.1 for one condition
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
10 # minimal splicing changes parameter
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
11 # orignial pipeline written by Martin Akerman
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
12 # re-organized and re-written by Jie Wu
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
13
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
14 use FileHandle;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
15
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
16 use Getopt::Long;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
17
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
18 my @programs = ('grep','mkdir','R','paste','awk','sort');
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
19 foreach my $program (@programs)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
20 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
21 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
22 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
23
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
24
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
25 my $InputFileName1 = "";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
26 my $InputFileName2 = "";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
27 my $OutputFileName = "";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
28 my $minchange = 0.3;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
29 my $mininc = 0.1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
30 my $noIRM = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
31
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
32
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
33 GetOptions (
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
34 "1:s"=>\$InputFileName1,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
35 "2:s"=>\$InputFileName2,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
36 "o:s"=>\$OutputFileName,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
37 "noIRM|noirm"=>\$noIRM,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
38 "m:f"=>\$mininc,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
39 "c:f"=>\$minchange
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
40 );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
41
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
42 my $InputParaDes=" Usage of the script:
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
43 -1 input file 1, output from SpliceTrap, *.raw file in the output folder
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
44 -2 input file 2. see above.
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
45 -o output file prefix.
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
46 -c minimal change required, [default:0.3]
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
47 -m minimal inclusion ratio for at least one condition. [defualt:0.1]
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
48 --noIRM Use the unadjusted inclusion ratios (before IRM correction)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
49 ";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
50
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
51 if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
52 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
53 print $InputParaDes;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
54 exit;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
55 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
56
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
57
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
58
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
59 if(-d "$OutputFileName.cache" )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
60 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
61 print "Aborted! output cache folder exists: $OutputFileName.cache \n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
62 exit;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
63 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
64 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
65 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
66 system("mkdir $OutputFileName.cache");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
67 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
68
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
69 #
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
70 my %ir1; # records ir from file1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
71 my %ir2; # records ir from file2
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
72 # only records trios above the cutoffs
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
73
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
74 open(input1, $InputFileName1) or die "$InputFileName1 open error!\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
75 while(my $line=<input1>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
76 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
77 chomp($line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
78 my @a = split("\t", $line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
79 if($a[21] ne "na")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
80 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
81 if($noIRM)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
82 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
83 $ir1{$a[0]} = $a[1];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
84 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
85 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
86 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
87 $ir1{$a[0]} = $a[2];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
88 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
89 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
90 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
91 print scalar(keys (%ir1) )," records loaded from $InputFileName1\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
92 close(input1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
93
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
94 open(input2, $InputFileName2) or die "$InputFileName2 open error!\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
95 while(my $line=<input2>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
96 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
97 chomp($line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
98 my @a = split("\t", $line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
99 if($a[21] ne "na")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
100 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
101 if($noIRM)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
102 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
103 $ir2{$a[0]} = $a[1];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
104 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
105 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
106 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
107 $ir2{$a[0]} = $a[2];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
108 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
109 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
110 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
111 print scalar(keys (%ir2) )," records loaded from $InputFileName2\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
112
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
113
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
114 close(input2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
115
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
116
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
117 ##
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
118 my %mean;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
119 my %sd;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
120
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
121 my %num;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
122
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
123 my %filehandles;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
124
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
125 my @types = ("CA", "IR", "AD","AA");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
126
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
127 foreach my $type (@types)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
128 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
129 my $fh = new FileHandle;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
130 open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
131 $filehandles{$type} = $fh;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
132 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
133
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
134
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
135 foreach my $key (keys %ir1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
136 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
137 if(exists $ir2{$key})
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
138 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
139 if(($ir1{$key} + $ir2{$key}) > 0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
140 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
141 #find the type
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
142 my $type = substr($key, 0, 2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
143 $type = "CA" if $type eq "CS";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
144 $num{$type}++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
145
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
146 my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key});
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
147 $mean{$type} = $mean{$type} + $change;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
148 $sd{$type} = $change*$change + $sd{$type};
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
149
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
150 $change = sprintf("%.4f",$change);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
151
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
152 my $fout = $filehandles{$type};
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
153 print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
154 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
155 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
156 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
157
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
158 foreach my $type (keys %filehandles)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
159 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
160 close($filehandles{$type});
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
161 if($num{$type} == 0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
162 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
163 warn "no AS events passed filters for both files\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
164 next;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
165 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
166 $mean{$type} = $mean{$type}/$num{$type};
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
167 $sd{$type} = sqrt($sd{$type}/$num{$type});
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
168 system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
169 system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
170 print "$num{$type} $type events processed...\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
171 #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
172
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
173 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
174 system("rm $OutputFileName.cache -rf");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
175
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
176