Mercurial > repos > cristian > rbgoa
comparison gomwu_a.pl @ 1:f7287f82602f draft
"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
author | cristian |
---|---|
date | Tue, 19 Apr 2022 08:28:43 +0000 |
parents | 91261b42c07e |
children | 5acf9dfdfa27 |
comparison
equal
deleted
inserted
replaced
0:91261b42c07e | 1:f7287f82602f |
---|---|
10 See README_GO_MWU.txt file for details. | 10 See README_GO_MWU.txt file for details. |
11 | 11 |
12 Mikhail Matz, UT Austin; matz@utexas.edu | 12 Mikhail Matz, UT Austin; matz@utexas.edu |
13 | 13 |
14 "; | 14 "; |
15 | |
16 use File::Basename; | |
15 | 17 |
16 print "@ARGV"; | 18 print "@ARGV"; |
17 | 19 |
18 my $onto=$ARGV[0] or die $usage; | 20 my $onto=$ARGV[0] or die $usage; |
19 my $gen2go=$ARGV[1] or die $usage; | 21 my $gen2go=$ARGV[1] or die $usage; |
45 if ($div eq "BP") { $division="biological_process";} | 47 if ($div eq "BP") { $division="biological_process";} |
46 elsif ($div eq "MF") { $division="molecular_function";} | 48 elsif ($div eq "MF") { $division="molecular_function";} |
47 elsif ($div eq "CC") { $division="cellular_component";} | 49 elsif ($div eq "CC") { $division="cellular_component";} |
48 else { die "unrecognized division: $div\n";} | 50 else { die "unrecognized division: $div\n";} |
49 | 51 |
50 my $inname2=$measure.".".$div.".tmp"; | 52 ($mname,$mdir,$mext) = fileparse($measure,'\..*'); |
51 my $inname3=$div."_".$measure; | 53 ($aname,$adir,$aext) = fileparse($gen2go,'\..*'); |
52 my $inname31="dissim0_".$div."_".$gen2go; | 54 print "$mname - $mdir - $mext\n"; |
53 my $inname4="dissim_".$div."_".$measure."_".$gen2go; | 55 my $inname2=$mdir.$mname.".".$div.".tmp"; |
56 my $inname3=$mdir.$mname."_".$div.".tsv"; | |
57 my $inname31=$mdir."dissim0_".$div."_".$aname.$aext; | |
58 my $inname4=$mdir."dissim_".$div."_".$mname."_".$aname.$aext; | |
54 | 59 |
55 my @donealready=(); | 60 my @donealready=(); |
56 | 61 |
57 open GO, $onto or die "cannot open ontology $onto\n"; | 62 open GO, $onto or die "cannot open ontology $onto\n"; |
58 open CORAL, $gen2go or die "cannot open genes2go table $gen2go\n"; | 63 open CORAL, $gen2go or die "cannot open genes2go table $gen2go\n"; |
129 if (!$l){ | 134 if (!$l){ |
130 $l++; | 135 $l++; |
131 next; | 136 next; |
132 } | 137 } |
133 chomp; | 138 chomp; |
134 ($seq,$ns)=split(/,/, $_); | 139 ($seq,$ns)=split(/\t/, $_); |
135 if ($seq=~/SEQ/) { $seq.="_s";} | 140 if ($seq=~/SEQ/) { $seq.="_s";} |
136 $dnds{$seq}=$ns; | 141 $dnds{$seq}=$ns; |
137 } | 142 } |
138 | 143 |
139 #reformatting | 144 #reformatting |
215 my %level={}; | 220 my %level={}; |
216 my %desc={}; | 221 my %desc={}; |
217 my %value={}; | 222 my %value={}; |
218 my $des; | 223 my $des; |
219 my $go; | 224 my $go; |
220 my $l; | 225 my $ll; |
221 my $gn; | 226 my $gn; |
222 my $val; | 227 my $val; |
223 my @gos=(); | 228 my @gos=(); |
224 my %genes={}; | 229 my %genes={}; |
225 my @gcount=(); | 230 my @gcount=(); |
226 my %gci={}; | 231 my %gci={}; |
227 my %goi={}; | 232 my %goi={}; |
228 | 233 |
229 while (<TAB>){ | 234 while (<TAB>){ |
230 chomp; | 235 chomp; |
231 ($des,$go,$l,$val,$gn)=split(/\t/,$_); | 236 ($des,$go,$ll,$val,$gn)=split(/\t/,$_); |
232 $value{$gn}=$val; | 237 $value{$gn}=$val; |
233 $desc{$go}=$des; | 238 $desc{$go}=$des; |
234 push @{$genes{$go}},$gn; | 239 push @{$genes{$go}},$gn; |
235 push @gcount, $gn unless ($gci{$gn}==1) ; | 240 push @gcount, $gn unless ($gci{$gn}==1) ; |
236 $gci{$gn}=1; | 241 $gci{$gn}=1; |
314 $overlap{$go2,$go}=min($shared/($#{$genes{$go}}+1),$shared/($#{$genes{$go2}}+1)); | 319 $overlap{$go2,$go}=min($shared/($#{$genes{$go}}+1),$shared/($#{$genes{$go2}}+1)); |
315 } | 320 } |
316 } | 321 } |
317 } | 322 } |
318 | 323 |
319 open OUT, ">$inname31" or die "gomwu_b: cannot create output $inname31\n"; | 324 open OUT, ">$inname31" or die "gomwu_a: cannot create output $inname31\n"; |
320 | 325 |
321 print {OUT} join("\t",@gos),"\n"; | 326 print {OUT} join("\t",@gos),"\n"; |
322 | 327 |
323 foreach $go (@gos) { | 328 foreach $go (@gos) { |
324 $overlap{$go,$go}=1; | 329 $overlap{$go,$go}=1; |