comparison gomwu_a.pl @ 1:f7287f82602f draft

"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
author cristian
date Tue, 19 Apr 2022 08:28:43 +0000
parents 91261b42c07e
children 5acf9dfdfa27
comparison
equal deleted inserted replaced
0:91261b42c07e 1:f7287f82602f
10 See README_GO_MWU.txt file for details. 10 See README_GO_MWU.txt file for details.
11 11
12 Mikhail Matz, UT Austin; matz@utexas.edu 12 Mikhail Matz, UT Austin; matz@utexas.edu
13 13
14 "; 14 ";
15
16 use File::Basename;
15 17
16 print "@ARGV"; 18 print "@ARGV";
17 19
18 my $onto=$ARGV[0] or die $usage; 20 my $onto=$ARGV[0] or die $usage;
19 my $gen2go=$ARGV[1] or die $usage; 21 my $gen2go=$ARGV[1] or die $usage;
45 if ($div eq "BP") { $division="biological_process";} 47 if ($div eq "BP") { $division="biological_process";}
46 elsif ($div eq "MF") { $division="molecular_function";} 48 elsif ($div eq "MF") { $division="molecular_function";}
47 elsif ($div eq "CC") { $division="cellular_component";} 49 elsif ($div eq "CC") { $division="cellular_component";}
48 else { die "unrecognized division: $div\n";} 50 else { die "unrecognized division: $div\n";}
49 51
50 my $inname2=$measure.".".$div.".tmp"; 52 ($mname,$mdir,$mext) = fileparse($measure,'\..*');
51 my $inname3=$div."_".$measure; 53 ($aname,$adir,$aext) = fileparse($gen2go,'\..*');
52 my $inname31="dissim0_".$div."_".$gen2go; 54 print "$mname - $mdir - $mext\n";
53 my $inname4="dissim_".$div."_".$measure."_".$gen2go; 55 my $inname2=$mdir.$mname.".".$div.".tmp";
56 my $inname3=$mdir.$mname."_".$div.".tsv";
57 my $inname31=$mdir."dissim0_".$div."_".$aname.$aext;
58 my $inname4=$mdir."dissim_".$div."_".$mname."_".$aname.$aext;
54 59
55 my @donealready=(); 60 my @donealready=();
56 61
57 open GO, $onto or die "cannot open ontology $onto\n"; 62 open GO, $onto or die "cannot open ontology $onto\n";
58 open CORAL, $gen2go or die "cannot open genes2go table $gen2go\n"; 63 open CORAL, $gen2go or die "cannot open genes2go table $gen2go\n";
129 if (!$l){ 134 if (!$l){
130 $l++; 135 $l++;
131 next; 136 next;
132 } 137 }
133 chomp; 138 chomp;
134 ($seq,$ns)=split(/,/, $_); 139 ($seq,$ns)=split(/\t/, $_);
135 if ($seq=~/SEQ/) { $seq.="_s";} 140 if ($seq=~/SEQ/) { $seq.="_s";}
136 $dnds{$seq}=$ns; 141 $dnds{$seq}=$ns;
137 } 142 }
138 143
139 #reformatting 144 #reformatting
215 my %level={}; 220 my %level={};
216 my %desc={}; 221 my %desc={};
217 my %value={}; 222 my %value={};
218 my $des; 223 my $des;
219 my $go; 224 my $go;
220 my $l; 225 my $ll;
221 my $gn; 226 my $gn;
222 my $val; 227 my $val;
223 my @gos=(); 228 my @gos=();
224 my %genes={}; 229 my %genes={};
225 my @gcount=(); 230 my @gcount=();
226 my %gci={}; 231 my %gci={};
227 my %goi={}; 232 my %goi={};
228 233
229 while (<TAB>){ 234 while (<TAB>){
230 chomp; 235 chomp;
231 ($des,$go,$l,$val,$gn)=split(/\t/,$_); 236 ($des,$go,$ll,$val,$gn)=split(/\t/,$_);
232 $value{$gn}=$val; 237 $value{$gn}=$val;
233 $desc{$go}=$des; 238 $desc{$go}=$des;
234 push @{$genes{$go}},$gn; 239 push @{$genes{$go}},$gn;
235 push @gcount, $gn unless ($gci{$gn}==1) ; 240 push @gcount, $gn unless ($gci{$gn}==1) ;
236 $gci{$gn}=1; 241 $gci{$gn}=1;
314 $overlap{$go2,$go}=min($shared/($#{$genes{$go}}+1),$shared/($#{$genes{$go2}}+1)); 319 $overlap{$go2,$go}=min($shared/($#{$genes{$go}}+1),$shared/($#{$genes{$go2}}+1));
315 } 320 }
316 } 321 }
317 } 322 }
318 323
319 open OUT, ">$inname31" or die "gomwu_b: cannot create output $inname31\n"; 324 open OUT, ">$inname31" or die "gomwu_a: cannot create output $inname31\n";
320 325
321 print {OUT} join("\t",@gos),"\n"; 326 print {OUT} join("\t",@gos),"\n";
322 327
323 foreach $go (@gos) { 328 foreach $go (@gos) {
324 $overlap{$go,$go}=1; 329 $overlap{$go,$go}=1;