annotate gomwu_b.pl @ 1:f7287f82602f draft

"planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
author cristian
date Tue, 19 Apr 2022 08:28:43 +0000
parents 91261b42c07e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
1 #!/usr/bin/env perl
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
2
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
3 use File::Basename;
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
4
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
5 my $usage= "
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
6
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
7 gomwu_b.pl (v. Feb 2015):
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
8
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
9 This is the second script in the GO database slimming and reformatting procedure,
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
10 called automatically by goStats.R
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
11
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
12 See README_GO_MWU.txt file for details.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
13
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
14 Mikhail Matz, UT Austin; matz@utexas.edu
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
15
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
16 ";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
17
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
18 my $gen2go=shift;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
19 my $measure=shift;
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
20 ($mname,$mdir,$mext) = fileparse($measure,'\..*');
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
21 ($aname,$adir,$aext) = fileparse($gen2go,'\..*');
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
22 my $div=shift or die "$usage\nNot enough arguments for gomwu_b.pl\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
23
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
24 my $clfile=$mdir."cl_dissim0_".$div."_".$aname.$aext;
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
25 open CLF, $clfile or die "cannot locate primary clustering file $clfile\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
26 my %clgo={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
27 my $go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
28 my $cl;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
29
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
30 <CLF>;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
31 while(<CLF>){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
32 ($go, $cl)=split(/,/,$_);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
33 push @{$clgo{$cl}},$go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
34 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
35 close CLF;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
36
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
37 unlink $clfile;
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
38 unlink $mdir."dissim0_".$div."_".$aname.$aext;
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
39
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
40 opendir THISDIR, $mdir;
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
41 my @donealready=grep /$gen2go/, readdir THISDIR;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
42 my $dones=" "."@donealready"." ";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
43 #print "DONE: $dones\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
44
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
45 my $inname2=$mdir.$mname.".".$div.".tmp";
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
46 open TAB, $inname2 or die "go_nrify: cannot open input table $inname2\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
47 <TAB>;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
48 my %level={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
49 my %desc={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
50 my %value={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
51 my $des;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
52 my $go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
53 my $l;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
54 my $gn;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
55 my $val;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
56 my @gos=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
57 my %genes={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
58 my @gcount=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
59 my %gci={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
60 my %goi={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
61
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
62 while (<TAB>){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
63 chomp;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
64 ($des,$go,$l,$val,$gn)=split(/\t/,$_);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
65 $value{$gn}=$val;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
66 $desc{$go}=$des;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
67 push @{$genes{$go}},$gn;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
68 push @gcount, $gn unless ($gci{$gn}==1) ;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
69 $gci{$gn}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
70 # push @genes,$gn;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
71 unless ($goi{$go}==1){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
72 $desc{$go}=$des;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
73 $level{$go}=$l;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
74 push @gos, $go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
75 $goi{$go}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
76 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
77 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
78 close TAB;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
79 unlink $inname2;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
80
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
81 #--------------------
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
82
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
83
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
84 my @nrgos=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
85 my %nrlev={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
86 my @nrgos=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
87 my %nrgenes={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
88 my %nrdesc={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
89 my $gcount=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
90 my $cl;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
91 my $go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
92 my $gene;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
93
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
94 foreach $cl (keys %clgo){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
95 my $largest=${$clgo{$cl}}[0];
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
96 my $maxgenes=$#{$genes{$largest}}+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
97 my $maxlevel=$level{$largest};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
98 $gcount+=$#{$genes{$largest}}+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
99 my @nrgens=@{$genes{$largest}};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
100 if ($#{$clgo{$cl}}>0) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
101 foreach $go (@{$clgo{$cl}}) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
102 if ($maxgenes<($#{$genes{$go}}+1)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
103 $maxgenes=($#{$genes{$go}}+1);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
104 $largest=$go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
105 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
106 elsif ($maxgenes==($#{$genes{$go}}+1)) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
107 if ($maxlevel<$level{$go}){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
108 $maxlevel=$level{$go};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
109 $largest=$go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
110 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
111 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
112 foreach $gene (@{$genes{$go}}){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
113 push @nrgens, $gene unless(" @nrgens "=~/ $gene /);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
114 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
115 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
116 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
117 my $goos=join(";",@{$clgo{$cl}});
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
118 push @nrgos, $goos;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
119 $nrdesc{$goos}=$desc{$largest};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
120 $nrlev{$goos}=$maxlevel;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
121 @{$nrgenes{$goos}}=@nrgens;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
122 $gcount+=$#nrgens+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
123 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
124
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
125 print $#nrgos+1," non-redundant GO categories of good size\n-------------\n";
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
126 $outname=$mdir.$mname."_".$div.".tsv";
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
127 print("creating $outname");
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
128 open OUT, ">$outname" or die "gomwu_b: cannot create output $outname\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
129 print {OUT} "name\tterm\tlev\tseq\tvalue\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
130
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
131 foreach $go (@nrgos) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
132 foreach $gene (@{$nrgenes{$go}}){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
133 print {OUT} "$nrdesc{$go}\t$go\t$nrlev{$go}\t$gene\t$value{$gene}\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
134 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
135 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
136 close OUT;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
137 my %level={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
138 my %desc={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
139 my %value={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
140 my $des;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
141 my $go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
142 my $l;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
143 my $gn;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
144 my $val;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
145 my @gos=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
146 my %genes={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
147 my @gcount=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
148 my %nrlev={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
149 my @nrgos=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
150 my %nrgenes={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
151 my %nrdesc={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
152 my $gcount=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
153 my %dnds;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
154
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
155 ####################
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
156 # building dissimilarity matrix
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
157
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
158 my $inname4=$mdir."dissim_".$div."_".$mname."_".$aname.$aext;
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
159 my $inname3=$mdir.$mname."_".$div.".tsv";
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
160
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
161 #if($dones!~/ $inname4 /) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
162
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
163 use List::Util qw[min max];
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
164 open TAB, $inname3 or die "go_cluster: cannot open input table $inname3\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
165 <TAB>;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
166
1
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
167 # my $des;
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
168 # my $go;
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
169 # my $l;
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
170 # my $gn;
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
171 # my $val;
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
172 # my @gos=();
f7287f82602f "planemo upload commit 486235d6560c9e95bd42152ad19bf7c3941cdc1b"
cristian
parents: 0
diff changeset
173 # my %genes={};
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
174 my %gosi={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
175
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
176 print"\nSecondary clustering:\ncalculating similarities....\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
177 while (<TAB>){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
178 chomp;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
179 ($des,$go,$l,$gn,$val)=split(/\t/,$_);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
180 push @{$genes{$go}},$gn;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
181 unless ($gosi{$go}==1 ){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
182 push @gos, $go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
183 $gosi{$go}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
184 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
185 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
186
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
187 my %dissim={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
188 for ($g1=0;$g1<$#gos;$g1++){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
189 my $go=@gos[$g1];
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
190 #if ($go eq "unknown") { print "unknown as go\n";}
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
191 for ($g2=$g1+1;$g2<=$#gos;$g2++){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
192 my $go2=@gos[$g2];
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
193 if ($go2 eq "unknown") {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
194 #print "$go against $go2\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
195 $dissim{$go,$go2}=$dissim{$go2,$go}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
196 next;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
197 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
198 my %seen={}; my $count=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
199 foreach $g (@{$genes{$go}},@{$genes{$go2}}){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
200 unless($seen{$g}==1 ){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
201 $count++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
202 $seen{$g}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
203 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
204 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
205 my $shared=$#{$genes{$go}}+1+$#{$genes{$go2}}+1-$count;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
206 my $ref=min($#{$genes{$go}}+1,$#{$genes{$go2}}+1);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
207 $dissim{$go,$go2}=$dissim{$go2,$go}=sprintf("%.3f",1-$shared/$ref);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
208 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
209 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
210
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
211 open OUT, ">$inname4" or die "gomwu_b: cannot create output $inname4\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
212 print {OUT} join("\t",@gos),"\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
213
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
214 foreach $go (@gos) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
215 $dissim{$go,$go}=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
216 foreach $go2 (@gos){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
217 print {OUT} "$dissim{$go,$go2}";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
218 print {OUT} "\t" unless ($go2 eq $gos[$#gos]);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
219 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
220 print {OUT} "\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
221 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
222 #}