annotate gomwu_a.pl @ 0:91261b42c07e draft

"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
author cristian
date Thu, 14 Apr 2022 13:28:05 +0000
parents
children f7287f82602f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
1 #!/usr/bin/env perl
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
2
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
3 my $usage= "
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
4
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
5 gomwu_a.pl (v. Feb 2015):
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
6
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
7 This is the fist script in the GO database slimming and reformatting procedure,
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
8 called automatically by goStats.R
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
9
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
10 See README_GO_MWU.txt file for details.
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
11
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
12 Mikhail Matz, UT Austin; matz@utexas.edu
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
13
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
14 ";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
15
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
16 print "@ARGV";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
17
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
18 my $onto=$ARGV[0] or die $usage;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
19 my $gen2go=$ARGV[1] or die $usage;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
20 my $measure=$ARGV[2] or die $usage;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
21 my $div=$ARGV[3] or die $usage;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
22 my $altern="t";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
23 if ("@ARGV"=~/alternative=(\w)/) { $altern=$1; }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
24 my $toomany=0.1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
25 if ("@ARGV"=~/largest=(0\.\d+)/) { $toomany=$1; }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
26 my $mingenes=5;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
27 if ("@ARGV"=~/smallest=(\d+)/) { $mingenes=$1; }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
28 my $cutHeight=0.25;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
29 if ("@ARGV"=~/cutHeight=(\S+)/) { $cutHeight=$1; }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
30 my $padj="BH";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
31 if ("@ARGV"=~/p.adjust=(\w+)/) { $padj=$1; }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
32
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
33
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
34 print "
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
35
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
36 Run parameters:
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
37
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
38 largest GO category as fraction of all genes (largest) : $toomany
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
39 smallest GO category as # of genes (smallest) : $mingenes
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
40 clustering threshold (clusterCutHeight) : $cutHeight
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
41
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
42 ";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
43
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
44 my $division;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
45 if ($div eq "BP") { $division="biological_process";}
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
46 elsif ($div eq "MF") { $division="molecular_function";}
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
47 elsif ($div eq "CC") { $division="cellular_component";}
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
48 else { die "unrecognized division: $div\n";}
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
49
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
50 my $inname2=$measure.".".$div.".tmp";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
51 my $inname3=$div."_".$measure;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
52 my $inname31="dissim0_".$div."_".$gen2go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
53 my $inname4="dissim_".$div."_".$measure."_".$gen2go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
54
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
55 my @donealready=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
56
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
57 open GO, $onto or die "cannot open ontology $onto\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
58 open CORAL, $gen2go or die "cannot open genes2go table $gen2go\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
59 open DNDS, $measure or die "cannot open measure table $measure\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
60 my $outname=$inname2;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
61 open VOOL, ">$outname" or die "cannot create output file $outname\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
62
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
63 #################################################
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
64 # reading GO hierarchy, only the $divterm division
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
65 print "-----------------\nretrieving GO hierarchy, reformatting data...\n\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
66 my %parents;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
67 my %name;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
68 my $term;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
69 my %goodterms;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
70 #my $divterm;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
71 #my $division="molecular_function";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
72
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
73 $bads=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
74
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
75 while (<GO>){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
76 if ($_=~/^id:\s(GO\S+)/){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
77 $term=$1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
78 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
79 elsif ($_=~/^namespace:\s(\S+)/){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
80 if ($1 ne $division) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
81 $term="";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
82 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
83 else {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
84 $goodterms{$term}="OK" unless (!$term);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
85 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
86 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
87
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
88 elsif ($_=~/^is_a:\s(GO\S+)\s/) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
89 push @{$parents{$term}}, $1 unless (!$term);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
90 push @allparents, $1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
91 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
92 elsif ($_=~/^name:\s(.+)/) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
93 $name{$term}=$1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
94 chomp($name{$term}) unless (!$term);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
95 if ($name{$term} eq $division) { $divterm=$term;}
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
96 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
97 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
98 $goodterms{$term}="OK" unless (!$term);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
99
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
100 # calculating hierarchy levels, putting all parents together
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
101
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
102 my %golevel;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
103 $golevel{$divterm}=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
104
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
105 foreach $term (keys %parents) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
106 $extra=$#{$parents{$term}}+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
107 for ($in=$#{$parents{$term}};$extra;$in+=$extra){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
108 $golevel{$term}++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
109 $tstart=$in-$extra+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
110 $extra=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
111 for ($tt=$tstart; $tt<=$in;$tt++){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
112 $t=${$parents{$term}}[$tt];
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
113 foreach $t0 (@{$parents{$t}}) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
114 next if ("@{$parents{$term}}"=~/$t0/ || !$goodterms{$t0});
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
115 push @{$parents{$term}}, $t0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
116 $extra++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
117 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
118 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
119 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
120 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
121 ###################################################
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
122
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
123 # reading measures
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
124 my $l=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
125 my %dnds;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
126 my $seq;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
127
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
128 while (<DNDS>){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
129 if (!$l){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
130 $l++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
131 next;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
132 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
133 chomp;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
134 ($seq,$ns)=split(/,/, $_);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
135 if ($seq=~/SEQ/) { $seq.="_s";}
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
136 $dnds{$seq}=$ns;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
137 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
138
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
139 #reformatting
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
140
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
141 $bads=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
142 $goods=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
143
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
144 my $seq;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
145 my $goline;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
146 my @terms;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
147 my @orphans;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
148 my @nolevel;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
149
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
150 print {VOOL} "id\tterm\tlev\tvalue\tseq\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
151
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
152 my $count;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
153 while (<CORAL>){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
154 chomp;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
155 ($seq,$goline)=split(/\t/,$_);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
156 if ($dnds{$seq}!~/\d/) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
157 push @orphans, $seq;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
158 next;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
159 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
160 if ($goline=~/unknown/){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
161 print {VOOL} "\"$goline\"\t$goline\t5\t$dnds{$seq}\t$seq\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
162 next;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
163 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
164 @terms=split(/;/,$goline);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
165 $nt0=$#terms+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
166 $count++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
167 #print "-------------------\n$count | $seq terms: $nt0\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
168 my @collect;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
169 foreach $term (@terms) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
170 if (!$goodterms{$term}) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
171 $bads++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
172 #print "$term is not $division$1\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
173 next;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
174 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
175 #print "$term\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
176 $goods++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
177 if (!$golevel{$term}){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
178 push @nolevel,$term;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
179 $golevel{$term}=-1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
180 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
181 push @collect,($term,@{$parents{$term}});
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
182 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
183
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
184 $ncoll=$#collect+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
185 #print "collected terms: $ncoll\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
186
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
187 my @nrcollect;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
188 foreach $term (@collect) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
189 push @nrcollect, $term unless ("@nrcollect"=~/$term/);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
190 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
191
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
192 $ncoll=$#nrcollect+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
193 #print " nr terms: $ncoll\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
194
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
195 foreach $term (@nrcollect) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
196 print {VOOL} "\"$name{$term}\"\t$term\t$golevel{$term}\t$dnds{$seq}\t$seq\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
197 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
198 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
199 #print "terms matching division: $goods\nterms not matching division: $bads\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
200 $nor=$#orphans+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
201 $nnl=$#nolevel+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
202 print "-------------\ngo_reformat:\nGenes with GO annotations, but not listed in measure table: $nor\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
203 print "\nTerms without defined level (old ontology?..): $nnl\n-------------\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
204 my %parents;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
205 my %name;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
206 my $term;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
207 my %goodterms;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
208 close VOOL;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
209 #########################
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
210 # selecting good sized categroies, collapsing redundant ones
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
211 #if($dones!~/ $inname3 /) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
212 open TAB, $inname2 or die "go_nrify: cannot open input table $inname2\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
213 <TAB>;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
214
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
215 my %level={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
216 my %desc={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
217 my %value={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
218 my $des;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
219 my $go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
220 my $l;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
221 my $gn;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
222 my $val;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
223 my @gos=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
224 my %genes={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
225 my @gcount=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
226 my %gci={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
227 my %goi={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
228
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
229 while (<TAB>){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
230 chomp;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
231 ($des,$go,$l,$val,$gn)=split(/\t/,$_);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
232 $value{$gn}=$val;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
233 $desc{$go}=$des;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
234 push @{$genes{$go}},$gn;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
235 push @gcount, $gn unless ($gci{$gn}==1) ;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
236 $gci{$gn}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
237 # push @genes,$gn;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
238 unless ($goi{$go}==1){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
239 $desc{$go}=$des;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
240 $level{$go}=$l;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
241 push @gos, $go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
242 $goi{$go}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
243 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
244 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
245
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
246 close TAB;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
247 #unlink $inname2;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
248
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
249 $gc=$#gcount+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
250 $goc=$#gos+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
251 print "-------------\ngo_nrify:\n$goc categories, $gc genes; size range $mingenes-",$gc*$toomany,"\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
252
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
253 # excluding too large and too small categories
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
254 my %gonego={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
255 my $toobroad=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
256 my $toosmall=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
257 for ($g1=0;$g1<=$#gos;$g1++){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
258 $go=@gos[$g1];
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
259 my $golen=$#{$genes{$go}}+1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
260 if ($golen > $gc*$toomany) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
261 unless ($go eq "unknown") {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
262 $gonego{$go}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
263 $toobroad++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
264 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
265 #warn "$go: too broad ($golen genes)\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
266 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
267 elsif ($golen < $mingenes) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
268 $gonego{$go}=1 ;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
269 $toosmall++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
270 #warn "\t\t$go: too narrow ($golen genes) @{$genes{$go}}\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
271 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
272 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
273 print "\t$toobroad too broad\n\t$toosmall too small\n\t", $goc-$toobroad-$toosmall," remaining
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
274
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
275 removing redundancy:
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
276
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
277 calculating GO term similarities based on shared genes...\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
278
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
279 my @goodgo=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
280 foreach $go (@gos) { push @goodgo, $go unless ($gonego{$go}==1); }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
281 @gos=@goodgo;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
282
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
283 ################################
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
284
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
285 #warn "comparing categories...\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
286 #my $clfile="cl_".$inname31;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
287 #if($dones!~/ $clfile /) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
288
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
289 use List::Util qw[min max];
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
290 for ($g1=0;$g1<=$#gos;$g1++){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
291 my $go=@gos[$g1];
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
292 next if ($gonego{$go}==1);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
293 #warn "----------------\n$go $desc{$go} level $level{$go}\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
294 my $goos=$go;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
295 my $lev=$level{$go};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
296 my $dsc=$desc{$go};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
297 for ($g2=$g1+1;$g2<=$#gos;$g2++){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
298 my $go2=@gos[$g2];
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
299 next if ($gonego{$go2}==1);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
300 next if ($ggi{$go2}==1);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
301 my %seen={};
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
302 my $count=0;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
303 my @combo=();
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
304 if ($lump<=1) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
305 foreach $g (@{$genes{$go}},@{$genes{$go2}}){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
306 unless($seen{$g}==1 ){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
307 $count++;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
308 $seen{$g}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
309 push @combo, $g;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
310 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
311 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
312 my $shared=$#{$genes{$go}}+1+$#{$genes{$go2}}+1-$count;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
313 $overlap{$go,$go2}=min($shared/($#{$genes{$go}}+1),$shared/($#{$genes{$go2}}+1));
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
314 $overlap{$go2,$go}=min($shared/($#{$genes{$go}}+1),$shared/($#{$genes{$go2}}+1));
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
315 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
316 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
317 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
318
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
319 open OUT, ">$inname31" or die "gomwu_b: cannot create output $inname31\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
320
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
321 print {OUT} join("\t",@gos),"\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
322
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
323 foreach $go (@gos) {
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
324 $overlap{$go,$go}=1;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
325 foreach $go2 (@gos){
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
326 print {OUT} sprintf("%.3f",1-$overlap{$go,$go2});;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
327 print {OUT} "\t" unless ($go2 eq $gos[$#gos]);
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
328 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
329 print {OUT} "\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
330 }
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
331 close OUT;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
332 #}
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
333
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
334 #print "calling clusteringGOs.R script ....\n";
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
335 # my $err=`Rscript clusteringGOs.R $inname31 $cutHeight `;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
336 # print $err;
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
337
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
338
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
339
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
340
91261b42c07e "planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff changeset
341