Mercurial > repos > cristian > rbgoa
annotate nrify_GOtable.pl @ 2:5acf9dfdfa27 draft default tip
planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
author | cristian |
---|---|
date | Wed, 09 Nov 2022 08:57:54 +0000 |
parents | 91261b42c07e |
children |
rev | line source |
---|---|
0
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
1 #!/usr/bin/perl |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
2 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
3 $usage= " |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
4 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
5 nrify_GOtable.pl: |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
6 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
7 removes duplicate entries for a gene from gene<tab>semicolon-separated GOterms table |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
8 concatenates nonredundant categories for each gene |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
9 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
10 Misha Matz July 2013, matz\@utexas.edu |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
11 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
12 "; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
13 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
14 $inp=shift or die $usage; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
15 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
16 open IN, $inp or die "cannot open input $inp\n"; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
17 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
18 my %gos={}; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
19 my $gene=""; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
20 my $goline=""; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
21 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
22 while(<IN>){ |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
23 chomp; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
24 ($gene,$goline)=split('\t',$_); |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
25 if (!$gos{$gene}) { |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
26 $gos{$gene}=$goline; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
27 next; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
28 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
29 my @goo=split(';',$goline); |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
30 foreach $g (@goo){ |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
31 if ($gos{$gene}=~/$g/){next;} |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
32 $gos{$gene}=$gos{$gene}.";".$g; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
33 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
34 } |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
35 |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
36 foreach $g (keys %gos){ |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
37 if ($g=~/HASH/){next;} |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
38 print $g,"\t",$gos{$g},"\n"; |
91261b42c07e
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
cristian
parents:
diff
changeset
|
39 } |