view nrify_GOtable.pl @ 2:5acf9dfdfa27 draft default tip

planemo upload commit 66a856bcce69986d9a6f1a39820dd9b3f4f6b0db
author cristian
date Wed, 09 Nov 2022 08:57:54 +0000
parents 91261b42c07e
children
line wrap: on
line source

#!/usr/bin/perl

$usage= "

nrify_GOtable.pl:

removes duplicate entries for a gene from gene<tab>semicolon-separated GOterms table
concatenates nonredundant categories for each gene

Misha Matz July 2013, matz\@utexas.edu

";

$inp=shift or die $usage;

open IN, $inp or die "cannot open input $inp\n";

my %gos={};
my $gene="";
my $goline="";

while(<IN>){
	chomp;
	($gene,$goline)=split('\t',$_);
	if (!$gos{$gene}) {
		$gos{$gene}=$goline;
		next;
	}
	my @goo=split(';',$goline);
	foreach $g (@goo){
		if ($gos{$gene}=~/$g/){next;}
		$gos{$gene}=$gos{$gene}.";".$g;
	}
}

foreach $g (keys %gos){
	if ($g=~/HASH/){next;}
	print $g,"\t",$gos{$g},"\n";
}