Mercurial > repos > dereeper > pangenome_explorer
diff Perl/remove_duplicates_in_gff.pl @ 3:e42d30da7a74 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:52:25 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Perl/remove_duplicates_in_gff.pl Thu May 30 11:52:25 2024 +0000 @@ -0,0 +1,50 @@ +#!/usr/bin/perl + +use strict; + +my $file = $ARGV[0]; +my $out = $ARGV[1]; + +my %h; +open(F,$file); +while(<F>){ + my @infos = split(/\t/,$_); + if ($infos[2] eq 'CDS' && /Name=([^;]*);/){ + my $id = $1; + $h{$id}++; + } +} +close(F); + +my %dup; +foreach my $id(keys(%h)){ + my $n = $h{$id}; + if ($n > 1){ + $dup{$id} = 1; + } +} +open(O,">$out"); +open(F,$file); +while(<F>){ + my @infos = split(/\t/,$_); + if ($infos[2] eq 'CDS' && /Name=([^;]*);/){ + my $id = $1; + if ($dup{$id}){next;} + } + if ($infos[2] eq 'mRNA' && /Parent=([^;]*);/){ + my $id = $1; + if ($dup{$id}){next;} + } + if ($infos[2] eq 'gene' && /ID=([^;]*);/){ + my $id = $1; + if ($dup{$id}){next;} + } + if ($infos[2] eq 'exon' && /Parent=([^;]*);/){ + my ($id,$extension) = split(/\./,$1); + if ($dup{$id}){next;} + } + print O $_; + +} +close(F); +close(O);