comparison Perl/remove_duplicates_in_gff.pl @ 3:e42d30da7a74 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:52:25 +0000
parents
children
comparison
equal deleted inserted replaced
2:97e4e3e818b6 3:e42d30da7a74
1 #!/usr/bin/perl
2
3 use strict;
4
5 my $file = $ARGV[0];
6 my $out = $ARGV[1];
7
8 my %h;
9 open(F,$file);
10 while(<F>){
11 my @infos = split(/\t/,$_);
12 if ($infos[2] eq 'CDS' && /Name=([^;]*);/){
13 my $id = $1;
14 $h{$id}++;
15 }
16 }
17 close(F);
18
19 my %dup;
20 foreach my $id(keys(%h)){
21 my $n = $h{$id};
22 if ($n > 1){
23 $dup{$id} = 1;
24 }
25 }
26 open(O,">$out");
27 open(F,$file);
28 while(<F>){
29 my @infos = split(/\t/,$_);
30 if ($infos[2] eq 'CDS' && /Name=([^;]*);/){
31 my $id = $1;
32 if ($dup{$id}){next;}
33 }
34 if ($infos[2] eq 'mRNA' && /Parent=([^;]*);/){
35 my $id = $1;
36 if ($dup{$id}){next;}
37 }
38 if ($infos[2] eq 'gene' && /ID=([^;]*);/){
39 my $id = $1;
40 if ($dup{$id}){next;}
41 }
42 if ($infos[2] eq 'exon' && /Parent=([^;]*);/){
43 my ($id,$extension) = split(/\./,$1);
44 if ($dup{$id}){next;}
45 }
46 print O $_;
47
48 }
49 close(F);
50 close(O);