Mercurial > repos > dereeper > pangenome_explorer
comparison Perl/remove_duplicates_in_gff.pl @ 3:e42d30da7a74 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:52:25 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:97e4e3e818b6 | 3:e42d30da7a74 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 | |
5 my $file = $ARGV[0]; | |
6 my $out = $ARGV[1]; | |
7 | |
8 my %h; | |
9 open(F,$file); | |
10 while(<F>){ | |
11 my @infos = split(/\t/,$_); | |
12 if ($infos[2] eq 'CDS' && /Name=([^;]*);/){ | |
13 my $id = $1; | |
14 $h{$id}++; | |
15 } | |
16 } | |
17 close(F); | |
18 | |
19 my %dup; | |
20 foreach my $id(keys(%h)){ | |
21 my $n = $h{$id}; | |
22 if ($n > 1){ | |
23 $dup{$id} = 1; | |
24 } | |
25 } | |
26 open(O,">$out"); | |
27 open(F,$file); | |
28 while(<F>){ | |
29 my @infos = split(/\t/,$_); | |
30 if ($infos[2] eq 'CDS' && /Name=([^;]*);/){ | |
31 my $id = $1; | |
32 if ($dup{$id}){next;} | |
33 } | |
34 if ($infos[2] eq 'mRNA' && /Parent=([^;]*);/){ | |
35 my $id = $1; | |
36 if ($dup{$id}){next;} | |
37 } | |
38 if ($infos[2] eq 'gene' && /ID=([^;]*);/){ | |
39 my $id = $1; | |
40 if ($dup{$id}){next;} | |
41 } | |
42 if ($infos[2] eq 'exon' && /Parent=([^;]*);/){ | |
43 my ($id,$extension) = split(/\./,$1); | |
44 if ($dup{$id}){next;} | |
45 } | |
46 print O $_; | |
47 | |
48 } | |
49 close(F); | |
50 close(O); |