annotate Perl/remove_duplicates_in_gff.pl @ 3:e42d30da7a74 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:52:25 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
2
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
3 use strict;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
4
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
5 my $file = $ARGV[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
6 my $out = $ARGV[1];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
7
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
8 my %h;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
9 open(F,$file);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
10 while(<F>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
11 my @infos = split(/\t/,$_);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
12 if ($infos[2] eq 'CDS' && /Name=([^;]*);/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
13 my $id = $1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
14 $h{$id}++;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
15 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
16 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
17 close(F);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
18
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
19 my %dup;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
20 foreach my $id(keys(%h)){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
21 my $n = $h{$id};
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
22 if ($n > 1){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
23 $dup{$id} = 1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
24 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
25 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
26 open(O,">$out");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
27 open(F,$file);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
28 while(<F>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
29 my @infos = split(/\t/,$_);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
30 if ($infos[2] eq 'CDS' && /Name=([^;]*);/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
31 my $id = $1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
32 if ($dup{$id}){next;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
33 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
34 if ($infos[2] eq 'mRNA' && /Parent=([^;]*);/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
35 my $id = $1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
36 if ($dup{$id}){next;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
37 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
38 if ($infos[2] eq 'gene' && /ID=([^;]*);/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
39 my $id = $1;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
40 if ($dup{$id}){next;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
41 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
42 if ($infos[2] eq 'exon' && /Parent=([^;]*);/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
43 my ($id,$extension) = split(/\./,$1);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
44 if ($dup{$id}){next;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
45 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
46 print O $_;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
47
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
48 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
49 close(F);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
50 close(O);