Mercurial > repos > dereeper > pangenome_explorer
comparison Perl/Naegleria/calculateFeatureDensitiesFromGFF.pl @ 3:e42d30da7a74 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:52:25 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:97e4e3e818b6 | 3:e42d30da7a74 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 | |
5 my $gff = $ARGV[0]; | |
6 my $window_size = $ARGV[1]; | |
7 | |
8 my %counts; | |
9 my %hash; | |
10 open(F,$gff); | |
11 while(<F>){ | |
12 if (/^#/){next;} | |
13 my @infos = split("\t",$_); | |
14 my $chr = $infos[0]; | |
15 #$chr =~s/chr//g; | |
16 my $source = $infos[1]; | |
17 my $feature = $infos[2]; | |
18 my $start = $infos[3]; | |
19 my $end = $infos[4]; | |
20 my $window_num = int($start/$window_size)+1; | |
21 my $window_num2 = int($end/$window_size)+1; | |
22 if ($source eq "repeatmasker" && $feature eq "match"){ | |
23 $feature = "repeats"; | |
24 } | |
25 if ($feature ne "gene" && $feature ne "CDS" && $feature ne "repeats" && $feature !~/UTR/ && $feature ne "exon"){next;} | |
26 $counts{$chr}{$window_num}{$feature}++; | |
27 if ($window_num == $window_num2){ | |
28 for (my $i = $start; $i <= $end; $i++){ | |
29 $hash{$chr}{$window_num}{$feature}{$i} = 1; | |
30 } | |
31 } | |
32 else{ | |
33 for (my $i = $start; $i <= ($window_num*$window_size); $i++){ | |
34 $hash{$chr}{$window_num}{$feature}{$i} = 1; | |
35 } | |
36 for (my $j = ($window_num2-1)*$window_size; $j <= $end; $j++){ | |
37 $hash{$chr}{$window_num2}{$feature}{$j} = 1; | |
38 } | |
39 if (($window_num2 - $window_num) > 1){ | |
40 for (my $window_num_new = $window_num + 1; $window_num_new <= $window_num2 - 1; $window_num_new++){ | |
41 for (my $j = (($window_num_new-1)*$window_size); $j <= ($window_num_new*$window_size); $j++){ | |
42 $hash{$chr}{$window_num_new}{$feature}{$j} = 1; | |
43 } | |
44 } | |
45 } | |
46 | |
47 } | |
48 } | |
49 close(F); | |
50 | |
51 #my $refhash2 = $hash{"Chr13"}{"1"}{"exon"}; | |
52 #my %subhash2 = %$refhash2; | |
53 #print scalar %subhash2; | |
54 #exit; | |
55 | |
56 open(G2,">gene_counts.txt"); | |
57 open(G,">gene_density.txt"); | |
58 open(I,">exon_density.txt"); | |
59 open(R,">repeat_density.txt"); | |
60 foreach my $chr(sort {$a<=>$b} keys(%hash)){ | |
61 #print G "$chr 0 1 0\n"; | |
62 #print G "$chr 1 2 1\n"; | |
63 #print I "$chr 0 1 0\n"; | |
64 #print I "$chr 0 1 0\n"; | |
65 my $refhash = $hash{$chr}; | |
66 my %subhash = %$refhash; | |
67 foreach my $window_num(sort {$a<=>$b} keys(%subhash)){ | |
68 | |
69 # genes | |
70 my $proportion_gene = 0; | |
71 if ($hash{$chr}{$window_num}{"gene"}){ | |
72 my $refhash2 = $hash{$chr}{$window_num}{"gene"}; | |
73 my %subhash2 = %$refhash2; | |
74 $proportion_gene = scalar %subhash2 / $window_size; | |
75 if ($proportion_gene > 1.1){ | |
76 #print "$chr $window_num $proportion_gene ".scalar %subhash2."\n";exit; | |
77 } | |
78 } | |
79 | |
80 # exon | |
81 my $proportion_exon = 0; | |
82 if ($hash{$chr}{$window_num}{"CDS"}){ | |
83 my $refhash2 = $hash{$chr}{$window_num}{"CDS"}; | |
84 my %subhash2 = %$refhash2; | |
85 $proportion_exon = scalar %subhash2 / $window_size; | |
86 } | |
87 | |
88 # repeat | |
89 my $proportion_repeat = 0; | |
90 if ($hash{$chr}{$window_num}{"repeats"}){ | |
91 my $refhash2 = $hash{$chr}{$window_num}{"repeats"}; | |
92 my %subhash2 = %$refhash2; | |
93 $proportion_repeat = scalar %subhash2 / $window_size; | |
94 #$proportion_repeat = $proportion_repeat + $proportion_gene; | |
95 } | |
96 else{ | |
97 #$proportion_repeat = $proportion_gene; | |
98 $proportion_repeat = 0; | |
99 } | |
100 my $start = ($window_num-1)*$window_size; | |
101 my $end = $window_num*$window_size; | |
102 print I "$chr $start $end $proportion_exon\n"; | |
103 print G "$chr $start $end $proportion_gene\n"; | |
104 print R "$chr $start $end $proportion_repeat\n"; | |
105 if ($counts{$chr}{$window_num}{"gene"}){ | |
106 print G2 "$chr $start $end ".$counts{$chr}{$window_num}{"gene"}."\n"; | |
107 } | |
108 else{ | |
109 print G2 "$chr $start $end 0\n"; | |
110 } | |
111 } | |
112 } | |
113 close(G); | |
114 close(I); | |
115 close(R); | |
116 close(G2); | |
117 |