Mercurial > repos > dereeper > pangenome_explorer
comparison PanExplorer_workflow/Perl/reformatHeatmapSVG.pl @ 1:032f6b3806a3 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:16:08 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:3cbb01081cde | 1:032f6b3806a3 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 | |
5 my $filein = $ARGV[0]; | |
6 my $fileout = $ARGV[1]; | |
7 my $matrix = $ARGV[2]; | |
8 | |
9 | |
10 | |
11 | |
12 my $min_x = 100000; | |
13 my $max_x = 0; | |
14 my $min_y = 100000; | |
15 my $max_y = 0; | |
16 my $n=0; | |
17 open(OUT,">$fileout"); | |
18 open(F,$filein); | |
19 while(<F>){ | |
20 if (/<path style=" stroke:none;fill-rule:nonzero;fill/){ | |
21 | |
22 if (/d=\"M ([\s\d\.]+) L/){ | |
23 my @infos = split(/ /,$1); | |
24 my $x = $infos[0]; | |
25 if ($x < $min_x){$min_x = $x;} | |
26 if ($x > $max_x){$max_x = $x;} | |
27 } | |
28 if (/L ([\s\d\.]+) L/){ | |
29 my @infos = split(/ /,$1); | |
30 my $y = $infos[1]; | |
31 if ($y < $min_y){$min_y = $y;} | |
32 if ($y > $max_y){$max_y = $y;} | |
33 } | |
34 if (/L ([\s\d\.]+) Z/){ | |
35 my @infos = split(/ /,$1); | |
36 my $y = $infos[1]; | |
37 if ($y < $min_y){$min_y = $y;} | |
38 if ($y > $max_y){$max_y = $y;} | |
39 } | |
40 $n++; | |
41 } | |
42 else{ | |
43 if (!/\<\/svg\>/){ | |
44 print OUT $_; | |
45 } | |
46 } | |
47 } | |
48 close(F); | |
49 | |
50 my $nb_dispensable_clusters = `grep -P -c '\t0' $matrix`; | |
51 my $nb_samples = `awk {'print NF-1'} $matrix | head -1`; | |
52 | |
53 my $global_width = $max_x - $min_x; | |
54 my $width_of_one_block = $global_width / $nb_dispensable_clusters; | |
55 | |
56 my $global_height = $max_y - $min_y; | |
57 my $height_of_one_block = $global_height / $nb_samples; | |
58 | |
59 ########################################################### | |
60 # get distinct pattern of presence/absence | |
61 ########################################################### | |
62 my %patterns; | |
63 my $pattern_order = 0; | |
64 my %pattern_orders; | |
65 open(M,$matrix); | |
66 <M>; | |
67 while(<M>){ | |
68 my $line = $_; | |
69 $line =~s/\n//g;$line =~s/\r//g; | |
70 my @infos = split(/\t/,$line); | |
71 | |
72 my $pattern = ""; | |
73 for (my $k=1;$k<=$#infos;$k++){ | |
74 $pattern.=$infos[$k]; | |
75 } | |
76 | |
77 # print only dispensable (at least one absence) | |
78 if ($pattern =~/0/){ | |
79 if (!$patterns{$pattern}){ | |
80 $pattern_order++; | |
81 } | |
82 $patterns{$pattern}++; | |
83 $pattern_orders{$pattern_order} = $pattern; | |
84 } | |
85 } | |
86 close(M); | |
87 | |
88 print "Number of distinct patterns:"; | |
89 print scalar keys(%patterns)."\n"; | |
90 | |
91 my @colors = ("orange","green","red","blue","black","pink","yellow","brown","grey","purple","darkred"); | |
92 | |
93 my $cumul_x = 0; | |
94 foreach my $pattern_order(sort {$a<=>$b} keys(%pattern_orders)){ | |
95 my $pattern = $pattern_orders{$pattern_order}; | |
96 my $size = $patterns{$pattern}; | |
97 my $width = $size * $width_of_one_block; | |
98 my $x = $max_x - $cumul_x - $width; | |
99 | |
100 my $modulo = $pattern_order % 2; | |
101 print "$pattern_order $pattern $size $modulo\n"; | |
102 | |
103 #my $color = $colors[$pattern_order-1]; | |
104 my $color = $colors[$modulo]; | |
105 | |
106 my $pattern_y = $min_y-15; | |
107 print OUT "<rect y='$pattern_y' x='$x' width='$width' height='10' style=\"fill:$color;stroke-width:3;$color;\"/>"; | |
108 | |
109 $cumul_x += $width; | |
110 my @values = split(//,$pattern); | |
111 my $cumul_y = 0; | |
112 foreach my $val(@values){ | |
113 my $y = $max_y - $cumul_y - $height_of_one_block; | |
114 if ($val){ | |
115 print OUT "<rect y='$y' x='$x' width='$width' height='$height_of_one_block' style=\"fill:purple;stroke:purple;\"/>"; | |
116 } | |
117 $cumul_y += $height_of_one_block; | |
118 } | |
119 } | |
120 | |
121 | |
122 print OUT "</svg>\n"; | |
123 close(OUT); | |
124 | |
125 print "Min x : $min_x\n"; | |
126 print "Max x : $max_x\n"; | |
127 print "Min y : $min_y\n"; | |
128 print "Max y : $max_y\n"; | |
129 print "$n\n"; |