annotate Perl/reformatHeatmapSVG.pl @ 8:f919ffe96425 draft

Uploaded
author dereeper
date Thu, 30 May 2024 14:31:04 +0000
parents e42d30da7a74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
2
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
3 use strict;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
4
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
5 my $filein = $ARGV[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
6 my $fileout = $ARGV[1];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
7 my $matrix = $ARGV[2];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
8
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
9
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
10
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
11
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
12 my $min_x = 100000;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
13 my $max_x = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
14 my $min_y = 100000;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
15 my $max_y = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
16 my $n=0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
17 open(OUT,">$fileout");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
18 open(F,$filein);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
19 while(<F>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
20 if (/<path style=" stroke:none;fill-rule:nonzero;fill/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
21
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
22 if (/d=\"M ([\s\d\.]+) L/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
23 my @infos = split(/ /,$1);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
24 my $x = $infos[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
25 if ($x < $min_x){$min_x = $x;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
26 if ($x > $max_x){$max_x = $x;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
27 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
28 if (/L ([\s\d\.]+) L/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
29 my @infos = split(/ /,$1);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
30 my $y = $infos[1];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
31 if ($y < $min_y){$min_y = $y;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
32 if ($y > $max_y){$max_y = $y;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
33 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
34 if (/L ([\s\d\.]+) Z/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
35 my @infos = split(/ /,$1);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
36 my $y = $infos[1];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
37 if ($y < $min_y){$min_y = $y;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
38 if ($y > $max_y){$max_y = $y;}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
39 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
40 $n++;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
41 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
42 else{
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
43 if (!/\<\/svg\>/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
44 print OUT $_;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
45 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
46 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
47 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
48 close(F);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
49
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
50 my $nb_dispensable_clusters = `grep -P -c '\t0' $matrix`;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
51 my $nb_samples = `awk {'print NF-1'} $matrix | head -1`;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
52
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
53 my $global_width = $max_x - $min_x;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
54 my $width_of_one_block = $global_width / $nb_dispensable_clusters;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
55
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
56 my $global_height = $max_y - $min_y;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
57 my $height_of_one_block = $global_height / $nb_samples;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
58
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
59 ###########################################################
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
60 # get distinct pattern of presence/absence
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
61 ###########################################################
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
62 my %patterns;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
63 my $pattern_order = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
64 my %pattern_orders;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
65 open(M,$matrix);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
66 <M>;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
67 while(<M>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
68 my $line = $_;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
69 $line =~s/\n//g;$line =~s/\r//g;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
70 my @infos = split(/\t/,$line);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
71
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
72 my $pattern = "";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
73 for (my $k=1;$k<=$#infos;$k++){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
74 $pattern.=$infos[$k];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
75 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
76
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
77 # print only dispensable (at least one absence)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
78 if ($pattern =~/0/){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
79 if (!$patterns{$pattern}){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
80 $pattern_order++;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
81 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
82 $patterns{$pattern}++;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
83 $pattern_orders{$pattern_order} = $pattern;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
84 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
85 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
86 close(M);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
87
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
88 print "Number of distinct patterns:";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
89 print scalar keys(%patterns)."\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
90
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
91 my @colors = ("orange","green","red","blue","black","pink","yellow","brown","grey","purple","darkred");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
92
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
93 my $cumul_x = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
94 foreach my $pattern_order(sort {$a<=>$b} keys(%pattern_orders)){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
95 my $pattern = $pattern_orders{$pattern_order};
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
96 my $size = $patterns{$pattern};
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
97 my $width = $size * $width_of_one_block;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
98 my $x = $max_x - $cumul_x - $width;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
99
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
100 my $modulo = $pattern_order % 2;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
101 print "$pattern_order $pattern $size $modulo\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
102
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
103 #my $color = $colors[$pattern_order-1];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
104 my $color = $colors[$modulo];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
105
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
106 my $pattern_y = $min_y-15;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
107 print OUT "<rect y='$pattern_y' x='$x' width='$width' height='10' style=\"fill:$color;stroke-width:3;$color;\"/>";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
108
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
109 $cumul_x += $width;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
110 my @values = split(//,$pattern);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
111 my $cumul_y = 0;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
112 foreach my $val(@values){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
113 my $y = $max_y - $cumul_y - $height_of_one_block;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
114 if ($val){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
115 print OUT "<rect y='$y' x='$x' width='$width' height='$height_of_one_block' style=\"fill:purple;stroke:purple;\"/>";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
116 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
117 $cumul_y += $height_of_one_block;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
118 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
119 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
120
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
121
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
122 print OUT "</svg>\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
123 close(OUT);
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
124
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
125 print "Min x : $min_x\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
126 print "Max x : $max_x\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
127 print "Min y : $min_y\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
128 print "Max y : $max_y\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
129 print "$n\n";