diff PanExplorer_workflow/Perl/reformatHeatmapSVG.pl @ 1:032f6b3806a3 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:16:08 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PanExplorer_workflow/Perl/reformatHeatmapSVG.pl	Thu May 30 11:16:08 2024 +0000
@@ -0,0 +1,129 @@
+#!/usr/bin/perl
+
+use strict;
+
+my $filein = $ARGV[0];
+my $fileout = $ARGV[1];
+my $matrix = $ARGV[2];
+
+
+
+
+my $min_x = 100000;
+my $max_x = 0;
+my $min_y = 100000;
+my $max_y = 0;
+my $n=0;
+open(OUT,">$fileout");
+open(F,$filein);
+while(<F>){
+	if (/<path style=" stroke:none;fill-rule:nonzero;fill/){
+
+		if (/d=\"M ([\s\d\.]+) L/){
+			my @infos = split(/ /,$1);
+			my $x = $infos[0];
+			if ($x < $min_x){$min_x = $x;}
+			if ($x > $max_x){$max_x = $x;}
+		}
+		if (/L ([\s\d\.]+) L/){
+			my @infos = split(/ /,$1);
+			my $y = $infos[1];
+			if ($y < $min_y){$min_y = $y;}
+			if ($y > $max_y){$max_y = $y;}
+                }
+		if (/L ([\s\d\.]+) Z/){
+                        my @infos = split(/ /,$1);
+                        my $y = $infos[1];
+                        if ($y < $min_y){$min_y = $y;}
+                        if ($y > $max_y){$max_y = $y;}
+                }
+		$n++;
+	}
+	else{
+		if (!/\<\/svg\>/){
+			print OUT $_;
+		}
+	}
+}
+close(F);
+
+my $nb_dispensable_clusters = `grep -P -c '\t0' $matrix`;
+my $nb_samples = `awk {'print NF-1'} $matrix | head -1`;
+
+my $global_width = $max_x - $min_x;
+my $width_of_one_block = $global_width / $nb_dispensable_clusters;
+
+my $global_height = $max_y - $min_y;
+my $height_of_one_block = $global_height / $nb_samples;
+
+###########################################################
+# get distinct pattern of presence/absence
+###########################################################
+my %patterns;
+my $pattern_order = 0;
+my %pattern_orders;
+open(M,$matrix);
+<M>;
+while(<M>){
+	my $line = $_;
+	$line =~s/\n//g;$line =~s/\r//g;
+	my @infos = split(/\t/,$line);
+	
+	my $pattern = "";
+	for (my $k=1;$k<=$#infos;$k++){
+		$pattern.=$infos[$k];
+	}
+
+	# print only dispensable (at least one absence)
+	if ($pattern =~/0/){
+		if (!$patterns{$pattern}){
+			$pattern_order++;
+		}
+		$patterns{$pattern}++;
+		$pattern_orders{$pattern_order} = $pattern;
+	}
+}
+close(M);
+
+print "Number of distinct patterns:";
+print scalar keys(%patterns)."\n";
+
+my @colors = ("orange","green","red","blue","black","pink","yellow","brown","grey","purple","darkred");
+
+my $cumul_x = 0;
+foreach my $pattern_order(sort {$a<=>$b} keys(%pattern_orders)){
+	my $pattern = $pattern_orders{$pattern_order};
+	my $size = $patterns{$pattern};
+	my $width = $size * $width_of_one_block;
+	my $x = $max_x - $cumul_x - $width;
+
+	my $modulo = $pattern_order % 2;
+	print "$pattern_order $pattern $size $modulo\n";
+	
+	#my $color = $colors[$pattern_order-1];
+	my $color = $colors[$modulo];
+
+	my $pattern_y = $min_y-15;
+	print OUT "<rect y='$pattern_y' x='$x' width='$width' height='10' style=\"fill:$color;stroke-width:3;$color;\"/>";
+
+	$cumul_x += $width;
+	my @values = split(//,$pattern);
+	my $cumul_y = 0;
+	foreach my $val(@values){
+		my $y = $max_y - $cumul_y - $height_of_one_block;
+		if ($val){
+			print OUT "<rect y='$y' x='$x' width='$width' height='$height_of_one_block' style=\"fill:purple;stroke:purple;\"/>";
+		}
+		$cumul_y += $height_of_one_block;
+	}
+}
+
+
+print OUT "</svg>\n";
+close(OUT);
+
+print "Min x : $min_x\n";
+print "Max x : $max_x\n";
+print "Min y : $min_y\n";
+print "Max y : $max_y\n";
+print "$n\n";