Mercurial > repos > dereeper > pangenome_explorer
comparison Perl/GenerateHeatmapFromPAV.pl @ 3:e42d30da7a74 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:52:25 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:97e4e3e818b6 | 3:e42d30da7a74 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use File::Basename; | |
5 my $dirname = dirname(__FILE__); | |
6 | |
7 my $infile = $ARGV[0]; | |
8 my $heatmap = $ARGV[1]; | |
9 | |
10 my %core_cluster; | |
11 my %samples; | |
12 my %sequences; | |
13 my %matrix; | |
14 my %hash; | |
15 my $cl_num = 0; | |
16 my $nb_strains = 1; | |
17 open(M,">$heatmap.accessory_01matrix.txt"); | |
18 open(PAN,">$heatmap.pangenes_01matrix.txt"); | |
19 open(F,"$infile"); | |
20 my $firstline = <F>; | |
21 $firstline =~s/\n//g;$firstline =~s/\r//g; | |
22 #my @infos = split(/","/,$firstline); | |
23 my @infos = split(/\t/,$firstline); | |
24 print M "Gene"; | |
25 print PAN "Gene"; | |
26 #for (my $j=14; $j <= $#infos; $j++){ | |
27 for (my $j=1; $j <= $#infos; $j++){ | |
28 my $gbfile = $infos[$j]; | |
29 $gbfile =~s/\"//g; | |
30 $gbfile =~s/\.gb\.filt//g; | |
31 | |
32 my $strain = $gbfile; | |
33 my @words = split(/_/,$strain); | |
34 my $genus = $words[0]; | |
35 my $species = $words[1]; | |
36 my $shortname = substr($genus,0,3) . "_". substr($species,0,2); | |
37 for (my $j = 2; $j <= $#words; $j++){ | |
38 $shortname.="_".$words[$j]; | |
39 } | |
40 #$shortname = substr($shortname,0,25); | |
41 | |
42 print M "\t".$strain; | |
43 print PAN "\t".$strain; | |
44 $samples{$j} = $strain; | |
45 $nb_strains++; | |
46 } | |
47 print M "\n"; | |
48 print PAN "\n"; | |
49 while(<F>){ | |
50 $cl_num++; | |
51 my $line = $_; | |
52 $line =~s/\n//g;$line =~s/\r//g; | |
53 #my @infos = split(/","/,$line); | |
54 my @infos = split(/\t/,$line); | |
55 my $concat_accessory = ""; | |
56 #for (my $i = 14; $i <= $#infos; $i++){ | |
57 for (my $i = 1; $i <= $#infos; $i++){ | |
58 my $val = $infos[$i]; | |
59 my $sample = $samples{$i}; | |
60 $matrix{$sample}{$cl_num} = $val; | |
61 $val =~s/\"//g; | |
62 if ($val =~/\w+/){ | |
63 $concat_accessory .= "\t1"; | |
64 $hash{$sample}{$cl_num} = 1; | |
65 $sequences{$sample}.= "T"; | |
66 } | |
67 else{ | |
68 $concat_accessory .= "\t0"; | |
69 $hash{$sample}{$cl_num} = 0; | |
70 $sequences{$sample}.= "A"; | |
71 } | |
72 } | |
73 if ($concat_accessory =~/0/){ | |
74 print M $cl_num.$concat_accessory."\n"; | |
75 } | |
76 else{ | |
77 $core_cluster{$cl_num}=1; | |
78 } | |
79 print PAN $cl_num.$concat_accessory."\n"; | |
80 #if ($cl_num > 1000){last;} | |
81 } | |
82 close(F); | |
83 close(M); | |
84 close(PAN); | |
85 | |
86 | |
87 system("ulimit -s 163840;Rscript $dirname/../R/heatmap.R -f $heatmap.accessory_01matrix.txt -o $heatmap.complete.pdf"); | |
88 #system("ulimit -s 163840;Rscript $dirname/../R/heatmaply.R -f $heatmap.accessory_01matrix.txt -o $heatmap.complete2.svg"); | |
89 system("pdf2svg $heatmap.complete.pdf $heatmap.complete.svg"); | |
90 | |
91 | |
92 open(A,">$heatmap.alignment.fa"); | |
93 foreach my $sample(keys(%sequences)){ | |
94 my $seq = $sequences{$sample}; | |
95 print A ">$sample\n"; | |
96 print A "$seq\n"; | |
97 } | |
98 close(A); | |
99 | |
100 | |
101 | |
102 my $min_y = 10000000; | |
103 my $min_x = 10000000; | |
104 my $max_y = 0; | |
105 my $max_x = 0; | |
106 my $step_x; | |
107 my $step_y; | |
108 open(SVG,"$heatmap.complete.svg"); | |
109 open(SVGNEW,">$heatmap.complete.new.svg"); | |
110 while(<SVG>){ | |
111 if (/rgb\(100%,0%,0%\)/){ | |
112 $_ =~s/rgb\(100%,0%,0%\)/rgb\(100%,100%,100%\)/g; | |
113 } | |
114 if (/rgb\(100%,63.529968%,0%\)/){ | |
115 $_ =~s/rgb\(100%,63.529968%,0%\)/rgb\(41%,72%,64%\)/g; | |
116 } | |
117 print SVGNEW $_; | |
118 } | |
119 close(SVG); | |
120 close(SVGNEW); | |
121 | |
122 | |
123 | |
124 my @clusters; | |
125 open(F,"$heatmap.complete.pdf.cols.csv"); | |
126 <F>; | |
127 while(<F>){ | |
128 my $line = $_; | |
129 $line =~s/\n//g;$line =~s/\r//g; | |
130 push(@clusters,$line); | |
131 print $line; | |
132 } | |
133 close(F); | |
134 | |
135 my @strains; | |
136 open(F,"$heatmap.complete.pdf.rows.csv"); | |
137 <F>; | |
138 while(<F>){ | |
139 my $line = $_; | |
140 $line =~s/\n//g;$line =~s/\r//g; | |
141 push(@strains,$line); | |
142 print $line; | |
143 } | |
144 close(F); | |
145 | |
146 | |
147 open(O,">$infile.sorted"); | |
148 open(HP,">$infile.sorted.for_heatmap_plotly.txt"); | |
149 print O "ClutserID"."\t".join("\t",@strains)."\n"; | |
150 print HP "ClutserID"."\t".join("\t",@strains)."\n"; | |
151 foreach my $cl(reverse(@clusters)){ | |
152 print O $cl; | |
153 my $clnb = $cl; | |
154 if (length($clnb) == 1){$clnb = "000".$clnb;} | |
155 elsif (length($clnb) == 2){$clnb = "00".$clnb;} | |
156 elsif (length($clnb) == 3){$clnb = "0".$clnb;} | |
157 my $name = "CLUSTER".$clnb; | |
158 | |
159 print HP $name; | |
160 foreach my $strain(@strains){ | |
161 my $val = $matrix{$strain}{$cl}; | |
162 print O "\t$val"; | |
163 if ($val =~/\w/){print HP "\t1";} | |
164 else{print HP "\t0";} | |
165 } | |
166 print O "\n"; | |
167 print HP "\n"; | |
168 } | |
169 foreach my $cl(keys(%core_cluster)){ | |
170 print O $cl; | |
171 my $clnb = $cl; | |
172 if (length($clnb) == 1){$clnb = "000".$clnb;} | |
173 elsif (length($clnb) == 2){$clnb = "00".$clnb;} | |
174 elsif (length($clnb) == 3){$clnb = "0".$clnb;} | |
175 my $name = "CLUSTER".$clnb; | |
176 print HP $name; | |
177 foreach my $strain(@strains){ | |
178 my $val = $matrix{$strain}{$cl}; | |
179 print O "\t$val"; | |
180 if ($val =~/\w/){print HP "\t1";} | |
181 else{print HP "\t0";} | |
182 | |
183 } | |
184 print O "\n"; | |
185 print HP "\n"; | |
186 } | |
187 close(O); | |
188 close(HP); | |
189 | |
190 my $svg_section = ""; | |
191 | |
192 if (scalar @clusters == 0){ | |
193 system("touch $heatmap.upsetr.svg"); | |
194 if (!-e "$heatmap"){ | |
195 system("touch $heatmap"); | |
196 } | |
197 exit; | |
198 } | |
199 system("cp -rf $infile $infile.bkp"); | |
200 system("cp -rf $infile.sorted $infile"); | |
201 | |
202 my %combinaisons; | |
203 my $x = $min_x; | |
204 my $length_matrix = ($max_x-$min_x); | |
205 my $height_matrix = ($max_y-$min_y); | |
206 my $step_x = $length_matrix / scalar @clusters; | |
207 #my $step_y = $height_matrix / scalar @strains; | |
208 | |
209 print "$height_matrix $length_matrix $step_x $step_y\n"; | |
210 my $previous_combinaison = ""; | |
211 my $combinaison_id = 0; | |
212 my %combinaison_ids; | |
213 open(M,">$heatmap.upsetr.txt"); | |
214 print M "ClutserID"; | |
215 foreach my $strain(@strains){ | |
216 my @words = split(/_/,$strain); | |
217 my $genus = $words[0]; | |
218 my $species = $words[1]; | |
219 my $shortname = substr($genus,0,3) . "_". substr($species,0,2); | |
220 for (my $j = 2; $j <= $#words; $j++){ | |
221 $shortname.="_".$words[$j]; | |
222 } | |
223 $shortname = substr($shortname,0,25); | |
224 print M "\t".$shortname; | |
225 } | |
226 print M "\n"; | |
227 foreach my $cluster(@clusters){ | |
228 $x = $x+$step_x; | |
229 my $combinaison = ""; | |
230 my $numstrain; | |
231 print M "$cluster"; | |
232 foreach my $strain(@strains){ | |
233 $numstrain++; | |
234 my $val = $hash{$strain}{$cluster}; | |
235 print M "\t".$val; | |
236 if ($val == 1){ | |
237 $combinaison .= ",$numstrain"; | |
238 } | |
239 } | |
240 if ($combinaison ne $previous_combinaison){ | |
241 $combinaison_id++; | |
242 } | |
243 print M "\n"; | |
244 $previous_combinaison = $combinaison; | |
245 $combinaison_ids{$combinaison_id}=$combinaison; | |
246 #print "Cluster:$cluster Combinaison: $combinaison X:$x $combinaison_id\n"; | |
247 $combinaisons{$combinaison_id} .= "|".$x; | |
248 | |
249 } | |
250 close(M); | |
251 | |
252 | |
253 my $nb_strains = scalar @strains; | |
254 system("Rscript $dirname/../R/upsetr.R $heatmap.upsetr.txt $heatmap.upsetr.pdf $nb_strains >>$heatmap.upsetr.log 2>&1"); | |
255 system("pdf2svg $heatmap.upsetr.pdf $heatmap.upsetr.svg 2"); | |
256 | |
257 system("python3 $dirname/../Python/Heatmap.py -i $infile.sorted.for_heatmap_plotly.txt -o $heatmap.heatmap_plotly.html >>$heatmap.heatmap_plotly.log 2>&1"); | |
258 | |
259 if (!-e "$heatmap.upsetr.svg"){ | |
260 system("touch $heatmap.upsetr.svg"); | |
261 } | |
262 system("perl $dirname/../Perl/reformatHeatmapSVG.pl $heatmap.complete.svg $heatmap.complete.new.svg $infile.sorted.for_heatmap_plotly.txt"); | |
263 system("cp -rf $heatmap.complete.new.svg $heatmap"); | |
264 system("gzip $heatmap"); |