3
|
1 #!/usr/bin/perl
|
|
2
|
|
3 use strict;
|
|
4 use File::Basename;
|
|
5 my $dirname = dirname(__FILE__);
|
|
6
|
|
7 my $infile = $ARGV[0];
|
|
8 my $heatmap = $ARGV[1];
|
|
9
|
|
10 my %core_cluster;
|
|
11 my %samples;
|
|
12 my %sequences;
|
|
13 my %matrix;
|
|
14 my %hash;
|
|
15 my $cl_num = 0;
|
|
16 my $nb_strains = 1;
|
|
17 open(M,">$heatmap.accessory_01matrix.txt");
|
|
18 open(PAN,">$heatmap.pangenes_01matrix.txt");
|
|
19 open(F,"$infile");
|
|
20 my $firstline = <F>;
|
|
21 $firstline =~s/\n//g;$firstline =~s/\r//g;
|
|
22 #my @infos = split(/","/,$firstline);
|
|
23 my @infos = split(/\t/,$firstline);
|
|
24 print M "Gene";
|
|
25 print PAN "Gene";
|
|
26 #for (my $j=14; $j <= $#infos; $j++){
|
|
27 for (my $j=1; $j <= $#infos; $j++){
|
|
28 my $gbfile = $infos[$j];
|
|
29 $gbfile =~s/\"//g;
|
|
30 $gbfile =~s/\.gb\.filt//g;
|
|
31
|
|
32 my $strain = $gbfile;
|
|
33 my @words = split(/_/,$strain);
|
|
34 my $genus = $words[0];
|
|
35 my $species = $words[1];
|
|
36 my $shortname = substr($genus,0,3) . "_". substr($species,0,2);
|
|
37 for (my $j = 2; $j <= $#words; $j++){
|
|
38 $shortname.="_".$words[$j];
|
|
39 }
|
|
40 #$shortname = substr($shortname,0,25);
|
|
41
|
|
42 print M "\t".$strain;
|
|
43 print PAN "\t".$strain;
|
|
44 $samples{$j} = $strain;
|
|
45 $nb_strains++;
|
|
46 }
|
|
47 print M "\n";
|
|
48 print PAN "\n";
|
|
49 while(<F>){
|
|
50 $cl_num++;
|
|
51 my $line = $_;
|
|
52 $line =~s/\n//g;$line =~s/\r//g;
|
|
53 #my @infos = split(/","/,$line);
|
|
54 my @infos = split(/\t/,$line);
|
|
55 my $concat_accessory = "";
|
|
56 #for (my $i = 14; $i <= $#infos; $i++){
|
|
57 for (my $i = 1; $i <= $#infos; $i++){
|
|
58 my $val = $infos[$i];
|
|
59 my $sample = $samples{$i};
|
|
60 $matrix{$sample}{$cl_num} = $val;
|
|
61 $val =~s/\"//g;
|
|
62 if ($val =~/\w+/){
|
|
63 $concat_accessory .= "\t1";
|
|
64 $hash{$sample}{$cl_num} = 1;
|
|
65 $sequences{$sample}.= "T";
|
|
66 }
|
|
67 else{
|
|
68 $concat_accessory .= "\t0";
|
|
69 $hash{$sample}{$cl_num} = 0;
|
|
70 $sequences{$sample}.= "A";
|
|
71 }
|
|
72 }
|
|
73 if ($concat_accessory =~/0/){
|
|
74 print M $cl_num.$concat_accessory."\n";
|
|
75 }
|
|
76 else{
|
|
77 $core_cluster{$cl_num}=1;
|
|
78 }
|
|
79 print PAN $cl_num.$concat_accessory."\n";
|
|
80 #if ($cl_num > 1000){last;}
|
|
81 }
|
|
82 close(F);
|
|
83 close(M);
|
|
84 close(PAN);
|
|
85
|
|
86
|
|
87 system("ulimit -s 163840;Rscript $dirname/../R/heatmap.R -f $heatmap.accessory_01matrix.txt -o $heatmap.complete.pdf");
|
|
88 #system("ulimit -s 163840;Rscript $dirname/../R/heatmaply.R -f $heatmap.accessory_01matrix.txt -o $heatmap.complete2.svg");
|
|
89 system("pdf2svg $heatmap.complete.pdf $heatmap.complete.svg");
|
|
90
|
|
91
|
|
92 open(A,">$heatmap.alignment.fa");
|
|
93 foreach my $sample(keys(%sequences)){
|
|
94 my $seq = $sequences{$sample};
|
|
95 print A ">$sample\n";
|
|
96 print A "$seq\n";
|
|
97 }
|
|
98 close(A);
|
|
99
|
|
100
|
|
101
|
|
102 my $min_y = 10000000;
|
|
103 my $min_x = 10000000;
|
|
104 my $max_y = 0;
|
|
105 my $max_x = 0;
|
|
106 my $step_x;
|
|
107 my $step_y;
|
|
108 open(SVG,"$heatmap.complete.svg");
|
|
109 open(SVGNEW,">$heatmap.complete.new.svg");
|
|
110 while(<SVG>){
|
|
111 if (/rgb\(100%,0%,0%\)/){
|
|
112 $_ =~s/rgb\(100%,0%,0%\)/rgb\(100%,100%,100%\)/g;
|
|
113 }
|
|
114 if (/rgb\(100%,63.529968%,0%\)/){
|
|
115 $_ =~s/rgb\(100%,63.529968%,0%\)/rgb\(41%,72%,64%\)/g;
|
|
116 }
|
|
117 print SVGNEW $_;
|
|
118 }
|
|
119 close(SVG);
|
|
120 close(SVGNEW);
|
|
121
|
|
122
|
|
123
|
|
124 my @clusters;
|
|
125 open(F,"$heatmap.complete.pdf.cols.csv");
|
|
126 <F>;
|
|
127 while(<F>){
|
|
128 my $line = $_;
|
|
129 $line =~s/\n//g;$line =~s/\r//g;
|
|
130 push(@clusters,$line);
|
|
131 print $line;
|
|
132 }
|
|
133 close(F);
|
|
134
|
|
135 my @strains;
|
|
136 open(F,"$heatmap.complete.pdf.rows.csv");
|
|
137 <F>;
|
|
138 while(<F>){
|
|
139 my $line = $_;
|
|
140 $line =~s/\n//g;$line =~s/\r//g;
|
|
141 push(@strains,$line);
|
|
142 print $line;
|
|
143 }
|
|
144 close(F);
|
|
145
|
|
146
|
|
147 open(O,">$infile.sorted");
|
|
148 open(HP,">$infile.sorted.for_heatmap_plotly.txt");
|
|
149 print O "ClutserID"."\t".join("\t",@strains)."\n";
|
|
150 print HP "ClutserID"."\t".join("\t",@strains)."\n";
|
|
151 foreach my $cl(reverse(@clusters)){
|
|
152 print O $cl;
|
|
153 my $clnb = $cl;
|
|
154 if (length($clnb) == 1){$clnb = "000".$clnb;}
|
|
155 elsif (length($clnb) == 2){$clnb = "00".$clnb;}
|
|
156 elsif (length($clnb) == 3){$clnb = "0".$clnb;}
|
|
157 my $name = "CLUSTER".$clnb;
|
|
158
|
|
159 print HP $name;
|
|
160 foreach my $strain(@strains){
|
|
161 my $val = $matrix{$strain}{$cl};
|
|
162 print O "\t$val";
|
|
163 if ($val =~/\w/){print HP "\t1";}
|
|
164 else{print HP "\t0";}
|
|
165 }
|
|
166 print O "\n";
|
|
167 print HP "\n";
|
|
168 }
|
|
169 foreach my $cl(keys(%core_cluster)){
|
|
170 print O $cl;
|
|
171 my $clnb = $cl;
|
|
172 if (length($clnb) == 1){$clnb = "000".$clnb;}
|
|
173 elsif (length($clnb) == 2){$clnb = "00".$clnb;}
|
|
174 elsif (length($clnb) == 3){$clnb = "0".$clnb;}
|
|
175 my $name = "CLUSTER".$clnb;
|
|
176 print HP $name;
|
|
177 foreach my $strain(@strains){
|
|
178 my $val = $matrix{$strain}{$cl};
|
|
179 print O "\t$val";
|
|
180 if ($val =~/\w/){print HP "\t1";}
|
|
181 else{print HP "\t0";}
|
|
182
|
|
183 }
|
|
184 print O "\n";
|
|
185 print HP "\n";
|
|
186 }
|
|
187 close(O);
|
|
188 close(HP);
|
|
189
|
|
190 my $svg_section = "";
|
|
191
|
|
192 if (scalar @clusters == 0){
|
|
193 system("touch $heatmap.upsetr.svg");
|
|
194 if (!-e "$heatmap"){
|
|
195 system("touch $heatmap");
|
|
196 }
|
|
197 exit;
|
|
198 }
|
|
199 system("cp -rf $infile $infile.bkp");
|
|
200 system("cp -rf $infile.sorted $infile");
|
|
201
|
|
202 my %combinaisons;
|
|
203 my $x = $min_x;
|
|
204 my $length_matrix = ($max_x-$min_x);
|
|
205 my $height_matrix = ($max_y-$min_y);
|
|
206 my $step_x = $length_matrix / scalar @clusters;
|
|
207 #my $step_y = $height_matrix / scalar @strains;
|
|
208
|
|
209 print "$height_matrix $length_matrix $step_x $step_y\n";
|
|
210 my $previous_combinaison = "";
|
|
211 my $combinaison_id = 0;
|
|
212 my %combinaison_ids;
|
|
213 open(M,">$heatmap.upsetr.txt");
|
|
214 print M "ClutserID";
|
|
215 foreach my $strain(@strains){
|
|
216 my @words = split(/_/,$strain);
|
|
217 my $genus = $words[0];
|
|
218 my $species = $words[1];
|
|
219 my $shortname = substr($genus,0,3) . "_". substr($species,0,2);
|
|
220 for (my $j = 2; $j <= $#words; $j++){
|
|
221 $shortname.="_".$words[$j];
|
|
222 }
|
|
223 $shortname = substr($shortname,0,25);
|
|
224 print M "\t".$shortname;
|
|
225 }
|
|
226 print M "\n";
|
|
227 foreach my $cluster(@clusters){
|
|
228 $x = $x+$step_x;
|
|
229 my $combinaison = "";
|
|
230 my $numstrain;
|
|
231 print M "$cluster";
|
|
232 foreach my $strain(@strains){
|
|
233 $numstrain++;
|
|
234 my $val = $hash{$strain}{$cluster};
|
|
235 print M "\t".$val;
|
|
236 if ($val == 1){
|
|
237 $combinaison .= ",$numstrain";
|
|
238 }
|
|
239 }
|
|
240 if ($combinaison ne $previous_combinaison){
|
|
241 $combinaison_id++;
|
|
242 }
|
|
243 print M "\n";
|
|
244 $previous_combinaison = $combinaison;
|
|
245 $combinaison_ids{$combinaison_id}=$combinaison;
|
|
246 #print "Cluster:$cluster Combinaison: $combinaison X:$x $combinaison_id\n";
|
|
247 $combinaisons{$combinaison_id} .= "|".$x;
|
|
248
|
|
249 }
|
|
250 close(M);
|
|
251
|
|
252
|
|
253 my $nb_strains = scalar @strains;
|
|
254 system("Rscript $dirname/../R/upsetr.R $heatmap.upsetr.txt $heatmap.upsetr.pdf $nb_strains >>$heatmap.upsetr.log 2>&1");
|
|
255 system("pdf2svg $heatmap.upsetr.pdf $heatmap.upsetr.svg 2");
|
|
256
|
|
257 system("python3 $dirname/../Python/Heatmap.py -i $infile.sorted.for_heatmap_plotly.txt -o $heatmap.heatmap_plotly.html >>$heatmap.heatmap_plotly.log 2>&1");
|
|
258
|
|
259 if (!-e "$heatmap.upsetr.svg"){
|
|
260 system("touch $heatmap.upsetr.svg");
|
|
261 }
|
|
262 system("perl $dirname/../Perl/reformatHeatmapSVG.pl $heatmap.complete.svg $heatmap.complete.new.svg $infile.sorted.for_heatmap_plotly.txt");
|
|
263 system("cp -rf $heatmap.complete.new.svg $heatmap");
|
|
264 system("gzip $heatmap");
|