comparison DrawMapOfOccurences.pl @ 0:58111b3965b2 draft default tip

Uploaded
author dereeper
date Thu, 01 Nov 2012 09:35:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:58111b3965b2
1 #!/usr/bin/perl
2
3 use strict;
4 use Switch;
5 use Getopt::Long;
6 use lib ".";
7
8 my $usage = qq~Usage:$0 <args>
9 where <args> are:
10 -c, --count <SNP count SNiPloid output>
11 -a, --annotation <annotation file in GFF3>
12 -o, --output_png <output PNG file>
13 -s, --scale <scale. Default:100000>
14 -t, --type_analysis <type of analysis: polyploid_diploid or polyploid_polyploid. Default:polyploid_diploid>
15 -m, --max_nb_chrom <maximum number of chromomsome to display. Default:20>
16 -n, --nb_min_snp <minimal number of SNP to calculate ratio. Default:10>
17 -d, --display_cat <display ratio for each category instead of intra-polyploid and inter-diploid (yes/no). Default:no>
18 ~;
19 $usage .= "\n";
20
21 my ($snp_count,$annotation,$output_png,$global_scale,$max_nb_chrom,$type_analysis);
22
23 my $global_scale = 10000;
24 my $max_nb_chrom = 20;
25 my $nb_min_snp = 10;
26 my $display_cat = "no";
27
28
29 GetOptions(
30 "count=s" => \$snp_count,
31 "annotation=s" => \$annotation,
32 "output_png=s" => \$output_png,
33 "scale=s" => \$global_scale,
34 "max_nb_chrom=s" => \$max_nb_chrom,
35 "type_analysis=s" => \$type_analysis,
36 "nb_min_snp=s" => \$nb_min_snp,
37 "display_cat=s" => \$display_cat
38 );
39
40
41 die $usage
42 if ( !$snp_count || !$annotation || !$output_png || !$type_analysis);
43
44 my %proportions_categories;
45 my %ratios;
46 my %ratios_poly_diploid;
47 my %nb_snps;
48 open(my $COUNT,$snp_count);
49 <$COUNT>;
50 while(<$COUNT>)
51 {
52 my $line =$_;
53 chomp($line);
54 my @infos = split(/\t/,$line);
55
56
57 if ($type_analysis eq "polyploid_diploid")
58 {
59 my $gene = $infos[0];
60 my $nb_snp = $infos[2];
61 my $nb_1 = $infos[3];
62 my $nb_2 = $infos[4];
63 my $nb_3or4 = $infos[5];
64 my $nb_3 = $infos[6];
65 my $nb_4 = $infos[7];
66 my $nb_5 = $infos[8];
67 my $nb_other = $infos[9];
68 my $nb_heterozygot_diploid = $infos[10];
69 my $nb_snp_diploid = $infos[11];
70 my $nb_snp_polyploid = $infos[12];
71
72 $nb_snps{$gene} = $nb_snp;
73 my $sum = $nb_1 + $nb_2 + $nb_3or4 + $nb_5 + $nb_3 + $nb_4 + $nb_other + $nb_heterozygot_diploid;
74
75 if ($nb_snp >= $nb_min_snp)
76 {
77 if ($nb_1)
78 {
79 $proportions_categories{$gene}{"1"} = $nb_1/$nb_snp;
80 }
81 if ($nb_2)
82 {
83 $proportions_categories{$gene}{"2"} = $nb_2/$nb_snp;
84 }
85 if ($nb_5)
86 {
87 $proportions_categories{$gene}{"5"} = $nb_5/$nb_snp;
88 }
89 if ($nb_3or4)
90 {
91 $proportions_categories{$gene}{"3or4"} = $nb_3or4/$nb_snp;
92 }
93 }
94
95 my $ratio_g1 = $infos[13];
96 my $ratio_g2 = $infos[14];
97
98 if ($ratio_g1)
99 {
100 $ratios{$gene} = $ratio_g1;
101 }
102 }
103
104 if ($type_analysis eq "polyploid_polyploid")
105 {
106 my $gene = $infos[0];
107 my $nb_snp = $infos[2];
108 my $nb_equal = $infos[3] + $infos[4];
109 my $nb_diff = $infos[7];
110 $nb_snps{$gene} = $nb_snp;
111 if ($nb_snp >= $nb_min_snp)
112 {
113 if ($nb_equal)
114 {
115 $proportions_categories{$gene}{"equal"} = $nb_equal/$nb_snp;
116 }
117 if ($nb_diff)
118 {
119 $proportions_categories{$gene}{"difference"} = $nb_diff/$nb_snp;
120 }
121 }
122 }
123
124 }
125 close($COUNT);
126
127
128
129
130 my $max_pos = 0;
131 my %chrom_sizes;
132 my $chrom_particule;
133 my %genes;
134 my %gene_positions;
135
136 open(my $ANNOT,$annotation);
137 while(<$ANNOT>)
138 {
139 my $line =$_;
140 chomp($line);
141 if (!/^#/ && /gene/)
142 {
143 my @infos = split(/\t/,$line);
144 my $chrom = $infos[0];
145 if ($chrom =~/^(\w+_)(\d+)$/)
146 {
147 $chrom_particule = $1;
148 $chrom = $2;
149 }
150
151 my $attributes = $infos[8];
152 my $gene_name;
153 if ($attributes =~/ID=([^;]+);/)
154 {
155 $gene_name = $1;
156 }
157 if ($gene_name =~/(.*)_G1/)
158 {
159 $gene_name = $1;
160 }
161 else
162 {
163 next;
164 }
165 if (not defined $nb_snps{$gene_name})
166 {
167 next;
168 }
169
170 my $start = $infos[3];
171 my $end = $infos[4];
172 my $pos = sprintf("%.0f", ($start + (($end - $start) / 2)));
173
174 $end = $end / $global_scale;
175 if ($chrom_sizes{$chrom})
176 {
177 if ($end > $chrom_sizes{$chrom})
178 {
179 $chrom_sizes{$chrom} = $end;
180 if ($end > $max_pos)
181 {
182 $max_pos = $end;
183 }
184 }
185 }
186 else
187 {
188 $chrom_sizes{$chrom} = $end;
189 if ($end > $max_pos)
190 {
191 $max_pos = $end;
192 }
193 }
194 $genes{$gene_name} = "$chrom:$pos";
195 $gene_positions{$chrom}{$pos}= $gene_name;
196 }
197 }
198 close($ANNOT);
199
200
201
202 use GD;
203 use GD::Simple;
204 use GD::Image;
205
206
207
208
209 ####################
210 # drawing
211 ####################
212
213 my $scale = 800 / $max_pos;
214
215 my $margin_left = 80;
216 my $margin_right = 50;
217 my $margin_top = 50;
218 my $margin_legend = 100;
219 my $margin_bottom = 10;
220 my $margin_between_chromosomes = 25;
221 my $margin_between_section = 50;
222 my $chrom_width = 10;
223 my $gene_width = 1;
224
225 my $nb_group = 1;
226
227 my $width_of_picture = scalar keys(%gene_positions);
228 if (scalar keys(%gene_positions) > $max_nb_chrom)
229 {
230 $width_of_picture = $max_nb_chrom;
231 }
232
233 my $diagram_img = GD::Simple->new(($margin_left + $margin_right + ($max_pos*$scale)),
234 ($margin_top + ((($chrom_width * $nb_group) + ($margin_between_chromosomes * ($nb_group-1))) * $width_of_picture) + ($margin_between_section * $width_of_picture) + $margin_bottom + $margin_legend)
235 );
236
237 my $yellow = $diagram_img->colorAllocate(247,254,46);
238 my $orange_light = $diagram_img->colorAllocate(250,204,46);
239 my $red_light = $diagram_img->colorAllocate(254,100,46);
240 my $red = $diagram_img->colorAllocate(254,46,46);
241 my $orange = $diagram_img->colorAllocate(254,154,46);
242
243
244 # draw chromosomes
245 my $num_chrom = 0;
246 my @sorted_chrom = sort {$a <=> $b} keys(%gene_positions);
247
248 my $nombre_genes = 0;
249 my $y_end;
250 foreach my $chrom(@sorted_chrom)
251 {
252 if (!$chrom)
253 {
254 next;
255 }
256
257 if ($num_chrom > ($max_nb_chrom - 1))
258 {
259 last;
260 }
261 my $ref_hash = $gene_positions{$chrom};
262 my %hash = %$ref_hash;
263
264 my $section_size = $chrom_width + (($margin_between_chromosomes + $chrom_width) * ($nb_group - 1));
265
266 # draw chromosome (X number of groups)
267
268 $diagram_img->fgcolor('black');
269 $diagram_img->bgcolor('white');
270 $diagram_img->setThickness(1);
271 my $chrom_chain = $chrom_particule . $chrom;
272
273 $diagram_img->rectangle( $margin_left,
274 $margin_top + (($section_size + $margin_between_section) * $num_chrom),
275 $margin_left + ($chrom_sizes{$chrom}*$scale),
276 $margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom)
277 );
278
279 $diagram_img->fgcolor('black');
280 $diagram_img->moveTo(5,$margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom) - 1);
281 $y_end = $margin_top + $chrom_width + (($section_size + $margin_between_section) * ($num_chrom+1)) - 1;
282 $diagram_img->fontsize(12);
283 $diagram_img->font('Times');
284 $diagram_img->string($chrom_particule . $chrom);
285
286
287 my $previous_x_5;
288 my $previous_x_1;
289 my $previous_x_2;
290 my $previous_x_3or4;
291 my $previous_y_5;
292 my $previous_y_1;
293 my $previous_y_2;
294 my $previous_y_3or4;
295
296 my $previous_x_equal;
297 my $previous_x_diff;
298 my $previous_y_equal;
299 my $previous_y_diff;
300
301 my $previous_x_snp_diplo;
302 my $previous_x_snp_poly;
303 my $previous_y_snp_diplo;
304 my $previous_y_snp_poly;
305
306 my $previous_x_ratio_diplo_poly;
307 my $previous_y_ratio_diplo_poly;
308
309 # draw genes
310 foreach my $pos(sort{$a <=> $b}keys(%hash))
311 {
312 my $gene = $gene_positions{$chrom}{$pos};
313 if (not defined $nb_snps{$gene} or $nb_snps{$gene} < $nb_min_snp)
314 {
315 next;
316 }
317
318 if ($type_analysis eq "polyploid_diploid")
319 {
320 #####################################################
321 # draw ratio (subgenomic contribution)
322 #####################################################
323 my $color = "gray";
324 if ($ratios{$gene})
325 {
326 my $ratio_g1 = $ratios{$gene};
327 if ($ratio_g1 <= 30)
328 {
329 $color = $red;
330 }
331 elsif ($ratio_g1 > 30 && $ratio_g1 <= 40)
332 {
333 $color = $red_light;
334 }
335 elsif ($ratio_g1 > 40 && $ratio_g1 <= 60)
336 {
337 $color = $orange;
338 }
339 elsif ($ratio_g1 > 60 && $ratio_g1 <= 70)
340 {
341 $color = $orange_light;
342 }
343 elsif ($ratio_g1 > 70)
344 {
345 $color = $yellow;
346 }
347 }
348
349 $pos = $pos / $global_scale;
350
351 $diagram_img->fgcolor($color);
352 $diagram_img->bgcolor($color);
353 $diagram_img->rectangle( $margin_left + ($pos*$scale) - ($gene_width / 2),
354 $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1,
355 $margin_left + ($pos*$scale) + ($gene_width / 2),
356 $margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom) - 1
357 );
358
359
360
361 #####################################################
362 # draw SNP categories
363 #####################################################
364
365 my $proportion_5 = $proportions_categories{$gene}{"5"};
366 my $proportion_1 = $proportions_categories{$gene}{"1"};
367 my $proportion_2 = $proportions_categories{$gene}{"2"};
368 my $proportion_3or4 = $proportions_categories{$gene}{"3or4"};
369 my $ratio_poly_diplo = $ratios_poly_diploid{$gene};
370
371 my $draw = 0;
372 if (defined $previous_x_5)
373 {
374 $draw = 1;
375 }
376
377
378 #######################
379 # SNP category 5
380 #######################
381 if ($draw)
382 {
383 $diagram_img->moveTo($previous_x_5,$previous_y_5);
384 }
385 $previous_x_5 = $margin_left + ($pos*$scale) - 1;
386 $diagram_img->setThickness(2);
387 $diagram_img->fgcolor("red");
388 $diagram_img->bgcolor("red");
389 $previous_y_5 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_5 * 20) - 7;
390 if ($draw)
391 {
392 $diagram_img->lineTo($previous_x_5,$previous_y_5);
393 }
394
395 if ($display_cat eq "yes")
396 {
397 #######################
398 # SNP category 1
399 #######################
400 if ($draw)
401 {
402 $diagram_img->moveTo($previous_x_1,$previous_y_1);
403 }
404 $previous_x_1 = $margin_left + ($pos*$scale) - 1;
405 $diagram_img->fgcolor("orange");
406 $diagram_img->bgcolor("orange");
407 $previous_y_1 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_1 * 20) - 7;
408 if ($draw)
409 {
410 $diagram_img->lineTo($previous_x_1,$previous_y_1);
411 }
412
413
414 #######################
415 # SNP category 2
416 #######################
417 if ($draw)
418 {
419 $diagram_img->moveTo($previous_x_2,$previous_y_2);
420 }
421 $previous_x_2 = $margin_left + ($pos*$scale) - 1;
422 $diagram_img->fgcolor("purple");
423 $diagram_img->bgcolor("purple");
424 $previous_y_2 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_2 * 20) - 7;
425 if ($draw)
426 {
427 $diagram_img->lineTo($previous_x_2,$previous_y_2);
428 }
429
430
431 #######################
432 # SNP category 3 or 4
433 #######################
434 if ($draw)
435 {
436 $diagram_img->moveTo($previous_x_3or4,$previous_y_3or4);
437 }
438 $previous_x_3or4 = $margin_left + ($pos*$scale) - 1;
439 $diagram_img->fgcolor("green");
440 $diagram_img->bgcolor("green");
441 $previous_y_3or4 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_3or4 * 20) - 7;
442 if ($draw)
443 {
444 $diagram_img->lineTo($previous_x_3or4,$previous_y_3or4);
445 }
446 }
447
448 }
449
450 if ($type_analysis eq "polyploid_polyploid")
451 {
452 my $color = "gray";
453 $pos = $pos / $global_scale;
454
455 $diagram_img->fgcolor($color);
456 $diagram_img->bgcolor($color);
457 $diagram_img->rectangle( $margin_left + ($pos*$scale) - ($gene_width / 2),
458 $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1,
459 $margin_left + ($pos*$scale) + ($gene_width / 2),
460 $margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom) - 1
461 );
462
463
464
465 my $proportion_equal = $proportions_categories{$gene}{"equal"};
466 my $proportion_diff = $proportions_categories{$gene}{"difference"};
467
468 my $draw = 0;
469 if (defined $previous_x_equal)
470 {
471 $draw = 1;
472 }
473
474
475 ##################################################
476 # SNP category : equality between 2 polyploids
477 ##################################################
478 if ($draw)
479 {
480 $diagram_img->moveTo($previous_x_equal,$previous_y_equal);
481 }
482 $previous_x_equal = $margin_left + ($pos*$scale) - 1;
483 $diagram_img->setThickness(2);
484 $diagram_img->fgcolor("red");
485 $diagram_img->bgcolor("red");
486 $previous_y_equal = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_equal * 20) - 7;
487 if ($draw)
488 {
489 $diagram_img->lineTo($previous_x_equal,$previous_y_equal);
490 }
491 }
492
493 $nombre_genes++;
494 }
495
496 $num_chrom++;
497 }
498
499 if ($type_analysis eq "polyploid_polyploid")
500 {
501 $diagram_img->moveTo(5,$y_end);
502 $diagram_img->setThickness(2);
503 $diagram_img->fgcolor("red");
504 $diagram_img->bgcolor("red");
505 $diagram_img->lineTo(25,$y_end);
506 $diagram_img->fgcolor("black");
507 $diagram_img->moveTo(30,$y_end + 5);
508 $diagram_img->fontsize(12);
509 $diagram_img->font('Times');
510 $diagram_img->string("% SNP where P1 = P2");
511 }
512 elsif ($type_analysis eq "polyploid_diploid")
513 {
514 if ($display_cat eq "yes")
515 {
516 $diagram_img->moveTo(5,$y_end);
517 $diagram_img->setThickness(2);
518 $diagram_img->fgcolor("orange");
519 $diagram_img->bgcolor("orange");
520 $diagram_img->lineTo(25,$y_end);
521 $diagram_img->fgcolor("black");
522 $diagram_img->moveTo(30,$y_end + 5);
523 $diagram_img->fontsize(12);
524 $diagram_img->font('Times');
525 $diagram_img->string("% SNP type 1");
526
527 $diagram_img->moveTo(5,$y_end + 20);
528 $diagram_img->setThickness(2);
529 $diagram_img->fgcolor("purple");
530 $diagram_img->bgcolor("purple");
531 $diagram_img->lineTo(25,$y_end + 20);
532 $diagram_img->fgcolor("black");
533 $diagram_img->moveTo(30,$y_end + 25);
534 $diagram_img->fontsize(12);
535 $diagram_img->font('Times');
536 $diagram_img->string("% SNP type 2");
537
538 $diagram_img->moveTo(5,$y_end + 40);
539 $diagram_img->setThickness(2);
540 $diagram_img->fgcolor("green");
541 $diagram_img->bgcolor("green");
542 $diagram_img->lineTo(25,$y_end + 40);
543 $diagram_img->fgcolor("black");
544 $diagram_img->moveTo(30,$y_end + 45);
545 $diagram_img->fontsize(12);
546 $diagram_img->font('Times');
547 $diagram_img->string("% SNP type 3 or 4");
548
549 $diagram_img->moveTo(5,$y_end + 60);
550 $diagram_img->setThickness(2);
551 $diagram_img->fgcolor("red");
552 $diagram_img->bgcolor("red");
553 $diagram_img->lineTo(25,$y_end + 60);
554 $diagram_img->fgcolor("black");
555 $diagram_img->moveTo(30,$y_end + 65);
556 $diagram_img->fontsize(12);
557 $diagram_img->font('Times');
558 $diagram_img->string("% SNP type 5");
559 }
560 else
561 {
562 $diagram_img->moveTo(5,$y_end);
563 $diagram_img->setThickness(2);
564 $diagram_img->fgcolor("red");
565 $diagram_img->bgcolor("red");
566 $diagram_img->lineTo(25,$y_end);
567 $diagram_img->fgcolor("black");
568 $diagram_img->moveTo(30,$y_end + 5);
569 $diagram_img->fontsize(12);
570 $diagram_img->font('Times');
571 $diagram_img->string("% SNP Class 5 per gene (SNP Intra-Diploids = SNP Intra-Polyploid)");
572 }
573
574 $diagram_img->moveTo(5,$y_end + 30);
575 $diagram_img->fontsize(12);
576 $diagram_img->font('Times');
577 $diagram_img->string("Estimate of subgenomic contribution to the transcriptome for each gene (%G2)");
578
579
580 $diagram_img->moveTo(25,$y_end + 45);
581 $diagram_img->setThickness(10);
582 $diagram_img->fgcolor($red);
583 $diagram_img->bgcolor($red);
584 $diagram_img->lineTo(30,$y_end + 45);
585 $diagram_img->fgcolor("black");
586 $diagram_img->moveTo(35,$y_end + 50);
587 $diagram_img->fontsize(12);
588 $diagram_img->font('Times');
589 $diagram_img->string("0-30%");
590
591 $diagram_img->moveTo(95,$y_end + 45);
592 $diagram_img->setThickness(10);
593 $diagram_img->fgcolor($red_light);
594 $diagram_img->bgcolor($red_light);
595 $diagram_img->lineTo(100,$y_end + 45);
596 $diagram_img->fgcolor("black");
597 $diagram_img->moveTo(105,$y_end + 50);
598 $diagram_img->fontsize(12);
599 $diagram_img->font('Times');
600 $diagram_img->string("30-40%");
601
602 $diagram_img->moveTo(165,$y_end + 45);
603 $diagram_img->setThickness(10);
604 $diagram_img->fgcolor($orange);
605 $diagram_img->bgcolor($orange);
606 $diagram_img->lineTo(170,$y_end + 45);
607 $diagram_img->fgcolor("black");
608 $diagram_img->moveTo(175,$y_end + 50);
609 $diagram_img->fontsize(12);
610 $diagram_img->font('Times');
611 $diagram_img->string("40-60%");
612
613 $diagram_img->moveTo(235,$y_end + 45);
614 $diagram_img->setThickness(10);
615 $diagram_img->fgcolor($orange_light);
616 $diagram_img->bgcolor($orange_light);
617 $diagram_img->lineTo(240,$y_end + 45);
618 $diagram_img->fgcolor("black");
619 $diagram_img->moveTo(245,$y_end + 50);
620 $diagram_img->fontsize(12);
621 $diagram_img->font('Times');
622 $diagram_img->string("60-70%");
623
624 $diagram_img->moveTo(305,$y_end + 45);
625 $diagram_img->setThickness(10);
626 $diagram_img->fgcolor($yellow);
627 $diagram_img->bgcolor($yellow);
628 $diagram_img->lineTo(310,$y_end + 45);
629 $diagram_img->fgcolor("black");
630 $diagram_img->moveTo(315,$y_end + 50);
631 $diagram_img->fontsize(12);
632 $diagram_img->font('Times');
633 $diagram_img->string("70-100%");
634
635 $diagram_img->moveTo(25,$y_end + 60);
636 $diagram_img->setThickness(10);
637 $diagram_img->fgcolor("gray");
638 $diagram_img->bgcolor("gray");
639 $diagram_img->lineTo(30,$y_end + 60);
640 $diagram_img->fgcolor("black");
641 $diagram_img->moveTo(35,$y_end + 65);
642 $diagram_img->fontsize(12);
643 $diagram_img->font('Times');
644 $diagram_img->string("No ratio information, no SNP class 5 in this gene");
645
646 }
647
648
649 open( DIAGRAM_PICT, ">$output_png" );
650 binmode(DIAGRAM_PICT);
651 print DIAGRAM_PICT $diagram_img->png;
652 close DIAGRAM_PICT;