comparison DrawMapOfOccurences.pl @ 0:e94de0ea3351 draft default tip

Uploaded
author dereeper
date Wed, 11 Sep 2013 09:08:15 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e94de0ea3351
1 #!/usr/bin/perl
2
3 use strict;
4 use Switch;
5 use Getopt::Long;
6 use lib ".";
7
8 my $usage = qq~Usage:$0 <args>
9 where <args> are:
10 -c, --count <SNP count SNiPloid output>
11 -a, --annotation <annotation file in GFF3>
12 -o, --output_png <output PNG file>
13 -s, --scale <scale. Default:100000>
14 -t, --type_analysis <type of analysis: polyploid_diploid or polyploid_polyploid. Default:polyploid_diploid>
15 -m, --max_nb_chrom <maximum number of chromomsome to display. Default:20>
16 -n, --nb_min_snp <minimal number of SNP to calculate ratio. Default:10>
17 -d, --display_cat <display ratio for each category instead of intra-polyploid and inter-diploid (yes/no). Default:no>
18 ~;
19 $usage .= "\n";
20
21 my ($snp_count,$annotation,$output_png,$global_scale,$max_nb_chrom,$type_analysis);
22
23 my $global_scale = 10000;
24 my $max_nb_chrom = 20;
25 my $nb_min_snp = 10;
26 my $display_cat = "no";
27
28
29 GetOptions(
30 "count=s" => \$snp_count,
31 "annotation=s" => \$annotation,
32 "output_png=s" => \$output_png,
33 "scale=s" => \$global_scale,
34 "max_nb_chrom=s" => \$max_nb_chrom,
35 "type_analysis=s" => \$type_analysis,
36 "nb_min_snp=s" => \$nb_min_snp,
37 "display_cat=s" => \$display_cat
38 );
39
40
41 die $usage
42 if ( !$snp_count || !$annotation || !$output_png || !$type_analysis);
43
44 my %proportions_categories;
45 my %ratios;
46 my %ratios_poly_diploid;
47 my %nb_snps;
48 open(my $COUNT,$snp_count);
49 <$COUNT>;
50 while(<$COUNT>)
51 {
52 my $line =$_;
53 chomp($line);
54 my @infos = split(/\t/,$line);
55
56
57 if ($type_analysis eq "polyploid_diploid")
58 {
59 my $gene = $infos[0];
60 my $nb_snp = $infos[2];
61 my $nb_1 = $infos[3];
62 my $nb_2 = $infos[4];
63 my $nb_3or4 = $infos[5];
64 my $nb_3 = $infos[6];
65 my $nb_4 = $infos[7];
66 my $nb_5 = $infos[8];
67 my $nb_other = $infos[9];
68 my $nb_heterozygot_diploid = $infos[10];
69 my $nb_snp_diploid = $infos[11];
70 my $nb_snp_polyploid = $infos[12];
71
72 $nb_snps{$gene} = $nb_snp;
73 my $sum = $nb_1 + $nb_2 + $nb_3or4 + $nb_5 + $nb_3 + $nb_4 + $nb_other + $nb_heterozygot_diploid;
74
75 if ($nb_snp >= $nb_min_snp)
76 {
77 if ($nb_1)
78 {
79 $proportions_categories{$gene}{"1"} = $nb_1/$nb_snp;
80 }
81 if ($nb_2)
82 {
83 $proportions_categories{$gene}{"2"} = $nb_2/$nb_snp;
84 }
85 if ($nb_5)
86 {
87 $proportions_categories{$gene}{"5"} = $nb_5/$nb_snp;
88 }
89 if ($nb_3or4)
90 {
91 $proportions_categories{$gene}{"3or4"} = $nb_3or4/$nb_snp;
92 }
93 }
94
95 my $ratio_g1 = $infos[13];
96 my $ratio_g2 = $infos[14];
97
98 if ($ratio_g1)
99 {
100 $ratios{$gene} = $ratio_g1;
101 }
102 }
103
104 if ($type_analysis eq "polyploid_polyploid")
105 {
106 my $gene = $infos[0];
107 my $nb_snp = $infos[2];
108 my $nb_equal = $infos[3] + $infos[4];
109 my $nb_diff = $infos[7];
110 $nb_snps{$gene} = $nb_snp;
111 if ($nb_snp >= $nb_min_snp)
112 {
113 if ($nb_equal)
114 {
115 $proportions_categories{$gene}{"equal"} = $nb_equal/$nb_snp;
116 }
117 if ($nb_diff)
118 {
119 $proportions_categories{$gene}{"difference"} = $nb_diff/$nb_snp;
120 }
121 }
122 }
123
124 }
125 close($COUNT);
126
127
128
129
130 my $max_pos = 0;
131 my %chrom_sizes;
132 my $chrom_particule;
133 my %genes;
134 my %gene_positions;
135
136 open(my $ANNOT,$annotation);
137 while(<$ANNOT>)
138 {
139 my $line =$_;
140 chomp($line);
141 if (!/^#/ && /gene/)
142 {
143 my @infos = split(/\t/,$line);
144 my $chrom = $infos[0];
145 if ($chrom =~/^(\w+_)(\d+)$/)
146 {
147 $chrom_particule = $1;
148 $chrom = $2;
149 }
150
151 my $attributes = $infos[8];
152 my $gene_name;
153 if ($attributes =~/Name=([^;]+);/)
154 {
155 $gene_name = $1;
156 }
157 if (!$gene_name && $attributes =~/ID=([^;]+);/)
158 {
159 $gene_name = $1;
160 }
161 if ($gene_name =~/(.*)_G1/)
162 {
163 $gene_name = $1;
164 }
165 else
166 {
167 next;
168 }
169 if (not defined $nb_snps{$gene_name})
170 {
171 next;
172 }
173
174 my $start = $infos[3];
175 my $end = $infos[4];
176 my $pos = sprintf("%.0f", ($start + (($end - $start) / 2)));
177
178 $end = $end / $global_scale;
179 if ($chrom_sizes{$chrom})
180 {
181 if ($end > $chrom_sizes{$chrom})
182 {
183 $chrom_sizes{$chrom} = $end;
184 if ($end > $max_pos)
185 {
186 $max_pos = $end;
187 }
188 }
189 }
190 else
191 {
192 $chrom_sizes{$chrom} = $end;
193 if ($end > $max_pos)
194 {
195 $max_pos = $end;
196 }
197 }
198 $genes{$gene_name} = "$chrom:$pos";
199 $gene_positions{$chrom}{$pos}= $gene_name;
200 }
201 }
202 close($ANNOT);
203
204
205
206 use GD;
207 use GD::Simple;
208 use GD::Image;
209
210
211
212
213 ####################
214 # drawing
215 ####################
216
217 my $scale = 800 / $max_pos;
218
219 my $margin_left = 80;
220 my $margin_right = 50;
221 my $margin_top = 50;
222 my $margin_legend = 100;
223 my $margin_bottom = 10;
224 my $margin_between_chromosomes = 25;
225 my $margin_between_section = 50;
226 my $chrom_width = 10;
227 my $gene_width = 1;
228
229 my $nb_group = 1;
230
231 my $width_of_picture = scalar keys(%gene_positions);
232 if (scalar keys(%gene_positions) > $max_nb_chrom)
233 {
234 $width_of_picture = $max_nb_chrom;
235 }
236
237 my $diagram_img = GD::Simple->new(($margin_left + $margin_right + ($max_pos*$scale)),
238 ($margin_top + ((($chrom_width * $nb_group) + ($margin_between_chromosomes * ($nb_group-1))) * $width_of_picture) + ($margin_between_section * $width_of_picture) + $margin_bottom + $margin_legend)
239 );
240
241 my $yellow = $diagram_img->colorAllocate(247,254,46);
242 my $orange_light = $diagram_img->colorAllocate(250,204,46);
243 my $red_light = $diagram_img->colorAllocate(254,100,46);
244 my $red = $diagram_img->colorAllocate(254,46,46);
245 my $orange = $diagram_img->colorAllocate(254,154,46);
246
247
248 # draw chromosomes
249 my $num_chrom = 0;
250 my @sorted_chrom = sort {$a <=> $b} keys(%gene_positions);
251
252 my $nombre_genes = 0;
253 my $y_end;
254 foreach my $chrom(@sorted_chrom)
255 {
256 if (!$chrom)
257 {
258 next;
259 }
260
261 if ($num_chrom > ($max_nb_chrom - 1))
262 {
263 last;
264 }
265 my $ref_hash = $gene_positions{$chrom};
266 my %hash = %$ref_hash;
267
268 my $section_size = $chrom_width + (($margin_between_chromosomes + $chrom_width) * ($nb_group - 1));
269
270 # draw chromosome (X number of groups)
271
272 $diagram_img->fgcolor('black');
273 $diagram_img->bgcolor('white');
274 $diagram_img->setThickness(1);
275 my $chrom_chain = $chrom_particule . $chrom;
276
277 $diagram_img->rectangle( $margin_left,
278 $margin_top + (($section_size + $margin_between_section) * $num_chrom),
279 $margin_left + ($chrom_sizes{$chrom}*$scale),
280 $margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom)
281 );
282
283 $diagram_img->fgcolor('black');
284 $diagram_img->moveTo(5,$margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom) - 1);
285 $y_end = $margin_top + $chrom_width + (($section_size + $margin_between_section) * ($num_chrom+1)) - 1;
286 $diagram_img->fontsize(12);
287 $diagram_img->font('Times');
288 $diagram_img->string($chrom_particule . $chrom);
289
290
291 my $previous_x_5;
292 my $previous_x_1;
293 my $previous_x_2;
294 my $previous_x_3or4;
295 my $previous_y_5;
296 my $previous_y_1;
297 my $previous_y_2;
298 my $previous_y_3or4;
299
300 my $previous_x_equal;
301 my $previous_x_diff;
302 my $previous_y_equal;
303 my $previous_y_diff;
304
305 my $previous_x_snp_diplo;
306 my $previous_x_snp_poly;
307 my $previous_y_snp_diplo;
308 my $previous_y_snp_poly;
309
310 my $previous_x_ratio_diplo_poly;
311 my $previous_y_ratio_diplo_poly;
312
313 # draw genes
314 foreach my $pos(sort{$a <=> $b}keys(%hash))
315 {
316 my $gene = $gene_positions{$chrom}{$pos};
317 if (not defined $nb_snps{$gene} or $nb_snps{$gene} < $nb_min_snp)
318 {
319 next;
320 }
321
322 if ($type_analysis eq "polyploid_diploid")
323 {
324 #####################################################
325 # draw ratio (subgenomic contribution)
326 #####################################################
327 my $color = "gray";
328 if ($ratios{$gene})
329 {
330 my $ratio_g1 = $ratios{$gene};
331 if ($ratio_g1 <= 30)
332 {
333 $color = $red;
334 }
335 elsif ($ratio_g1 > 30 && $ratio_g1 <= 40)
336 {
337 $color = $red_light;
338 }
339 elsif ($ratio_g1 > 40 && $ratio_g1 <= 60)
340 {
341 $color = $orange;
342 }
343 elsif ($ratio_g1 > 60 && $ratio_g1 <= 70)
344 {
345 $color = $orange_light;
346 }
347 elsif ($ratio_g1 > 70)
348 {
349 $color = $yellow;
350 }
351 }
352
353 $pos = $pos / $global_scale;
354
355 $diagram_img->fgcolor($color);
356 $diagram_img->bgcolor($color);
357 $diagram_img->rectangle( $margin_left + ($pos*$scale) - ($gene_width / 2),
358 $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1,
359 $margin_left + ($pos*$scale) + ($gene_width / 2),
360 $margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom) - 1
361 );
362
363
364
365 #####################################################
366 # draw SNP categories
367 #####################################################
368
369 my $proportion_5 = $proportions_categories{$gene}{"5"};
370 my $proportion_1 = $proportions_categories{$gene}{"1"};
371 my $proportion_2 = $proportions_categories{$gene}{"2"};
372 my $proportion_3or4 = $proportions_categories{$gene}{"3or4"};
373 my $ratio_poly_diplo = $ratios_poly_diploid{$gene};
374
375 my $draw = 0;
376 if (defined $previous_x_5)
377 {
378 $draw = 1;
379 }
380
381
382 #######################
383 # SNP category 5
384 #######################
385 if ($draw)
386 {
387 $diagram_img->moveTo($previous_x_5,$previous_y_5);
388 }
389 $previous_x_5 = $margin_left + ($pos*$scale) - 1;
390 $diagram_img->setThickness(2);
391 $diagram_img->fgcolor("red");
392 $diagram_img->bgcolor("red");
393 $previous_y_5 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_5 * 20) - 7;
394 if ($draw)
395 {
396 $diagram_img->lineTo($previous_x_5,$previous_y_5);
397 }
398
399 if ($display_cat eq "yes")
400 {
401 #######################
402 # SNP category 1
403 #######################
404 if ($draw)
405 {
406 $diagram_img->moveTo($previous_x_1,$previous_y_1);
407 }
408 $previous_x_1 = $margin_left + ($pos*$scale) - 1;
409 $diagram_img->fgcolor("orange");
410 $diagram_img->bgcolor("orange");
411 $previous_y_1 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_1 * 20) - 7;
412 if ($draw)
413 {
414 $diagram_img->lineTo($previous_x_1,$previous_y_1);
415 }
416
417
418 #######################
419 # SNP category 2
420 #######################
421 if ($draw)
422 {
423 $diagram_img->moveTo($previous_x_2,$previous_y_2);
424 }
425 $previous_x_2 = $margin_left + ($pos*$scale) - 1;
426 $diagram_img->fgcolor("purple");
427 $diagram_img->bgcolor("purple");
428 $previous_y_2 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_2 * 20) - 7;
429 if ($draw)
430 {
431 $diagram_img->lineTo($previous_x_2,$previous_y_2);
432 }
433
434
435 #######################
436 # SNP category 3 or 4
437 #######################
438 if ($draw)
439 {
440 $diagram_img->moveTo($previous_x_3or4,$previous_y_3or4);
441 }
442 $previous_x_3or4 = $margin_left + ($pos*$scale) - 1;
443 $diagram_img->fgcolor("green");
444 $diagram_img->bgcolor("green");
445 $previous_y_3or4 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_3or4 * 20) - 7;
446 if ($draw)
447 {
448 $diagram_img->lineTo($previous_x_3or4,$previous_y_3or4);
449 }
450 }
451
452 }
453
454 if ($type_analysis eq "polyploid_polyploid")
455 {
456 my $color = "gray";
457 $pos = $pos / $global_scale;
458
459 $diagram_img->fgcolor($color);
460 $diagram_img->bgcolor($color);
461 $diagram_img->rectangle( $margin_left + ($pos*$scale) - ($gene_width / 2),
462 $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1,
463 $margin_left + ($pos*$scale) + ($gene_width / 2),
464 $margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom) - 1
465 );
466
467
468
469 my $proportion_equal = $proportions_categories{$gene}{"equal"};
470 my $proportion_diff = $proportions_categories{$gene}{"difference"};
471
472 my $draw = 0;
473 if (defined $previous_x_equal)
474 {
475 $draw = 1;
476 }
477
478
479 ##################################################
480 # SNP category : equality between 2 polyploids
481 ##################################################
482 if ($draw)
483 {
484 $diagram_img->moveTo($previous_x_equal,$previous_y_equal);
485 }
486 $previous_x_equal = $margin_left + ($pos*$scale) - 1;
487 $diagram_img->setThickness(2);
488 $diagram_img->fgcolor("red");
489 $diagram_img->bgcolor("red");
490 $previous_y_equal = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_equal * 20) - 7;
491 if ($draw)
492 {
493 $diagram_img->lineTo($previous_x_equal,$previous_y_equal);
494 }
495 }
496
497 $nombre_genes++;
498 }
499
500 $num_chrom++;
501 }
502
503 if ($type_analysis eq "polyploid_polyploid")
504 {
505 $diagram_img->moveTo(5,$y_end);
506 $diagram_img->setThickness(2);
507 $diagram_img->fgcolor("red");
508 $diagram_img->bgcolor("red");
509 $diagram_img->lineTo(25,$y_end);
510 $diagram_img->fgcolor("black");
511 $diagram_img->moveTo(30,$y_end + 5);
512 $diagram_img->fontsize(12);
513 $diagram_img->font('Times');
514 $diagram_img->string("% SNP where P1 = P2");
515 }
516 elsif ($type_analysis eq "polyploid_diploid")
517 {
518 if ($display_cat eq "yes")
519 {
520 $diagram_img->moveTo(5,$y_end);
521 $diagram_img->setThickness(2);
522 $diagram_img->fgcolor("orange");
523 $diagram_img->bgcolor("orange");
524 $diagram_img->lineTo(25,$y_end);
525 $diagram_img->fgcolor("black");
526 $diagram_img->moveTo(30,$y_end + 5);
527 $diagram_img->fontsize(12);
528 $diagram_img->font('Times');
529 $diagram_img->string("% SNP type 1");
530
531 $diagram_img->moveTo(5,$y_end + 20);
532 $diagram_img->setThickness(2);
533 $diagram_img->fgcolor("purple");
534 $diagram_img->bgcolor("purple");
535 $diagram_img->lineTo(25,$y_end + 20);
536 $diagram_img->fgcolor("black");
537 $diagram_img->moveTo(30,$y_end + 25);
538 $diagram_img->fontsize(12);
539 $diagram_img->font('Times');
540 $diagram_img->string("% SNP type 2");
541
542 $diagram_img->moveTo(5,$y_end + 40);
543 $diagram_img->setThickness(2);
544 $diagram_img->fgcolor("green");
545 $diagram_img->bgcolor("green");
546 $diagram_img->lineTo(25,$y_end + 40);
547 $diagram_img->fgcolor("black");
548 $diagram_img->moveTo(30,$y_end + 45);
549 $diagram_img->fontsize(12);
550 $diagram_img->font('Times');
551 $diagram_img->string("% SNP type 3 or 4");
552
553 $diagram_img->moveTo(5,$y_end + 60);
554 $diagram_img->setThickness(2);
555 $diagram_img->fgcolor("red");
556 $diagram_img->bgcolor("red");
557 $diagram_img->lineTo(25,$y_end + 60);
558 $diagram_img->fgcolor("black");
559 $diagram_img->moveTo(30,$y_end + 65);
560 $diagram_img->fontsize(12);
561 $diagram_img->font('Times');
562 $diagram_img->string("% SNP type 5");
563 }
564 else
565 {
566 $diagram_img->moveTo(5,$y_end);
567 $diagram_img->setThickness(2);
568 $diagram_img->fgcolor("red");
569 $diagram_img->bgcolor("red");
570 $diagram_img->lineTo(25,$y_end);
571 $diagram_img->fgcolor("black");
572 $diagram_img->moveTo(30,$y_end + 5);
573 $diagram_img->fontsize(12);
574 $diagram_img->font('Times');
575 $diagram_img->string("% SNP Class 5 per gene (SNP Intra-Diploids = SNP Intra-Polyploid)");
576 }
577
578 $diagram_img->moveTo(5,$y_end + 30);
579 $diagram_img->fontsize(12);
580 $diagram_img->font('Times');
581 $diagram_img->string("Estimate of subgenomic contribution to the transcriptome for each gene (%G2)");
582
583
584 $diagram_img->moveTo(25,$y_end + 45);
585 $diagram_img->setThickness(10);
586 $diagram_img->fgcolor($red);
587 $diagram_img->bgcolor($red);
588 $diagram_img->lineTo(30,$y_end + 45);
589 $diagram_img->fgcolor("black");
590 $diagram_img->moveTo(35,$y_end + 50);
591 $diagram_img->fontsize(12);
592 $diagram_img->font('Times');
593 $diagram_img->string("0-30%");
594
595 $diagram_img->moveTo(95,$y_end + 45);
596 $diagram_img->setThickness(10);
597 $diagram_img->fgcolor($red_light);
598 $diagram_img->bgcolor($red_light);
599 $diagram_img->lineTo(100,$y_end + 45);
600 $diagram_img->fgcolor("black");
601 $diagram_img->moveTo(105,$y_end + 50);
602 $diagram_img->fontsize(12);
603 $diagram_img->font('Times');
604 $diagram_img->string("30-40%");
605
606 $diagram_img->moveTo(165,$y_end + 45);
607 $diagram_img->setThickness(10);
608 $diagram_img->fgcolor($orange);
609 $diagram_img->bgcolor($orange);
610 $diagram_img->lineTo(170,$y_end + 45);
611 $diagram_img->fgcolor("black");
612 $diagram_img->moveTo(175,$y_end + 50);
613 $diagram_img->fontsize(12);
614 $diagram_img->font('Times');
615 $diagram_img->string("40-60%");
616
617 $diagram_img->moveTo(235,$y_end + 45);
618 $diagram_img->setThickness(10);
619 $diagram_img->fgcolor($orange_light);
620 $diagram_img->bgcolor($orange_light);
621 $diagram_img->lineTo(240,$y_end + 45);
622 $diagram_img->fgcolor("black");
623 $diagram_img->moveTo(245,$y_end + 50);
624 $diagram_img->fontsize(12);
625 $diagram_img->font('Times');
626 $diagram_img->string("60-70%");
627
628 $diagram_img->moveTo(305,$y_end + 45);
629 $diagram_img->setThickness(10);
630 $diagram_img->fgcolor($yellow);
631 $diagram_img->bgcolor($yellow);
632 $diagram_img->lineTo(310,$y_end + 45);
633 $diagram_img->fgcolor("black");
634 $diagram_img->moveTo(315,$y_end + 50);
635 $diagram_img->fontsize(12);
636 $diagram_img->font('Times');
637 $diagram_img->string("70-100%");
638
639 $diagram_img->moveTo(25,$y_end + 60);
640 $diagram_img->setThickness(10);
641 $diagram_img->fgcolor("gray");
642 $diagram_img->bgcolor("gray");
643 $diagram_img->lineTo(30,$y_end + 60);
644 $diagram_img->fgcolor("black");
645 $diagram_img->moveTo(35,$y_end + 65);
646 $diagram_img->fontsize(12);
647 $diagram_img->font('Times');
648 $diagram_img->string("No ratio information, no SNP class 5 in this gene");
649
650 }
651
652
653 open( DIAGRAM_PICT, ">$output_png" );
654 binmode(DIAGRAM_PICT);
655 print DIAGRAM_PICT $diagram_img->png;
656 close DIAGRAM_PICT;