view DrawMapOfOccurences.pl @ 0:58111b3965b2 draft default tip

Uploaded
author dereeper
date Thu, 01 Nov 2012 09:35:05 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl

use strict;
use Switch;
use Getopt::Long;
use lib ".";

my $usage = qq~Usage:$0 <args>
where <args> are:
    -c, --count           <SNP count SNiPloid output>
    -a, --annotation      <annotation file in GFF3>
    -o, --output_png      <output PNG file>
    -s, --scale           <scale. Default:100000>
    -t, --type_analysis   <type of analysis: polyploid_diploid or polyploid_polyploid. Default:polyploid_diploid>
    -m, --max_nb_chrom    <maximum number of chromomsome to display. Default:20>
    -n, --nb_min_snp      <minimal number of SNP to calculate ratio. Default:10>
    -d, --display_cat     <display ratio for each category instead of intra-polyploid and inter-diploid (yes/no). Default:no>
~;
$usage .= "\n";

my ($snp_count,$annotation,$output_png,$global_scale,$max_nb_chrom,$type_analysis);

my $global_scale = 10000;
my $max_nb_chrom = 20;
my $nb_min_snp   = 10;
my $display_cat = "no";


GetOptions(
	"count=s"           => \$snp_count,
	"annotation=s"      => \$annotation,
	"output_png=s"      => \$output_png,
	"scale=s"           => \$global_scale,
	"max_nb_chrom=s"    => \$max_nb_chrom,
	"type_analysis=s"   => \$type_analysis,
	"nb_min_snp=s"      => \$nb_min_snp,
	"display_cat=s"     => \$display_cat
);


die $usage
  if ( !$snp_count || !$annotation || !$output_png || !$type_analysis);
  
my %proportions_categories;
my %ratios; 
my %ratios_poly_diploid;
my %nb_snps;
open(my $COUNT,$snp_count);
<$COUNT>;
while(<$COUNT>)
{
	my $line =$_;
	chomp($line);
	my @infos = split(/\t/,$line);
	
	
	if ($type_analysis eq "polyploid_diploid")
	{
		my $gene = $infos[0];
		my $nb_snp = $infos[2];
		my $nb_1 = $infos[3];
		my $nb_2 = $infos[4];
		my $nb_3or4 = $infos[5];
		my $nb_3 = $infos[6];
		my $nb_4 = $infos[7];
		my $nb_5 = $infos[8];
		my $nb_other = $infos[9];
		my $nb_heterozygot_diploid = $infos[10];
		my $nb_snp_diploid = $infos[11];
		my $nb_snp_polyploid = $infos[12];
		
		$nb_snps{$gene} = $nb_snp;	
		my $sum = $nb_1 + $nb_2 + $nb_3or4 + $nb_5 + $nb_3 + $nb_4 + $nb_other + $nb_heterozygot_diploid;
		
		if ($nb_snp >= $nb_min_snp)
		{
			if ($nb_1)
			{
				$proportions_categories{$gene}{"1"} = $nb_1/$nb_snp;
			}
			if ($nb_2)
			{
				$proportions_categories{$gene}{"2"} = $nb_2/$nb_snp;
			}
			if ($nb_5)
			{
				$proportions_categories{$gene}{"5"} = $nb_5/$nb_snp;
			}
			if ($nb_3or4)
			{
				$proportions_categories{$gene}{"3or4"} = $nb_3or4/$nb_snp;
			}
		}

		my $ratio_g1 = $infos[13];
		my $ratio_g2 = $infos[14];
	
		if ($ratio_g1)
		{
			$ratios{$gene} = $ratio_g1;
		}
	}
	
	if ($type_analysis eq "polyploid_polyploid")
	{
		my $gene = $infos[0];
		my $nb_snp = $infos[2];
		my $nb_equal = $infos[3] + $infos[4];
		my $nb_diff = $infos[7];
		$nb_snps{$gene} = $nb_snp;
		if ($nb_snp >= $nb_min_snp)
		{
			if ($nb_equal)
			{
				$proportions_categories{$gene}{"equal"} = $nb_equal/$nb_snp;
			}
			if ($nb_diff)
			{
				$proportions_categories{$gene}{"difference"} = $nb_diff/$nb_snp;
			}
		}
	}
	
}
close($COUNT);


  

my $max_pos = 0;
my %chrom_sizes;
my $chrom_particule;
my %genes;
my %gene_positions;

open(my $ANNOT,$annotation);
while(<$ANNOT>)
{
	my $line =$_;
	chomp($line);
	if (!/^#/ && /gene/)
	{
		my @infos = split(/\t/,$line);
		my $chrom = $infos[0];
		if ($chrom =~/^(\w+_)(\d+)$/)
		{
			$chrom_particule = $1;
			$chrom = $2;
		}
		
		my $attributes = $infos[8];
		my $gene_name;
		if ($attributes =~/ID=([^;]+);/)
		{
			$gene_name = $1;
		}
		if ($gene_name =~/(.*)_G1/)
		{
			$gene_name = $1;
		}
		else
		{
			next;
		}
		if (not defined $nb_snps{$gene_name})
		{
			next;
		}
		
		my $start = $infos[3];
		my $end = $infos[4];
		my $pos = sprintf("%.0f", ($start + (($end - $start) / 2)));
		
		$end = $end / $global_scale;
		if ($chrom_sizes{$chrom})
		{
			if ($end > $chrom_sizes{$chrom})
			{
				$chrom_sizes{$chrom} = $end;
				if ($end > $max_pos)
				{
					$max_pos = $end;
				}
			}
		}
		else
		{
			$chrom_sizes{$chrom} = $end;
			if ($end > $max_pos)
			{
				$max_pos = $end;
			}
		}
		$genes{$gene_name} = "$chrom:$pos";
		$gene_positions{$chrom}{$pos}= $gene_name;
	}
}
close($ANNOT);



use GD;
use GD::Simple;
use GD::Image;




####################
# drawing
####################

my $scale = 800 / $max_pos;

my $margin_left = 80;
my $margin_right = 50;
my $margin_top = 50;
my $margin_legend = 100;
my $margin_bottom = 10;
my $margin_between_chromosomes = 25;
my $margin_between_section = 50;
my $chrom_width = 10;
my $gene_width = 1;

my $nb_group = 1;

my $width_of_picture = scalar keys(%gene_positions);
if (scalar keys(%gene_positions) > $max_nb_chrom)
{
	$width_of_picture = $max_nb_chrom;
}
                                        
my $diagram_img = GD::Simple->new(($margin_left + $margin_right + ($max_pos*$scale)),
                                         ($margin_top + ((($chrom_width * $nb_group) + ($margin_between_chromosomes * ($nb_group-1))) * $width_of_picture) + ($margin_between_section * $width_of_picture) + $margin_bottom + $margin_legend)
                                        );                                        

my $yellow =  $diagram_img->colorAllocate(247,254,46);
my $orange_light = $diagram_img->colorAllocate(250,204,46);
my $red_light = $diagram_img->colorAllocate(254,100,46);
my $red = $diagram_img->colorAllocate(254,46,46);
my $orange = $diagram_img->colorAllocate(254,154,46);


# draw chromosomes
my $num_chrom = 0;
my @sorted_chrom = sort {$a <=> $b} keys(%gene_positions);

my $nombre_genes = 0;
my $y_end;
foreach my $chrom(@sorted_chrom)
{
		if (!$chrom)
		{
			next;
		}
	        
	    if ($num_chrom > ($max_nb_chrom - 1))
	    {
	    	last;
	    }
        my $ref_hash = $gene_positions{$chrom};
        my %hash = %$ref_hash;

        my $section_size = $chrom_width + (($margin_between_chromosomes + $chrom_width) * ($nb_group - 1));

        # draw chromosome (X number of groups)

        $diagram_img->fgcolor('black');
        $diagram_img->bgcolor('white');
        $diagram_img->setThickness(1); 
        my $chrom_chain = $chrom_particule . $chrom;

        $diagram_img->rectangle( $margin_left,
                                        $margin_top + (($section_size + $margin_between_section) * $num_chrom),
                                        $margin_left + ($chrom_sizes{$chrom}*$scale),
                                        $margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom)
                                        );

        $diagram_img->fgcolor('black');
        $diagram_img->moveTo(5,$margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom) - 1);
        $y_end = $margin_top + $chrom_width + (($section_size + $margin_between_section) * ($num_chrom+1)) - 1;
        $diagram_img->fontsize(12);
        $diagram_img->font('Times');
        $diagram_img->string($chrom_particule . $chrom);
        
        
        my $previous_x_5;
        my $previous_x_1;
        my $previous_x_2;
        my $previous_x_3or4;
		my $previous_y_5;
		my $previous_y_1;
		my $previous_y_2;
		my $previous_y_3or4;
		
		my $previous_x_equal;
        my $previous_x_diff;
		my $previous_y_equal;
		my $previous_y_diff;
		
		my $previous_x_snp_diplo;
        my $previous_x_snp_poly;
		my $previous_y_snp_diplo;
		my $previous_y_snp_poly;

		my $previous_x_ratio_diplo_poly;
		my $previous_y_ratio_diplo_poly;
        
        # draw genes
        foreach my $pos(sort{$a <=> $b}keys(%hash))
        {
        	my $gene = $gene_positions{$chrom}{$pos};
        	if (not defined $nb_snps{$gene} or $nb_snps{$gene} < $nb_min_snp)
        	{
        		next;
        	}
        	
        	if ($type_analysis eq "polyploid_diploid")
			{
	        	#####################################################
	        	# draw ratio (subgenomic contribution)
	        	#####################################################
	        	my $color = "gray";
	        	if ($ratios{$gene})
	        	{
	        		my $ratio_g1 = $ratios{$gene};
	        		if ($ratio_g1 <= 30)
	        		{
	        			$color = $red;
	        		}
	        		elsif ($ratio_g1 > 30 && $ratio_g1 <= 40)
	        		{
	        			$color = $red_light;
	        		}
	        		elsif ($ratio_g1 > 40 && $ratio_g1 <= 60)
	        		{
	        			$color = $orange;
	        		}
	        		elsif ($ratio_g1 > 60 && $ratio_g1 <= 70)
	        		{
	        			$color = $orange_light;
	        		}
	        		elsif ($ratio_g1 > 70)
	        		{
	        			$color = $yellow;
	        		}
	        	}
	
	            $pos = $pos / $global_scale;
	            
	            $diagram_img->fgcolor($color);
	            $diagram_img->bgcolor($color);
	            $diagram_img->rectangle( $margin_left + ($pos*$scale) - ($gene_width / 2),
	                                                $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1,
	                                                $margin_left + ($pos*$scale) + ($gene_width / 2),
	                                                $margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom) - 1
	                                                );
	                                                
	                 
	                                           
	            #####################################################
	        	# draw SNP categories
	        	#####################################################
	        	
				my $proportion_5 = $proportions_categories{$gene}{"5"};
				my $proportion_1 = $proportions_categories{$gene}{"1"};
				my $proportion_2 = $proportions_categories{$gene}{"2"};
				my $proportion_3or4 = $proportions_categories{$gene}{"3or4"};
				my $ratio_poly_diplo = $ratios_poly_diploid{$gene};
		
				my $draw = 0;
				if (defined $previous_x_5)
				{
					$draw = 1;
				}
					
					
				#######################
				# SNP category 5
				#######################
				if ($draw)
				{
					$diagram_img->moveTo($previous_x_5,$previous_y_5);
				}    
				$previous_x_5 = $margin_left + ($pos*$scale) - 1;    
				$diagram_img->setThickness(2);                          
				$diagram_img->fgcolor("red");
				$diagram_img->bgcolor("red");
				$previous_y_5 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_5 * 20) - 7;
				if ($draw)
				{
			        $diagram_img->lineTo($previous_x_5,$previous_y_5);  
				}
					
				if ($display_cat eq "yes")
				{	
					#######################
					# SNP category 1
					#######################
					if ($draw)
					{
						$diagram_img->moveTo($previous_x_1,$previous_y_1);
					}    
					$previous_x_1 = $margin_left + ($pos*$scale) - 1;                                
			        $diagram_img->fgcolor("orange");
		            $diagram_img->bgcolor("orange");
			        $previous_y_1 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_1 * 20) - 7;
			        if ($draw)
					{
			        	$diagram_img->lineTo($previous_x_1,$previous_y_1); 
					}
					
					
					#######################
					# SNP category 2
					#######################
					if ($draw)
					{
						$diagram_img->moveTo($previous_x_2,$previous_y_2);
					}    
					$previous_x_2 = $margin_left + ($pos*$scale) - 1;                                
			        $diagram_img->fgcolor("purple");
		            $diagram_img->bgcolor("purple");
			        $previous_y_2 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_2 * 20) - 7;
			        if ($draw)
					{
			        	$diagram_img->lineTo($previous_x_2,$previous_y_2); 
					}
					
					
					#######################
					# SNP category 3 or 4
					#######################
					if ($draw)
					{
						$diagram_img->moveTo($previous_x_3or4,$previous_y_3or4);
					}    
					$previous_x_3or4 = $margin_left + ($pos*$scale) - 1;                                
			        $diagram_img->fgcolor("green");
		            $diagram_img->bgcolor("green");
			        $previous_y_3or4 = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_3or4 * 20) - 7;
			        if ($draw)
					{
			        	$diagram_img->lineTo($previous_x_3or4,$previous_y_3or4); 
					}
				}
			
			}
			
			if ($type_analysis eq "polyploid_polyploid")
			{
				my $color = "gray";
				$pos = $pos / $global_scale;
	            
	            $diagram_img->fgcolor($color);
	            $diagram_img->bgcolor($color);
	            $diagram_img->rectangle( $margin_left + ($pos*$scale) - ($gene_width / 2),
	                                                $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1,
	                                                $margin_left + ($pos*$scale) + ($gene_width / 2),
	                                                $margin_top + $chrom_width + (($section_size + $margin_between_section) * $num_chrom) - 1
	                                                );
	                                                
	                                                
	            
				my $proportion_equal = $proportions_categories{$gene}{"equal"};
				my $proportion_diff = $proportions_categories{$gene}{"difference"};
	
				my $draw = 0;
				if (defined $previous_x_equal)
				{
					$draw = 1;
				}
				
				
				##################################################
				# SNP category : equality between 2 polyploids
				##################################################
				if ($draw)
				{
					$diagram_img->moveTo($previous_x_equal,$previous_y_equal);
				}    
				$previous_x_equal = $margin_left + ($pos*$scale) - 1;    
				$diagram_img->setThickness(2);                          
		        $diagram_img->fgcolor("red");
	            $diagram_img->bgcolor("red");
		        $previous_y_equal = $margin_top + (($section_size + $margin_between_section) * $num_chrom) + 1 - ($proportion_equal * 20) - 7;
		        if ($draw)
				{
		        	$diagram_img->lineTo($previous_x_equal,$previous_y_equal);  
				}
			}

			$nombre_genes++;
        }

		$num_chrom++;
}     

if ($type_analysis eq "polyploid_polyploid")
{
	$diagram_img->moveTo(5,$y_end);
	$diagram_img->setThickness(2); 
	$diagram_img->fgcolor("red");
	$diagram_img->bgcolor("red");
	$diagram_img->lineTo(25,$y_end);  
	$diagram_img->fgcolor("black");
    $diagram_img->moveTo(30,$y_end + 5);
    $diagram_img->fontsize(12);
    $diagram_img->font('Times');
    $diagram_img->string("% SNP where P1 = P2");
}
elsif ($type_analysis eq "polyploid_diploid")
{
	if ($display_cat eq "yes")
	{
		$diagram_img->moveTo(5,$y_end);
		$diagram_img->setThickness(2); 
		$diagram_img->fgcolor("orange");
		$diagram_img->bgcolor("orange");
		$diagram_img->lineTo(25,$y_end);  
		$diagram_img->fgcolor("black");
	    $diagram_img->moveTo(30,$y_end + 5);
	    $diagram_img->fontsize(12);
	    $diagram_img->font('Times');
	    $diagram_img->string("% SNP type 1");
	    
	    $diagram_img->moveTo(5,$y_end + 20);
		$diagram_img->setThickness(2); 
		$diagram_img->fgcolor("purple");
		$diagram_img->bgcolor("purple");
		$diagram_img->lineTo(25,$y_end + 20);  
		$diagram_img->fgcolor("black");
	    $diagram_img->moveTo(30,$y_end + 25);
	    $diagram_img->fontsize(12);
	    $diagram_img->font('Times');
	    $diagram_img->string("% SNP type 2");
	    
	    $diagram_img->moveTo(5,$y_end + 40);
		$diagram_img->setThickness(2); 
		$diagram_img->fgcolor("green");
		$diagram_img->bgcolor("green");
		$diagram_img->lineTo(25,$y_end + 40);  
		$diagram_img->fgcolor("black");
	    $diagram_img->moveTo(30,$y_end + 45);
	    $diagram_img->fontsize(12);
	    $diagram_img->font('Times');
	    $diagram_img->string("% SNP type 3 or 4");
	    
	    $diagram_img->moveTo(5,$y_end + 60);
		$diagram_img->setThickness(2); 
		$diagram_img->fgcolor("red");
		$diagram_img->bgcolor("red");
		$diagram_img->lineTo(25,$y_end + 60);  
		$diagram_img->fgcolor("black");
	    $diagram_img->moveTo(30,$y_end + 65);
	    $diagram_img->fontsize(12);
	    $diagram_img->font('Times');
	    $diagram_img->string("% SNP type 5");
	}
	else
	{
		$diagram_img->moveTo(5,$y_end);
        $diagram_img->setThickness(2);
        $diagram_img->fgcolor("red");
        $diagram_img->bgcolor("red");
        $diagram_img->lineTo(25,$y_end);
        $diagram_img->fgcolor("black");
        $diagram_img->moveTo(30,$y_end + 5);
        $diagram_img->fontsize(12);
        $diagram_img->font('Times');
        $diagram_img->string("% SNP Class 5 per gene (SNP Intra-Diploids = SNP Intra-Polyploid)");
	}

	$diagram_img->moveTo(5,$y_end + 30);
	$diagram_img->fontsize(12);
	$diagram_img->font('Times');
	$diagram_img->string("Estimate of subgenomic contribution to the transcriptome for each gene (%G2)");


	$diagram_img->moveTo(25,$y_end + 45);
	$diagram_img->setThickness(10);
	$diagram_img->fgcolor($red);
	$diagram_img->bgcolor($red);
	$diagram_img->lineTo(30,$y_end + 45);
	$diagram_img->fgcolor("black");
	$diagram_img->moveTo(35,$y_end + 50);
	$diagram_img->fontsize(12);
	$diagram_img->font('Times');
	$diagram_img->string("0-30%");

	$diagram_img->moveTo(95,$y_end + 45);
	$diagram_img->setThickness(10);
	$diagram_img->fgcolor($red_light);
	$diagram_img->bgcolor($red_light);
	$diagram_img->lineTo(100,$y_end + 45);
	$diagram_img->fgcolor("black");
	$diagram_img->moveTo(105,$y_end + 50);
	$diagram_img->fontsize(12);
	$diagram_img->font('Times');
	$diagram_img->string("30-40%");

	$diagram_img->moveTo(165,$y_end + 45);
	$diagram_img->setThickness(10);
	$diagram_img->fgcolor($orange);
	$diagram_img->bgcolor($orange);
	$diagram_img->lineTo(170,$y_end + 45);
	$diagram_img->fgcolor("black");
	$diagram_img->moveTo(175,$y_end + 50);
	$diagram_img->fontsize(12);
	$diagram_img->font('Times');
	$diagram_img->string("40-60%");

	$diagram_img->moveTo(235,$y_end + 45);
	$diagram_img->setThickness(10);
	$diagram_img->fgcolor($orange_light);
	$diagram_img->bgcolor($orange_light);
	$diagram_img->lineTo(240,$y_end + 45);
	$diagram_img->fgcolor("black");
	$diagram_img->moveTo(245,$y_end + 50);
	$diagram_img->fontsize(12);
	$diagram_img->font('Times');
	$diagram_img->string("60-70%");

	$diagram_img->moveTo(305,$y_end + 45);
	$diagram_img->setThickness(10);
	$diagram_img->fgcolor($yellow);
	$diagram_img->bgcolor($yellow);
	$diagram_img->lineTo(310,$y_end + 45);
	$diagram_img->fgcolor("black");
	$diagram_img->moveTo(315,$y_end + 50);
	$diagram_img->fontsize(12);
	$diagram_img->font('Times');
	$diagram_img->string("70-100%");

	$diagram_img->moveTo(25,$y_end + 60);
    $diagram_img->setThickness(10);
    $diagram_img->fgcolor("gray");
	$diagram_img->bgcolor("gray");
	$diagram_img->lineTo(30,$y_end + 60);
	$diagram_img->fgcolor("black");
	$diagram_img->moveTo(35,$y_end + 65);
	$diagram_img->fontsize(12);
	$diagram_img->font('Times');
	$diagram_img->string("No ratio information, no SNP class 5 in this gene");

}                             


open( DIAGRAM_PICT, ">$output_png" );
binmode(DIAGRAM_PICT);
print DIAGRAM_PICT $diagram_img->png;
close DIAGRAM_PICT;