view sRNA_plot.pl @ 32:399d8dbdedd6 draft

Uploaded
author big-tiandm
date Sat, 15 Nov 2014 01:33:20 -0500
parents e0884a4b996b
children
line wrap: on
line source

#!/usr/bin/perl -w
#==========================================================================================
# Date: 
# Title: 
# Comment: Program to plot gene structure
# Input: 1. 
#        2. 
#        3. 
# Output: output file of gene structure graph by html or svg formt
# Test Usage: 
#========================================================================================
#use strict;
my $version=1.00;
use SVG;
use Getopt::Long;
my %opt;
GetOptions(\%opt,"g=s","l=s","span=s","c=s","o=s","out=s","cen:s","mark=s","h");
if (!( defined $opt{o}) || defined $opt{h}) {
&usage;
}
my $span=$opt{span};
#my $sample_cloumn=$opt{n};
my $mark=$opt{mark};
my @mark=split/\#/,$mark;
my $genelist=$opt{g};
#===============================Define Attribute==========================================
my %attribute=(
	canvas=>{
		'width'=>1500,
		'height'=>1800
	},
	text=>{
		'stroke'=>"#000000",
		'fill'=>"none",
		'stroke-width'=>0.5
	},
	line=>{
		'stroke'=>"black",
		'stroke-width'=>1
	},
	csv=>{
		'stroke'=>"red",
		'stroke-width'=>0.5
	},
	exon=>{
		'stroke'=>"black",
		'stroke-width'=>1
	},
	intron=>{
		'stroke'=>"black",
		'stroke-width'=>1.5
	},
	font=>{
		'fill'=>"#000000",
		'font-size'=>12,
		'font-size2'=>10,
		#'font-weight'=>'bold',
		'font-family'=>"Arial"
		#'font-family'=>"ArialNarrow-bold"
	},
	rect=>{
		'fill'=>"lightgreen",
		'stroke'=>"black",
		'stroke-width'=>0.5
	},
	readwidth=>0.5
);
#############################s#define start coordinate and scale
open(TXT,">$opt{out}");
open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}";
my %length;
while (my $aline=<LENGTH>) {
	chomp $aline;
	next if($aline=~/^\#/);
	my @temp=split/\t/,$aline;
	$temp[0]=~s/^c/C/;
	$length{$temp[0]}=$temp[1];
}
close LENGTH;
#---------------------------------------------------------------
open(GENE,"$opt{g}")||die"cannot open the file $opt{g}";
my %genelist;
while (my $aline=<GENE>) {
	chomp $aline;#LOC_Os01g01280  Chr1    133291  134685  +
	next if($aline=~/^\#/);
	my @temp=split/\t/,$aline;
	if ($temp[1]=~/^Chr(\d)$/) {
		$temp[1]="Chr0$1";
	}
	push @{$genelist{$temp[1]}},[$temp[0],$temp[2],$temp[3]];

}
close GENE;
#my %have_gene;
#foreach my $chr (sort keys %genelist) {
#	my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
#	my $start=$genelist[0][1];
#	my $end=$genelist[0][2];
#	for (my $i=0;$i<@genelist ;$i++) {
#		if ($gene) {
#		}
#	}
#}

my %gene_desity;
foreach my $chr (sort keys %genelist) {
	my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
	for (my $i=0;$i<@genelist ;$i++) {
		my $start=int($genelist[$i][1]/$span);
		my $end=int($genelist[$i][2]/$span);
		#my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]};
		if ($start==$end) {
			$gene_desity{$chr}[$start]++;
		}
		else{
			for (my $k=$start;$k<=$end ;$k++) {
				$gene_desity{$chr}[$k]++;
			}
		}
	}
}
#------------------------------------------region_gene_number-------------------------
my $max_gene_number=0;
my $total=0;
foreach my $chr (sort keys %genelist) {
	for (my $i=0;$i<@{$gene_desity{$chr}} ;$i++) {
		if (!(defined($gene_desity{$chr}[$i]))) {
			$gene_desity{$chr}[$i]=0;
		}
		if ($gene_desity{$chr}[$i]>$max_gene_number) {
			$max_gene_number=$gene_desity{$chr}[$i];
			#print "$gene_desity{$chr}[$i]\n";
		}
		#print TXT "$i\t$gene_desity[$i]\n";
		$total+=$gene_desity{$chr}[$i];
		#print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
	}
}
#print "Gene max:$max_gene_number\ntotal:$total\n";

#---------------------------------------------------------------
my %centromere;
if (defined($opt{cen})) {
	open CEN,"$opt{cen}";
	while (my $aline=<CEN>) {
		chomp $aline;
		next if($aline=~/^\#/);
		my @temp=split/\t/,$aline;
		$temp[0]=~s/^c/C/;
		$centromere{$temp[0]}[0]=$temp[1];
		$centromere{$temp[0]}[1]=$temp[2];
	}
	close CEN;
}

#---------------------------------------------------------------
my $max_length=0;
foreach my $chr (keys %length) {
	if ($max_length<$length{$chr}) {
		$max_length=$length{$chr};
	}
	print "$chr\n";
}
#====================================cluster data=======================================
open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}";
my %cluster;
my %cluster_density;
#my @sample=qw(39B3 3PA3 3LC3);
my @cluster_non_add;
while (my $aline=<CLUSTER>) {
	next if($aline=~/^\#/);
	chomp $aline;##Chr	MajorLength	Percent	end	19B1
	my @temp=split/\t/,$aline;
	my @ID=split/\:/,$temp[0];
	my @posi=split/\-/,$ID[1];
	my @all_rpkm=@temp;
	shift @all_rpkm;
	shift @all_rpkm;
	shift @all_rpkm;
#	for (my $s=0;$s<@all_rpkm ;$s++) {#log transfer
#		$all_rpkm[$s]=log2($all_rpkm[$s]);
#	}
	push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],@all_rpkm];#ID	start	end	rpkm(19B1,1PA1,1LC1);
}
close CLUSTER;
my %max_cluster;
my $chr_number=0;
print "@mark\n$mark\n";
foreach my $chr (sort keys %cluster) {
	for (my $i=0;$i<@mark ;$i++) {
		$max_cluster{$chr}[$i]=0;
	}	
	$chr_number++;
}
foreach my $chr (sort keys %cluster) {
	@{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}};
	for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
		for (my $s=0;$s<@mark;$s++) {
			if ($cluster{$chr}[$i][3+$s]>$max_cluster{$chr}) {
				$max_cluster{$chr}[$s]=$cluster{$chr}[$i][3+$s];
			}
		}
	}

}
foreach my $chr (sort keys %max_cluster) {
	for (my $s=0; $s<@mark;$s++) {
	#	print "$max_cluster{$chr}[$s]\n";
	}
}
#---------------------------------------------------------------------------------------
foreach my $chr(keys %cluster) {
	for(my $i=0;$i<$#{$cluster{$chr}};$i++) {
		my $start=int($cluster{$chr}[$i][1]/$span);
		my $end=int($cluster{$chr}[$i][2]/$span);
		if ($start==$end) {
			for (my $s=0;$s<@mark ;$s++) {
				$cluster_density{$chr}[$start][$s]+=$cluster{$chr}[$i][3+$s];
			}
		
		}
		else{
			for (my $m=$start;$m<=$end ;$m++) {
				for (my $s=0;$s<@mark ;$s++) {
					$cluster_density{$chr}[$m][$s]+=$cluster{$chr}[$i][3+$s];
				}
			}
		}
	}
}
my %max_cluster_density;
my $max_all_density=0;
foreach my $chr (sort keys %cluster) {#
	for (my $s=0;$s<@mark ;$s++) {
		for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
			$max_cluster_density{$chr}[$s]=0;
		}
	}	
	
}
foreach my $chr (sort keys %cluster_density) {
	print "$#{$cluster_density{$chr}}\n";
	for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) {
		print TXT "$chr\t$k";
		for (my $s=0;$s<@mark;$s++) {
			if (!(defined($cluster_density{$chr}[$k][$s]))) {
				$cluster_density{$chr}[$k][$s]=0;
			}
			if ($cluster_density{$chr}[$k][$s]>$max_cluster_density{$chr}[$s]) {
				$max_cluster_density{$chr}[$s]=$cluster_density{$chr}[$k][$s];
			}
			if ($cluster_density{$chr}[$k][$s]>$max_all_density) {
				$max_all_density=$cluster_density{$chr}[$k][$s];
			}
			print TXT "\t$cluster_density{$chr}[$k][$s]";
		}	
		print TXT "\n";
	}
}
print "max density: $max_all_density\n";
#--------------------------------------------------------------------
my $top_margin=30;
my $tail_margin=30;
my $XOFFSET=50;
my $YOFFSET=60;
my $chr_length=600;
my $Xscale=$chr_length/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算
#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰
#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算
#========================================New canvas============================
####    Starting    ####
#新建画布
my $width=1000;
my $heigth=100+130*$chr_number;
my $svg=SVG->new(width=>$width, height=>$heigth);
#画图起始点
my $canvas_start_x=$XOFFSET;
my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线
my $canvas_start_y=$YOFFSET;
my $canvas_end_y=$YOFFSET;
my $chr_heigth=$heigth-$YOFFSET-$tail_margin;
print "chr number:$chr_number\n";
my $one_chr_heigth=$chr_heigth/$chr_number;
my $Yscale=($one_chr_heigth-15)/$max_all_density;
#my $chr_width=$YOFFSET;
#my $chr_start_y;
#my $chr_end_y;
#my $Yscale=0.01;
#=======================================title of the graph===============================
#my $span_k=$span/1000;
#$svg->text('x',$width/2,'y',$YOFFSET-20,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Clusters rpkm/"."$span_k"."kb Distribution");
#=======================================the top max chr line=============================
$svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
$long_scale=int ($max_length/10);#十等分 大刻度
#大坐标刻度
for ($i=0;$i<=10;$i++) {
	my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale;
	my $long_x_end=$long_x_start;
	my $long_y_start=$YOFFSET;
	my $long_y_end=$YOFFSET-5;
	$svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
	my $Bscale=$long_scale*$i;
	my $cdata=int ($Bscale/1000000);
	$svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M");
}
#=========================================================================================
my $cc=1;
foreach my $chr (sort keys %length) {
	my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale;
	my $chr_start_x=$XOFFSET;
	my $chr_start_y=$YOFFSET+$cc*$one_chr_heigth;
	my $chr_end_y=$chr_start_y;
	#$chr_start_y+=$chr_width;
	#$chr_end_y+=$chr_width;
#	for (my $i=0;$i<@{$gene_desity{$chr}};$i++) {
#		print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
#		my $red=$gene_desity{$chr}[$i]/$max_gene_number*255;
#		my $green=$gene_desity{$chr}[$i]/$max_gene_number*255;
#		print "$red\t$green\t0\n";
#		$svg->rect('x',$chr_start_x+$i*$span*$Xscale,'y',$chr_start_y,'width',$span*$Xscale,'height',8,'stroke',"rgb($red,$green,0)",'stroke-width',0.1,'fill',"rgb($red,$green,0)");
#	}

	$svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
	$svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr);
	my $m_length=$length{$chr}%1000000;
	$svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$m_length."M");
	

	if (defined($centromere{$chr}[0])) {
		$svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y-2,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
	}
	for (my $s=0;$s<@mark ;$s++) {
		for (my $i=0;$i<$#{$cluster_density{$chr}}-1 ;$i++) {
			#if ($cluster_density{$chr}[$i]*$Yscale>40) {
				#$cluster_density{$chr}[$i]=40/$Yscale;
				#$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green");
			#}
			#print "$i\t$cluster_density{$chr}[$i][$s]\t$cluster_density{$chr}[$i+1][$s]\n";
			my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale;
			my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale;
			my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i][$s]*$Yscale;
			my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][$s]*$Yscale;
			my $c_red=($s+1)/@mark*255;
			$svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"rgb($c_red,125,0)",'stroke-width',0.3);
		}

	}
	#=======Y axis
	$svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_start_x,y2=>$chr_start_y-$one_chr_heigth+15,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
	#=======Y axis ===>3 xiaoge
	my $s10=1;
	my $e10=0;
	my $chr_max=$max_all_density;
	while ($chr_max>10) {
		$chr_max=int($chr_max/10);
		$s10=$s10*10;
		$e10++;
	}
	$chr_max=$chr_max/2;
	#print "*****$max_all_density\t$chr_max\t$s10\n";
	for (my $i=1;$i<3 ;$i++) {
		my $y1=$chr_start_y-$chr_max*$s10*$Yscale*$i;
		my $xiaoge_Y=$chr_max*$i;
		$svg->line('x1',$chr_start_x,'y1',$y1,'x2',$chr_start_x+3,'y2',$y1,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
		$svg->text('x',$chr_start_x-26,'y',$y1+4,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',8,'font-family',$attribute{font}{'font-family'},'-cdata',$xiaoge_Y."e".$e10);
	}
	$cc++;
}

for (my $s=0;$s<@mark ;$s++) {
	my $c_red=($s+1)/@mark*255;
	print "**$c_red\n";
	$svg->line('x1',$canvas_end_x+100,'y1',$YOFFSET+$s*20+30,'x2',$canvas_end_x+130,'y2',$YOFFSET+$s*20+30,'stroke',"rgb($c_red,125,0)",'stroke-width',1);
	$svg->text('x',$canvas_end_x+150,'y',$YOFFSET+$s*20+5+30,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$s]);
}
#
#
if (defined($opt{cen})) {
	$svg->rect('x',$canvas_end_x+100,'y',$YOFFSET+@mark*20+30,'width',30,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
	$svg->text('x',$canvas_end_x+150,'y',$YOFFSET+@mark*20+30+5,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere");
}

close TXT;

open (OUT,">$opt{o}");
print OUT $svg->xmlify();

sub log2 {
	my $n = shift;
	return log($n)/log(2);
}

sub usage{
print <<"USAGE";
Version $version
Usage:
$0 
options:
-g genelist
-span
-n sample cloumn
-mark sample name
-o output graph file name with html or svg extension
-c cluster file input
-out txt output
-l length of chr
-cen centromere
-h help
USAGE
exit(1);
}