diff sRNA_rpkm_distribution_along_genome.pl @ 0:07745c0958dd draft

Uploaded
author big-tiandm
date Thu, 18 Sep 2014 21:40:25 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sRNA_rpkm_distribution_along_genome.pl	Thu Sep 18 21:40:25 2014 -0400
@@ -0,0 +1,264 @@
+#!/usr/bin/perl -w
+#==========================================================================================
+# Date: 
+# Title: 
+# Comment: Program to plot gene structure
+# Input: 1. 
+#        2. 
+#        3. 
+# Output: output file of gene structure graph by html or svg formt
+# Test Usage: 
+#========================================================================================
+#use strict;
+my $version=1.00;
+use SVG;
+use Getopt::Long;
+my %opt;
+GetOptions(\%opt,"span=s","c=s","o=s","out=s","l=s","cen:s","n=s","mark=s","h");
+if (!( defined $opt{o}) || defined $opt{h}) {
+&usage;
+}
+my $span=$opt{span};
+my $sample_cloumn=$opt{n};
+my $mark=$opt{mark};
+#===============================Define Attribute==========================================
+my %attribute=(
+	canvas=>{
+		'width'=>1500,
+		'height'=>1800
+	},
+	text=>{
+		'stroke'=>"#000000",
+		'fill'=>"none",
+		'stroke-width'=>0.5
+	},
+	line=>{
+		'stroke'=>"black",
+		'stroke-width'=>1
+	},
+	csv=>{
+		'stroke'=>"red",
+		'stroke-width'=>0.5
+	},
+	exon=>{
+		'stroke'=>"black",
+		'stroke-width'=>1
+	},
+	intron=>{
+		'stroke'=>"black",
+		'stroke-width'=>1.5
+	},
+	font=>{
+		'fill'=>"#000000",
+		'font-size'=>12,
+		'font-size2'=>10,
+		#'font-weight'=>'bold',
+		'font-family'=>"Arial"
+		#'font-family'=>"ArialNarrow-bold"
+	},
+	rect=>{
+		'fill'=>"lightgreen",
+		'stroke'=>"black",
+		'stroke-width'=>0.5
+	},
+	readwidth=>0.5
+);
+#############################s#define start coordinate and scale
+open(TXT,">$opt{out}");
+open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}";
+my %length;
+while (my $aline=<LENGTH>) {
+	chomp $aline;
+	next if($aline=~/^\#/);
+	my @temp=split/\t/,$aline;
+	$temp[0]=~s/^c/C/;
+	$length{$temp[0]}=$temp[1];
+}
+close LENGTH;
+#---------------------------------------------------------------
+my %centromere;
+if (defined($opt{cen})) {
+	open(CEN,"$opt{cen}")||die"cannot open the file $opt{cen}";
+	while (my $aline=<CEN>) {
+		chomp $aline;
+		next if($aline=~/^\#/);
+		my @temp=split/\t/,$aline;
+		$temp[0]=~s/^c/C/;
+		$centromere{$temp[0]}[0]=$temp[1];
+		$centromere{$temp[0]}[1]=$temp[2];
+	}
+	close CEN;
+}
+
+#---------------------------------------------------------------
+my $max_length=0;
+foreach my $chr (keys %length) {
+	if ($max_length<$length{$chr}) {
+		$max_length=$length{$chr};
+	}
+	print "$chr\n";
+}
+#====================================cluster data=======================================
+open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}";
+my %cluster;
+my %cluster_density;
+#my @sample=qw(39B3 3PA3 3LC3);
+my @cluster_non_add;
+while (my $aline=<CLUSTER>) {
+	next if($aline=~/^\#/);
+	chomp $aline;##ID	chr	strand	start	end	19B1
+	my @temp=split/\t/,$aline;
+	my @ID=split/\:/,$temp[0];
+	my @posi=split/\-/,$ID[1];
+	push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],$temp[2+$sample_cloumn]];#ID	start	end	rpkm(19B1,1PA1,1LC1);
+}
+close CLUSTER;
+my %max_cluster;
+foreach my $chr (sort keys %cluster) {
+#	for (my $i=0;$i<3 ;$i++) {
+#		$max_cluster{$chr}[$i]=0;
+#	}	
+	$max_cluster{$chr}=0
+}
+foreach my $chr (sort keys %cluster) {
+	@{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}};
+	#for (my $s=0;$s<3;$s++) {
+		for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
+			if ($cluster{$chr}[$i][3]>$max_cluster{$chr}) {
+				$max_cluster{$chr}=$cluster{$chr}[$i][3];
+			}
+		}
+	#}
+
+}
+#---------------------------------------------------------------------------------------
+foreach my $chr(keys %cluster) {
+	for(my $i=0;$i<$#{$cluster{$chr}};$i++) {
+		my $start=int($cluster{$chr}[$i][1]/$span);
+		my $end=int($cluster{$chr}[$i][2]/$span);
+		if ($start==$end) {
+			#for (my $j=0;$j<3 ;$j++) {
+				$cluster_density{$chr}[$start]+=$cluster{$chr}[$i][3];
+			#}
+		
+		}
+		else{
+			for (my $m=$start;$m<=$end ;$m++) {
+				#for (my $j=0;$j<3 ;$j++) {
+					$cluster_density{$chr}[$m]+=$cluster{$chr}[$i][3];
+				#}
+			}
+		}
+	}
+}
+my %max_cluster_density;
+foreach my $chr (sort keys %cluster) {#
+	#for (my $i=0;$i<3 ;$i++) {
+		for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
+			$max_cluster_density{$chr}=0;
+		}
+	#}	
+}
+foreach my $chr (sort keys %cluster) {
+	#for (my $i=0;$i<3;$i++) {
+		for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) {
+			if (!(defined($cluster_density{$chr}[$k]))) {
+				$cluster_density{$chr}[$k]=0;
+			}
+			if ($cluster_density{$chr}[$k]>$max_cluster_density{$chr}) {
+				$max_cluster_density{$chr}=$cluster_density{$chr}[$k];
+			}
+			print TXT "$chr\t$k\t$cluster_density{$chr}[$k]\n";
+		}	
+	#}
+}
+#--------------------------------------------------------------------
+my $XOFFSET=50;
+my $YOFFSET=60;
+#my $length=$end-$start+1;
+my $Xscale=600/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算
+#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰
+#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算
+#========================================New canvas============================
+####    Starting    ####
+#新建画布
+my $svg=SVG->new();
+#画图起始点
+my $canvas_start_x=$XOFFSET;
+my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线
+my $canvas_start_y=$YOFFSET;
+my $canvas_end_y=$YOFFSET;
+
+my $chr_width=$YOFFSET;
+my $chr_start_y;
+my $chr_end_y;
+my $Yscale=0.01;
+foreach my $chr (sort keys %length) {
+	my $chr_start_x=$XOFFSET;
+	my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale;
+	$chr_start_y+=$chr_width;
+	$chr_end_y+=$chr_width;
+	$svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+	$svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr);
+	$svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$length{$chr});
+	
+
+	if (defined($centromere{$chr}[0])) {
+		$svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
+	}
+	for (my $i=0;$i<$#{$cluster_density{$chr}} ;$i++) {
+		if ($cluster_density{$chr}[$i]*$Yscale>40) {
+			$cluster_density{$chr}[$i]=40/$Yscale;
+			$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green");
+		}
+		my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale;
+		my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale;
+		my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i]*$Yscale;
+		#my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][0]*$Yscale;
+		#$svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"red",'stroke-width',$attribute{csv}{'stroke-width'});
+		$svg->rect('x',$cluster_density_start_x,'y',$chr_start_y-$cluster_density{$chr}[$i]*$Yscale,'width',$span*$Xscale,'height',$cluster_density{$chr}[$i]*$Yscale,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red");
+	}
+	
+	$chr_width=50;
+	
+	#$svg->rect('x',$c_non_add_start_x,'y',$c_non_add_start_y,'width',$cluster_non_add_width,'height',$cluster_non_add_heigth,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
+}
+
+my $span_k=$span/1000;
+$svg->text('x',200,'y',$chr_start_y+20,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"$mark sRNA rpmk \/ $span_k kb");
+
+$svg->rect('x',600,'y',500,'width',10,'height',10,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red");
+$svg->text('x',620,'y',510,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"sRNA rpkm");
+
+if (defined($opt{cen})) {
+	$svg->rect('x',600,'y',520,'width',10,'height',10,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
+	$svg->text('x',620,'y',530,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere");
+}
+
+close TXT;
+open (OUT,">$opt{o}");
+print OUT $svg->xmlify();
+
+sub log2 {
+	my $n = shift;
+	return log($n)/log(2);
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 
+options:
+-span
+-n sample cloumn
+-mark sample name
+-o output graph file name with html or svg extension
+-c cluster file input
+-out txt output
+-l length of chr
+-cen centromere
+-h help
+USAGE
+exit(1);
+}
\ No newline at end of file