Mercurial > repos > big-tiandm > sirna_plant
diff sRNA_rpkm_distribution_along_genome.pl @ 0:07745c0958dd draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 18 Sep 2014 21:40:25 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sRNA_rpkm_distribution_along_genome.pl Thu Sep 18 21:40:25 2014 -0400 @@ -0,0 +1,264 @@ +#!/usr/bin/perl -w +#========================================================================================== +# Date: +# Title: +# Comment: Program to plot gene structure +# Input: 1. +# 2. +# 3. +# Output: output file of gene structure graph by html or svg formt +# Test Usage: +#======================================================================================== +#use strict; +my $version=1.00; +use SVG; +use Getopt::Long; +my %opt; +GetOptions(\%opt,"span=s","c=s","o=s","out=s","l=s","cen:s","n=s","mark=s","h"); +if (!( defined $opt{o}) || defined $opt{h}) { +&usage; +} +my $span=$opt{span}; +my $sample_cloumn=$opt{n}; +my $mark=$opt{mark}; +#===============================Define Attribute========================================== +my %attribute=( + canvas=>{ + 'width'=>1500, + 'height'=>1800 + }, + text=>{ + 'stroke'=>"#000000", + 'fill'=>"none", + 'stroke-width'=>0.5 + }, + line=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + csv=>{ + 'stroke'=>"red", + 'stroke-width'=>0.5 + }, + exon=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + intron=>{ + 'stroke'=>"black", + 'stroke-width'=>1.5 + }, + font=>{ + 'fill'=>"#000000", + 'font-size'=>12, + 'font-size2'=>10, + #'font-weight'=>'bold', + 'font-family'=>"Arial" + #'font-family'=>"ArialNarrow-bold" + }, + rect=>{ + 'fill'=>"lightgreen", + 'stroke'=>"black", + 'stroke-width'=>0.5 + }, + readwidth=>0.5 +); +#############################s#define start coordinate and scale +open(TXT,">$opt{out}"); +open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}"; +my %length; +while (my $aline=<LENGTH>) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $temp[0]=~s/^c/C/; + $length{$temp[0]}=$temp[1]; +} +close LENGTH; +#--------------------------------------------------------------- +my %centromere; +if (defined($opt{cen})) { + open(CEN,"$opt{cen}")||die"cannot open the file $opt{cen}"; + while (my $aline=<CEN>) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $temp[0]=~s/^c/C/; + $centromere{$temp[0]}[0]=$temp[1]; + $centromere{$temp[0]}[1]=$temp[2]; + } + close CEN; +} + +#--------------------------------------------------------------- +my $max_length=0; +foreach my $chr (keys %length) { + if ($max_length<$length{$chr}) { + $max_length=$length{$chr}; + } + print "$chr\n"; +} +#====================================cluster data======================================= +open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}"; +my %cluster; +my %cluster_density; +#my @sample=qw(39B3 3PA3 3LC3); +my @cluster_non_add; +while (my $aline=<CLUSTER>) { + next if($aline=~/^\#/); + chomp $aline;##ID chr strand start end 19B1 + my @temp=split/\t/,$aline; + my @ID=split/\:/,$temp[0]; + my @posi=split/\-/,$ID[1]; + push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],$temp[2+$sample_cloumn]];#ID start end rpkm(19B1,1PA1,1LC1); +} +close CLUSTER; +my %max_cluster; +foreach my $chr (sort keys %cluster) { +# for (my $i=0;$i<3 ;$i++) { +# $max_cluster{$chr}[$i]=0; +# } + $max_cluster{$chr}=0 +} +foreach my $chr (sort keys %cluster) { + @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}}; + #for (my $s=0;$s<3;$s++) { + for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { + if ($cluster{$chr}[$i][3]>$max_cluster{$chr}) { + $max_cluster{$chr}=$cluster{$chr}[$i][3]; + } + } + #} + +} +#--------------------------------------------------------------------------------------- +foreach my $chr(keys %cluster) { + for(my $i=0;$i<$#{$cluster{$chr}};$i++) { + my $start=int($cluster{$chr}[$i][1]/$span); + my $end=int($cluster{$chr}[$i][2]/$span); + if ($start==$end) { + #for (my $j=0;$j<3 ;$j++) { + $cluster_density{$chr}[$start]+=$cluster{$chr}[$i][3]; + #} + + } + else{ + for (my $m=$start;$m<=$end ;$m++) { + #for (my $j=0;$j<3 ;$j++) { + $cluster_density{$chr}[$m]+=$cluster{$chr}[$i][3]; + #} + } + } + } +} +my %max_cluster_density; +foreach my $chr (sort keys %cluster) {# + #for (my $i=0;$i<3 ;$i++) { + for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { + $max_cluster_density{$chr}=0; + } + #} +} +foreach my $chr (sort keys %cluster) { + #for (my $i=0;$i<3;$i++) { + for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) { + if (!(defined($cluster_density{$chr}[$k]))) { + $cluster_density{$chr}[$k]=0; + } + if ($cluster_density{$chr}[$k]>$max_cluster_density{$chr}) { + $max_cluster_density{$chr}=$cluster_density{$chr}[$k]; + } + print TXT "$chr\t$k\t$cluster_density{$chr}[$k]\n"; + } + #} +} +#-------------------------------------------------------------------- +my $XOFFSET=50; +my $YOFFSET=60; +#my $length=$end-$start+1; +my $Xscale=600/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 +#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰 +#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算 +#========================================New canvas============================ +#### Starting #### +#新建画布 +my $svg=SVG->new(); +#画图起始点 +my $canvas_start_x=$XOFFSET; +my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线 +my $canvas_start_y=$YOFFSET; +my $canvas_end_y=$YOFFSET; + +my $chr_width=$YOFFSET; +my $chr_start_y; +my $chr_end_y; +my $Yscale=0.01; +foreach my $chr (sort keys %length) { + my $chr_start_x=$XOFFSET; + my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale; + $chr_start_y+=$chr_width; + $chr_end_y+=$chr_width; + $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr); + $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$length{$chr}); + + + if (defined($centromere{$chr}[0])) { + $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); + } + for (my $i=0;$i<$#{$cluster_density{$chr}} ;$i++) { + if ($cluster_density{$chr}[$i]*$Yscale>40) { + $cluster_density{$chr}[$i]=40/$Yscale; + $svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); + } + my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale; + my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; + my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i]*$Yscale; + #my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][0]*$Yscale; + #$svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"red",'stroke-width',$attribute{csv}{'stroke-width'}); + $svg->rect('x',$cluster_density_start_x,'y',$chr_start_y-$cluster_density{$chr}[$i]*$Yscale,'width',$span*$Xscale,'height',$cluster_density{$chr}[$i]*$Yscale,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red"); + } + + $chr_width=50; + + #$svg->rect('x',$c_non_add_start_x,'y',$c_non_add_start_y,'width',$cluster_non_add_width,'height',$cluster_non_add_heigth,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); +} + +my $span_k=$span/1000; +$svg->text('x',200,'y',$chr_start_y+20,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"$mark sRNA rpmk \/ $span_k kb"); + +$svg->rect('x',600,'y',500,'width',10,'height',10,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red"); +$svg->text('x',620,'y',510,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"sRNA rpkm"); + +if (defined($opt{cen})) { + $svg->rect('x',600,'y',520,'width',10,'height',10,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); + $svg->text('x',620,'y',530,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere"); +} + +close TXT; +open (OUT,">$opt{o}"); +print OUT $svg->xmlify(); + +sub log2 { + my $n = shift; + return log($n)/log(2); +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 +options: +-span +-n sample cloumn +-mark sample name +-o output graph file name with html or svg extension +-c cluster file input +-out txt output +-l length of chr +-cen centromere +-h help +USAGE +exit(1); +} \ No newline at end of file