Mercurial > repos > big-tiandm > sirna_plant
changeset 19:e0884a4b996b draft
Uploaded
author | big-tiandm |
---|---|
date | Wed, 05 Nov 2014 01:17:26 -0500 |
parents | 22d79320085c |
children | 2ed1e1728299 |
files | Length_Distibution.pl html.pl nibls.pl sRNA_plot.pl siRNA.pl tool_dependencies.xml |
diffstat | 5 files changed, 708 insertions(+), 75 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Length_Distibution.pl Wed Nov 05 01:17:26 2014 -0500 @@ -0,0 +1,219 @@ +#!/usr/bin/perl -w +#========================================================================================== +# Date: +# Title: +# Comment: Program to plot gene structure +# Input: 1. input file of Gene region annotation which format like GenePred +# 2. input file of Transcripts region annotation which format like GenePred +# 3. input file of gene snp detail info +# Output: output file of gene structure graph by html or svg formt +# Test Usage: +#======================================================================================== +#use strict; +my $version=1.00; +use SVG; +use Getopt::Long; +my %opt; +GetOptions(\%opt,"i=s","o=s",,"h"); +if (!(defined $opt{i} and defined $opt{o}) || defined $opt{h}) { +&usage; +} +#===============================Define Attribute========================================== +my %attribute=( + canvas=>{ + 'width'=>1500, + 'height'=>1800 + }, + text=>{ + 'stroke'=>"#000000", + 'fill'=>"none", + 'stroke-width'=>0.5 + #'stroke-width2'=>1 + }, + line=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + font=>{ + 'fill'=>"#000000", + 'font-size'=>12, + 'font-size2'=>10, + 'font-weight'=>'bold', + 'font-family'=>"Arial" + #'font-family2'=>"ArialNarrow-bold" + }, + rect=>{ + 'fill'=>"lightgreen", + 'stroke'=>"black", + 'stroke-width'=>0.5 + }, + readwidth=>0.5 +); +#my $Xscale=600/$length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 +#========================================data============================ +open(IN,"$opt{i}")||die"cannot open the file $opt{i}"; +my @R_length; +my @T_length; +my $R_number=0; +my $T_number=0; +my $R_max=0; +my $T_max=0; + +my $title=<IN>; +chomp $title; +my @title=split/\t/,$title; +my @mark=split/\s+/,$title[1]; +my $sample_number=@mark; +while (my $aline=<IN>) { + if ($aline=~/^\s/) { + my $T_title=<IN>; + chomp $T_title; + while (my $a_aline=<IN>) { + chomp $a_aline; + my @temp=split/\t/,$a_aline; + my @number=split/\s+/,$temp[1]; + for (my $i=0;$i<@number ;$i++) { + if ($R_max<$number[$i]) { + $R_max=$number[$i]; + } + } + push @R_length,[$temp[0],@number]; + $R_number++; + } + } + else { + chomp $aline; + my @temp=split/\t/,$aline; + my @number=split/\s+/,$temp[1]; + for (my $i=0;$i<@number ;$i++) { + if ($T_max<$number[$i]) { + $T_max=$number[$i]; + } + } + push @T_length,[$temp[0],@number]; + $T_number++; + } +} +close IN; +print "Tag max: $T_max\nRead max: $R_max\n"; +my $kd_number=5; +##=======================Reads 纵坐标刻度========================== +my $r=1; +my $rr=1; +my $R=$R_max; +while ($R>10) { + $R=$R/10; + $r=$r*10; + $rr++; +} +$R=int($R+0.5); +my $R_xg=$R/$kd_number*$r;#纵坐标一小格大小(一共10格) +my $R_kedu_scale_x=6*$rr;#纵坐标刻度文字 +##=======================Tags 纵坐标刻度========================== +my $t=1; +my $tt=1; +my $T=$T_max; +while ($T>10) { + $T=$T/10; + $t=$t*10; + $tt++; +} +$T=int($T+0.5); +my $T_xg=$T/$kd_number*$t;#纵坐标一小格大小(一共10格) +my $T_kedu_scale_x=6*$tt;#纵坐标刻度文字 + +#############################s#define start coordinate and scale +my $XOFFSET=50; +my $YOFFSET=60; +my $width=800; +my $heigth=800; +my $X_width=600; +#my $height=1600; +#### Starting #### +#新建画布 +my $svg=SVG->new(width=>$width,height=>$heigth); +####坐标轴 +my $axisL=300;#read 纵坐标长度 +my $x_margin = 50; +#=========Reads number setting========================================== +my $Y_R_title=30;#标题的纵向宽度 +my $Y_R_0=$YOFFSET+$axisL+$Y_R_title; +my $X_R_0=$XOFFSET+$x_margin; +my $R_Yscale=$axisL/$R_xg/$kd_number; +my $R_Xscale=$X_width/$R_number/($sample_number+1); +#=====================================Reads Y axis====================== +$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0,'y2',$Y_R_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); +for (my $i=1;$i<$kd_number ;$i++) { + $svg->line('x1',$X_R_0-5,'y1',$Y_R_0-$i*$R_xg*$R_Yscale,'x2',$X_R_0,'y2',$Y_R_0-$i*$R_xg*$R_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$X_R_0-$R_kedu_scale_x,'y',$Y_R_0-$i*$R_xg*$R_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$R_xg); +} +#=====================================Reads X axis====================== +$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0+$X_width,'y2',$Y_R_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + +#print "$R_number\t$sample_number\n"; +for ($i=0;$i<$R_number ;$i++) { + for (my $j=1;$j<$sample_number+1 ;$j++) { + my $red=$j/$sample_number*255; + $svg->rect('x',$X_R_0+($j+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0-$R_length[$i][$j]*$R_Yscale,'width',$R_Xscale,'height',$R_length[$i][$j]*$R_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + } + $svg->text('x',$X_R_0+(1+$sample_number/2+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$R_length[$i][0]); +} +#===Reads number title +$svg->text('x',$XOFFSET+400,'y',$YOFFSET,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Reads Length Distribution"); +#===Reads +for (my $i=0;$i<$sample_number ;$i++) { + my $red=($i+1)/$sample_number*255; + $svg->rect('x',$X_R_0+550,'y',$YOFFSET+$Y_R_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + $svg->text('x',$X_R_0+550+30,'y',$YOFFSET+$Y_R_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]); +} +####================================================================================== +#=========================================Tag s +my $Y_T_title=30;#标题的纵向宽度 +my $Y_T_0=$Y_R_0+$axisL+$Y_R_title+50;#length size +my $X_T_0=$XOFFSET+$x_margin; +my $T_Yscale=$axisL/$T_xg/$kd_number; +my $T_Xscale=$X_width/$T_number/($sample_number+1); +#=====================================Tags Y axis====================== +$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0,'y2',$Y_T_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); +for (my $i=1;$i<$kd_number ;$i++) { + $svg->line('x1',$X_T_0-5,'y1',$Y_T_0-$i*$T_xg*$T_Yscale,'x2',$X_T_0,'y2',$Y_T_0-$i*$T_xg*$T_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$X_T_0-$T_kedu_scale_x,'y',$Y_T_0-$i*$T_xg*$T_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$T_xg); +} +#=====================================Tags X axis====================== +$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0+$X_width,'y2',$Y_T_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + +#print "$R_number\t$sample_number\n"; +for ($i=0;$i<$T_number ;$i++) { + for (my $j=1;$j<$sample_number+1 ;$j++) { + my $red=$j/$sample_number*255; + $svg->rect('x',$X_T_0+($j+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0-$T_length[$i][$j]*$T_Yscale,'width',$T_Xscale,'height',$T_length[$i][$j]*$T_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + } + $svg->text('x',$X_T_0+(1+$sample_number/2+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$T_length[$i][0]); +} +#===Reads number title +$svg->text('x',$XOFFSET+400,'y',$Y_R_0+30+$Y_T_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Tags Length Distribution"); +#===Reads +for (my $i=0;$i<$sample_number ;$i++) { + my $red=($i+1)/$sample_number*255; + $svg->rect('x',$X_T_0+550,'y',$Y_R_0+30+$Y_T_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + $svg->text('x',$X_T_0+550+30,'y',$Y_R_0+30+$Y_T_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]); +} + + + + +open (OUT,">$opt{o}"); +print OUT $svg->xmlify(); + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 +options: +-i +-o svg output +-h help +USAGE +exit(1); +} \ No newline at end of file
--- a/html.pl Thu Oct 30 21:31:55 2014 -0400 +++ b/html.pl Wed Nov 05 01:17:26 2014 -0500 @@ -13,11 +13,11 @@ my %opts; GetOptions(\%opts,"i=s","format=s","o=s","h"); -if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments +if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments &usage; } -my ($config,$prepath,$rfampath,$genomepath,$clusterpath,$annotatepath,$degpath); -my ($predir,$rfamdir,$genomedir,$clusterdir,$annotatedir,$degdir); +my ($config,$prepath,$rfampath,$genomepath,$clusterpath,$annotatepath,$plotpath,$degpath); +my ($predir,$rfamdir,$genomedir,$clusterdir,$annotatedir,$plotdir,$degdir); open IN,"<$opts{i}"; $config=<IN>; chomp $config; $prepath=<IN>; chomp $prepath; @@ -25,6 +25,7 @@ $genomepath=<IN>; chomp $genomepath; $clusterpath=<IN>; chomp $clusterpath; $annotatepath=<IN>; chomp $annotatepath; +$plotpath=<IN>; chomp $plotpath; my $deg_tag=1; if(eof){$deg_tag=0;} else{ @@ -41,8 +42,8 @@ $clusterdir=$tmp[-1]; @tmp=split/\//,$annotatepath; $annotatedir=$tmp[-1]; -#@tmp=split/\//,$degpath; -#$degdir=$tmp[-1]; +@tmp=split/\//,$plotpath; +$plotdir=$tmp[-1]; my $dir=dirname($opts{'o'}); @@ -201,13 +202,18 @@ The filter (remain total reads>3) data file path is: <b>$filter</b><br /> </p> <h2> 1. Sequence length count</h2> -<h3> 1.1 Reads length</h3> "; +print OUT "\n"; -print OUT "<img src=\"./$predir/Reads_length_after_count_filter.png\" alt=\"Reads_length_after_count_filter.png\" width=\"400\" height=\"300\"/> -<h3> 1.2 Tags length count</h3> -<img src=\"./$predir/Tags_length_after_count_filter.png\" alt=\"Tags_length_after_count_filter.png\" width=\"400\" height=\"300\"/> -<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution_after_count_filter.txt\"> length file</a> +my $length=$prepath."length.html"; +open IN,"<$length"; +while (my $aline=<IN>) { + chomp $aline; + print OUT "$aline\n"; +} +close IN; + +print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution_after_count_filter.txt\"> length file</a> </p> "; @@ -361,8 +367,8 @@ my $cluster_number=`less $cluster |wc -l `; $cluster_number=$cluster_number-1; my (%cluster_length,@exp,@rpkm); -my @exp_range=qw(0 1--9 10--99 100--999 1000--9999 10000--99999 100000--**); -my @rpkm_range=qw(0 0--0.25 0.25--0.5 0.5--1 1.0--5.0 5.0--10 10--50 50--100 100--500 500--1000 1000--**); +my @exp_range=qw(0 \(0--10] \(10--100] \(100--1000] \(1000--10000] \(10000--100000] \(100000--**\)); +my @rpkm_range=qw(0 \(0--0.25] \(0.25--0.5] \(0.5--1] \(1.0-5.0] \(5--10] \(10--50] \(50--100] \(100--500] \(500--1000] \(1000--**]); open IN,"<$cluster"; while (my $aline=<IN>) { @@ -372,13 +378,13 @@ my @id=split/:|-/,$temp[0]; $cluster_length{$id[2]-$id[1]+1}++; for (my $i=0;$i<@marks ;$i++) { - if ($temp[$i+3] == 0) {$exp[$i][0]++;} - elsif ($temp[$i+3]>0 && $temp[$i+3]<= 10 ){$exp[$i][1]++;} - elsif ($temp[$i+3]>10 && $temp[$i+3]<=100){$exp[$i][2]++;} - elsif ($temp[$i+3]>100 && $temp[$i+3]<=1000){$exp[$i][3]++;} - elsif ($temp[$i+3]>1000 && $temp[$i+3]<=10000){$exp[$i][4]++;} - elsif ($temp[$i+3]>10000 && $temp[$i+3]<=100000){$exp[$i][5]++;} - elsif ($temp[$i+3]>100000){$exp[$i][6]++;} + if ($temp[$i+3] == 0) {$exp[0][$i]++;} + elsif ($temp[$i+3]>0 && $temp[$i+3]<= 10 ){$exp[1][$i]++;} + elsif ($temp[$i+3]>10 && $temp[$i+3]<=100){$exp[2][$i]++;} + elsif ($temp[$i+3]>100 && $temp[$i+3]<=1000){$exp[3][$i]++;} + elsif ($temp[$i+3]>1000 && $temp[$i+3]<=10000){$exp[4][$i]++;} + elsif ($temp[$i+3]>10000 && $temp[$i+3]<=100000){$exp[5][$i]++;} + elsif ($temp[$i+3]>100000){$exp[6][$i]++;} } } close IN; @@ -390,17 +396,17 @@ chomp $aline; my @temp=split/\t/,$aline; for (my $i=0;$i<@marks ;$i++) { - if ($temp[$i+3]==0) {$rpkm[$i][0]++;} - elsif($temp[$i+3]>0 && $temp[$i+3]<=0.25){$rpkm[$i][1]++;} - elsif($temp[$i+3]>0.25 && $temp[$i+3]<=0.5){$rpkm[$i][2]++;} - elsif($temp[$i+3]>0.5 && $temp[$i+3]<=1){$rpkm[$i][3]++;} - elsif($temp[$i+3]>1 && $temp[$i+3]<=5){$rpkm[$i][4]++;} - elsif($temp[$i+3]>5 && $temp[$i+3]<=10){$rpkm[$i][5]++;} - elsif($temp[$i+3]>10 && $temp[$i+3]<=50){$rpkm[$i][6]++;} - elsif($temp[$i+3]>50 && $temp[$i+3]<=100){$rpkm[$i][7]++;} - elsif($temp[$i+3]>100 && $temp[$i+3]<=500){$rpkm[$i][8]++;} - elsif($temp[$i+3]>500 && $temp[$i+3]<=1000){$rpkm[$i][9]++;} - else{$rpkm[$i][10]++;} + if ($temp[$i+3]==0) {$rpkm[0][$i]++;} + elsif($temp[$i+3]>0 && $temp[$i+3]<=0.25){$rpkm[1][$i]++;} + elsif($temp[$i+3]>0.25 && $temp[$i+3]<=0.5){$rpkm[2][$i]++;} + elsif($temp[$i+3]>0.5 && $temp[$i+3]<=1){$rpkm[3][$i]++;} + elsif($temp[$i+3]>1 && $temp[$i+3]<=5){$rpkm[4][$i]++;} + elsif($temp[$i+3]>5 && $temp[$i+3]<=10){$rpkm[5][$i]++;} + elsif($temp[$i+3]>10 && $temp[$i+3]<=50){$rpkm[6][$i]++;} + elsif($temp[$i+3]>50 && $temp[$i+3]<=100){$rpkm[7][$i]++;} + elsif($temp[$i+3]>100 && $temp[$i+3]<=500){$rpkm[8][$i]++;} + elsif($temp[$i+3]>500 && $temp[$i+3]<=1000){$rpkm[9][$i]++;} + else{$rpkm[10][$i]++;} } } @@ -484,7 +490,7 @@ $nat[$j]=0; } -my $class_anno=6; +my $class_anno=1; open ANNO,"<$annotate"; while (my $aline=<ANNO>) { chomp $aline; @@ -694,6 +700,17 @@ } +print OUT "<h2>6. Graph of Clusters of all samples</h2> \n"; + +my $plot=$plotpath."cluster.html"; +open IN,"<$plot"; +while (my $aline=<IN>) { + chomp $aline; + print OUT "$aline\n"; +} +close IN; + + if ($deg_tag) { my $deg_file=`ls $degpath`; chomp $deg_file; @@ -722,7 +739,7 @@ close IN; } - print OUT "<h2>6. DEG</h2> + print OUT "<h2>7. DEG</h2> <table border=\"1\"> <tr align=\"center\"> <th align=\"left\">Genes number</th>\n @@ -743,7 +760,7 @@ print OUT "</tr>\n</table>"; } else{ - print OUT "<h2>6. DEG</h2> + print OUT "<h2>7. DEG</h2> <br />Do not do DE clusters <br />"; }
--- a/nibls.pl Thu Oct 30 21:31:55 2014 -0400 +++ b/nibls.pl Wed Nov 05 01:17:26 2014 -0500 @@ -80,8 +80,8 @@ # $length1=40; # } my $total; - foreach (0..$#data) { - $total+=$_; + for (my $s=0;$s<@data ;$s++) { + $total+=$data[$s]; } push @data,$total; # push @data,$length1; @@ -258,10 +258,10 @@ my $c=$1; my $s=$2; my $e=$3; - my @data; + # my @data; foreach my $str (keys %{$molecules{$c}{$s}{$e}}) { - push @tag,($s.",".$e.",".$str); - @data=split(/;/,$molecules{$c}{$s}{$e}{$str}); + my @data=split(/;/,$molecules{$c}{$s}{$e}{$str}); + push @tag,($s.",".$e.",".$str.",".$data[-1]); # for (my $i=0;$i<$#old_data ;$i++) { # $data[$i]+=$old_data[$i]; # } @@ -279,14 +279,14 @@ $end = $e if $e > $end; } my $tag=join";",@tag; - + my $tag_number=@tag; my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample); if ($max_length==40) { $max_length="\>30"; } my $cluster_exp=join"\t",@cluster_exp; my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp; - print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; + print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag_number\t$tag\n"; print OUT $gff, "\n"; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sRNA_plot.pl Wed Nov 05 01:17:26 2014 -0500 @@ -0,0 +1,411 @@ +#!/usr/bin/perl -w +#========================================================================================== +# Date: +# Title: +# Comment: Program to plot gene structure +# Input: 1. +# 2. +# 3. +# Output: output file of gene structure graph by html or svg formt +# Test Usage: +#======================================================================================== +#use strict; +my $version=1.00; +use SVG; +use Getopt::Long; +my %opt; +GetOptions(\%opt,"g=s","l=s","span=s","c=s","o=s","out=s","cen:s","mark=s","h"); +if (!( defined $opt{o}) || defined $opt{h}) { +&usage; +} +my $span=$opt{span}; +#my $sample_cloumn=$opt{n}; +my $mark=$opt{mark}; +my @mark=split/\#/,$mark; +my $genelist=$opt{g}; +#===============================Define Attribute========================================== +my %attribute=( + canvas=>{ + 'width'=>1500, + 'height'=>1800 + }, + text=>{ + 'stroke'=>"#000000", + 'fill'=>"none", + 'stroke-width'=>0.5 + }, + line=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + csv=>{ + 'stroke'=>"red", + 'stroke-width'=>0.5 + }, + exon=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + intron=>{ + 'stroke'=>"black", + 'stroke-width'=>1.5 + }, + font=>{ + 'fill'=>"#000000", + 'font-size'=>12, + 'font-size2'=>10, + #'font-weight'=>'bold', + 'font-family'=>"Arial" + #'font-family'=>"ArialNarrow-bold" + }, + rect=>{ + 'fill'=>"lightgreen", + 'stroke'=>"black", + 'stroke-width'=>0.5 + }, + readwidth=>0.5 +); +#############################s#define start coordinate and scale +open(TXT,">$opt{out}"); +open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}"; +my %length; +while (my $aline=<LENGTH>) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $temp[0]=~s/^c/C/; + $length{$temp[0]}=$temp[1]; +} +close LENGTH; +#--------------------------------------------------------------- +open(GENE,"$opt{g}")||die"cannot open the file $opt{g}"; +my %genelist; +while (my $aline=<GENE>) { + chomp $aline;#LOC_Os01g01280 Chr1 133291 134685 + + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + if ($temp[1]=~/^Chr(\d)$/) { + $temp[1]="Chr0$1"; + } + push @{$genelist{$temp[1]}},[$temp[0],$temp[2],$temp[3]]; + +} +close GENE; +#my %have_gene; +#foreach my $chr (sort keys %genelist) { +# my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}}; +# my $start=$genelist[0][1]; +# my $end=$genelist[0][2]; +# for (my $i=0;$i<@genelist ;$i++) { +# if ($gene) { +# } +# } +#} + +my %gene_desity; +foreach my $chr (sort keys %genelist) { + my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}}; + for (my $i=0;$i<@genelist ;$i++) { + my $start=int($genelist[$i][1]/$span); + my $end=int($genelist[$i][2]/$span); + #my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]}; + if ($start==$end) { + $gene_desity{$chr}[$start]++; + } + else{ + for (my $k=$start;$k<=$end ;$k++) { + $gene_desity{$chr}[$k]++; + } + } + } +} +#------------------------------------------region_gene_number------------------------- +my $max_gene_number=0; +my $total=0; +foreach my $chr (sort keys %genelist) { + for (my $i=0;$i<@{$gene_desity{$chr}} ;$i++) { + if (!(defined($gene_desity{$chr}[$i]))) { + $gene_desity{$chr}[$i]=0; + } + if ($gene_desity{$chr}[$i]>$max_gene_number) { + $max_gene_number=$gene_desity{$chr}[$i]; + #print "$gene_desity{$chr}[$i]\n"; + } + #print TXT "$i\t$gene_desity[$i]\n"; + $total+=$gene_desity{$chr}[$i]; + #print "$chr\t$i\t$gene_desity{$chr}[$i]\n"; + } +} +#print "Gene max:$max_gene_number\ntotal:$total\n"; + +#--------------------------------------------------------------- +my %centromere; +if (defined($opt{cen})) { + open CEN,"$opt{cen}"; + while (my $aline=<CEN>) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $temp[0]=~s/^c/C/; + $centromere{$temp[0]}[0]=$temp[1]; + $centromere{$temp[0]}[1]=$temp[2]; + } + close CEN; +} + +#--------------------------------------------------------------- +my $max_length=0; +foreach my $chr (keys %length) { + if ($max_length<$length{$chr}) { + $max_length=$length{$chr}; + } + print "$chr\n"; +} +#====================================cluster data======================================= +open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}"; +my %cluster; +my %cluster_density; +#my @sample=qw(39B3 3PA3 3LC3); +my @cluster_non_add; +while (my $aline=<CLUSTER>) { + next if($aline=~/^\#/); + chomp $aline;##Chr MajorLength Percent end 19B1 + my @temp=split/\t/,$aline; + my @ID=split/\:/,$temp[0]; + my @posi=split/\-/,$ID[1]; + my @all_rpkm=@temp; + shift @all_rpkm; + shift @all_rpkm; + shift @all_rpkm; +# for (my $s=0;$s<@all_rpkm ;$s++) {#log transfer +# $all_rpkm[$s]=log2($all_rpkm[$s]); +# } + push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],@all_rpkm];#ID start end rpkm(19B1,1PA1,1LC1); +} +close CLUSTER; +my %max_cluster; +my $chr_number=0; +print "@mark\n$mark\n"; +foreach my $chr (sort keys %cluster) { + for (my $i=0;$i<@mark ;$i++) { + $max_cluster{$chr}[$i]=0; + } + $chr_number++; +} +foreach my $chr (sort keys %cluster) { + @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}}; + for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { + for (my $s=0;$s<@mark;$s++) { + if ($cluster{$chr}[$i][3+$s]>$max_cluster{$chr}) { + $max_cluster{$chr}[$s]=$cluster{$chr}[$i][3+$s]; + } + } + } + +} +foreach my $chr (sort keys %max_cluster) { + for (my $s=0; $s<@mark;$s++) { + # print "$max_cluster{$chr}[$s]\n"; + } +} +#--------------------------------------------------------------------------------------- +foreach my $chr(keys %cluster) { + for(my $i=0;$i<$#{$cluster{$chr}};$i++) { + my $start=int($cluster{$chr}[$i][1]/$span); + my $end=int($cluster{$chr}[$i][2]/$span); + if ($start==$end) { + for (my $s=0;$s<@mark ;$s++) { + $cluster_density{$chr}[$start][$s]+=$cluster{$chr}[$i][3+$s]; + } + + } + else{ + for (my $m=$start;$m<=$end ;$m++) { + for (my $s=0;$s<@mark ;$s++) { + $cluster_density{$chr}[$m][$s]+=$cluster{$chr}[$i][3+$s]; + } + } + } + } +} +my %max_cluster_density; +my $max_all_density=0; +foreach my $chr (sort keys %cluster) {# + for (my $s=0;$s<@mark ;$s++) { + for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { + $max_cluster_density{$chr}[$s]=0; + } + } + +} +foreach my $chr (sort keys %cluster_density) { + print "$#{$cluster_density{$chr}}\n"; + for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) { + print TXT "$chr\t$k"; + for (my $s=0;$s<@mark;$s++) { + if (!(defined($cluster_density{$chr}[$k][$s]))) { + $cluster_density{$chr}[$k][$s]=0; + } + if ($cluster_density{$chr}[$k][$s]>$max_cluster_density{$chr}[$s]) { + $max_cluster_density{$chr}[$s]=$cluster_density{$chr}[$k][$s]; + } + if ($cluster_density{$chr}[$k][$s]>$max_all_density) { + $max_all_density=$cluster_density{$chr}[$k][$s]; + } + print TXT "\t$cluster_density{$chr}[$k][$s]"; + } + print TXT "\n"; + } +} +print "max density: $max_all_density\n"; +#-------------------------------------------------------------------- +my $top_margin=30; +my $tail_margin=30; +my $XOFFSET=50; +my $YOFFSET=60; +my $chr_length=600; +my $Xscale=$chr_length/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 +#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰 +#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算 +#========================================New canvas============================ +#### Starting #### +#新建画布 +my $width=1000; +my $heigth=100+130*$chr_number; +my $svg=SVG->new(width=>$width, height=>$heigth); +#画图起始点 +my $canvas_start_x=$XOFFSET; +my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线 +my $canvas_start_y=$YOFFSET; +my $canvas_end_y=$YOFFSET; +my $chr_heigth=$heigth-$YOFFSET-$tail_margin; +print "chr number:$chr_number\n"; +my $one_chr_heigth=$chr_heigth/$chr_number; +my $Yscale=($one_chr_heigth-15)/$max_all_density; +#my $chr_width=$YOFFSET; +#my $chr_start_y; +#my $chr_end_y; +#my $Yscale=0.01; +#=======================================title of the graph=============================== +#my $span_k=$span/1000; +#$svg->text('x',$width/2,'y',$YOFFSET-20,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Clusters rpkm/"."$span_k"."kb Distribution"); +#=======================================the top max chr line============================= +$svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); +$long_scale=int ($max_length/10);#十等分 大刻度 +#大坐标刻度 +for ($i=0;$i<=10;$i++) { + my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale; + my $long_x_end=$long_x_start; + my $long_y_start=$YOFFSET; + my $long_y_end=$YOFFSET-5; + $svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + my $Bscale=$long_scale*$i; + my $cdata=int ($Bscale/1000000); + $svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M"); +} +#========================================================================================= +my $cc=1; +foreach my $chr (sort keys %length) { + my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale; + my $chr_start_x=$XOFFSET; + my $chr_start_y=$YOFFSET+$cc*$one_chr_heigth; + my $chr_end_y=$chr_start_y; + #$chr_start_y+=$chr_width; + #$chr_end_y+=$chr_width; +# for (my $i=0;$i<@{$gene_desity{$chr}};$i++) { +# print "$chr\t$i\t$gene_desity{$chr}[$i]\n"; +# my $red=$gene_desity{$chr}[$i]/$max_gene_number*255; +# my $green=$gene_desity{$chr}[$i]/$max_gene_number*255; +# print "$red\t$green\t0\n"; +# $svg->rect('x',$chr_start_x+$i*$span*$Xscale,'y',$chr_start_y,'width',$span*$Xscale,'height',8,'stroke',"rgb($red,$green,0)",'stroke-width',0.1,'fill',"rgb($red,$green,0)"); +# } + + $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr); + my $m_length=$length{$chr}%1000000; + $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$m_length."M"); + + + if (defined($centromere{$chr}[0])) { + $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y-2,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); + } + for (my $s=0;$s<@mark ;$s++) { + for (my $i=0;$i<$#{$cluster_density{$chr}}-1 ;$i++) { + #if ($cluster_density{$chr}[$i]*$Yscale>40) { + #$cluster_density{$chr}[$i]=40/$Yscale; + #$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); + #} + #print "$i\t$cluster_density{$chr}[$i][$s]\t$cluster_density{$chr}[$i+1][$s]\n"; + my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale; + my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; + my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i][$s]*$Yscale; + my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][$s]*$Yscale; + my $c_red=($s+1)/@mark*255; + $svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"rgb($c_red,125,0)",'stroke-width',0.3); + } + + } + #=======Y axis + $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_start_x,y2=>$chr_start_y-$one_chr_heigth+15,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + #=======Y axis ===>3 xiaoge + my $s10=1; + my $e10=0; + my $chr_max=$max_all_density; + while ($chr_max>10) { + $chr_max=int($chr_max/10); + $s10=$s10*10; + $e10++; + } + $chr_max=$chr_max/2; + #print "*****$max_all_density\t$chr_max\t$s10\n"; + for (my $i=1;$i<3 ;$i++) { + my $y1=$chr_start_y-$chr_max*$s10*$Yscale*$i; + my $xiaoge_Y=$chr_max*$i; + $svg->line('x1',$chr_start_x,'y1',$y1,'x2',$chr_start_x+3,'y2',$y1,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$chr_start_x-26,'y',$y1+4,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',8,'font-family',$attribute{font}{'font-family'},'-cdata',$xiaoge_Y."e".$e10); + } + $cc++; +} + +for (my $s=0;$s<@mark ;$s++) { + my $c_red=($s+1)/@mark*255; + print "**$c_red\n"; + $svg->line('x1',$canvas_end_x+100,'y1',$YOFFSET+$s*20+30,'x2',$canvas_end_x+130,'y2',$YOFFSET+$s*20+30,'stroke',"rgb($c_red,125,0)",'stroke-width',1); + $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+$s*20+5+30,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$s]); +} +# +# +if (defined($opt{cen})) { + $svg->rect('x',$canvas_end_x+100,'y',$YOFFSET+@mark*20+30,'width',30,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); + $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+@mark*20+30+5,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere"); +} + +close TXT; + +open (OUT,">$opt{o}"); +print OUT $svg->xmlify(); + +sub log2 { + my $n = shift; + return log($n)/log(2); +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 +options: +-g genelist +-span +-n sample cloumn +-mark sample name +-o output graph file name with html or svg extension +-c cluster file input +-out txt output +-l length of chr +-cen centromere +-h help +USAGE +exit(1); +} \ No newline at end of file
--- a/siRNA.pl Thu Oct 30 21:31:55 2014 -0400 +++ b/siRNA.pl Wed Nov 05 01:17:26 2014 -0500 @@ -139,12 +139,20 @@ my $cluster_file; my $annotate_dir; my $deg_dir; +my $plot_dir; my %id; for (my $i=0;$i<@mark ;$i++) { $id{$mark[$i]}=$i+4; } - +print "\n######## tiandm test start ###########\n"; +print "\@mark: @mark\n\%id keys number:"; +print scalar keys %id; +print "\n"; +foreach my $kyess (keys %id){ + print $kyess," --> $id{$kyess}\n"; +} +print "\n######## tiandm test end ############\n"; group_and_filter(); #collapse reads to tags rfam(); @@ -280,6 +288,8 @@ print "perl $path\/filterReadsByLength_1.pl -i $group_out_file -o $l_out -min 18 -max 40 -mark $sample_mark\n\n"; my $cout_f=`perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark`; print "perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark\n\n"; + my $plot_l_D=`perl $path/Length_Distibution.pl -i $dir/preProcess/reads_length_distribution_after_count_filter.txt -o $dir/preProcess/length.html `; + print "perl $path\/Length_Distibution.pl -i $dir\/preProcess\/reads_length_distribution_after_count_filter.txt -o $dir\/preProcess\/length\.html\n\n"; return 0; } @@ -341,8 +351,8 @@ print "Using converntional method\n perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt\n\n"; } elsif($cluster_mothod eq "T"){ - my $cluster=`perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $read_txt -mark $sample_mark`; - print "Using nibls method\n perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $dir\/cluster.txt -mark $sample_mark\n\n"; + my $cluster=`perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $read_txt -k $sample_mark`; + print "Using nibls method\n perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $dir\/cluster.txt -k $sample_mark\n\n"; } else{print "\-p is wrong!\n\n";} return 0; @@ -466,41 +476,16 @@ } sub plot{ - my $plot_file="$dir\/plot\/"; - mkdir ("$plot_file"); - my $genome_plot="$dir\/plot\/genome\/"; - mkdir ("$genome_plot"); - #genome cluster + $plot_dir="$dir\/plot\/"; + mkdir ("$plot_dir"); my $span=defined($options{span})?$options{span}:50000; - foreach (1..$sample_number) { - my $mark=$mark[$_-1]; - my $cen=""; - if (defined $options{cen}) { - $cen="-cen $options{cen}"; - } - my $plot=`perl $path\/sRNA_rpkm_distribution_along_genome.pl -c $rpkm -n $_ -mark $mark -span $span -l $dir\/ref\/genome\.length $cen -o $genome_plot\/$mark\.html -out $genome_plot\/$mark\.txt`; - print "perl $path\/sRNA_rpkm_distribution_along_genome.pl -c $rpkm -n $_ -mark $mark -span $span -l $dir\/ref\/genome\.length $cen -o $genome_plot\/$mark\.html -out $genome_plot\/$mark\.txt\n\n"; + my $cen=""; + if (defined $options{cen}) { + $cen="-cen $options{cen}"; } + my $plot=`perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome\.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt `; + "print perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt \n"; - my $chr_plot_dir="$dir\/plot\/chr\/"; - mkdir("$chr_plot_dir"); - my %chr; - open LEN,"<$dir\/ref\/genome\.length"; - while (my $aline=<LEN>) { - next if($aline=~/^\#/); - chomp $aline; - my @temp=split/\t/,$aline; - $chr{$temp[0]}=$temp[1]; - } - close LEN; - foreach my $chr (sort keys %chr) { - my $cen=""; - if (defined $options{cen}) { - $cen="-cen $options{cen}"; - } - my $chr_plot=`perl $path\/chr_plot.pl -l $chr{$chr} -chro $chr -g $dir\/ref\/genelist.txt -span $span -c $rpkm -mark $sample_mark -o $chr_plot_dir\/$chr\.html`; - print "perl $path\/chr_plot.pl -l $chr{$chr} -chro $chr -g $dir\/ref\/genelist.txt -span $span -c $rpkm -mark $sample_mark -o $chr_plot_dir\/$chr\.html\n"; - } } sub html{ @@ -512,6 +497,7 @@ print PA "$dir"."genome_match\n"; print PA "$cluster_file\n"; print PA "$annotate_dir\n"; + print PA "$plot_dir\n"; if (defined($deg_dir)) { print PA "$deg_dir\n"; }